diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/Add-riscv64-support.patch similarity index 64% rename from 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch rename to Add-riscv64-support.patch index 13815b7..59017ae 100644 --- a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +++ b/Add-riscv64-support.patch @@ -1,228 +1,198 @@ -From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001 -From: misaka00251 -Date: Wed, 9 Aug 2023 02:24:23 +0800 -Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) +From dfa792539047c39d0d25244265bc8368163d5768 Mon Sep 17 00:00:00 2001 +From: Fei Yang +Date: Thu, 24 Mar 2022 09:22:46 +0000 +Subject: [PATCH 001/140] Cherry-picked JDK-8276799: initial load of RISC-V + backend (cannot pass compilation) --- - make/autoconf/build-aux/config.sub | 7 + + make/autoconf/build-aux/config.guess | 2 +- make/autoconf/hotspot.m4 | 3 +- - make/autoconf/libraries.m4 | 4 +- - make/autoconf/platform.m4 | 10 +- - make/hotspot/gensrc/GensrcAdlc.gmk | 16 +- - src/hotspot/cpu/aarch64/aarch64.ad | 40 +- - .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 +- - .../cpu/aarch64/macroAssembler_aarch64.cpp | 64 + - .../cpu/aarch64/macroAssembler_aarch64.hpp | 3 + - src/hotspot/cpu/arm/arm.ad | 10 +- - src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 5 +- - src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 5 +- - src/hotspot/cpu/ppc/ppc.ad | 16 +- - .../cpu/riscv/abstractInterpreter_riscv.cpp | 185 + - src/hotspot/cpu/riscv/assembler_riscv.cpp | 365 + - src/hotspot/cpu/riscv/assembler_riscv.hpp | 2004 +++ + make/autoconf/libraries.m4 | 8 +- + make/autoconf/platform.m4 | 6 +- + make/hotspot/gensrc/GensrcAdlc.gmk | 9 +- + .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 6 +- + src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 7 +- + src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 8 +- + .../cpu/riscv/abstractInterpreter_riscv.cpp | 177 + + src/hotspot/cpu/riscv/assembler_riscv.cpp | 372 + + src/hotspot/cpu/riscv/assembler_riscv.hpp | 3047 +++++ .../cpu/riscv/assembler_riscv.inline.hpp | 47 + - src/hotspot/cpu/riscv/bytes_riscv.hpp | 169 + - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 352 + - src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 85 + - .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 31 + - .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 33 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 391 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 149 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 287 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 36 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 387 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 51 + - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2275 ++++ + src/hotspot/cpu/riscv/bytes_riscv.hpp | 167 + + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 353 + + src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 84 + + .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 30 + + .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 32 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 388 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 148 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 281 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 37 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 388 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 52 + + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2267 ++++ .../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 + - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1083 ++ + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1075 ++ src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 + src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 85 + - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 441 + - .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 121 + - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1206 ++ - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 72 + - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 91 + + src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 83 + + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 432 + + .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 120 + + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1172 ++ + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 65 + + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1646 +++ + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 193 + + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 83 + src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 + + .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 + src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 + - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 154 + - src/hotspot/cpu/riscv/copy_riscv.hpp | 60 + - src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 + - src/hotspot/cpu/riscv/disassembler_riscv.hpp | 37 + - src/hotspot/cpu/riscv/frame_riscv.cpp | 683 + - src/hotspot/cpu/riscv/frame_riscv.hpp | 200 + - src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 257 + - .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 479 + + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 149 + + src/hotspot/cpu/riscv/copy_riscv.hpp | 136 + + src/hotspot/cpu/riscv/disassembler_riscv.hpp | 58 + + .../cpu/riscv/foreign_globals_riscv.cpp | 44 + + .../cpu/riscv/foreign_globals_riscv.hpp | 32 + + src/hotspot/cpu/riscv/frame_riscv.cpp | 697 + + src/hotspot/cpu/riscv/frame_riscv.hpp | 202 + + src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 248 + + .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 484 + .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 + - .../gc/shared/barrierSetAssembler_riscv.cpp | 226 + - .../gc/shared/barrierSetAssembler_riscv.hpp | 75 + - .../cardTableBarrierSetAssembler_riscv.cpp | 120 + - .../cardTableBarrierSetAssembler_riscv.hpp | 43 + - .../modRefBarrierSetAssembler_riscv.cpp | 54 + + .../cpu/riscv/gc/g1/g1Globals_riscv.hpp | 31 + + .../gc/shared/barrierSetAssembler_riscv.cpp | 302 + + .../gc/shared/barrierSetAssembler_riscv.hpp | 79 + + .../gc/shared/barrierSetNMethod_riscv.cpp | 171 + + .../cardTableBarrierSetAssembler_riscv.cpp | 111 + + .../cardTableBarrierSetAssembler_riscv.hpp | 42 + + .../modRefBarrierSetAssembler_riscv.cpp | 55 + .../modRefBarrierSetAssembler_riscv.hpp | 55 + - .../c1/shenandoahBarrierSetC1_riscv.cpp | 124 + - .../shenandoahBarrierSetAssembler_riscv.cpp | 743 ++ - .../shenandoahBarrierSetAssembler_riscv.hpp | 92 + - .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 188 + - .../cpu/riscv/globalDefinitions_riscv.hpp | 44 + - src/hotspot/cpu/riscv/globals_riscv.hpp | 120 + + .../c1/shenandoahBarrierSetC1_riscv.cpp | 117 + + .../shenandoahBarrierSetAssembler_riscv.cpp | 712 ++ + .../shenandoahBarrierSetAssembler_riscv.hpp | 88 + + .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 285 + + .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 + + .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 + + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 + + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 + + src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 + + .../cpu/riscv/globalDefinitions_riscv.hpp | 52 + + src/hotspot/cpu/riscv/globals_riscv.hpp | 99 + src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 + - src/hotspot/cpu/riscv/icache_riscv.cpp | 61 + + src/hotspot/cpu/riscv/icache_riscv.cpp | 51 + src/hotspot/cpu/riscv/icache_riscv.hpp | 42 + - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1932 +++ - src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 283 + - src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 296 + + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1940 +++ + src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 285 + + src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 295 + src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 + - .../cpu/riscv/javaFrameAnchor_riscv.hpp | 89 + - .../cpu/riscv/jniFastGetField_riscv.cpp | 193 + - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 108 + - .../cpu/riscv/macroAssembler_riscv.cpp | 5861 +++++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 975 ++ - .../cpu/riscv/macroAssembler_riscv.inline.hpp | 30 + - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 440 + - src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 58 + - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 404 + - src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 561 + - src/hotspot/cpu/riscv/registerMap_riscv.hpp | 46 + - .../cpu/riscv/register_definitions_riscv.cpp | 193 + - src/hotspot/cpu/riscv/register_riscv.cpp | 69 + - src/hotspot/cpu/riscv/register_riscv.hpp | 337 + + .../cpu/riscv/javaFrameAnchor_riscv.hpp | 86 + + .../cpu/riscv/jniFastGetField_riscv.cpp | 214 + + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 106 + + .../cpu/riscv/macroAssembler_riscv.cpp | 4016 ++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 858 ++ + .../cpu/riscv/macroAssembler_riscv.inline.hpp | 31 + + src/hotspot/cpu/riscv/matcher_riscv.hpp | 169 + + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 461 + + src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 57 + + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 429 + + src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 572 + + src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 + + src/hotspot/cpu/riscv/registerMap_riscv.hpp | 43 + + src/hotspot/cpu/riscv/register_riscv.cpp | 73 + + src/hotspot/cpu/riscv/register_riscv.hpp | 324 + src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 + - src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 45 + - src/hotspot/cpu/riscv/riscv.ad | 10685 ++++++++++++++++ - src/hotspot/cpu/riscv/riscv_b.ad | 605 + - src/hotspot/cpu/riscv/riscv_v.ad | 1723 +++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2738 ++++ - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3743 ++++++ - src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 60 + - src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 179 + - .../templateInterpreterGenerator_riscv.cpp | 1841 +++ - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4028 ++++++ + src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 44 + + src/hotspot/cpu/riscv/riscv.ad | 10611 ++++++++++++++++ + src/hotspot/cpu/riscv/riscv_b.ad | 527 + + src/hotspot/cpu/riscv/riscv_v.ad | 2065 +++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2761 ++++ + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3864 ++++++ + src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 58 + + src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 161 + + .../templateInterpreterGenerator_riscv.cpp | 1794 +++ + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 3951 ++++++ src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 + - src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 43 + - .../cpu/riscv/vm_version_ext_riscv.cpp | 91 + - .../cpu/riscv/vm_version_ext_riscv.hpp | 55 + - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 190 + - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 65 + - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 60 + - src/hotspot/cpu/riscv/vmreg_riscv.hpp | 64 + - src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 47 + + .../riscv/universalNativeInvoker_riscv.cpp | 33 + + .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 + + src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 42 + + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 230 + + src/hotspot/cpu/riscv/vm_version_riscv.hpp | 72 + + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 64 + + src/hotspot/cpu/riscv/vmreg_riscv.hpp | 68 + + src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 46 + src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 + - src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 5 +- - src/hotspot/cpu/s390/s390.ad | 16 +- - src/hotspot/cpu/sparc/sparc.ad | 10 +- - src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 5 +- - src/hotspot/cpu/x86/macroAssembler_x86.cpp | 93 + - src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + - src/hotspot/cpu/x86/x86.ad | 14 +- - src/hotspot/cpu/x86/x86_32.ad | 19 +- - src/hotspot/cpu/x86/x86_64.ad | 24 +- - src/hotspot/os/linux/os_linux.cpp | 11 +- - .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 113 + - .../linux_riscv/bytes_linux_riscv.inline.hpp | 44 + - .../linux_riscv/copy_linux_riscv.inline.hpp | 116 + + src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 9 +- + src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 7 +- + src/hotspot/os/linux/os_linux.cpp | 2 + + .../linux_riscv/assembler_linux_riscv.cpp | 26 + + .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 134 + + .../os_cpu/linux_riscv/bytes_linux_riscv.hpp | 45 + + .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 + + .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 + .../linux_riscv/globals_linux_riscv.hpp | 43 + - .../linux_riscv/orderAccess_linux_riscv.hpp | 73 + - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 628 + - .../os_cpu/linux_riscv/os_linux_riscv.hpp | 40 + + .../linux_riscv/orderAccess_linux_riscv.hpp | 63 + + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 466 + + .../os_cpu/linux_riscv/os_linux_riscv.hpp | 59 + .../prefetch_linux_riscv.inline.hpp | 38 + - .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 103 + - .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 67 + + .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 92 + + .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 48 + .../linux_riscv/vmStructs_linux_riscv.hpp | 55 + - .../linux_riscv/vm_version_linux_riscv.cpp | 116 + - src/hotspot/share/adlc/archDesc.cpp | 5 + - src/hotspot/share/adlc/formssel.cpp | 2 + - src/hotspot/share/c1/c1_LIR.cpp | 113 +- - src/hotspot/share/c1/c1_LIR.hpp | 208 +- + .../linux_riscv/vm_version_linux_riscv.cpp | 118 + + src/hotspot/share/c1/c1_LIR.cpp | 112 +- + src/hotspot/share/c1/c1_LIR.hpp | 209 +- src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +- - src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +- - src/hotspot/share/c1/c1_LinearScan.cpp | 14 +- - src/hotspot/share/classfile/vmSymbols.cpp | 2 + - src/hotspot/share/classfile/vmSymbols.hpp | 1 + - .../gc/shenandoah/shenandoahArguments.cpp | 2 +- + src/hotspot/share/c1/c1_LIRAssembler.hpp | 5 +- + src/hotspot/share/c1/c1_LinearScan.cpp | 18 +- + .../gc/shenandoah/shenandoahArguments.cpp | 4 +- + src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp | 4 +- .../share/jfr/utilities/jfrBigEndian.hpp | 2 +- - src/hotspot/share/opto/c2compiler.cpp | 1 + - src/hotspot/share/opto/chaitin.cpp | 90 +- - src/hotspot/share/opto/chaitin.hpp | 32 +- - src/hotspot/share/opto/intrinsicnode.hpp | 5 +- - src/hotspot/share/opto/library_call.cpp | 13 +- - src/hotspot/share/opto/machnode.cpp | 2 +- - src/hotspot/share/opto/machnode.hpp | 4 + - src/hotspot/share/opto/matcher.cpp | 41 +- - src/hotspot/share/opto/matcher.hpp | 6 +- - src/hotspot/share/opto/node.cpp | 21 + - src/hotspot/share/opto/node.hpp | 5 + - src/hotspot/share/opto/opcodes.cpp | 4 +- - src/hotspot/share/opto/opcodes.hpp | 2 + - src/hotspot/share/opto/phase.cpp | 2 + - src/hotspot/share/opto/phase.hpp | 1 + - src/hotspot/share/opto/postaloc.cpp | 53 +- - src/hotspot/share/opto/regmask.cpp | 46 +- - src/hotspot/share/opto/regmask.hpp | 10 +- - src/hotspot/share/opto/superword.cpp | 7 +- - src/hotspot/share/opto/type.cpp | 14 +- - src/hotspot/share/opto/type.hpp | 12 +- - src/hotspot/share/opto/vectornode.cpp | 4 +- - .../share/runtime/abstract_vm_version.cpp | 12 +- + src/hotspot/share/opto/regmask.hpp | 2 +- + .../share/runtime/abstract_vm_version.cpp | 3 +- + src/hotspot/share/runtime/synchronizer.cpp | 2 +- src/hotspot/share/runtime/thread.hpp | 2 +- - src/hotspot/share/runtime/thread.inline.hpp | 2 +- - src/hotspot/share/utilities/debug.cpp | 1 + + src/hotspot/share/runtime/thread.inline.hpp | 4 +- src/hotspot/share/utilities/macros.hpp | 26 + - .../share/classes/java/lang/StringLatin1.java | 5 + .../native/libsaproc/LinuxDebuggerLocal.c | 49 +- - .../linux/native/libsaproc/libproc.h | 2 + - .../linux/native/libsaproc/ps_proc.c | 4 + - .../classes/sun/jvm/hotspot/HotSpotAgent.java | 4 + + .../linux/native/libsaproc/libproc.h | 4 +- + .../classes/sun/jvm/hotspot/HotSpotAgent.java | 3 + .../debugger/MachineDescriptionRISCV64.java | 40 + - .../debugger/linux/LinuxCDebugger.java | 11 +- + .../debugger/linux/LinuxCDebugger.java | 13 +- .../linux/riscv64/LinuxRISCV64CFrame.java | 90 + .../riscv64/LinuxRISCV64ThreadContext.java | 48 + - .../debugger/proc/ProcDebuggerLocal.java | 6 + .../proc/riscv64/ProcRISCV64Thread.java | 88 + .../riscv64/ProcRISCV64ThreadContext.java | 48 + .../riscv64/ProcRISCV64ThreadFactory.java | 46 + .../remote/riscv64/RemoteRISCV64Thread.java | 55 + .../riscv64/RemoteRISCV64ThreadContext.java | 48 + .../riscv64/RemoteRISCV64ThreadFactory.java | 46 + - .../riscv64/RISCV64ThreadContext.java | 172 + - .../sun/jvm/hotspot/runtime/Threads.java | 3 + - .../LinuxRISCV64JavaThreadPDAccess.java | 132 + + .../debugger/risv64/RISCV64ThreadContext.java | 172 + + .../sun/jvm/hotspot/runtime/Threads.java | 5 +- + .../LinuxRISCV64JavaThreadPDAccess.java | 134 + .../riscv64/RISCV64CurrentFrameGuess.java | 223 + - .../hotspot/runtime/riscv64/RISCV64Frame.java | 554 + - .../riscv64/RISCV64JavaCallWrapper.java | 58 + + .../hotspot/runtime/riscv64/RISCV64Frame.java | 556 + + .../riscv64/RISCV64JavaCallWrapper.java | 61 + .../runtime/riscv64/RISCV64RegisterMap.java | 53 + - .../jvm/hotspot/utilities/PlatformInfo.java | 2 +- - src/utils/hsdis/hsdis.c | 6 +- - test/hotspot/jtreg/compiler/c2/TestBit.java | 6 +- - ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 4 + - .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 4 + - .../testcases/GenericTestCaseForOtherCPU.java | 10 +- - ...nericTestCaseForUnsupportedRISCV64CPU.java | 102 + - .../string/TestStringLatin1IndexOfChar.java | 153 + - .../loopopts/superword/ProdRed_Double.java | 2 +- - .../loopopts/superword/ProdRed_Float.java | 2 +- - .../loopopts/superword/ProdRed_Int.java | 2 +- - .../loopopts/superword/ReductionPerf.java | 2 +- - .../superword/SumRedAbsNeg_Double.java | 2 +- - .../superword/SumRedAbsNeg_Float.java | 2 +- - .../loopopts/superword/SumRedSqrt_Double.java | 2 +- - .../loopopts/superword/SumRed_Double.java | 2 +- - .../loopopts/superword/SumRed_Float.java | 2 +- - .../loopopts/superword/SumRed_Int.java | 2 +- - .../argumentcorruption/CheckLongArgs.java | 2 +- - .../criticalnatives/lookup/LookUp.java | 2 +- - .../sha/predicate/IntrinsicPredicates.java | 9 +- - .../NMT/CheckForProperDetailStackTrace.java | 3 +- - .../ReservedStack/ReservedStackTest.java | 3 +- - test/hotspot/jtreg/test_env.sh | 5 + - ...stMutuallyExclusivePlatformPredicates.java | 3 +- - .../nsk/jvmti/GetThreadInfo/thrinfo001.java | 2 +- - .../jdk/jfr/event/os/TestCPUInformation.java | 5 +- - test/lib/jdk/test/lib/Platform.java | 5 + - .../bench/java/lang/StringIndexOfChar.java | 221 + - 218 files changed, 57653 insertions(+), 221 deletions(-) + .../jvm/hotspot/utilities/PlatformInfo.java | 4 +- + test/hotspot/jtreg/compiler/c2/TestBit.java | 7 +- + ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 5 +- + .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 5 +- + .../testcases/GenericTestCaseForOtherCPU.java | 11 +- + ...nericTestCaseForUnsupportedRISCV64CPU.java | 115 + + .../loopopts/superword/ProdRed_Double.java | 4 +- + .../loopopts/superword/ProdRed_Float.java | 4 +- + .../loopopts/superword/ProdRed_Int.java | 4 +- + .../loopopts/superword/ReductionPerf.java | 4 +- + .../superword/SumRedAbsNeg_Double.java | 4 +- + .../superword/SumRedAbsNeg_Float.java | 4 +- + .../loopopts/superword/SumRedSqrt_Double.java | 4 +- + .../loopopts/superword/SumRed_Double.java | 4 +- + .../loopopts/superword/SumRed_Float.java | 4 +- + .../loopopts/superword/SumRed_Int.java | 4 +- + .../sha/predicate/IntrinsicPredicates.java | 11 +- + .../NMT/CheckForProperDetailStackTrace.java | 4 +- + .../ReservedStack/ReservedStackTest.java | 4 +- + .../HeapMonitorEventsForTwoThreadsTest.java | 1 - + ...stMutuallyExclusivePlatformPredicates.java | 2 +- + .../jdk/jfr/event/os/TestCPUInformation.java | 6 +- + test/lib/jdk/test/lib/Platform.java | 4 + + 187 files changed, 59079 insertions(+), 189 deletions(-) create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -248,20 +218,26 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp @@ -270,6 +246,11 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad + create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -285,12 +266,13 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -305,18 +287,20 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp @@ -335,101 +319,95 @@ Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java - create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java - create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub -index 3c280ac7c..eda408e01 100644 ---- a/make/autoconf/build-aux/config.sub -+++ b/make/autoconf/build-aux/config.sub -@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then - exit - fi - -+# Canonicalize for riscv which autoconf-config.sub doesn't handle -+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then -+ result=`echo $@ | sed 's/linux/unknown-linux/'` -+ echo $result -+ exit -+fi -+ - while test $# -gt 0 ; do - case $1 in - -- ) # Stop option processing +diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess +index a88a9adec3f..15111d827ab 100644 +--- a/make/autoconf/build-aux/config.guess ++++ b/make/autoconf/build-aux/config.guess +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index a3e1e00b2..01ef26c10 100644 +index 9bb34363e5c..f84e8f84c60 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 -@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], AC_MSG_CHECKING([if shenandoah can be built]) if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ - test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then + test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ -+ test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 -index 16e906bdc..c01fdbcce 100644 +index 16e906bdc6a..5c49fd9285d 100644 --- a/make/autoconf/libraries.m4 +++ b/make/autoconf/libraries.m4 -@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - GLOBAL_LIBS="" +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -130,6 +130,12 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lthread" fi -- BASIC_JDKLIB_LIBS="" -+ BASIC_JDKLIB_LIBS="-latomic" - if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then - BASIC_JDKLIB_LIBS="-ljava -ljvm" - fi -@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - wsock32.lib winmm.lib version.lib psapi.lib" - fi - -+ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" ++ # Because RISC-V only has word-sized atomics, it requries libatomic where ++ # other common architectures do not. So link libatomic by default. ++ if test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xriscv64; then ++ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" ++ fi + - JDKLIB_LIBS="$BASIC_JDKLIB_LIBS" - JDKEXE_LIBS="" - JVM_LIBS="$BASIC_JVM_LIBS" + # perfstat lib + if test "x$OPENJDK_TARGET_OS" = xaix; then + BASIC_JVM_LIBS="$BASIC_JVM_LIBS -lperfstat" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index f89b22f5f..48d615992 100644 +index 26a58eb2ee8..67972d89248 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 -@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], - VAR_CPU_BITS=64 - VAR_CPU_ENDIAN=little - ;; -+ riscv32) -+ VAR_CPU=riscv32 -+ VAR_CPU_ARCH=riscv -+ VAR_CPU_BITS=32 -+ VAR_CPU_ENDIAN=little -+ ;; - riscv64) - VAR_CPU=riscv64 - VAR_CPU_ARCH=riscv -@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], + HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xppc64le; then + HOTSPOT_$1_CPU_DEFINE=PPC64 ++ elif test "x$OPENJDK_$1_CPU" = xriscv64; then ++ HOTSPOT_$1_CPU_DEFINE=RISCV64 + + # The cpu defines below are for zero, we don't support them directly. + elif test "x$OPENJDK_$1_CPU" = xsparc; then +@@ -564,8 +566,6 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU_DEFINE=S390 elif test "x$OPENJDK_$1_CPU" = xs390x; then HOTSPOT_$1_CPU_DEFINE=S390 -+ elif test "x$OPENJDK_$1_CPU" = xriscv32; then -+ HOTSPOT_$1_CPU_DEFINE=RISCV32 - elif test "x$OPENJDK_$1_CPU" = xriscv64; then +- elif test "x$OPENJDK_$1_CPU" = xriscv64; then - HOTSPOT_$1_CPU_DEFINE=RISCV -+ HOTSPOT_$1_CPU_DEFINE=RISCV64 + elif test "x$OPENJDK_$1_CPU" = xloongarch64; then + HOTSPOT_$1_CPU_DEFINE=LOONGARCH64 elif test "x$OPENJDK_$1_CPU" != x; then - HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) - fi diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index c5a3ac572..9de6f663c 100644 +index c5a3ac5724b..67f4c6f0574 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ @@ -439,17 +417,10 @@ index c5a3ac572..9de6f663c 100644 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it -@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -150,6 +150,13 @@ ifeq ($(call check-jvm-feature, compiler2), true) $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) -+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) -+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ -+ ))) -+ endif -+ + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ @@ -460,95 +431,17 @@ index c5a3ac572..9de6f663c 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 1e4ee33a9..ac5d56f0f 100644 ---- a/src/hotspot/cpu/aarch64/aarch64.ad -+++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return size; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg. - const uint Matcher::vector_ideal_reg(int len) { - switch(len) { -@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, - ins_pipe(pipe_class_memory); - %} - --instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, - TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); - -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} - - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - ins_pipe(pipe_class_memory); - %} - -+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, rFlagsReg cr) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); -+ -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+ - instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, - iRegI_R0 result, rFlagsReg cr) - %{ diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -index fdd2c0ca3..1a35be210 100644 +index fdd2c0ca3d7..63f193de86e 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } @@ -556,131 +449,21 @@ index fdd2c0ca3..1a35be210 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on aarch64"); Assembler::Condition acond, ncond; switch (condition) { -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 5753cc9a6..21c6fdf19 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, - BIND(DONE); - } - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3) -+{ -+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; -+ Register cnt1_neg = cnt1; -+ Register ch1 = rscratch1; -+ Register result_tmp = rscratch2; -+ -+ cbz(cnt1, NOMATCH); -+ -+ cmp(cnt1, (u1)8); -+ br(LT, DO1_SHORT); -+ -+ orr(ch, ch, ch, LSL, 8); -+ orr(ch, ch, ch, LSL, 16); -+ orr(ch, ch, ch, LSL, 32); -+ -+ sub(cnt1, cnt1, 8); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ -+ mov(tmp3, 0x0101010101010101); -+ -+ BIND(CH1_LOOP); -+ ldr(ch1, Address(str1, cnt1_neg)); -+ eor(ch1, ch, ch1); -+ sub(tmp1, ch1, tmp3); -+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); -+ bics(tmp1, tmp1, tmp2); -+ br(NE, HAS_ZERO); -+ adds(cnt1_neg, cnt1_neg, 8); -+ br(LT, CH1_LOOP); -+ -+ cmp(cnt1_neg, (u1)8); -+ mov(cnt1_neg, 0); -+ br(LT, CH1_LOOP); -+ b(NOMATCH); -+ -+ BIND(HAS_ZERO); -+ rev(tmp1, tmp1); -+ clz(tmp1, tmp1); -+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); -+ b(MATCH); -+ -+ BIND(DO1_SHORT); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ BIND(DO1_LOOP); -+ ldrb(ch1, Address(str1, cnt1_neg)); -+ cmp(ch, ch1); -+ br(EQ, MATCH); -+ adds(cnt1_neg, cnt1_neg, 1); -+ br(LT, DO1_LOOP); -+ BIND(NOMATCH); -+ mov(result, -1); -+ b(DONE); -+ BIND(MATCH); -+ add(result, result_tmp, cnt1_neg); -+ BIND(DONE); -+} -+ - // Compare strings. - void MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index 7e23c16a4..c3d472a9a 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -@@ -1260,6 +1260,9 @@ public: - void string_indexof_char(Register str1, Register cnt1, - Register ch, Register result, - Register tmp1, Register tmp2, Register tmp3); -+ void stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3); - void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, - FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, - FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, -diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad -index 51f2d9ce7..71f83521e 100644 ---- a/src/hotspot/cpu/arm/arm.ad -+++ b/src/hotspot/cpu/arm/arm.ad -@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { - return MaxVectorSize; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -index f0a7229aa..2d06d3d58 100644 +index f0a7229aa18..cb095052534 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } @@ -688,75 +471,44 @@ index f0a7229aa..2d06d3d58 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on arm"); + AsmCondition acond = al; AsmCondition ncond = nv; if (opr1 != opr2) { diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d..d081116be 100644 +index 847f7d61d2f..d74db914331 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2012, 2019, SAP SE. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2021 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1553,8 +1553,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { + } } - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc"); -+ ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on ppc"); + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { load_to_reg(this, opr1, result); // Condition doesn't matter. return; -diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad -index ebbe80a26..df66a46dc 100644 ---- a/src/hotspot/cpu/ppc/ppc.ad -+++ b/src/hotspot/cpu/ppc/ppc.ad -@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // PPC implementation uses VSX load/store instructions (if - // SuperwordUseVSX) which support 4 byte but not arbitrary alignment - const bool Matcher::misaligned_vectors_ok() { diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp new file mode 100644 -index 000000000..5661b7425 +index 00000000000..31c63abe71d --- /dev/null +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp -@@ -0,0 +1,185 @@ +@@ -0,0 +1,177 @@ +/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -784,13 +536,13 @@ index 000000000..5661b7425 +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" ++#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + -+ +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { @@ -880,7 +632,6 @@ index 000000000..5661b7425 + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state -+ + assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * @@ -894,14 +645,6 @@ index 000000000..5661b7425 + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp -+ // -+ // The interpreted method entry on riscv aligns SP to 16 bytes -+ // before generating the fixed part of the activation frame. So there -+ // may be a gap between the locals block and the saved sender SP. For -+ // an interpreted caller we need to recreate this gap and exactly -+ // align the incoming parameters with the caller's temporary -+ // expression stack. For other types of caller frame it doesn't -+ // matter. + intptr_t* locals = NULL; + if (caller->is_interpreted_frame()) { + locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; @@ -935,6 +678,7 @@ index 000000000..5661b7425 + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } ++ + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = @@ -942,14 +686,14 @@ index 000000000..5661b7425 +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp new file mode 100644 -index 000000000..40ecf1a6c +index 00000000000..f15ef5304c5 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp -@@ -0,0 +1,365 @@ +@@ -0,0 +1,372 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -969,6 +713,7 @@ index 000000000..40ecf1a6c + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ * + */ + +#include @@ -983,8 +728,6 @@ index 000000000..40ecf1a6c +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" + -+#define __ _masm. -+ +int AbstractAssembler::code_fill_byte() { + return 0; +} @@ -999,7 +742,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { ++void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addiw(Rd, Rn, increment); + } else { @@ -1019,7 +762,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { ++void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addiw(Rd, Rn, -decrement); + } else { @@ -1033,11 +776,12 @@ index 000000000..40ecf1a6c + add_uw(Rd, Rs, zr); +} + -+void Assembler::li(Register Rd, int64_t imm) { ++void Assembler::_li(Register Rd, int64_t imm) { + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff + int shift = 12; + int64_t upper = imm, lower = imm; -+ // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm. ++ // Split imm to a lower 12-bit sign-extended part and the remainder, ++ // because addi will sign-extend the lower imm. + lower = ((int32_t)imm << 20) >> 20; + upper -= lower; + @@ -1051,8 +795,7 @@ index 000000000..40ecf1a6c + if (lower != 0) { + addi(Rd, Rd, lower); + } -+ } -+ else { ++ } else { + // 32-bit integer + Register hi_Rd = zr; + if (upper != 0) { @@ -1066,30 +809,30 @@ index 000000000..40ecf1a6c +} + +void Assembler::li64(Register Rd, int64_t imm) { -+ // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1), -+ // upper = imm[63:32] + 1. -+ int64_t lower = imm & 0xffffffff; -+ lower -= ((lower << 44) >> 44); -+ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; -+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or ++ // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. ++ int64_t lower = imm & 0xffffffff; ++ lower -= ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; + -+ // Load upper 32 bits -+ int64_t up = upper, lo = upper; -+ lo = (lo << 52) >> 52; -+ up -= lo; -+ up = (int32_t)up; -+ lui(Rd, up); -+ addi(Rd, Rd, lo); ++ // Load upper 32 bits ++ int64_t up = upper, lo = upper; ++ lo = (lo << 52) >> 52; ++ up -= lo; ++ up = (int32_t)up; ++ lui(Rd, up); ++ addi(Rd, Rd, lo); + -+ // Load the rest 32 bits. -+ slli(Rd, Rd, 12); -+ addi(Rd, Rd, (int32_t)lower >> 20); -+ slli(Rd, Rd, 12); -+ lower = ((int32_t)imm << 12) >> 20; -+ addi(Rd, Rd, lower); -+ slli(Rd, Rd, 8); -+ lower = imm & 0xff; -+ addi(Rd, Rd, lower); ++ // Load the rest 32 bits. ++ slli(Rd, Rd, 12); ++ addi(Rd, Rd, (int32_t)lower >> 20); ++ slli(Rd, Rd, 12); ++ lower = ((int32_t)imm << 12) >> 20; ++ addi(Rd, Rd, lower); ++ slli(Rd, Rd, 8); ++ lower = imm & 0xff; ++ addi(Rd, Rd, lower); +} + +void Assembler::li32(Register Rd, int32_t imm) { @@ -1162,15 +905,16 @@ index 000000000..40ecf1a6c + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const Address &adr, Register temp) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ -+ Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \ -+ jalr(REGISTER, tmp_adr.base(), tmp_adr.offset()); \ ++ int32_t offset = 0; \ ++ baseOffset(temp, adr, offset); \ ++ jalr(REGISTER, temp, offset); \ + break; \ + } \ + default: \ @@ -1230,9 +974,9 @@ index 000000000..40ecf1a6c + } +#endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), -+ "bit 47 overflows in address constant"); -+ // Load upper 31 bits -+ int32_t imm = imm64 >> 17; ++ "48-bit overflow in address constant"); ++ // Load upper 32 bits ++ int32_t imm = imm64 >> 16; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; @@ -1240,13 +984,13 @@ index 000000000..40ecf1a6c + lui(Rd, upper); + addi(Rd, Rd, lower); + -+ // Load the rest 17 bits. ++ // Load the rest 16 bits. + slli(Rd, Rd, 11); -+ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); -+ slli(Rd, Rd, 6); ++ addi(Rd, Rd, (imm64 >> 5) & 0x7ff); ++ slli(Rd, Rd, 5); + -+ // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld. -+ offset = imm64 & 0x3f; ++ // This offset will be used by following jalr/ld. ++ offset = imm64 & 0x1f; +} + +void Assembler::movptr(Register Rd, uintptr_t imm64) { @@ -1259,6 +1003,13 @@ index 000000000..40ecf1a6c + addi(Rd, Rd, offset); +} + ++void Assembler::ifence() { ++ fence_i(); ++ if (UseConservativeFence) { ++ fence(ir, ir); ++ } ++} ++ +#define INSN(NAME, NEG_INSN) \ + void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ + NEG_INSN(Rt, Rs, dest); \ @@ -1313,14 +1064,14 @@ index 000000000..40ecf1a6c +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp new file mode 100644 -index 000000000..d4da30ed6 +index 00000000000..4923962a496 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -0,0 +1,2004 @@ +@@ -0,0 +1,3047 @@ +/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1348,6 +1099,7 @@ index 000000000..d4da30ed6 + +#include "asm/register.hpp" +#include "assembler_riscv.inline.hpp" ++#include "metaprogramming/enableIf.hpp" + +#define XLEN 64 + @@ -1359,10 +1111,10 @@ index 000000000..d4da30ed6 +class Argument { + public: + enum { -+ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) -+ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) ++ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) ++ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + -+ n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...) ++ n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) + n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) + }; +}; @@ -1386,7 +1138,21 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(FloatRegister, c_farg6, f16); +REGISTER_DECLARATION(FloatRegister, c_farg7, f17); + -+// java function register(caller-save registers) ++// Symbolically name the register arguments used by the Java calling convention. ++// We have control over the convention for java so we can do what we please. ++// What pleases us is to offset the java calling convention so that when ++// we call a suitable jni method the arguments are lined up and we don't ++// have to do much shuffling. A suitable jni method is non-static and a ++// small number of arguments. ++// ++// |------------------------------------------------------------------------| ++// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | ++// |------------------------------------------------------------------------| ++// | x10 x11 x12 x13 x14 x15 x16 x17 | ++// |------------------------------------------------------------------------| ++// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | ++// |------------------------------------------------------------------------| ++ +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); @@ -1396,6 +1162,8 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + ++// Java floating args are passed as per C ++ +REGISTER_DECLARATION(FloatRegister, j_farg0, f10); +REGISTER_DECLARATION(FloatRegister, j_farg1, f11); +REGISTER_DECLARATION(FloatRegister, j_farg2, f12); @@ -1412,6 +1180,9 @@ index 000000000..d4da30ed6 +// thread pointer +REGISTER_DECLARATION(Register, tp, x4); + ++// registers used to hold VM data either temporarily within a method ++// or across method calls ++ +// volatile (caller-save) registers + +// current method -- must be in a call-clobbered register @@ -1434,9 +1205,6 @@ index 000000000..d4da30ed6 +// locals on stack +REGISTER_DECLARATION(Register, xlocals, x24); + -+/* If you use x4(tp) as java thread pointer according to the instruction manual, -+ * it overlaps with the register used by c++ thread. -+ */ +// java thread pointer +REGISTER_DECLARATION(Register, xthread, x23); +// bytecode pointer @@ -1446,13 +1214,13 @@ index 000000000..d4da30ed6 +// Java stack pointer +REGISTER_DECLARATION(Register, esp, x20); + -+// tempory register(caller-save registers) ++// temporary register(caller-save registers) +REGISTER_DECLARATION(Register, t0, x5); +REGISTER_DECLARATION(Register, t1, x6); +REGISTER_DECLARATION(Register, t2, x7); + +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { -+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 +}; + +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { @@ -1469,6 +1237,7 @@ index 000000000..d4da30ed6 + + private: + Register _base; ++ Register _index; + int64_t _offset; + enum mode _mode; + @@ -1481,46 +1250,40 @@ index 000000000..d4da30ed6 + + public: + Address() -+ : _base(noreg), _offset(0), _mode(no_mode), _target(NULL) { } ++ : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) -+ : _base(r), _offset(0), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+ Address(Register r, unsigned long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } -+#ifdef ASSERT ++ : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } ++ ++ template::value)> ++ Address(Register r, T o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} ++ + Address(Register r, ByteSize disp) -+ : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { } -+#endif ++ : Address(r, in_bytes(disp)) {} + Address(address target, RelocationHolder const& rspec) + : _base(noreg), ++ _index(noreg), + _offset(0), + _mode(literal), + _rspec(rspec), -+ _target(target) { } ++ _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + + const Register base() const { -+ guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode"); ++ guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } -+ ++ Register index() const { ++ return _index; ++ } + mode getMode() const { + return _mode; + } + -+ bool uses(Register reg) const { return _base == reg;} ++ bool uses(Register reg) const { return _base == reg; } + const address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + ~Address() { @@ -1575,6 +1338,14 @@ index 000000000..d4da30ed6 + + enum { instruction_size = 4 }; + ++ //---< calculate length of instruction >--- ++ // We just use the values set above. ++ // instruction must start at passed address ++ static unsigned int instr_len(unsigned char *instr) { return instruction_size; } ++ ++ //---< longest instructions >--- ++ static unsigned int instr_maxlen() { return instruction_size; } ++ + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero @@ -1584,34 +1355,41 @@ index 000000000..d4da30ed6 + rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. + }; + -+ Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ assert_different_registers(noreg, temp, base); -+ int64_t upper = offset, lower = offset; -+ -+ int8_t shift = 64 - expect_offbits; -+ lower = (offset << shift) >> shift; -+ upper -= lower; -+ -+ li(temp, upper); -+ add(temp, temp, base); -+ return Address(temp, lower); -+ } -+ -+ Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ if (is_offset_in_range(offset, expect_offbits)) { -+ return Address(base, offset); ++ void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ guarantee(Rd != adr.base(), "should use different registers!"); ++ if (is_offset_in_range(adr.offset(), 32)) { ++ int32_t imm = adr.offset(); ++ int32_t upper = imm, lower = imm; ++ lower = (imm << 20) >> 20; ++ upper -= lower; ++ lui(Rd, upper); ++ offset = lower; ++ } else { ++ movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); + } -+ return form_address_complex(base, offset, expect_offbits, temp); ++ add(Rd, Rd, adr.base()); + } + -+ void li(Register Rd, int64_t imm); // optimized load immediate ++ void baseOffset(Register Rd, const Address &adr, int32_t &offset) { ++ if (is_offset_in_range(adr.offset(), 12)) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ addi(Rd, adr.base(), adr.offset()); ++ offset = 0; ++ } else { ++ baseOffset32(Rd, adr, offset); ++ } ++ } ++ ++ void _li(Register Rd, int64_t imm); // optimized load immediate + void li32(Register Rd, int32_t imm); + void li64(Register Rd, int64_t imm); + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); ++ void ifence(); + void j(const address &dest, Register temp = t0); -+ void j(const Address &adr, Register temp = t0) ; ++ void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); + void jal(Label &l, Register temp = t0); + void jal(const address &dest, Register temp = t0); @@ -1633,7 +1411,7 @@ index 000000000..d4da30ed6 + static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ uint32_t mask = checked_cast(right_n_bits(nbits)); ++ uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; @@ -1650,8 +1428,8 @@ index 000000000..d4da30ed6 + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ guarantee(val < (1ULL << nbits), "Field too big for insn"); -+ unsigned mask = checked_cast(right_n_bits(nbits)); ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; @@ -1680,11 +1458,11 @@ index 000000000..d4da30ed6 + emit_int32((jint)insn); + } + -+ void halt() { ++ void _halt() { + emit_int32(0); + } + -+// Rigster Instruction ++// Register Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ @@ -1697,18 +1475,18 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(add, 0b0110011, 0b000, 0b0000000); -+ INSN(sub, 0b0110011, 0b000, 0b0100000); -+ INSN(andr, 0b0110011, 0b111, 0b0000000); -+ INSN(orr, 0b0110011, 0b110, 0b0000000); -+ INSN(xorr, 0b0110011, 0b100, 0b0000000); ++ INSN(_add, 0b0110011, 0b000, 0b0000000); ++ INSN(_sub, 0b0110011, 0b000, 0b0100000); ++ INSN(_andr, 0b0110011, 0b111, 0b0000000); ++ INSN(_orr, 0b0110011, 0b110, 0b0000000); ++ INSN(_xorr, 0b0110011, 0b100, 0b0000000); + INSN(sll, 0b0110011, 0b001, 0b0000000); + INSN(sra, 0b0110011, 0b101, 0b0100000); + INSN(srl, 0b0110011, 0b101, 0b0000000); + INSN(slt, 0b0110011, 0b010, 0b0000000); + INSN(sltu, 0b0110011, 0b011, 0b0000000); -+ INSN(addw, 0b0111011, 0b000, 0b0000000); -+ INSN(subw, 0b0111011, 0b000, 0b0100000); ++ INSN(_addw, 0b0111011, 0b000, 0b0000000); ++ INSN(_subw, 0b0111011, 0b000, 0b0100000); + INSN(sllw, 0b0111011, 0b001, 0b0000000); + INSN(sraw, 0b0111011, 0b101, 0b0100000); + INSN(srlw, 0b0111011, 0b101, 0b0000000); @@ -1726,22 +1504,20 @@ index 000000000..d4da30ed6 + INSN(remw, 0b0111011, 0b110, 0b0000001); + INSN(remuw, 0b0111011, 0b111, 0b0000001); + -+ // Vector Configuration Instruction -+ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); -+ +#undef INSN + +#define INSN_ENTRY_RELOC(result_type, header) \ + result_type header { \ ++ InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ -+ code_section()->relocate(pc(), InternalAddress(dest).rspec()); ++ code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); + + // Load/store register (all modes) +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1749,7 +1525,19 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(lb, 0b0000011, 0b000); ++ INSN(lbu, 0b0000011, 0b100); ++ INSN(lh, 0b0000011, 0b001); ++ INSN(lhu, 0b0000011, 0b101); ++ INSN(_lw, 0b0000011, 0b010); ++ INSN(lwu, 0b0000011, 0b110); ++ INSN(_ld, 0b0000011, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1766,7 +1554,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest); \ + } \ + void NAME(Register Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target()); \ @@ -1776,7 +1564,14 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \ ++ int32_t offset = 0; \ ++ if (Rd == adr.base()) { \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ ++ } else { \ ++ baseOffset32(Rd, adr, offset); \ ++ NAME(Rd, Rd, offset); \ ++ } \ + } \ + break; \ + } \ @@ -1788,20 +1583,20 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, &Assembler::NAME); \ + } + -+ INSN(lb, 0b0000011, 0b000); -+ INSN(lbu, 0b0000011, 0b100); -+ INSN(ld, 0b0000011, 0b011); -+ INSN(lh, 0b0000011, 0b001); -+ INSN(lhu, 0b0000011, 0b101); -+ INSN(lw, 0b0000011, 0b010); -+ INSN(lwu, 0b0000011, 0b110); ++ INSN(lb); ++ INSN(lbu); ++ INSN(lh); ++ INSN(lhu); ++ INSN(lw); ++ INSN(lwu); ++ INSN(ld); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1809,7 +1604,14 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(flw, 0b0000111, 0b010); ++ INSN(_fld, 0b0000111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1826,7 +1628,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest, temp); \ + } \ + void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target(), temp); \ @@ -1836,7 +1638,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ + } \ + break; \ + } \ @@ -1845,14 +1649,14 @@ index 000000000..d4da30ed6 + } \ + } + -+ INSN(flw, 0b0000111, 0b010); -+ INSN(fld, 0b0000111, 0b011); ++ INSN(flw); ++ INSN(fld); +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0x1fff; \ + uint32_t val11 = (val >> 11) & 0x1; \ + uint32_t val12 = (val >> 12) & 0x1; \ @@ -1867,7 +1671,18 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 25, high); \ + patch((address)&insn, 31, val12); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_beq, 0b1100011, 0b000); ++ INSN(_bne, 0b1100011, 0b001); ++ INSN(bge, 0b1100011, 0b101); ++ INSN(bgeu, 0b1100011, 0b111); ++ INSN(blt, 0b1100011, 0b100); ++ INSN(bltu, 0b1100011, 0b110); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rs1, Register Rs2, const address dest) { \ + assert_cond(dest != NULL); \ + int64_t offset = (dest - pc()); \ @@ -1878,12 +1693,12 @@ index 000000000..d4da30ed6 + NAME(Rs1, Rs2, dest); \ + } + -+ INSN(beq, 0b1100011, 0b000); -+ INSN(bge, 0b1100011, 0b101); -+ INSN(bgeu, 0b1100011, 0b111); -+ INSN(blt, 0b1100011, 0b100); -+ INSN(bltu, 0b1100011, 0b110); -+ INSN(bne, 0b1100011, 0b001); ++ INSN(beq); ++ INSN(bne); ++ INSN(bge); ++ INSN(bgeu); ++ INSN(blt); ++ INSN(bltu); + +#undef INSN + @@ -1903,8 +1718,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, REGISTER, op, funct3) \ + void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + uint32_t low = val & 0x1f; \ + uint32_t high = (val >> 5) & 0x7f; \ @@ -1916,16 +1731,27 @@ index 000000000..d4da30ed6 + patch((address)&insn, 31, 25, high); \ + emit(insn); \ + } \ ++ ++ INSN(sb, Register, 0b0100011, 0b000); ++ INSN(sh, Register, 0b0100011, 0b001); ++ INSN(_sw, Register, 0b0100011, 0b010); ++ INSN(_sd, Register, 0b0100011, 0b011); ++ INSN(fsw, FloatRegister, 0b0100111, 0b010); ++ INSN(_fsd, FloatRegister, 0b0100111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER) \ + INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rs, dest, temp); \ + } + -+ INSN(sb, Register, 0b0100011, 0b000); -+ INSN(sh, Register, 0b0100011, 0b001); -+ INSN(sw, Register, 0b0100011, 0b010); -+ INSN(sd, Register, 0b0100011, 0b011); -+ INSN(fsw, FloatRegister, 0b0100111, 0b010); -+ INSN(fsd, FloatRegister, 0b0100111, 0b011); ++ INSN(sb, Register); ++ INSN(sh, Register); ++ INSN(sw, Register); ++ INSN(sd, Register); ++ INSN(fsw, FloatRegister); ++ INSN(fsd, FloatRegister); + +#undef INSN + @@ -1944,7 +1770,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(Register Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + assert_different_registers(Rs, temp); \ + code_section()->relocate(pc(), adr.rspec()); \ @@ -1955,8 +1781,10 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ ++ int32_t offset= 0; \ + assert_different_registers(Rs, temp); \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -1986,7 +1814,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ @@ -1996,7 +1824,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -2050,8 +1880,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op) \ + void NAME(Register Rd, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ @@ -2059,7 +1889,13 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ + patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_jal, 0b1101111); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, const address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t offset = dest - pc(); \ @@ -2077,7 +1913,7 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, temp, &Assembler::NAME); \ + } + -+ INSN(jal, 0b1101111); ++ INSN(jal); + +#undef INSN + @@ -2085,8 +1921,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op, funct) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 14, 12, funct); \ @@ -2096,7 +1932,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(jalr, 0b1100111, 0b000); ++ INSN(_jalr, 0b1100111, 0b000); + +#undef INSN + @@ -2130,8 +1966,10 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + ++ INSN(fence_i, 0b0001111, 0b001, 0b000000000000); + INSN(ecall, 0b1110011, 0b000, 0b000000000000); -+ INSN(ebreak, 0b1110011, 0b000, 0b000000000001); ++ INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); ++ +#undef INSN + +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; @@ -2239,12 +2077,12 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(addi, 0b0010011, 0b000); -+ INSN(slti, 0b0010011, 0b010); -+ INSN(addiw, 0b0011011, 0b000); -+ INSN(and_imm12, 0b0010011, 0b111); -+ INSN(ori, 0b0010011, 0b110); -+ INSN(xori, 0b0010011, 0b100); ++ INSN(_addi, 0b0010011, 0b000); ++ INSN(slti, 0b0010011, 0b010); ++ INSN(_addiw, 0b0011011, 0b000); ++ INSN(_and_imm12, 0b0010011, 0b111); ++ INSN(ori, 0b0010011, 0b110); ++ INSN(xori, 0b0010011, 0b100); + +#undef INSN + @@ -2278,9 +2116,9 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(slli, 0b0010011, 0b001, 0b000000); -+ INSN(srai, 0b0010011, 0b101, 0b010000); -+ INSN(srli, 0b0010011, 0b101, 0b000000); ++ INSN(_slli, 0b0010011, 0b001, 0b000000); ++ INSN(_srai, 0b0010011, 0b101, 0b010000); ++ INSN(_srli, 0b0010011, 0b101, 0b000000); + +#undef INSN + @@ -2316,7 +2154,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(lui, 0b0110111); ++ INSN(_lui, 0b0110111); + INSN(auipc, 0b0010111); + +#undef INSN @@ -2592,6 +2430,23 @@ index 000000000..d4da30ed6 + +#undef patch_vtype + ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ // Vector Configuration Instruction ++ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); ++ ++#undef INSN ++ +enum VectorMask { + v0_t = 0b0, + unmasked = 0b1 @@ -3159,7 +3014,6 @@ index 000000000..d4da30ed6 + +// ==================================== +// RISC-V Bit-Manipulation Extension -+// Currently only support Zba and Zbb. +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ @@ -3238,7 +3092,7 @@ index 000000000..d4da30ed6 +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ -+ void NAME(Register Rd, Register Rs1, unsigned shamt){ \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ @@ -3251,9 +3105,966 @@ index 000000000..d4da30ed6 + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); -+ ++ +#undef INSN + ++// ======================================== ++// RISC-V Compressed Instructions Extension ++// ======================================== ++// Note: ++// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be ++// transformed to 16-bit instructions if compressible. ++// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', ++// but most of time we have no need to explicitly use these instructions. ++// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range ++// are qualified to be compressed with their 2-byte versions. ++// An example: ++// ++// CompressibleRegion cr(_masm); ++// __ andr(...); // this instruction could change to c.and if able to ++// ++// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from ++// normal ones. ++// ++ ++private: ++ bool _in_compressible_region; ++public: ++ bool in_compressible_region() const { return _in_compressible_region; } ++ void set_in_compressible_region(bool b) { _in_compressible_region = b; } ++public: ++ ++ // a compressible region ++ class CompressibleRegion : public StackObj { ++ protected: ++ Assembler *_masm; ++ bool _saved_in_compressible_region; ++ public: ++ CompressibleRegion(Assembler *_masm) ++ : _masm(_masm) ++ , _saved_in_compressible_region(_masm->in_compressible_region()) { ++ _masm->set_in_compressible_region(true); ++ } ++ ~CompressibleRegion() { ++ _masm->set_in_compressible_region(_saved_in_compressible_region); ++ } ++ }; ++ ++ // patch a 16-bit instruction. ++ static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { ++ assert_cond(a != NULL); ++ assert_cond(msb >= lsb && msb <= 15); ++ unsigned nbits = msb - lsb + 1; ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ uint16_t mask = (1U << nbits) - 1; ++ val <<= lsb; ++ mask <<= lsb; ++ uint16_t target = *(uint16_t *)a; ++ target &= ~mask; ++ target |= val; ++ *(uint16_t *)a = target; ++ } ++ ++ static void c_patch(address a, unsigned bit, uint16_t val) { ++ c_patch(a, bit, bit, val); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++// -------------- RVC Instruction Definitions -------------- ++ ++ void c_nop() { ++ c_addi(x0, 0); ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi, 0b000, 0b01); ++ INSN(c_addiw, 0b001, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 10, 0)); \ ++ assert_cond((imm & 0b1111) == 0); \ ++ assert_cond(imm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ ++ c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ ++ c_patch_reg((address)&insn, 7, sp); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi16sp, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(uimm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ ++ c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi4spn, 0b000, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_slli, 0b000, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_srli, 0b100, 0b00, 0b01); ++ INSN(c_srai, 0b100, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_andi, 0b100, 0b10, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct6, funct2, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 6, 5, funct2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 10, funct6); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sub, 0b100011, 0b00, 0b01); ++ INSN(c_xor, 0b100011, 0b01, 0b01); ++ INSN(c_or, 0b100011, 0b10, 0b01); ++ INSN(c_and, 0b100011, 0b11, 0b01); ++ INSN(c_subw, 0b100111, 0b00, 0b01); ++ INSN(c_addw, 0b100111, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_mv, 0b1000, 0b10); ++ INSN(c_add, 0b1001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rs1) { \ ++ assert_cond(Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, x0); \ ++ c_patch_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_jr, 0b1000, 0b10); ++ INSN(c_jalr, 0b1001, 0b10); ++ ++#undef INSN ++ ++ typedef void (Assembler::* j_c_insn)(address dest); ++ typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); ++ ++ void wrap_label(Label &L, j_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(pc()); ++ } ++ } ++ ++ void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(r, target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(r, pc()); ++ } ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t offset) { \ ++ assert_cond(is_imm_in_range(offset, 11, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ ++ c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ ++ c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ ++ c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ ++ c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ ++ c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 11, 1)); \ ++ c_j(distance); \ ++ } \ ++ void NAME(Label &L) { \ ++ wrap_label(L, &Assembler::NAME); \ ++ } ++ ++ INSN(c_j, 0b101, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 8, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ ++ c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(Register Rs1, address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 8, 1)); \ ++ NAME(Rs1, distance); \ ++ } \ ++ void NAME(Register Rs1, Label &L) { \ ++ wrap_label(L, Rs1, &Assembler::NAME); \ ++ } ++ ++ INSN(c_beqz, 0b110, 0b01); ++ INSN(c_bnez, 0b111, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 18, 0)); \ ++ assert_cond((imm & 0xfff) == 0); \ ++ assert_cond(imm != 0); \ ++ assert_cond(Rd != x0 && Rd != x2); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lui, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_li, 0b010, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ldsp, 0b011, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(FloatRegister Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_fldsp, 0b001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ld, 0b011, 0b00, Register); ++ INSN(c_sd, 0b111, 0b00, Register); ++ INSN(c_fld, 0b001, 0b00, FloatRegister); ++ INSN(c_fsd, 0b101, 0b00, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sdsp, 0b111, 0b10, Register); ++ INSN(c_fsdsp, 0b101, 0b10, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_swsp, 0b110, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lwsp, 0b010, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lw, 0b010, 0b00); ++ INSN(c_sw, 0b110, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME() { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 11, 2, 0x0); \ ++ c_patch((address)&insn, 12, 12, 0b1); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ebreak, 0b100, 0b10); ++ ++#undef INSN ++ ++// -------------- RVC Transformation Functions -------------- ++ ++// -------------------------- ++// Register instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* add -> c.add */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ c_add(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ _add(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(add); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* sub/subw -> c.sub/c.subw */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ ++ C_NAME(Rd, Rs2); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(sub, c_sub, _sub); ++ INSN(subw, c_subw, _subw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ ++ ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ C_NAME(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(andr, c_and, _andr); ++ INSN(orr, c_or, _orr); ++ INSN(xorr, c_xor, _xorr); ++ INSN(addw, c_addw, _addw); ++ ++#undef INSN ++ ++private: ++// some helper functions ++ bool do_compress() const { ++ return UseRVC && in_compressible_region(); ++ } ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0 && \ ++ (!ld || rd_rs2 != x0); \ ++ } \ ++ ++ FUNC(is_c_ldsdsp, 0b111, 9); ++ FUNC(is_c_lwswsp, 0b011, 8); ++ ++#undef FUNC ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, int32_t imm12) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_fldsdsp, 0b111, 9); ++ ++#undef FUNC ++ ++#define FUNC(NAME, REG_TYPE, funct3, bits) \ ++ bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ ++ return rs1->is_compressed_valid() && \ ++ rd_rs2->is_compressed_valid() && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_ldsd, Register, 0b111, 8); ++ FUNC(is_c_lwsw, Register, 0b011, 7); ++ FUNC(is_c_fldsd, FloatRegister, 0b111, 8); ++ ++#undef FUNC ++ ++public: ++// -------------------------- ++// Load/store register ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* lw -> c.lwsp/c.lw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, true)) { \ ++ c_lwsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_lw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _lw(Rd, Rs, offset); \ ++ } ++ ++ INSN(lw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* ld -> c.ldsp/c.ld */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ ++ c_ldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_ld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _ld(Rd, Rs, offset); \ ++ } ++ ++ INSN(ld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fld -> c.fldsp/c.fld */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fld(Rd, Rs, offset); \ ++ } ++ ++ INSN(fld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sd -> c.sdsp/c.sd */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ ++ c_sdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_sd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sd(Rd, Rs, offset); \ ++ } ++ ++ INSN(sd); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sw -> c.swsp/c.sw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, false)) { \ ++ c_swsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_sw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sw(Rd, Rs, offset); \ ++ } ++ ++ INSN(sw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fsd -> c.fsdsp/c.fsd */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fsdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fsd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fsd(Rd, Rs, offset); \ ++ } ++ ++ INSN(fsd); ++ ++#undef INSN ++ ++// -------------------------- ++// Conditional branch instructions ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ ++ /* beq/bne -> c.beqz/c.bnez */ \ ++ if (do_compress() && \ ++ (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ ++ is_imm_in_range(offset, 8, 1))) { \ ++ C_NAME(Rs1, offset); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rs1, Rs2, offset); \ ++ } ++ ++ INSN(beq, c_beqz, _beq); ++ INSN(bne, c_beqz, _bne); ++ ++#undef INSN ++ ++// -------------------------- ++// Unconditional branch instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, const int32_t offset) { \ ++ /* jal -> c.j */ \ ++ if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ ++ c_j(offset); \ ++ return; \ ++ } \ ++ _jal(Rd, offset); \ ++ } ++ ++ INSN(jal); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* jalr -> c.jr/c.jalr */ \ ++ if (do_compress() && (offset == 0 && Rs != x0)) { \ ++ if (Rd == x1) { \ ++ c_jalr(Rs); \ ++ return; \ ++ } else if (Rd == x0) { \ ++ c_jr(Rs); \ ++ return; \ ++ } \ ++ } \ ++ _jalr(Rd, Rs, offset); \ ++ } ++ ++ INSN(jalr); ++ ++#undef INSN ++ ++// -------------------------- ++// Miscellaneous Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* ebreak -> c.ebreak */ \ ++ if (do_compress()) { \ ++ c_ebreak(); \ ++ return; \ ++ } \ ++ _ebreak(); \ ++ } ++ ++ INSN(ebreak); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ ++ if (do_compress()) { \ ++ emit_int16(0); \ ++ return; \ ++ } \ ++ _halt(); \ ++ } ++ ++ INSN(halt); ++ ++#undef INSN ++ ++// -------------------------- ++// Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int64_t imm) { \ ++ /* li -> c.li */ \ ++ if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ ++ c_li(Rd, imm); \ ++ return; \ ++ } \ ++ _li(Rd, imm); \ ++ } ++ ++ INSN(li); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ ++ if (do_compress()) { \ ++ if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ ++ c_addi(Rd, imm); \ ++ return; \ ++ } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ ++ c_mv(Rd, Rs1); \ ++ return; \ ++ } else if (Rs1 == sp && imm != 0) { \ ++ if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ ++ c_addi16sp(imm); \ ++ return; \ ++ } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ ++ c_addi4spn(Rd, imm); \ ++ return; \ ++ } \ ++ } \ ++ } \ ++ _addi(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addi); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addiw -> c.addiw */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ ++ c_addiw(Rd, imm); \ ++ return; \ ++ } \ ++ _addiw(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addiw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* and_imm12 -> c.andi */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ ++ c_andi(Rd, imm); \ ++ return; \ ++ } \ ++ _and_imm12(Rd, Rs1, imm); \ ++ } ++ ++ INSN(and_imm12); ++ ++#undef INSN ++ ++// -------------------------- ++// Shift Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* slli -> c.slli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ ++ c_slli(Rd, shamt); \ ++ return; \ ++ } \ ++ _slli(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(slli); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* srai/srli -> c.srai/c.srli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ ++ C_NAME(Rd, shamt); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(srai, c_srai, _srai); ++ INSN(srli, c_srli, _srli); ++ ++#undef INSN ++ ++// -------------------------- ++// Upper Immediate Instruction ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ /* lui -> c.lui */ \ ++ if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ ++ c_lui(Rd, imm); \ ++ return; \ ++ } \ ++ _lui(Rd, imm); \ ++ } ++ ++ INSN(lui); ++ ++#undef INSN ++ ++// --------------------------------------------------------------------------------------- ++ + void bgt(Register Rs, Register Rt, const address &dest); + void ble(Register Rs, Register Rt, const address &dest); + void bgtu(Register Rs, Register Rt, const address &dest); @@ -3273,25 +4084,17 @@ index 000000000..d4da30ed6 + void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); + void wrap_label(Register r, Label &L, jal_jalr_insn insn); + -+ // Computational pseudo instructions ++ // calculate pseudoinstruction + void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); -+ void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); -+ ++ void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); -+ void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); ++ void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + -+ Assembler(CodeBuffer* code) : AbstractAssembler(code) { -+ } -+ -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ ShouldNotCallThis(); -+ return RegisterOrConstant(); ++ Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + + // Stack overflow checking @@ -3301,34 +4104,25 @@ index 000000000..d4da30ed6 + return is_imm_in_range(imm, 12, 0); + } + -+ // The maximum range of a branch is fixed for the riscv -+ // architecture. ++ // The maximum range of a branch is fixed for the RISCV architecture. + static const unsigned long branch_range = 1 * M; + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + -+ static Assembler::SEW elemBytes_to_sew(int esize) { -+ assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size"); -+ return (Assembler::SEW) exact_log2(esize); -+ } -+ + virtual ~Assembler() {} -+ +}; + -+class BiasedLockingCounters; -+ +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp new file mode 100644 -index 000000000..82b825db7 +index 00000000000..7ffe8803985 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3376,14 +4170,14 @@ index 000000000..82b825db7 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp new file mode 100644 -index 000000000..d0ac7ef46 +index 00000000000..23d982f9abd --- /dev/null +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp -@@ -0,0 +1,169 @@ +@@ -0,0 +1,167 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -3409,7 +4203,7 @@ index 000000000..d0ac7ef46 +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + -+#include "memory/allocation.hpp" ++#include "memory/allStatic.hpp" + +class Bytes: AllStatic { + public: @@ -3457,7 +4251,6 @@ index 000000000..d0ac7ef46 + ((u8)(((u4*)p)[0])); + + case 2: -+ case 6: + return ((u8)(((u2*)p)[3]) << 48) | + ((u8)(((u2*)p)[2]) << 32) | + ((u8)(((u2*)p)[1]) << 16) | @@ -3471,7 +4264,7 @@ index 000000000..d0ac7ef46 + ((u8)(p[3]) << 24) | + ((u8)(p[2]) << 16) | + ((u8)(p[1]) << 8) | -+ (u8)(p[0]); ++ ((u8)(p[0])); + } + } + @@ -3516,7 +4309,6 @@ index 000000000..d0ac7ef46 + break; + + case 2: -+ case 6: + ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; @@ -3546,17 +4338,17 @@ index 000000000..d0ac7ef46 + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } +}; + -+#include OS_CPU_HEADER_INLINE(bytes) ++#include OS_CPU_HEADER(bytes) + +#endif // CPU_RISCV_BYTES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp new file mode 100644 -index 000000000..522eedd29 +index 00000000000..dcd0472c540 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -0,0 +1,352 @@ +@@ -0,0 +1,353 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3588,6 +4380,7 @@ index 000000000..522eedd29 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" +#include "nativeInst_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" @@ -3595,8 +4388,21 @@ index 000000000..522eedd29 + +#define __ ce->masm()-> + -+void CounterOverflowStub::emit_code(LIR_Assembler* ce) -+{ ++void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { ++ __ bind(_entry); ++ InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); ++ __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); ++ __ la(t0, safepoint_pc.target()); ++ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ ++ __ far_jump(RuntimeAddress(stub)); ++} ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(t0, m); @@ -3608,22 +4414,19 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array) -+ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) -+{ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) -+ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) -+{ ++ : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + -+void RangeCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); @@ -3643,7 +4446,7 @@ index 000000000..522eedd29 + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { -+ assert(_array != NULL, "sanity"); ++ assert(_array != LIR_Opr::nullOpr(), "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } @@ -3655,13 +4458,11 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) -+{ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + -+void PredicateFailedStub::emit_code(LIR_Assembler* ce) -+{ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); @@ -3670,8 +4471,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void DivByZeroStub::emit_code(LIR_Assembler* ce) -+{ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } @@ -3685,21 +4485,19 @@ index 000000000..522eedd29 +} + +// Implementation of NewInstanceStub -+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) -+{ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); -+ assert(stub_id == Runtime1::new_instance_id || -+ stub_id == Runtime1::fast_new_instance_id || ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + -+void NewInstanceStub::emit_code(LIR_Assembler* ce) -+{ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mv(x13, _klass_reg->as_register()); @@ -3711,16 +4509,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewTypeArrayStub -+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + -+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3733,16 +4529,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewObjectArrayStub -+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + -+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3756,13 +4550,11 @@ index 000000000..522eedd29 + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) -+: MonitorAccessStub(obj_reg, lock_reg) -+{ ++: MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + -+void MonitorEnterStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); @@ -3779,8 +4571,7 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+void MonitorExitStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it @@ -3798,18 +4589,23 @@ index 000000000..522eedd29 + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) {} + -+// RISCV don't use C1 runtime patching. When need patch, just deoptimize. -+void PatchingStub::emit_code(LIR_Assembler* ce) -+{ ++void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); +} + -+void DeoptimizeStub::emit_code(LIR_Assembler* ce) -+{ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); @@ -3817,8 +4613,7 @@ index 000000000..522eedd29 + DEBUG_ONLY(__ should_not_reach_here()); +} + -+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a = NULL; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. @@ -3835,8 +4630,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) -+{ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); @@ -3845,32 +4639,29 @@ index 000000000..522eedd29 + if (_obj->is_cpu_register()) { + __ mv(t0, _obj->as_register()); + } -+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + -+void ArrayCopyStub::emit_code(LIR_Assembler* ce) -+{ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // ---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. -+ // + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; -+ SharedRuntime::java_calling_convention(signature, args, args_num, true); ++ SharedRuntime::java_calling_convention(signature, args, args_num); + + // push parameters + Register r[args_num]; -+ int i = 0; -+ r[i++] = src()->as_register(); -+ r[i++] = src_pos()->as_register(); -+ r[i++] = dst()->as_register(); -+ r[i++] = dst_pos()->as_register(); -+ r[i++] = length()->as_register(); ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int j = 0; j < args_num; j++) { @@ -3879,7 +4670,7 @@ index 000000000..522eedd29 + int st_off = r_1->reg2stack() * wordSize; + __ sd(r[j], Address(sp, st_off)); + } else { -+ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg "); ++ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); + } + } + @@ -3899,8 +4690,10 @@ index 000000000..522eedd29 + ce->add_call_info_here(info()); + +#ifndef PRODUCT -+ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); -+ __ incrementw(Address(t1)); ++ if (PrintC1Statistics) { ++ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); ++ __ add_memory_int32(Address(t1), 1); ++ } +#endif + + __ j(_continuation); @@ -3909,13 +4702,12 @@ index 000000000..522eedd29 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp new file mode 100644 -index 000000000..a0f411352 +index 00000000000..4417ad63091 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,84 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -3973,7 +4765,7 @@ index 000000000..a0f411352 + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan -+ pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, @@ -4000,13 +4792,12 @@ index 000000000..a0f411352 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp new file mode 100644 -index 000000000..d4876625c +index 00000000000..e3a2606c532 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp -@@ -0,0 +1,31 @@ +@@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4037,13 +4828,12 @@ index 000000000..d4876625c +// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp new file mode 100644 -index 000000000..4b43bc4d7 +index 00000000000..7bc3d311501 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp -@@ -0,0 +1,33 @@ +@@ -0,0 +1,32 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4076,13 +4866,12 @@ index 000000000..4b43bc4d7 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp new file mode 100644 -index 000000000..94b4e0f0b +index 00000000000..172031941b2 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -@@ -0,0 +1,391 @@ +@@ -0,0 +1,388 @@ +/* -+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4112,8 +4901,7 @@ index 000000000..94b4e0f0b +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + -+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) -+{ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); @@ -4129,7 +4917,7 @@ index 000000000..94b4e0f0b + Register reg2 = r_2->as_Register(); + assert(reg2 == reg1, "must be same register"); + opr = as_long_opr(reg1); -+ } else if (type == T_OBJECT || type == T_ARRAY) { ++ } else if (is_reference_type(type)) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); @@ -4240,8 +5028,8 @@ index 000000000..94b4e0f0b +LIR_Opr FrameMap::fpu10_float_opr; +LIR_Opr FrameMap::fpu10_double_opr; + -+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; -+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; + +//-------------------------------------------------------- +// FrameMap @@ -4398,7 +5186,7 @@ index 000000000..94b4e0f0b + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; -+ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { @@ -4413,7 +5201,7 @@ index 000000000..94b4e0f0b + + +// ----------------mapping----------------------- -+// all mapping is based on rfp addressing, except for simple leaf methods where we access ++// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + @@ -4430,7 +5218,7 @@ index 000000000..94b4e0f0b +// | .........| <- TOS +// | locals | +// +----------+ -+// | old fp, | ++// | old fp, | +// +----------+ +// | ret addr | +// +----------+ @@ -4458,8 +5246,7 @@ index 000000000..94b4e0f0b + return as_FloatRegister(n)->as_VMReg(); +} + -+LIR_Opr FrameMap::stack_pointer() -+{ ++LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + @@ -4473,13 +5260,12 @@ index 000000000..94b4e0f0b +} diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp new file mode 100644 -index 000000000..f600c2f6f +index 00000000000..01281f5c9e1 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp -@@ -0,0 +1,149 @@ +@@ -0,0 +1,148 @@ +/* -+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4628,13 +5414,12 @@ index 000000000..f600c2f6f +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp new file mode 100644 -index 000000000..a846d60ae +index 00000000000..4c1c13dc290 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -0,0 +1,287 @@ +@@ -0,0 +1,281 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4671,16 +5456,15 @@ index 000000000..a846d60ae + +#define __ _masm-> + -+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { -+ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, ++ LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); -+ -+ // operand check -+ assert(left->is_single_cpu(), "left must be register"); -+ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); -+ assert(result->is_single_cpu(), "result must be register"); ++ // opreand check ++ assert(left->is_single_cpu(), "left must be a register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant"); ++ assert(result->is_single_cpu(), "result must be a register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + @@ -4754,7 +5538,7 @@ index 000000000..a846d60ae + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + case T_OBJECT: // fall through + case T_ADDRESS: + switch (code) { @@ -4762,7 +5546,7 @@ index 000000000..a846d60ae + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + default: + ShouldNotReachHere(); + } @@ -4817,7 +5601,7 @@ index 000000000..a846d60ae + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { -+ case lir_add: ++ case lir_add: // fall through + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); @@ -4831,7 +5615,7 @@ index 000000000..a846d60ae + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + // use t0 as intermediate result register + __ srai(t0, lreg_lo, 0x3f); + if (is_imm_in_range(c - 1, 12, 0)) { @@ -4849,7 +5633,7 @@ index 000000000..a846d60ae + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + __ srai(t0, lreg_lo, 0x3f); + __ srli(t0, t0, BitsPerLong - shift); + __ add(t1, lreg_lo, t0); @@ -4874,9 +5658,7 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); @@ -4889,9 +5671,7 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); @@ -4921,13 +5701,12 @@ index 000000000..a846d60ae +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp new file mode 100644 -index 000000000..93530ef58 +index 00000000000..ab0a9963fc1 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp -@@ -0,0 +1,36 @@ +@@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4950,6 +5729,7 @@ index 000000000..93530ef58 + * questions. + * + */ ++ +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP + @@ -4960,17 +5740,17 @@ index 000000000..93530ef58 + void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp new file mode 100644 -index 000000000..31f8d6a4a +index 00000000000..b7f53e395f3 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp -@@ -0,0 +1,387 @@ +@@ -0,0 +1,388 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -4999,6 +5779,7 @@ index 000000000..31f8d6a4a +#include "c1/c1_MacroAssembler.hpp" +#include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" ++#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + @@ -5026,7 +5807,7 @@ index 000000000..31f8d6a4a + __ mv(c_rarg4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); @@ -5064,14 +5845,14 @@ index 000000000..31f8d6a4a + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + } @@ -5133,7 +5914,7 @@ index 000000000..31f8d6a4a + if (PrintC1Statistics) { + Label failed; + __ bnez(x10, failed); -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1); + __ bind(failed); + } +#endif @@ -5142,7 +5923,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); @@ -5214,6 +5995,7 @@ index 000000000..31f8d6a4a +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { + assert(default_type != NULL, "NULL default_type!"); + BasicType basic_type = default_type->element_type()->basic_type(); ++ + if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the @@ -5269,7 +6051,7 @@ index 000000000..31f8d6a4a + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; -+ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } ++ if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { @@ -5292,7 +6074,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); ++ __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1); + } +#endif + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); @@ -5356,13 +6138,12 @@ index 000000000..31f8d6a4a +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp new file mode 100644 -index 000000000..872fd2ef6 +index 00000000000..06a0f248ca6 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp -@@ -0,0 +1,51 @@ +@@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -5388,6 +6169,7 @@ index 000000000..872fd2ef6 + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP ++ + // arraycopy sub functions + void generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub); @@ -5410,17 +6192,18 @@ index 000000000..872fd2ef6 + Register dst, Register dst_pos); + void arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp new file mode 100644 -index 000000000..222e3e97e +index 00000000000..742c2126e60 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -0,0 +1,2275 @@ +@@ -0,0 +1,2267 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -5455,14 +6238,12 @@ index 000000000..222e3e97e +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" -+#include "utilities/macros.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT @@ -5512,6 +6293,17 @@ index 000000000..222e3e97e + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + ++void LIR_Assembler::clinit_barrier(ciMethod* method) { ++ assert(VM_Version::supports_fast_class_init_checks(), "sanity"); ++ assert(!method->holder()->is_not_initialized(), "initialization should have been started"); ++ ++ Label L_skip_barrier; ++ ++ __ mov_metadata(t1, method->holder()->constant_encoding()); ++ __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(L_skip_barrier); ++} + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; @@ -5521,25 +6313,11 @@ index 000000000..222e3e97e + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + -+//--------------fpu register translations----------------------- -+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::reset_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::fpop() { Unimplemented(); } -+ -+void LIR_Assembler::fxch(int i) { Unimplemented(); } -+ -+void LIR_Assembler::fld(int i) { Unimplemented(); } -+ -+void LIR_Assembler::ffree(int i) { Unimplemented(); } -+ +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } -+//------------------------------------------- + +static jlong as_long(LIR_Opr data) { + jlong result; @@ -5557,6 +6335,43 @@ index 000000000..222e3e97e + return result; +} + ++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { ++ if (addr->base()->is_illegal()) { ++ assert(addr->index()->is_illegal(), "must be illegal too"); ++ __ movptr(tmp, addr->disp()); ++ return Address(tmp, 0); ++ } ++ ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr index_opr = addr->index(); ++ ++ if (index_opr->is_illegal()) { ++ return Address(base, addr->disp()); ++ } ++ ++ int scale = addr->scale(); ++ if (index_opr->is_cpu_register()) { ++ Register index; ++ if (index_opr->is_single_cpu()) { ++ index = index_opr->as_register(); ++ } else { ++ index = index_opr->as_register_lo(); ++ } ++ if (scale != 0) { ++ __ shadd(tmp, index, base, tmp, scale); ++ } else { ++ __ add(tmp, base, index); ++ } ++ return Address(tmp, addr->disp()); ++ } else if (index_opr->is_constant()) { ++ intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); ++ return Address(base, addr_offset); ++ } ++ ++ Unimplemented(); ++ return Address(); ++} ++ +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); @@ -5572,7 +6387,7 @@ index 000000000..222e3e97e + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address -+// calculation to hold the address in a temporary register. ++// calculation to hold the address in t0. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); @@ -5690,10 +6505,7 @@ index 000000000..222e3e97e +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + -+ // The frame_map records size in slots (32bit word) -+ -+ // subtract two words to account for return address and link -+ return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; ++ return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { @@ -5757,7 +6569,11 @@ index 000000000..222e3e97e + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); -+ __ unlock_object(x15, x14, x10, *stub->entry()); ++ if (UseHeavyMonitors) { ++ __ j(*stub->entry()); ++ } else { ++ __ unlock_object(x15, x14, x10, *stub->entry()); ++ } + __ bind(*stub->continuation()); + } + @@ -5810,7 +6626,7 @@ index 000000000..222e3e97e + return offset; +} + -+void LIR_Assembler::return_op(LIR_Opr result) { ++void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code @@ -5820,20 +6636,18 @@ index 000000000..222e3e97e + __ reserved_stack_check(); + } + -+ address polling_page(os::get_polling_page()); -+ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); ++ code_stub->set_safepoint_offset(__ offset()); ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); + __ ret(); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { -+ address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); -+ assert(os::is_poll_address(polling_page), "should be"); -+ int32_t offset = 0; -+ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); ++ __ get_polling_page(t0, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map -+ __ read_polling_page(t0, offset, relocInfo::poll_type); ++ __ read_polling_page(t0, 0, relocInfo::poll_type); + return __ offset(); +} + @@ -6007,7 +6821,7 @@ index 000000000..222e3e97e + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { -+ if (src->type() == T_OBJECT || src->type() == T_ARRAY) { ++ if (is_reference_type(src->type())) { + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; @@ -6064,8 +6878,7 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; @@ -6075,7 +6888,7 @@ index 000000000..222e3e97e + return; + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { @@ -6187,8 +7000,7 @@ index 000000000..222e3e97e + reg2stack(temp, dest, dest->type(), false); +} + -+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool wide, bool /* unaligned */) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + @@ -6233,11 +7045,7 @@ index 000000000..222e3e97e + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: -+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ __ lwu(dest->as_register(), as_Address(from_addr)); -+ } else { -+ __ ld(dest->as_register(), as_Address(from_addr)); -+ } ++ __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); @@ -6261,21 +7069,21 @@ index 000000000..222e3e97e + ShouldNotReachHere(); + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } -+ __ verify_oop(dest->as_register()); -+ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { -+ if (UseCompressedClassPointers) { -+ __ decode_klass_not_null(dest->as_register()); ++ ++ if (!UseZGC) { ++ // Load barrier has not yet been applied, so ZGC can't verify the oop here ++ __ verify_oop(dest->as_register()); + } + } +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { -+ case lir_idiv: ++ case lir_idiv: // fall through + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), @@ -6311,13 +7119,11 @@ index 000000000..222e3e97e + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack -+ false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack -+ false, // unaligned + false); // wide + __ bind(done); +} @@ -6431,8 +7237,8 @@ index 000000000..222e3e97e + Register len = op->len()->as_register(); + + if (UseSlowPath || -+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || -+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ j(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); @@ -6467,7 +7273,7 @@ index 000000000..222e3e97e + __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ bne(recv, t1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + __ j(*update_done); + __ bind(next_test); + } @@ -6479,7 +7285,7 @@ index 000000000..222e3e97e + __ ld(t1, recv_addr); + __ bnez(t1, next_test); + __ sd(recv, recv_addr); -+ __ mv(t1, DataLayout::counter_increment); ++ __ li(t1, DataLayout::counter_increment); + __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ j(*update_done); + __ bind(next_test); @@ -6505,7 +7311,7 @@ index 000000000..222e3e97e + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit -+ __ ld(t0, Address(klass_RInfo, long(k->super_check_offset()))); ++ __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump @@ -6550,10 +7356,7 @@ index 000000000..222e3e97e + // Object is null, update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); -+ Address data_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ lbu(t0, data_addr); + __ ori(t0, t0, BitData::null_seen_byte_constant()); + __ sb(t0, data_addr); @@ -6667,7 +7470,7 @@ index 000000000..222e3e97e + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); -+ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); @@ -6758,7 +7561,12 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::align_call(LIR_Code code) { } ++void LIR_Assembler::align_call(LIR_Code code) { ++ // With RVC a call instruction may get 2-byte aligned. ++ // The address of the call instruction needs to be 4-byte aligned to ++ // ensure that it does not span a cache line so that it can be patched. ++ __ align(4); ++} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -6778,10 +7586,9 @@ index 000000000..222e3e97e + add_call_info(code_offset(), op->info()); +} + -+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); } -+ +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); ++ assert((__ offset() % 4) == 0, "bad alignment"); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); @@ -6793,7 +7600,8 @@ index 000000000..222e3e97e + __ relocate(static_stub_Relocation::spec(call_pc)); + __ emit_static_call_stub(); + -+ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big"); ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() ++ <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + @@ -6838,7 +7646,6 @@ index 000000000..222e3e97e + __ j(_unwind_handler_entry); +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6866,7 +7673,6 @@ index 000000000..222e3e97e + } +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6901,22 +7707,16 @@ index 000000000..222e3e97e + } +} + -+ -+ +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); -+ if (!UseFastLocking) { ++ if (UseHeavyMonitors) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { -+ Register scratch = noreg; -+ if (UseBiasedLocking) { -+ scratch = op->scratch_opr()->as_register(); -+ } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible -+ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } @@ -6929,6 +7729,23 @@ index 000000000..222e3e97e + __ bind(*op->stub()->continuation()); +} + ++void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { ++ Register obj = op->obj()->as_pointer_register(); ++ Register result = op->result_opr()->as_pointer_register(); ++ ++ CodeEmitInfo* info = op->info(); ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ ++ if (UseCompressedClassPointers) { ++ __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); ++ __ decode_klass_not_null(result); ++ } else { ++ __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++} ++ +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); @@ -6962,7 +7779,7 @@ index 000000000..222e3e97e + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6978,7 +7795,7 @@ index 000000000..222e3e97e + __ mov_metadata(t1, known_klass->constant_encoding()); + __ sd(t1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6988,13 +7805,13 @@ index 000000000..222e3e97e + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + } +} + @@ -7029,7 +7846,7 @@ index 000000000..222e3e97e + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(t1, none); -+ __ mv(t0, (u1)TypeEntries::null_seen); ++ __ li(t0, (u1)TypeEntries::null_seen); + __ beq(t0, t1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the @@ -7079,7 +7896,7 @@ index 000000000..222e3e97e + Label ok; + __ ld(t0, mdo_addr); + __ beqz(t0, ok); -+ __ mv(t1, (u1)TypeEntries::null_seen); ++ __ li(t1, (u1)TypeEntries::null_seen); + __ beq(t0, t1, ok); + // may have been set by another thread + __ membar(MacroAssembler::LoadLoad); @@ -7199,32 +8016,30 @@ index 000000000..222e3e97e + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC && patch_code != lir_patch_none) { ++ if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } -+#endif -+ assert(patch_code == lir_patch_none, "Patch code not supported"); ++ + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + assert_different_registers(dst, t0); -+ if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { -+ ++ if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { ++ int scale = adr->scale(); + intptr_t offset = adr->disp(); + LIR_Opr index_op = adr->index(); -+ int scale = adr->scale(); -+ if(index_op->is_constant()) { ++ if (index_op->is_constant()) { + offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; + } + -+ if(!is_imm_in_range(offset, 12, 0)) { ++ if (!is_imm_in_range(offset, 12, 0)) { + __ la(t0, as_Address(adr)); + __ mv(dst, t0); + return; + } + } ++ + __ la(dst, as_Address(adr)); +} + @@ -7248,8 +8063,7 @@ index 000000000..222e3e97e + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { -+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, -+ /* unaligned */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } @@ -7326,7 +8140,7 @@ index 000000000..222e3e97e +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + + get_op(type); + @@ -7376,41 +8190,6 @@ index 000000000..222e3e97e + return exact_log2(elem_size); +} + -+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { -+ if (addr->base()->is_illegal()) { -+ assert(addr->index()->is_illegal(), "must be illegal too"); -+ __ movptr(tmp, addr->disp()); -+ return Address(tmp, 0); -+ } -+ -+ Register base = addr->base()->as_pointer_register(); -+ LIR_Opr index_op = addr->index(); -+ int scale = addr->scale(); -+ -+ if (index_op->is_illegal()) { -+ return Address(base, addr->disp()); -+ } else if (index_op->is_cpu_register()) { -+ Register index; -+ if (index_op->is_single_cpu()) { -+ index = index_op->as_register(); -+ } else { -+ index = index_op->as_register_lo(); -+ } -+ if (scale != 0) { -+ __ shadd(tmp, index, base, tmp, scale); -+ } else { -+ __ add(tmp, base, index); -+ } -+ return Address(tmp, addr->disp()); -+ } else if (index_op->is_constant()) { -+ intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp(); -+ return Address(base, addr_offset); -+ } -+ -+ Unimplemented(); -+ return Address(); -+} -+ +// helper functions which checks for overflow and sets bailout if it +// occurs. Always returns a valid embeddable pointer but in the +// bailout case the pointer won't be to unique storage. @@ -7444,16 +8223,6 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { -+ _masm->code_section()->relocate(adr, relocInfo::poll_type); -+ int pc_offset = code_offset(); -+ flush_debug_info(pc_offset); -+ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); -+ if (info->exception_handlers() != NULL) { -+ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); -+ } -+} -+ +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); @@ -7498,7 +8267,6 @@ index 000000000..222e3e97e + add_call_info_here(info); +} + -+ +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { + Label ok; + __ load_klass(tmp, tmp); @@ -7588,6 +8356,16 @@ index 000000000..222e3e97e + __ bind(done); +} + ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure) { @@ -7602,10 +8380,7 @@ index 000000000..222e3e97e + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); -+ Address counter_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t0, counter_addr); + __ addi(t0, t0, -DataLayout::counter_increment); + __ sd(t0, counter_addr); @@ -7687,21 +8462,21 @@ index 000000000..222e3e97e + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); -+ __ mv(t0, c); ++ __ li(t0, c); + __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); +} + +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp new file mode 100644 -index 000000000..11a47fd6e +index 00000000000..051328c3a8a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp @@ -0,0 +1,132 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -7743,9 +8518,6 @@ index 000000000..11a47fd6e + + Address as_Address(LIR_Address* addr, Register tmp); + -+ // Ensure we have a valid Address (base+offset) to a stack-slot. -+ Address stack_slot_address(int index, uint shift, int adjust = 0); -+ + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. @@ -7753,6 +8525,9 @@ index 000000000..11a47fd6e + address double_constant(double d); + address int_constant(jlong n); + ++ // Ensure we have a valid Address (base + offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, @@ -7768,17 +8543,15 @@ index 000000000..11a47fd6e + + void deoptimize_trap(CodeEmitInfo *info); + -+ enum -+ { -+ // see emit_static_call_stub for detail: ++ enum { ++ // See emit_static_call_stub for detail + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), -+ _call_aot_stub_size = 0, -+ // see emit_exception_handler for detail: ++ // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller -+ // see emit_deopt_handler for detail ++ // See emit_deopt_handler for detail + // auipc (1) + far_jump (6 or 2) + _deopt_handler_size = 1 * NativeInstruction::instruction_size + + 6 * NativeInstruction::instruction_size // or smaller @@ -7789,10 +8562,12 @@ index 000000000..11a47fd6e + void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); + + void check_exact_klass(Register tmp, ciKlass* exact_klass); ++ + void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); + + void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); ++ + void get_op(BasicType type); + + // emit_typecheck_helper sub functions @@ -7832,12 +8607,12 @@ index 000000000..11a47fd6e +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp new file mode 100644 -index 000000000..8ba9ed66d +index 00000000000..e126f148cdf --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -0,0 +1,1083 @@ +@@ -0,0 +1,1075 @@ +/* -+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -7876,6 +8651,7 @@ index 000000000..8ba9ed66d +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifdef ASSERT @@ -7980,7 +8756,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->as_constant() != NULL) { + long constant = 0; @@ -7996,7 +8771,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} @@ -8004,7 +8778,7 @@ index 000000000..8ba9ed66d +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); -+ ++ + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + jlong c; @@ -8031,17 +8805,22 @@ index 000000000..8ba9ed66d + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); -+ + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; + switch (type) { -+ case T_LONG: return LIR_OprFact::longConst(x); -+ case T_INT: return LIR_OprFact::intConst(x); -+ default: ShouldNotReachHere(); ++ case T_LONG: ++ r = LIR_OprFact::longConst(x); ++ break; ++ case T_INT: ++ r = LIR_OprFact::intConst(x); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ return NULL; ++ return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { @@ -8111,11 +8890,6 @@ index 000000000..8ba9ed66d + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); -+ // Need a tmp register for biased locking -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; -+ if (UseBiasedLocking) { -+ tmp = new_register(T_INT); -+ } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { @@ -8124,7 +8898,7 @@ index 000000000..8ba9ed66d + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); -+ monitor_enter(obj.result(), lock, syncTempOpr(), tmp, ++ monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, + x->monitor_no(), info_for_exception, info); +} + @@ -8194,12 +8968,7 @@ index 000000000..8ba9ed66d + right.load_item(); + + LIR_Opr reg = rlock(x); -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; -+ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { -+ tmp = new_register(T_DOUBLE); -+ } -+ -+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); + + set_result(x, round_item(reg)); +} @@ -8208,7 +8977,7 @@ index 000000000..8ba9ed66d +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap -+ LIRItem left(x->x(), this); ++ LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { @@ -8232,7 +9001,7 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + rlock_result(x); @@ -8306,16 +9075,16 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; -+ + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } ++ + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || @@ -8389,7 +9158,7 @@ index 000000000..8ba9ed66d + left.load_item(); + rlock_result(x); + ValueTag tag = right.type()->tag(); -+ if(right.is_constant() && ++ if (right.is_constant() && + ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { + right.dont_load_item(); @@ -8438,7 +9207,7 @@ index 000000000..8ba9ed66d + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); -+ if (type == T_OBJECT || type == T_ARRAY) { ++ if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); @@ -8452,7 +9221,7 @@ index 000000000..8ba9ed66d +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); @@ -8485,14 +9254,16 @@ index 000000000..8ba9ed66d + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through -+ case vmIntrinsics::_dsqrt: { ++ case vmIntrinsics::_dsqrt: // fall through ++ case vmIntrinsics::_dsqrt_strict: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { -+ case vmIntrinsics::_dsqrt: { ++ case vmIntrinsics::_dsqrt: // fall through ++ case vmIntrinsics::_dsqrt_strict: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } @@ -8892,9 +9663,9 @@ index 000000000..8ba9ed66d + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { -+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); ++ __ branch(lir_cond(cond), x->tsux(), x->usux()); + } else { -+ __ branch(lir_cond(cond), right->type(), x->tsux()); ++ __ branch(lir_cond(cond), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); @@ -8913,20 +9684,16 @@ index 000000000..8ba9ed66d + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { -+ if (!UseBarriersForVolatile) { -+ __ membar(); -+ } -+ + __ volatile_load_mem_reg(address, result, info); +} diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp new file mode 100644 -index 000000000..00e33e882 +index 00000000000..5f1c394ab3d --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -8954,40 +9721,40 @@ index 000000000..00e33e882 +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + -+FloatRegister LIR_OprDesc::as_float_reg() const { ++FloatRegister LIR_Opr::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + -+FloatRegister LIR_OprDesc::as_double_reg() const { ++FloatRegister LIR_Opr::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); -+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | -+ (reg1 << LIR_OprDesc::reg2_shift) | -+ LIR_OprDesc::double_type | -+ LIR_OprDesc::fpu_register | -+ LIR_OprDesc::double_size); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | ++ (reg1 << LIR_Opr::reg2_shift) | ++ LIR_Opr::double_type | ++ LIR_Opr::fpu_register | ++ LIR_Opr::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); -+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, -+ "wrong type for addresses"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || ++ base()->type() == T_METADATA, "wrong type for addresses"); +} +#endif // PRODUCT diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp new file mode 100644 -index 000000000..60dcdc0e1 +index 00000000000..78a61128bdd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -9021,14 +9788,14 @@ index 000000000..60dcdc0e1 +} diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp new file mode 100644 -index 000000000..f0aa08a39 +index 00000000000..d7ca7b0fd05 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,83 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9063,7 +9830,6 @@ index 000000000..f0aa08a39 + return 1; +} + -+ +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} @@ -9085,8 +9851,8 @@ index 000000000..f0aa08a39 + return false; +} + -+ +inline void LinearScan::pd_add_temps(LIR_Op* op) { ++ // No special case behaviours yet +} + + @@ -9099,8 +9865,8 @@ index 000000000..f0aa08a39 + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; -+ } else if (cur->type() == T_INT || cur->type() == T_LONG || -+ cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; @@ -9108,18 +9874,17 @@ index 000000000..f0aa08a39 + return false; +} + -+ +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp new file mode 100644 -index 000000000..370ec45c6 +index 00000000000..6f656c8c533 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -0,0 +1,441 @@ +@@ -0,0 +1,432 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9143,15 +9908,16 @@ index 000000000..370ec45c6 + */ + +#include "precompiled.hpp" ++#include "c1/c1_LIR.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" ++#include "oops/markWord.hpp" +#include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" @@ -9167,7 +9933,7 @@ index 000000000..370ec45c6 + } +} + -+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -9179,17 +9945,19 @@ index 000000000..370ec45c6 + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + -+ if (UseBiasedLocking) { -+ assert(tmp != noreg, "should have tmp register at this point"); -+ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case); -+ } else { -+ null_check_offset = offset(); ++ null_check_offset = offset(); ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(hdr, obj); ++ lwu(hdr, Address(hdr, Klass::access_flags_offset())); ++ andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); ++ bnez(t0, slow_case, true /* is_far */); + } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked -+ ori(hdr, hdr, markOopDesc::unlocked_value); ++ ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the @@ -9212,7 +9980,7 @@ index 000000000..370ec45c6 + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); -+ mv(t0, aligned_mask - os::vm_page_size()); ++ li(t0, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) @@ -9220,10 +9988,6 @@ index 000000000..370ec45c6 + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); -+ if (PrintBiasedLockingStatistics) { -+ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); -+ incrementw(Address(t1, 0)); -+ } + return null_check_offset; +} + @@ -9233,21 +9997,13 @@ index 000000000..370ec45c6 + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + -+ if (UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ biased_locking_exit(obj, hdr, done); -+ } -+ + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); -+ if (!UseBiasedLocking) { -+ // load object -+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); -+ } ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to @@ -9274,13 +10030,8 @@ index 000000000..370ec45c6 + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); -+ if (UseBiasedLocking && !len->is_valid()) { -+ assert_different_registers(obj, klass, len, tmp1, tmp2); -+ ld(tmp1, Address(klass, Klass::prototype_header_offset())); -+ } else { -+ // This assumes that all prototype bits fitr in an int32_t -+ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); -+ } ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass @@ -9298,7 +10049,7 @@ index 000000000..370ec45c6 +} + +// preserves obj, destroys len_in_bytes -+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) { ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + @@ -9310,7 +10061,7 @@ index 000000000..370ec45c6 + if (hdr_size_in_bytes) { + add(obj, obj, hdr_size_in_bytes); + } -+ zero_memory(obj, len_in_bytes, tmp1); ++ zero_memory(obj, len_in_bytes, tmp); + if (hdr_size_in_bytes) { + sub(obj, obj, hdr_size_in_bytes); + } @@ -9434,24 +10185,29 @@ index 000000000..370ec45c6 +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { -+ // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. For this action to be legal we -+ // must ensure that this first instruction is a J, JAL or NOP. -+ // Make it a NOP. -+ nop(); + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. -+ // Note that we do this before doing an enter(). ++ // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); -+ MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize ++ MacroAssembler::build_frame(framesize); ++ ++ // Insert nmethod entry barrier into frame. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(this); +} + +void C1_MacroAssembler::remove_frame(int framesize) { -+ MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize ++ MacroAssembler::remove_frame(framesize); +} + + -+void C1_MacroAssembler::verified_entry() { ++void C1_MacroAssembler::verified_entry(bool breakAtEntry) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ ++ nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { @@ -9539,9 +10295,9 @@ index 000000000..370ec45c6 + if (type == T_OBJECT || type == T_ARRAY) { + assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); + if (cmpFlag == lir_cond_equal) { -+ oop_equal(op1, op2, label, is_far); ++ beq(op1, op2, label, is_far); + } else { -+ oop_nequal(op1, op2, label, is_far); ++ bne(op1, op2, label, is_far); + } + } else { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), @@ -9559,14 +10315,14 @@ index 000000000..370ec45c6 +} diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp new file mode 100644 -index 000000000..5d0cefe89 +index 00000000000..dfd3c17d7c7 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp -@@ -0,0 +1,121 @@ +@@ -0,0 +1,120 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9614,7 +10370,7 @@ index 000000000..5d0cefe89 + ); + + void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); -+ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, @@ -9624,9 +10380,8 @@ index 000000000..5d0cefe89 + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved -+ // tmp : temporary register, contents destroyed + // returns code offset at which to add null check debug information -+ int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); + + // unlocking + // hdr : contents destroyed @@ -9686,14 +10441,14 @@ index 000000000..5d0cefe89 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp new file mode 100644 -index 000000000..f06e7b51c +index 00000000000..f523c9ed50a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -0,0 +1,1206 @@ +@@ -0,0 +1,1172 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9723,9 +10478,11 @@ index 000000000..f06e7b51c +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" @@ -9733,18 +10490,20 @@ index 000000000..f06e7b51c +#include "register_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" ++#include "utilities/powerOfTwo.hpp" +#include "vmreg_riscv.inline.hpp" + + +// Implementation of StubAssembler + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) { + // setup registers -+ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result, + "registers must be different"); -+ assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different"); ++ assert(oop_result != xthread && metadata_result != xthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + @@ -9780,7 +10539,7 @@ index 000000000..f06e7b51c + beqz(t0, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared -+ if (oop_result1->is_valid()) { ++ if (oop_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { @@ -9797,8 +10556,8 @@ index 000000000..f06e7b51c + bind(L); + } + // get oop results if there are any and reset the values in the thread -+ if (oop_result1->is_valid()) { -+ get_vm_result(oop_result1, xthread); ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, xthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, xthread); @@ -9806,12 +10565,12 @@ index 000000000..f06e7b51c + return call_offset; +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) { + mv(c_rarg1, arg1); -+ return call_RT(oop_result1, metadata_result, entry, 1); ++ return call_RT(oop_result, metadata_result, entry, 1); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) { + const int arg_num = 2; + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { @@ -9826,10 +10585,10 @@ index 000000000..f06e7b51c + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + const int arg_num = 3; + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || @@ -9838,31 +10597,36 @@ index 000000000..f06e7b51c + const int arg1_sp_offset = 0; + const int arg2_sp_offset = 1; + const int arg3_sp_offset = 2; -+ addi(sp, sp, -(arg_num * wordSize)); -+ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); -+ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ addi(sp, sp, -(arg_num + 1) * wordSize); + sd(arg1, Address(sp, arg1_sp_offset * wordSize)); ++ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); + + ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); + ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); + ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); -+ addi(sp, sp, arg_num * wordSize); ++ addi(sp, sp, (arg_num + 1) * wordSize); + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + mv(c_rarg3, arg3); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + ++enum return_state_t { ++ does_not_return, requires_return ++}; ++ +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; ++ bool _return_state; + + public: -+ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); @@ -9880,8 +10644,9 @@ index 000000000..f06e7b51c + +#define __ _sasm-> + -+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { + _sasm = sasm; ++ _return_state = return_state; + __ prologue(name, must_gc_arguments); +} + @@ -9893,7 +10658,11 @@ index 000000000..f06e7b51c + + +StubFrame::~StubFrame() { -+ __ epilogue(); ++ if (_return_state == requires_return) { ++ __ epilogue(); ++ } else { ++ __ should_not_reach_here(); ++ } + _sasm = NULL; +} + @@ -9919,7 +10688,7 @@ index 000000000..f06e7b51c +}; + +// Save off registers which might be killed by calls into the runtime. -+// Tries to smart of about FP registers. In particular we separate ++// Tries to smart of about FPU registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. @@ -9936,11 +10705,12 @@ index 000000000..f06e7b51c + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_map != NULL); + -+ // cpu_regs, caller save registers only, see FrameMap::initialize ++ // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. -+ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12, -+ x13, x14, x15, x16, x17, -+ x28, x29, x30, x31}; ++ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { ++ x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 ++ }; ++ + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { + Register r = caller_save_cpu_regs[i]; + int sp_offset = cpu_reg_save_offsets[r->encoding()]; @@ -10055,7 +10825,6 @@ index 000000000..f06e7b51c + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + -+ __ should_not_reach_here(); + return oop_maps; +} + @@ -10103,9 +10872,7 @@ index 000000000..f06e7b51c + sasm->set_frame_size(frame_size); + break; + } -+ default: -+ __ should_not_reach_here(); -+ break; ++ default: ShouldNotReachHere(); + } + + // verify that only x10 and x13 are valid at this time @@ -10161,11 +10928,8 @@ index 000000000..f06e7b51c + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: -+ // Pop the return address. -+ __ leave(); -+ __ ret(); // jump to exception handler + break; -+ default: ShouldNotReachHere(); ++ default: ShouldNotReachHere(); + } + + return oop_maps; @@ -10268,80 +11032,37 @@ index 000000000..f06e7b51c +#endif + __ reset_last_Java_frame(true); + -+ // check for pending exceptions -+ { -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, L); -+ // exception pending => remove activation and forward to exception handler -+ -+ { Label L1; -+ __ bnez(x10, L1); // have we deoptimized? -+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); -+ __ bind(L1); -+ } -+ -+ // the deopt blob expects exceptions in the special fields of -+ // JavaThread, so copy and clear pending exception. -+ -+ // load and clear pending exception -+ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); -+ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); -+ -+ // check that there is really a valid exception -+ __ verify_not_null_oop(x10); -+ -+ // load throwing pc: this is the return address of the stub -+ __ ld(x13, Address(fp, wordSize)); -+ +#ifdef ASSERT -+ // check that fields in JavaThread for exception oop and issuing pc are empty -+ Label oop_empty; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ beqz(t0, oop_empty); -+ __ stop("exception oop must be empty"); -+ __ bind(oop_empty); ++ // Check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); + -+ Label pc_empty; -+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); -+ __ beqz(t0, pc_empty); -+ __ stop("exception pc must be empty"); -+ __ bind(pc_empty); ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); +#endif + -+ // store exception oop and throwing pc to JavaThread -+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); -+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ // Runtime will return true if the nmethod has been deoptimized, this is the ++ // expected scenario and anything else is an error. Note that we maintain a ++ // check on the result purely as a defensive measure. ++ Label no_deopt; ++ __ beqz(x10, no_deopt); // Have we deoptimized? + -+ restore_live_registers(sasm); ++ // Perform a re-execute. The proper return address is already on the stack, ++ // we just need to restore registers, pop all of our frames but the return ++ // address and jump to the deopt blob. + -+ __ leave(); -+ -+ // Forward the exception directly to deopt blob. We can blow no -+ // registers and must leave throwing pc on the stack. A patch may -+ // have values live in registers so the entry point with the -+ // exception in tls. -+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); -+ -+ __ bind(L); -+ } -+ -+ // Runtime will return true if the nmethod has been deoptimized during -+ // the patching process. In that case we must do a deopt reexecute instead. -+ Label cont; -+ -+ __ beqz(x10, cont); // have we deoptimized? -+ -+ // Will reexecute. Proper return address is already on the stack we just restore -+ // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + -+ __ bind(cont); -+ restore_live_registers(sasm); -+ __ leave(); -+ __ ret(); ++ __ bind(no_deopt); ++ __ stop("deopt not performed"); + + return oop_maps; +} @@ -10367,13 +11088,13 @@ index 000000000..f06e7b51c + + case throw_div0_exception_id: + { -+ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: -+ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; @@ -10652,14 +11373,14 @@ index 000000000..f06e7b51c + + case throw_class_cast_exception_id: + { -+ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { -+ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } @@ -10693,7 +11414,7 @@ index 000000000..f06e7b51c + __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); + + // fallthrough on success: -+ __ mv(t0, 1); ++ __ li(t0, 1); + __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); @@ -10753,7 +11474,7 @@ index 000000000..f06e7b51c + + case deoptimize_id: + { -+ StubFrame f(sasm, "deoptimize", dont_gc_arguments); ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); @@ -10772,7 +11493,7 @@ index 000000000..f06e7b51c + + case throw_range_check_failed_id: + { -+ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; @@ -10788,7 +11509,7 @@ index 000000000..f06e7b51c + + case access_field_patching_id: + { -+ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } @@ -10796,7 +11517,7 @@ index 000000000..f06e7b51c + + case load_klass_patching_id: + { -+ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } @@ -10804,7 +11525,7 @@ index 000000000..f06e7b51c + + case load_mirror_patching_id: + { -+ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } @@ -10812,7 +11533,7 @@ index 000000000..f06e7b51c + + case load_appendix_patching_id: + { -+ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } @@ -10835,14 +11556,14 @@ index 000000000..f06e7b51c + + case throw_index_exception_id: + { -+ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { -+ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); @@ -10851,7 +11572,7 @@ index 000000000..f06e7b51c + + case predicate_failed_trap_id: + { -+ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); @@ -10874,7 +11595,7 @@ index 000000000..f06e7b51c + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); + + restore_live_registers(sasm); + } @@ -10882,8 +11603,8 @@ index 000000000..f06e7b51c + + default: + { -+ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); -+ __ mv(x10, (int)id); ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + } @@ -10898,14 +11619,13 @@ index 000000000..f06e7b51c +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp new file mode 100644 -index 000000000..974c8fe76 +index 00000000000..fe46f7b21c8 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -0,0 +1,72 @@ +@@ -0,0 +1,65 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -10937,10 +11657,8 @@ index 000000000..974c8fe76 +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + -+#ifndef TIERED ++#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); -+define_pd_global(bool, UseTLAB, true ); -+define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); @@ -10949,7 +11667,6 @@ index 000000000..974c8fe76 +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); -+define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); @@ -10960,30 +11677,1877 @@ index 000000000..974c8fe76 +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); -+define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); -+define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); -+#endif // !TIERED ++#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); -+define_pd_global(bool, RoundFPResults, true ); + -+define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); -+define_pd_global(bool, TwoOperandLIRForm, false ); ++define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +new file mode 100644 +index 00000000000..27770dc17aa +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +@@ -0,0 +1,1646 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "opto/c2_MacroAssembler.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "opto/subnode.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; ++ ++ BLOCK_COMMENT("string_indexof_char_short {"); ++ ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++ ++ mv(result, -1); ++ mv(index, zr); ++ ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); ++ ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); ++ ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); ++ ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); ++ ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); ++ ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); ++ ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); ++ ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); ++ ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); ++ ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); ++ ++ bind(MATCH7); ++ addi(index, index, 7); ++ ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); ++} ++ ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; ++ ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); ++ ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); ++ ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); ++ } ++ ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); ++ ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } ++ ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); ++ ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); ++ ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } ++ ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); ++} ++ ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); ++ ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void C2_MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_indexof {"); ++ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; ++ ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. ++ ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); ++ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } ++ ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; ++ ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; ++ ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++ ++ sub(sp, sp, ASIZE); ++ ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++ ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); ++ } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); ++ ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); ++ ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } ++ ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } ++ ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); ++ } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset ++ ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); ++ ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); ++ ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); ++ ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); ++} ++ ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ ++ Register first = tmp3; ++ ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); ++ ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); ++ ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); ++ ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); ++ ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } ++ } ++ ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); ++ } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); ++ ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); ++ ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); ++ } ++ ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; ++ ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); ++} ++ ++// Compare strings. ++void C2_MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; ++ ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); ++ } ++ ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); ++ ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); ++ ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); ++ ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } ++ ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); ++ ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} string_compare"); ++} ++ ++void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); ++ ++ BLOCK_COMMENT("arrays_equals {"); ++ ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); ++ ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); ++ ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same ++ ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); ++ ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); ++ ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); ++ ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); ++ ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); ++ ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); ++ ++ BLOCK_COMMENT("} array_equals"); ++} ++ ++// Compare Strings ++ ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. ++ ++void C2_MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ ++ BLOCK_COMMENT("string_equals {"); ++ ++ mv(result, false); ++ ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); ++ ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); ++ ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); ++ ++ bind(SHORT); ++ Label TAIL03, TAIL01; ++ ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); ++ { ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); ++ { ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); ++ } ++ } ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); ++} ++ ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, ++ ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; ++ ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++ ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; ++ ++void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++} ++ ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); ++} ++ ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); ++ } else { ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); ++ } ++ ++ frflags(t0); ++ beqz(t0, Done); ++ ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); ++ ++ bind(Done); ++} ++ ++void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { ++ Label loop; ++ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; ++ ++ bind(loop); ++ vsetvli(tmp1, cnt, sew, Assembler::m2); ++ vlex_v(vr1, a1, sew); ++ vlex_v(vr2, a2, sew); ++ vmsne_vv(vrs, vr1, vr2); ++ vfirst_m(tmp2, vrs); ++ bgez(tmp2, DONE); ++ sub(cnt, cnt, tmp1); ++ if (!islatin) { ++ slli(tmp1, tmp1, 1); // get byte counts ++ } ++ add(a1, a1, tmp1); ++ add(a2, a2, tmp1); ++ bnez(cnt, loop); ++ ++ mv(result, true); ++} ++ ++void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ BLOCK_COMMENT("string_equals_v {"); ++ ++ mv(result, false); ++ ++ if (elem_size == 2) { ++ srli(cnt, cnt, 1); ++ } ++ ++ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals_v"); ++} ++ ++// used by C2 ClearArray patterns. ++// base: Address of a buffer to be zeroed ++// cnt: Count in HeapWords ++// ++// base, cnt, v0, v1 and t0 are clobbered. ++void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { ++ Label loop; ++ ++ // making zero words ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vxor_vv(v0, v0, v0); ++ ++ bind(loop); ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vse64_v(v0, base); ++ sub(cnt, cnt, t0); ++ shadd(base, t0, base, t0, 3); ++ bnez(cnt, loop); ++} ++ ++void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ BLOCK_COMMENT("arrays_equals_v {"); ++ ++ // if (a1 == a2), return true ++ mv(result, true); ++ beq(a1, a2, DONE); ++ ++ mv(result, false); ++ // if a1 == null or a2 == null, return false ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ // if (a1.length != a2.length), return false ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt1, cnt2, DONE); ++ ++ la(a1, Address(a1, base_offset)); ++ la(a2, Address(a2, base_offset)); ++ ++ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} arrays_equals_v"); ++} ++ ++void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, ++ Register result, Register tmp1, Register tmp2, int encForm) { ++ Label DIFFERENCE, DONE, L, loop; ++ bool encLL = encForm == StrIntrinsicNode::LL; ++ bool encLU = encForm == StrIntrinsicNode::LU; ++ bool encUL = encForm == StrIntrinsicNode::UL; ++ ++ bool str1_isL = encLL || encLU; ++ bool str2_isL = encLL || encUL; ++ ++ int minCharsInWord = encLL ? wordSize : wordSize / 2; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // for Lating strings, 1 byte for 1 character ++ // for UTF16 strings, 2 bytes for 1 character ++ if (!str1_isL) ++ sraiw(cnt1, cnt1, 1); ++ if (!str2_isL) ++ sraiw(cnt2, cnt2, 1); ++ ++ // if str1 == str2, return the difference ++ // save the minimum of the string lengths in cnt2. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ if (str1_isL == str2_isL) { // LL or UU ++ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); ++ j(DONE); ++ } else { // LU or UL ++ Register strL = encLU ? str1 : str2; ++ Register strU = encLU ? str2 : str1; ++ VectorRegister vstr1 = encLU ? v4 : v0; ++ VectorRegister vstr2 = encLU ? v0 : v4; ++ ++ bind(loop); ++ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); ++ vle8_v(vstr1, strL); ++ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); ++ vzext_vf2(vstr2, vstr1); ++ vle16_v(vstr1, strU); ++ vmsne_vv(v0, vstr2, vstr1); ++ vfirst_m(tmp2, v0); ++ bgez(tmp2, DIFFERENCE); ++ sub(cnt2, cnt2, tmp1); ++ add(strL, strL, tmp1); ++ shadd(strU, tmp1, strU, tmp1, 1); ++ bnez(cnt2, loop); ++ j(DONE); ++ } ++ bind(DIFFERENCE); ++ slli(tmp1, tmp2, 1); ++ add(str1, str1, str1_isL ? tmp2 : tmp1); ++ add(str2, str2, str2_isL ? tmp2 : tmp1); ++ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); ++ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); ++ sub(result, tmp1, tmp2); ++ ++ bind(DONE); ++} ++ ++void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { ++ Label loop; ++ assert_different_registers(src, dst, len, tmp, t0); ++ ++ BLOCK_COMMENT("byte_array_inflate_v {"); ++ bind(loop); ++ vsetvli(tmp, len, Assembler::e8, Assembler::m2); ++ vle8_v(v2, src); ++ vsetvli(t0, len, Assembler::e16, Assembler::m4); ++ vzext_vf2(v0, v2); ++ vse16_v(v0, dst); ++ sub(len, len, tmp); ++ add(src, src, tmp); ++ shadd(dst, tmp, dst, tmp, 1); ++ bnez(len, loop); ++ BLOCK_COMMENT("} byte_array_inflate_v"); ++} ++ ++// Compress char[] array to byte[]. ++// result: the array length if every element in array can be encoded; 0, otherwise. ++void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label done; ++ encode_iso_array_v(src, dst, len, result, tmp); ++ beqz(len, done); ++ mv(result, zr); ++ bind(done); ++} ++ ++// result: the number of elements had been encoded. ++void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label loop, DIFFERENCE, DONE; ++ ++ BLOCK_COMMENT("encode_iso_array_v {"); ++ mv(result, 0); ++ ++ bind(loop); ++ mv(tmp, 0xff); ++ vsetvli(t0, len, Assembler::e16, Assembler::m2); ++ vle16_v(v2, src); ++ // if element > 0xff, stop ++ vmsgtu_vx(v1, v2, tmp); ++ vfirst_m(tmp, v1); ++ vmsbf_m(v0, v1); ++ // compress char to byte ++ vsetvli(t0, len, Assembler::e8); ++ vncvt_x_x_w(v1, v2, Assembler::v0_t); ++ vse8_v(v1, dst, Assembler::v0_t); ++ ++ bgez(tmp, DIFFERENCE); ++ add(result, result, t0); ++ add(dst, dst, t0); ++ sub(len, len, t0); ++ shadd(src, t0, src, t0, 1); ++ bnez(len, loop); ++ j(DONE); ++ ++ bind(DIFFERENCE); ++ add(result, result, tmp); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} encode_iso_array_v"); ++} ++ ++void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { ++ Label LOOP, SET_RESULT, DONE; ++ ++ BLOCK_COMMENT("count_positives_v {"); ++ mv(result, zr); ++ ++ bind(LOOP); ++ vsetvli(t0, len, Assembler::e8, Assembler::m4); ++ vle8_v(v0, ary); ++ vmslt_vx(v0, v0, zr); ++ vfirst_m(tmp, v0); ++ bgez(tmp, SET_RESULT); ++ // if tmp == -1, all bytes are positive ++ add(result, result, t0); ++ ++ sub(len, len, t0); ++ add(ary, ary, t0); ++ bnez(len, LOOP); ++ j(DONE); ++ ++ // add remaining positive bytes count ++ bind(SET_RESULT); ++ add(result, result, tmp); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} count_positives_v"); ++} ++ ++void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ bool isL) { ++ mv(result, zr); ++ ++ Label loop, MATCH, DONE; ++ Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; ++ bind(loop); ++ vsetvli(tmp1, cnt1, sew, Assembler::m4); ++ vlex_v(v0, str1, sew); ++ vmseq_vx(v0, v0, ch); ++ vfirst_m(tmp2, v0); ++ bgez(tmp2, MATCH); // if equal, return index ++ ++ add(result, result, tmp1); ++ sub(cnt1, cnt1, tmp1); ++ if (!isL) slli(tmp1, tmp1, 1); ++ add(str1, str1, tmp1); ++ bnez(cnt1, loop); ++ ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ add(result, result, tmp2); ++ ++ bind(DONE); ++} ++ ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ ++ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); ++ ++ is_min ? vfmin_vv(dst, src1, src2) ++ : vfmax_vv(dst, src1, src2); ++ ++ vmfne_vv(v0, src1, src1); ++ vfadd_vv(dst, src1, src1, Assembler::v0_t); ++ vmfne_vv(v0, src2, src2); ++ vfadd_vv(dst, src2, src2, Assembler::v0_t); ++} ++ ++// Set dst to NaN if any NaN input. ++void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, ++ FloatRegister src1, VectorRegister src2, ++ VectorRegister tmp1, VectorRegister tmp2, ++ bool is_double, bool is_min) { ++ assert_different_registers(src2, tmp1, tmp2); ++ ++ Label L_done, L_NaN; ++ vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); ++ vfmv_s_f(tmp2, src1); ++ ++ is_min ? vfredmin_vs(tmp1, src2, tmp2) ++ : vfredmax_vs(tmp1, src2, tmp2); ++ ++ fsflags(zr); ++ // Checking NaNs ++ vmflt_vf(tmp2, src2, src1); ++ frflags(t0); ++ bnez(t0, L_NaN); ++ j(L_done); ++ ++ bind(L_NaN); ++ vfmv_s_f(tmp2, src1); ++ vfredsum_vs(tmp1, src2, tmp2); ++ ++ bind(L_done); ++ vfmv_f_s(dst, tmp1); ++} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +new file mode 100644 +index 00000000000..c71df4c101b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +@@ -0,0 +1,193 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP ++ ++// C2_MacroAssembler contains high-level macros for C2 ++ ++ private: ++ void element_compare(Register r1, Register r2, ++ Register result, Register cnt, ++ Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, ++ VectorRegister vrs, ++ bool is_latin, Label& DONE); ++ public: ++ ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); ++ ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); ++ ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); ++ ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); ++ ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); ++ ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; ++ ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); ++ ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); ++ ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); ++ ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); ++ } ++ ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } ++ ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } ++ ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } ++ ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); ++ } ++ ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } ++ ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } ++ ++ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { ++ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); ++ unspill(v0, src_offset); ++ spill(v0, dst_offset); ++ } ++ ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); ++ ++ // intrinsic methods implemented by rvv instructions ++ void string_equals_v(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void arrays_equals_v(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void string_compare_v(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register result, ++ Register tmp1, Register tmp2, ++ int encForm); ++ ++ void clear_array_v(Register base, Register cnt); ++ ++ void byte_array_inflate_v(Register src, Register dst, ++ Register len, Register tmp); ++ ++ void char_array_compress_v(Register src, Register dst, ++ Register len, Register result, ++ Register tmp); ++ ++ void encode_iso_array_v(Register src, Register dst, ++ Register len, Register result, ++ Register tmp); ++ ++ void count_positives_v(Register ary, Register len, ++ Register result, Register tmp); ++ ++ void string_indexof_char_v(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ bool isL); ++ ++ void minmax_FD_v(VectorRegister dst, ++ VectorRegister src1, VectorRegister src2, ++ bool is_double, bool is_min); ++ ++ void reduce_minmax_FD_v(FloatRegister dst, ++ FloatRegister src1, VectorRegister src2, ++ VectorRegister tmp1, VectorRegister tmp2, ++ bool is_double, bool is_min); ++ ++#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp new file mode 100644 -index 000000000..bf4efa629 +index 00000000000..53a41665f4b --- /dev/null +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -0,0 +1,91 @@ +@@ -0,0 +1,83 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11016,23 +13580,19 @@ index 000000000..bf4efa629 +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); -+define_pd_global(bool, UseTLAB, true); -+define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); -+define_pd_global(bool, TieredCompilation, trueInTiered); ++define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 0); -+define_pd_global(intx, FLOATPRESSURE, 64); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); -+define_pd_global(intx, INTPRESSURE, 24); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); @@ -11059,27 +13619,24 @@ index 000000000..bf4efa629 +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); -+define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + -+// Heap related flags -+define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); -+ +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + -+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + +#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp new file mode 100644 -index 000000000..3cb4a4995 +index 00000000000..cdbd69807be --- /dev/null +++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp @@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -11115,14 +13672,67 @@ index 000000000..3cb4a4995 + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); + reg_mask_init(); +} +diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +new file mode 100644 +index 00000000000..a90d9fdc160 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++#define __ masm. ++void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { ++ assert(SharedRuntime::polling_page_return_handler_blob() != NULL, ++ "polling page return stub not created yet"); ++ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); ++ RuntimeAddress callback_addr(stub); ++ ++ __ bind(entry->_stub_label); ++ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); ++ masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); ++ __ la(t0, safepoint_pc.target()); ++ __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ __ far_jump(callback_addr); ++} ++#undef __ diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp new file mode 100644 -index 000000000..881900892 +index 00000000000..14a68b45026 --- /dev/null +++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp @@ -0,0 +1,36 @@ +/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -11159,14 +13769,14 @@ index 000000000..881900892 +#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp new file mode 100644 -index 000000000..0354a93a0 +index 00000000000..75bc4be7840 --- /dev/null +++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -0,0 +1,154 @@ +@@ -0,0 +1,149 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11234,13 +13844,13 @@ index 000000000..0354a93a0 +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { -+ // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr -+ return 12 * NativeInstruction::instruction_size; ++ // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr ++ return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; +} + +int CompiledStaticCall::to_trampoline_stub_size() { -+ // Somewhat pessimistically, we count four instructions here (although -+ // there are only three) because we sometimes emit an alignment nop. ++ // Somewhat pessimistically, we count 4 instructions here (although ++ // there are only 3) because we sometimes emit an alignment nop. + // Trampoline stubs are always word aligned. + return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; +} @@ -11251,7 +13861,7 @@ index 000000000..0354a93a0 +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { -+ address stub = find_stub(false /* is_aot */); ++ address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { @@ -11263,16 +13873,11 @@ index 000000000..0354a93a0 + + // Creation also verifies the object. + NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); -+#ifndef PRODUCT ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); ++#ifdef ASSERT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + -+ // read the value once -+ volatile intptr_t data = method_holder->data(); -+ assert(data == 0 || data == (intptr_t)callee(), -+ "a) MT-unsafe modification of inline cache"); -+ assert(data == 0 || jump->jump_destination() == entry, -+ "b) MT-unsafe modification of inline cache"); ++ verify_mt_safe(callee, entry, method_holder, jump); +#endif + // Update stub. + method_holder->set_data((intptr_t)callee()); @@ -11283,14 +13888,16 @@ index 000000000..0354a93a0 +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { -+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); ++ assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + method_holder->set_data(0); ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++ jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- @@ -11300,16 +13907,14 @@ index 000000000..0354a93a0 +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); -+ if (os::is_MP()) { -+ _call->verify_alignment(); -+ } ++ _call->verify_alignment(); + + // Verify stub. -+ address stub = find_stub(false /* is_aot */); ++ address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); ++ = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. @@ -11319,14 +13924,14 @@ index 000000000..0354a93a0 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp new file mode 100644 -index 000000000..011e965ad +index 00000000000..bceadcc5dcc --- /dev/null +++ b/src/hotspot/cpu/riscv/copy_riscv.hpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,136 @@ +/* -+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11352,11 +13957,7 @@ index 000000000..011e965ad +#ifndef CPU_RISCV_COPY_RISCV_HPP +#define CPU_RISCV_COPY_RISCV_HPP + -+// Inline functions for memory copy and fill. -+ -+// Contains inline asm implementations -+#include OS_CPU_HEADER_INLINE(copy) -+ ++#include OS_CPU_HEADER(copy) + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; @@ -11382,94 +13983,93 @@ index 000000000..011e965ad + (void)memset(to, 0, count); +} + ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ shared_disjoint_words_atomic(from, to, count); ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ +#endif // CPU_RISCV_COPY_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp -new file mode 100644 -index 000000000..31cee7103 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP -+ -+// Nothing to do on riscv -+ -+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp new file mode 100644 -index 000000000..e97b89327 +index 00000000000..b0e5560c906 --- /dev/null +++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -@@ -0,0 +1,37 @@ -+/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP -+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP -+ -+ static int pd_instruction_alignment() { -+ return 1; -+ } -+ -+ static const char* pd_cpu_opts() { -+ return ""; -+ } -+ -+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp -new file mode 100644 -index 000000000..be6f1a67f ---- /dev/null -+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -0,0 +1,683 @@ +@@ -0,0 +1,58 @@ +/* + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. @@ -11496,10 +14096,164 @@ index 000000000..be6f1a67f + * + */ + ++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP ++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP ++ ++static int pd_instruction_alignment() { ++ return 1; ++} ++ ++static const char* pd_cpu_opts() { ++ return ""; ++} ++ ++// Returns address of n-th instruction preceding addr, ++// NULL if no preceding instruction can be found. ++// On riscv, we assume a constant instruction length. ++// It might be beneficial to check "is_readable" as we do on ppc and s390. ++static address find_prev_instr(address addr, int n_instr) { ++ return addr - Assembler::instruction_size * n_instr; ++} ++ ++// special-case instruction decoding. ++// There may be cases where the binutils disassembler doesn't do ++// the perfect job. In those cases, decode_instruction0 may kick in ++// and do it right. ++// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" ++static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { ++ return here; ++} ++ ++// platform-specific instruction annotations (like value of loaded constants) ++static void annotate(address pc, outputStream* st) {} ++ ++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp +new file mode 100644 +index 00000000000..5c700be9c91 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ +#include "precompiled.hpp" ++#include "prims/foreign_globals.hpp" ++#include "utilities/debug.hpp" ++ ++// Stubbed out, implement later ++const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { ++ Unimplemented(); ++ return {}; ++} ++ ++const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { ++ ShouldNotCallThis(); ++ return {}; ++} +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +new file mode 100644 +index 00000000000..3ac89752c27 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP ++#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP ++ ++class ABIDescriptor {}; ++class BufferLayout {}; ++ ++#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +new file mode 100644 +index 00000000000..6e38960598a +--- /dev/null ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -0,0 +1,697 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" -+#include "oops/markOop.hpp" ++#include "memory/universe.hpp" ++#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" @@ -11507,8 +14261,9 @@ index 000000000..be6f1a67f +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" -+#include "runtime/os.hpp" ++#include "runtime/os.inline.hpp" +#include "runtime/signature.hpp" ++#include "runtime/stackWatermarkSet.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_riscv.inline.hpp" @@ -11531,16 +14286,8 @@ index 000000000..be6f1a67f + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers -+ static size_t stack_guard_size = os::uses_stack_guard_pages() ? -+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; -+ assert_cond(thread != NULL); -+ size_t usable_stack_size = thread->stack_size() - stack_guard_size; -+ + // sp must be within the usable part of the stack (not in guards) -+ bool sp_safe = (addr_sp < thread->stack_base()) && -+ (addr_sp >= thread->stack_base() - usable_stack_size); -+ -+ if (!sp_safe) { ++ if (!thread->is_in_usable_stack(addr_sp)) { + return false; + } + @@ -11557,16 +14304,14 @@ index 000000000..be6f1a67f + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + -+ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); -+ -+ if (!unextended_sp_safe) { ++ if (!thread->is_in_full_stack_checked(unextended_sp)) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 -+ bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) && -+ (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && ++ thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); + + // We know sp/unextended_sp are safe only fp is questionable here + @@ -11627,12 +14372,13 @@ index 000000000..be6f1a67f + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? -+ if ((address)sender_sp >= thread->stack_base()) { ++ if (!thread->is_in_full_stack_checked((address)sender_sp)) { + return false; + } ++ + sender_unextended_sp = sender_sp; -+ sender_pc = (address) *(sender_sp + frame::return_addr_offset); -+ saved_fp = (intptr_t*) *(sender_sp + frame::link_offset); ++ sender_pc = (address) *(sender_sp - 1); ++ saved_fp = (intptr_t*) *(sender_sp - 2); + } + + @@ -11642,9 +14388,7 @@ index 000000000..be6f1a67f + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); -+ -+ if (!saved_fp_safe) { ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + @@ -11677,9 +14421,7 @@ index 000000000..be6f1a67f + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { -+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); -+ -+ if (!saved_fp_safe) { ++ if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { + return false; + } + @@ -11739,6 +14481,7 @@ index 000000000..be6f1a67f +} + +void frame::patch_pc(Thread* thread, address pc) { ++ assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", @@ -11748,7 +14491,6 @@ index 000000000..be6f1a67f + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; -+ _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); @@ -11831,6 +14573,21 @@ index 000000000..be6f1a67f + return fr; +} + ++OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++bool frame::optimized_entry_frame_is_first() const { ++ ShouldNotCallThis(); ++ return false; ++} ++ ++frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { ++ ShouldNotCallThis(); ++ return {}; ++} ++ +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// @@ -11949,8 +14706,8 @@ index 000000000..be6f1a67f +} + +//------------------------------------------------------------------------------ -+// frame::sender -+frame frame::sender(RegisterMap* map) const { ++// frame::sender_raw ++frame frame::sender_raw(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + assert(map != NULL, "map must be set"); @@ -11975,6 +14732,16 @@ index 000000000..be6f1a67f + return frame(sender_sp(), link(), sender_pc()); +} + ++frame frame::sender(RegisterMap* map) const { ++ frame result = sender_raw(map); ++ ++ if (map->process_frames()) { ++ StackWatermarkSet::on_iteration(map->thread(), result); ++ } ++ ++ return result; ++} ++ +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks @@ -11996,13 +14763,12 @@ index 000000000..be6f1a67f + // do some validation of frame elements + + // first the method -+ + Method* m = *interpreter_frame_method_addr(); -+ + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) { + return false; + } ++ + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point @@ -12013,7 +14779,7 @@ index 000000000..be6f1a67f + } + + // validate bci/bcx -+ address bcp = interpreter_frame_bcp(); ++ address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } @@ -12023,12 +14789,13 @@ index 000000000..be6f1a67f + if (MetaspaceObj::is_valid(cp) == false) { + return false; + } -+ // validate locals -+ address locals = (address) *interpreter_frame_locals_addr(); + ++ // validate locals ++ address locals = (address) *interpreter_frame_locals_addr(); + if (locals > thread->stack_base() || locals < (address) fp()) { + return false; + } ++ + // We'd have to be pretty unlucky to be mislead at this point + return true; +} @@ -12059,7 +14826,7 @@ index 000000000..be6f1a67f + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } -+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ assert(Universe::is_in_heap_or_null(obj), "sanity check"); + *oop_result = obj; + break; + } @@ -12133,7 +14900,6 @@ index 000000000..be6f1a67f + init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); +} + -+void frame::pd_ps() {} +#endif + +void JavaFrameAnchor::make_walkable(JavaThread* thread) { @@ -12155,14 +14921,13 @@ index 000000000..be6f1a67f +} diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp new file mode 100644 -index 000000000..7acabcbba +index 00000000000..c06aaa9e391 --- /dev/null +++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -0,0 +1,200 @@ +@@ -0,0 +1,202 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12212,7 +14977,7 @@ index 000000000..7acabcbba +// [padding ] + +// [methodData ] = mdp() mdx_offset -+// [methodOop ] = method() method_offset ++// [Method ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset @@ -12293,7 +15058,7 @@ index 000000000..7acabcbba + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub -+ entry_frame_after_call_words = 34, ++ entry_frame_after_call_words = 22, + entry_frame_call_wrapper_offset = -10, + + // we don't need a save area @@ -12354,19 +15119,22 @@ index 000000000..7acabcbba + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support -+ void interpreter_frame_set_last_sp(intptr_t* ptr_sp); ++ void interpreter_frame_set_last_sp(intptr_t* last_sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + ++ // returns the sending frame, without applying any barriers ++ frame sender_raw(RegisterMap* map) const; ++ +#endif // CPU_RISCV_FRAME_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp new file mode 100644 -index 000000000..5bc6b430c +index 00000000000..5ac1bf57f57 --- /dev/null +++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -@@ -0,0 +1,257 @@ +@@ -0,0 +1,248 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -12504,11 +15272,6 @@ index 000000000..5bc6b430c +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + -+// Relationals on frames based -+ -+// Return true if the frame is younger (more recent activation) than the frame represented by id -+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); -+ return this->id() < id ; } +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } @@ -12602,35 +15365,31 @@ index 000000000..5bc6b430c + + +// Compiled frames ++PRAGMA_DIAG_PUSH ++PRAGMA_NONNULL_IGNORED +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ return (*result_adr); -+ } else { -+ ShouldNotReachHere(); -+ return NULL; -+ } ++ guarantee(result_adr != NULL, "bad register save location"); ++ return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ *result_adr = obj; -+ } else { -+ ShouldNotReachHere(); -+ } ++ guarantee(result_adr != NULL, "bad register save location"); ++ *result_adr = obj; +} ++PRAGMA_DIAG_POP + +#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..6f778956d +index 00000000000..1c46b3947d3 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -0,0 +1,479 @@ +@@ -0,0 +1,484 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12675,6 +15434,7 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) { ++ assert_cond(masm != NULL); + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if (!dest_uninitialized) { + Label done; @@ -12717,6 +15477,7 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); + __ push_reg(saved_regs, sp); + assert_different_registers(start, count, tmp); + assert_different_registers(c_rarg0, count); @@ -12736,7 +15497,8 @@ index 000000000..6f778956d + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -+ ++ ++ assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + + Label done; @@ -12810,6 +15572,7 @@ index 000000000..6f778956d + Register thread, + Register tmp, + Register tmp2) { ++ assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp, tmp2, + t0); @@ -12822,7 +15585,6 @@ index 000000000..6f778956d + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; @@ -12839,10 +15601,10 @@ index 000000000..6f778956d + + // storing region crossing non-NULL, is card already dirty? + -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ExternalAddress cardtable((address) ct->byte_map_base()); + const Register card_addr = tmp; + -+ __ srli(card_addr, store_addr, CardTable::card_shift); ++ __ srli(card_addr, store_addr, CardTable::card_shift()); + + // get the address of the card + __ load_byte_map_base(tmp2); @@ -12875,7 +15637,7 @@ index 000000000..6f778956d + + __ bind(runtime); + // save the live input values -+ RegSet saved = RegSet::of(store_addr, new_val); ++ RegSet saved = RegSet::of(store_addr); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_reg(saved, sp); @@ -12885,7 +15647,8 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { -+ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ assert_cond(masm != NULL); ++ bool on_oop = is_reference_type(type); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; @@ -12907,16 +15670,19 @@ index 000000000..6f778956d +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); + // flatten object address if needed + if (dst.offset() == 0) { -+ __ mv(tmp3, dst.base()); ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } + } else { -+ __ la(tmp3, dst); ++ __ la(x13, dst); + } + + g1_write_barrier_pre(masm, -+ tmp3 /* obj */, ++ x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -12924,7 +15690,7 @@ index 000000000..6f778956d + false /* expand_call */); + + if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; @@ -12932,9 +15698,9 @@ index 000000000..6f778956d + new_val = t1; + __ mv(new_val, val); + } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + g1_write_barrier_post(masm, -+ tmp3 /* store_adr */, ++ x13 /* store_adr */, + new_val /* new_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -12961,8 +15727,7 @@ index 000000000..6f778956d + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), -+ false /* wide */, false /* unaligned */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -13043,7 +15808,6 @@ index 000000000..6f778956d + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; @@ -13062,7 +15826,7 @@ index 000000000..6f778956d + assert_different_registers(card_offset, byte_map_base, t0); + + __ load_parameter(0, card_offset); -+ __ srli(card_offset, card_offset, CardTable::card_shift); ++ __ srli(card_offset, card_offset, CardTable::card_shift()); + __ load_byte_map_base(byte_map_base); + + // Convert card offset into an address in card_addr @@ -13109,13 +15873,13 @@ index 000000000..6f778956d +#endif // COMPILER1 diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..7f85e002d +index 00000000000..37bc183f39c --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13175,7 +15939,7 @@ index 000000000..7f85e002d + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 @@ -13191,15 +15955,52 @@ index 000000000..7f85e002d +}; + +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +new file mode 100644 +index 00000000000..8735fd014ff +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++ ++const size_t G1MergeHeapRootsPrefetchCacheSize = 16; ++ ++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..203b82744 +index 00000000000..3c115a2ea02 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -@@ -0,0 +1,226 @@ +@@ -0,0 +1,302 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13223,15 +16024,24 @@ index 000000000..203b82744 + */ + +#include "precompiled.hpp" ++#include "classfile/classLoaderData.hpp" ++#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" +#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "memory/universe.hpp" +#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); ++ + // RA is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; @@ -13271,7 +16081,8 @@ index 000000000..203b82744 +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + switch (type) { @@ -13311,16 +16122,9 @@ index 000000000..203b82744 + +} + -+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) { -+ __ beq(obj1, obj2, equal, is_far); -+} -+ -+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) { -+ __ bne(obj1, obj2, nequal, is_far); -+} -+ +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { ++ assert_cond(masm != NULL); + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ andi(obj, obj, ~JNIHandles::weak_tag_mask); @@ -13335,6 +16139,7 @@ index 000000000..203b82744 + Register tmp2, + Label& slow_case, + bool is_far) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, tmp2); + assert_different_registers(obj, var_size_in_bytes); + Register end = tmp2; @@ -13364,6 +16169,7 @@ index 000000000..203b82744 + Register tmp1, + Label& slow_case, + bool is_far) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, var_size_in_bytes, tmp1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ j(slow_case); @@ -13404,7 +16210,7 @@ index 000000000..203b82744 + // If heap_top hasn't been changed by some other thread, update it. + __ sc_d(t1, end, t0, Assembler::rl); + __ bnez(t1, retry); -+ ++ + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); + } +} @@ -13413,6 +16219,7 @@ index 000000000..203b82744 + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1) { ++ assert_cond(masm != NULL); + assert(tmp1->is_valid(), "need temp reg"); + + __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); @@ -13423,15 +16230,85 @@ index 000000000..203b82744 + } + __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); +} ++ ++void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ ++ if (bs_nm == NULL) { ++ return; ++ } ++ ++ // RISCV atomic operations require that the memory address be naturally aligned. ++ __ align(4); ++ ++ Label skip, guard; ++ Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); ++ ++ __ lwu(t0, guard); ++ ++ // Subsequent loads of oops must occur after load of guard value. ++ // BarrierSetNMethod::disarm sets guard with release semantics. ++ __ membar(MacroAssembler::LoadLoad); ++ __ lwu(t1, thread_disarmed_addr); ++ __ beq(t0, t1, skip); ++ ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); ++ __ jalr(ra, t0, offset); ++ __ j(skip); ++ ++ __ bind(guard); ++ ++ assert(__ offset() % 4 == 0, "bad alignment"); ++ __ emit_int32(0); // nmethod guard value. Skipped over in common case. ++ ++ __ bind(skip); ++} ++ ++void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { ++ BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs == NULL) { ++ return; ++ } ++ ++ Label bad_call; ++ __ beqz(xmethod, bad_call); ++ ++ // Pointer chase to the method holder to find out if the method is concurrently unloading. ++ Label method_live; ++ __ load_method_holder_cld(t0, xmethod); ++ ++ // Is it a strong CLD? ++ __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); ++ __ bnez(t1, method_live); ++ ++ // Is it a weak but alive CLD? ++ __ push_reg(RegSet::of(x28, x29), sp); ++ ++ __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); ++ ++ // Uses x28 & x29, so we must pass new temporaries. ++ __ resolve_weak_handle(x28, x29); ++ __ mv(t0, x28); ++ ++ __ pop_reg(RegSet::of(x28, x29), sp); ++ ++ __ bnez(t0, method_live); ++ ++ __ bind(bad_call); ++ ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(method_live); ++} diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..964fc28be +index 00000000000..b85f7f5582b --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -@@ -0,0 +1,75 @@ +@@ -0,0 +1,79 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13458,6 +16335,8 @@ index 000000000..964fc28be +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + @@ -13475,9 +16354,8 @@ index 000000000..964fc28be + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false); -+ virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false); ++ Address dst, Register val, Register tmp1, Register tmp2); ++ + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + @@ -13488,7 +16366,7 @@ index 000000000..964fc28be + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ bool is_far = false + ); + + void eden_allocate(MacroAssembler* masm, @@ -13497,22 +16375,202 @@ index 000000000..964fc28be + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ bool is_far = false + ); + virtual void barrier_stubs_init() {} ++ ++ virtual void nmethod_entry_barrier(MacroAssembler* masm); ++ virtual void c2i_entry_barrier(MacroAssembler* masm); + virtual ~BarrierSetAssembler() {} +}; + +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +new file mode 100644 +index 00000000000..ae7ee4c5a44 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +@@ -0,0 +1,171 @@ ++/* ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "code/codeCache.hpp" ++#include "code/nativeInst.hpp" ++#include "gc/shared/barrierSetNMethod.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/registerMap.hpp" ++#include "runtime/thread.hpp" ++#include "utilities/align.hpp" ++#include "utilities/debug.hpp" ++ ++class NativeNMethodBarrier: public NativeInstruction { ++ address instruction_address() const { return addr_at(0); } ++ ++ int *guard_addr() { ++ /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ ++ return reinterpret_cast(instruction_address() + 12 * 4); ++ } ++ ++public: ++ int get_value() { ++ return Atomic::load_acquire(guard_addr()); ++ } ++ ++ void set_value(int value) { ++ Atomic::release_store(guard_addr(), value); ++ } ++ ++ void verify() const; ++}; ++ ++// Store the instruction bitmask, bits and name for checking the barrier. ++struct CheckInsn { ++ uint32_t mask; ++ uint32_t bits; ++ const char *name; ++}; ++ ++static const struct CheckInsn barrierInsn[] = { ++ { 0x00000fff, 0x00000297, "auipc t0, 0 "}, ++ { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, ++ { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, ++ { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, ++ { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, ++ { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, ++ { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, ++ { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, ++ { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, ++ { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, ++ { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, ++ { 0x00000fff, 0x0000006f, "j skip "} ++ /* guard: */ ++ /* 32bit nmethod guard value */ ++ /* skip: */ ++}; ++ ++// The encodings must match the instructions emitted by ++// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific ++// register numbers and immediate values in the encoding. ++void NativeNMethodBarrier::verify() const { ++ intptr_t addr = (intptr_t) instruction_address(); ++ for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { ++ uint32_t inst = *((uint32_t*) addr); ++ if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { ++ tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); ++ fatal("not an %s instruction.", barrierInsn[i].name); ++ } ++ addr += 4; ++ } ++} ++ ++ ++/* We're called from an nmethod when we need to deoptimize it. We do ++ this by throwing away the nmethod's frame and jumping to the ++ ic_miss stub. This looks like there has been an IC miss at the ++ entry of the nmethod, so we resolve the call, which will fall back ++ to the interpreter if the nmethod has been unloaded. */ ++void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { ++ ++ typedef struct { ++ intptr_t *sp; intptr_t *fp; address ra; address pc; ++ } frame_pointers_t; ++ ++ frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); ++ ++ JavaThread *thread = JavaThread::current(); ++ RegisterMap reg_map(thread, false); ++ frame frame = thread->last_frame(); ++ ++ assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); ++ assert(frame.cb() == nm, "must be"); ++ frame = frame.sender(®_map); ++ ++ LogTarget(Trace, nmethod, barrier) out; ++ if (out.is_enabled()) { ++ ResourceMark mark; ++ log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", ++ nm->method()->name_and_sig_as_C_string(), ++ nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, ++ thread->name(), frame.sp(), nm->verified_entry_point()); ++ } ++ ++ new_frame->sp = frame.sp(); ++ new_frame->fp = frame.fp(); ++ new_frame->ra = frame.pc(); ++ new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); ++} ++ ++// This is the offset of the entry barrier from where the frame is completed. ++// If any code changes between the end of the verified entry where the entry ++// barrier resides, and the completion of the frame, then ++// NativeNMethodCmpBarrier::verify() will immediately complain when it does ++// not find the expected native instruction at this offset, which needs updating. ++// Note that this offset is invariant of PreserveFramePointer. ++ ++// see BarrierSetAssembler::nmethod_entry_barrier ++// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 ++static const int entry_barrier_offset = -4 * 13; ++ ++static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { ++ address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; ++ NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); ++ debug_only(barrier->verify()); ++ return barrier; ++} ++ ++void BarrierSetNMethod::disarm(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return; ++ } ++ ++ // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ ++ barrier->set_value(disarmed_value()); ++} ++ ++bool BarrierSetNMethod::is_armed(nmethod* nm) { ++ if (!supports_entry_barrier(nm)) { ++ return false; ++ } ++ ++ NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); ++ return barrier->get_value() != disarmed_value(); ++} diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..1720488fb +index 00000000000..a419f92b5f6 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,120 @@ +@@ -0,0 +1,111 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13541,21 +16599,19 @@ index 000000000..1720488fb +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, tmp); + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); -+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); -+ -+ __ srli(obj, obj, CardTable::card_shift); ++ __ srli(obj, obj, CardTable::card_shift()); + + assert(CardTable::dirty_card_val() == 0, "must be"); + @@ -13570,20 +16626,15 @@ index 000000000..1720488fb + __ sb(zr, Address(tmp)); + __ bind(L_already_dirty); + } else { -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); -+ } + __ sb(zr, Address(tmp)); + } +} + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); -+ BarrierSet* bs = BarrierSet::barrier_set(); -+ CardTableBarrierSet* ctbs = barrier_set_cast(bs); -+ CardTable* ct = ctbs->card_table(); + + Label L_loop, L_done; + const Register end = count; @@ -13593,15 +16644,12 @@ index 000000000..1720488fb + __ shadd(end, count, start, count, LogBytesPerHeapOop); + __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + -+ __ srli(start, start, CardTable::card_shift); -+ __ srli(end, end, CardTable::card_shift); ++ __ srli(start, start, CardTable::card_shift()); ++ __ srli(end, end, CardTable::card_shift()); + __ sub(count, end, start); // number of bytes to copy + + __ load_byte_map_base(tmp); + __ add(start, start, tmp); -+ if (ct->scanned_concurrently()) { -+ __ membar(MacroAssembler::StoreStore); -+ } + + __ bind(L_loop); + __ add(tmp, start, count); @@ -13612,33 +16660,34 @@ index 000000000..1720488fb +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || dst.offset() == 0) { -+ store_check(masm, dst.base(), tmp3); ++ store_check(masm, dst.base(), x13); + } else { -+ __ la(tmp3, dst); -+ store_check(masm, tmp3, t0); ++ assert_cond(masm != NULL); ++ __ la(x13, dst); ++ store_check(masm, x13, t0); + } + } +} diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..a5b3f9fe8 +index 00000000000..686fe8fa478 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,43 @@ +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13674,20 +16723,19 @@ index 000000000..a5b3f9fe8 + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ ++ Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..b82275297 +index 00000000000..7aa2015f9ec --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,54 @@ +@@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13718,6 +16766,7 @@ index 000000000..b82275297 + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { ++ + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } @@ -13732,22 +16781,22 @@ index 000000000..b82275297 +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ if (type == T_OBJECT || type == T_ARRAY) { -+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (is_reference_type(type)) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..df206cc87 +index 00000000000..00419c3163c --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13788,7 +16837,7 @@ index 000000000..df206cc87 + Register start, Register count, Register tmp, RegSet saved_regs) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0; ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, @@ -13796,18 +16845,18 @@ index 000000000..df206cc87 + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp new file mode 100644 -index 000000000..6657f1be0 +index 00000000000..cd568cc723f --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -@@ -0,0 +1,124 @@ +@@ -0,0 +1,117 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13834,6 +16883,7 @@ index 000000000..6657f1be0 +#include "precompiled.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" @@ -13859,14 +16909,6 @@ index 000000000..6657f1be0 + + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, + /* release */ Assembler::rl, /* is_cae */ false, result); -+ if (UseBarriersForVolatile) { -+ // The membar here is necessary to prevent reordering between the -+ // release store in the CAS above and a subsequent volatile load. -+ // However for !UseBarriersForVolatile, C1 inserts a full barrier before -+ // volatile loads which means we don't need an additional barrier -+ // here (see LIRGenerator::volatile_field_load()). -+ __ membar(MacroAssembler::AnyAny); -+ } +} + +#undef __ @@ -13918,7 +16960,7 @@ index 000000000..6657f1be0 + __ xchg(access.resolved_addr(), value_opr, result, tmp); + + if (access.is_oop()) { -+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); + LIR_Opr tmp_opr = gen->new_register(type); + __ move(result, tmp_opr); + result = tmp_opr; @@ -13932,13 +16974,13 @@ index 000000000..6657f1be0 +} diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..1bc01e454 +index 00000000000..d0ac6e52436 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,743 @@ +@@ -0,0 +1,712 @@ +/* + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13965,7 +17007,7 @@ index 000000000..1bc01e454 +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahForwarding.hpp" -+#include "gc/shenandoah/shenandoahHeap.hpp" ++#include "gc/shenandoah/shenandoahHeap.inline.hpp" +#include "gc/shenandoah/shenandoahHeapRegion.hpp" +#include "gc/shenandoah/shenandoahRuntime.hpp" +#include "gc/shenandoah/shenandoahThreadLocalData.hpp" @@ -13982,14 +17024,12 @@ index 000000000..1bc01e454 + +#define __ masm-> + -+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; -+ +void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if ((ShenandoahSATBBarrier && !dest_uninitialized) || -+ ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ + Label done; + + // Avoid calling runtime if count == 0 @@ -14139,7 +17179,7 @@ index 000000000..1bc01e454 + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst -+ RegSet savedRegs = RegSet::of(t2); ++ RegSet saved_regs = RegSet::of(t2); + bool borrow_reg = (tmp == noreg); + if (borrow_reg) { + // No free registers available. Make one useful. @@ -14147,38 +17187,55 @@ index 000000000..1bc01e454 + if (tmp == dst) { + tmp = t1; + } -+ savedRegs += RegSet::of(tmp); ++ saved_regs += RegSet::of(tmp); + } + + assert_different_registers(tmp, dst, t2); -+ __ push_reg(savedRegs, sp); ++ __ push_reg(saved_regs, sp); + + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 -+ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); ++ __ andi(t2, tmp, markWord::lock_mask_in_place); + __ bnez(t2, done); -+ __ ori(tmp, tmp, markOopDesc::marked_value); ++ __ ori(tmp, tmp, markWord::marked_value); + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + -+ __ pop_reg(savedRegs, sp); ++ __ pop_reg(saved_regs, sp); +} + -+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, -+ Register dst, Address load_addr) { ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, ++ Register dst, ++ Address load_addr, ++ DecoratorSet decorators) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); -+ assert_different_registers(load_addr.base(), t1, t2); ++ assert_different_registers(load_addr.base(), t0, t1); + -+ Label done; ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ bool is_narrow = UseCompressedOops && !is_native; ++ ++ Label heap_stable, not_cset; + __ enter(); + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lbu(t1, gc_state); + + // Check for heap stability -+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); -+ __ beqz(t1, done); ++ if (is_strong) { ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, heap_stable); ++ } else { ++ Label lrb; ++ __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); ++ __ bnez(t0, lrb); ++ __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t0, heap_stable); ++ __ bind(lrb); ++ } + + // use x11 for load address + Register result_dst = dst; @@ -14188,34 +17245,58 @@ index 000000000..1bc01e454 + } + + // Save x10 and x11, unless it is an output register -+ RegSet to_save = RegSet::of(x10, x11) - result_dst; -+ __ push_reg(to_save, sp); ++ RegSet saved_regs = RegSet::of(x10, x11) - result_dst; ++ __ push_reg(saved_regs, sp); + __ la(x11, load_addr); + __ mv(x10, dst); + -+ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); ++ // Test for in-cset ++ if (is_strong) { ++ __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1)); ++ __ andi(t0, t1, 1); ++ __ beqz(t0, not_cset); ++ } + ++ __ push_call_clobbered_registers(); ++ if (is_strong) { ++ if (is_narrow) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } ++ } else if (is_weak) { ++ if (is_narrow) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ assert(!is_narrow, "phantom access cannot be narrow"); ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ __ bind(not_cset); + __ mv(result_dst, x10); -+ __ pop_reg(to_save, sp); ++ __ pop_reg(saved_regs, sp); + -+ __ bind(done); ++ __ bind(heap_stable); + __ leave(); +} + +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { + if (ShenandoahIUBarrier) { + __ push_call_clobbered_registers(); -+ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); -+ __ pop_call_clobbered_registers(); -+ } -+} + -+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { -+ if (ShenandoahLoadRefBarrier) { -+ Label is_null; -+ __ beqz(dst, is_null); -+ load_reference_barrier_not_null(masm, dst, load_addr); -+ __ bind(is_null); ++ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); ++ ++ __ pop_call_clobbered_registers(); + } +} + @@ -14249,30 +17330,28 @@ index 000000000..1bc01e454 + + // 2: load a reference from src location and apply LRB if needed + if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { -+ guarantee(dst != x30 && src.base() != x30, "load_at need x30"); -+ bool ist5 = (dst == src.base()); -+ if (ist5) { -+ __ push_reg(RegSet::of(x30), sp); -+ } + Register result_dst = dst; + + // Preserve src location for LRB ++ RegSet saved_regs; + if (dst == src.base()) { -+ dst = x30; ++ dst = (src.base() == x28) ? x29 : x28; ++ saved_regs = RegSet::of(dst); ++ __ push_reg(saved_regs, sp); + } + assert_different_registers(dst, src.base()); + + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + -+ load_reference_barrier(masm, dst, src); ++ load_reference_barrier(masm, dst, src, decorators); + + if (dst != result_dst) { + __ mv(result_dst, dst); + dst = result_dst; + } + -+ if (ist5) { -+ __ pop_reg(RegSet::of(x30), sp); ++ if (saved_regs.bits() != 0) { ++ __ pop_reg(saved_regs, sp); + } + } else { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); @@ -14295,24 +17374,24 @@ index 000000000..1bc01e454 +} + +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { + bool on_oop = is_reference_type(type); + if (!on_oop) { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + return; + } + + // flatten object address if needed + if (dst.offset() == 0) { -+ if (dst.base() != tmp3) { -+ __ mv(tmp3, dst.base()); ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); + } + } else { -+ __ la(tmp3, dst); ++ __ la(x13, dst); + } + + shenandoah_write_barrier_pre(masm, -+ tmp3 /* obj */, ++ x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -14320,7 +17399,7 @@ index 000000000..1bc01e454 + false /* expand_call */); + + if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + iu_barrier(masm, val, tmp1); + // G1 barrier needs uncompressed oop for region cross check. @@ -14329,7 +17408,7 @@ index 000000000..1bc01e454 + new_val = t1; + __ mv(new_val, val); + } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + } +} + @@ -14370,39 +17449,10 @@ index 000000000..1bc01e454 +// from-space, or it refers to the to-space version of an object that +// is being evacuated out of from-space. +// -+// By default, this operation implements sequential consistency and the -+// value held in the result register following execution of the -+// generated code sequence is 0 to indicate failure of CAS, non-zero -+// to indicate success. Arguments support variations on this theme: -+// -+// acquire: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If acquire, successful -+// execution has the side effect of assuring that memory -+// values updated by other threads and "released" will be -+// visible to any read operations perfomed by this thread -+// which follow this operation in program order. This is a -+// special optimization that should not be enabled by default. -+// release: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If release, successful -+// completion of this operation has the side effect of -+// assuring that all writes to memory performed by this -+// thread that precede this operation in program order are -+// visible to all other threads that subsequently "acquire" -+// before reading the respective memory values. This is a -+// special optimization that should not be enabled by default. -+// is_cae: This turns CAS (compare and swap) into CAE (compare and -+// exchange). This HotSpot convention is that CAE makes -+// available to the caller the "failure witness", which is -+// the value that was stored in memory which did not match -+// the expected value. If is_cae, the result is the value -+// most recently fetched from addr rather than a boolean -+// success indicator. ++// By default the value held in the result register following execution ++// of the generated code sequence is 0 to indicate failure of CAS, ++// non-zero to indicate success. If is_cae, the result is the value most ++// recently fetched from addr rather than a boolean success indicator. +// +// Clobbers t0, t1 +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, @@ -14452,7 +17502,7 @@ index 000000000..1bc01e454 + if (is_cae) { + __ mv(result, expected); + } else { -+ __ mv(result, 1); ++ __ addi(result, zr, 1); + } + __ j(done); + @@ -14485,8 +17535,7 @@ index 000000000..1bc01e454 + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), -+ stub->info(), false /* wide */, false /* unaligned */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -14499,6 +17548,12 @@ index 000000000..1bc01e454 + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + ++ DecoratorSet decorators = stub->decorators(); ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); @@ -14512,32 +17567,30 @@ index 000000000..1bc01e454 + __ mv(res, obj); + } + -+ // Check for null. -+ __ beqz(res, *stub->continuation(), /* is_far */ true); ++ if (is_strong) { ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(tmp2, tmp2, tmp1); ++ __ lbu(tmp2, Address(tmp2)); ++ __ beqz(tmp2, *stub->continuation(), true /* is_far */); ++ } + -+ // Check for object in cset. -+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t0, tmp2, tmp1); -+ __ lb(tmp2, Address(t0)); -+ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); -+ -+ // Check if object is already forwarded. -+ Label slow_path; -+ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); -+ __ xori(tmp1, tmp1, -1); -+ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); -+ __ bnez(t0, slow_path); -+ -+ // Decode forwarded object. -+ __ ori(tmp1, tmp1, markOopDesc::marked_value); -+ __ xori(res, tmp1, -1); -+ __ j(*stub->continuation()); -+ -+ __ bind(slow_path); + ce->store_parameter(res, 0); + ce->store_parameter(addr, 1); -+ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); ++ ++ if (is_strong) { ++ if (is_native) { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); ++ } else { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); ++ } ++ } else if (is_weak) { ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); ++ } else { ++ assert(is_phantom, "only remaining strength"); ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); ++ } + + __ j(*stub->continuation()); +} @@ -14591,17 +17644,40 @@ index 000000000..1bc01e454 + __ epilogue(); +} + -+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) { + __ prologue("shenandoah_load_reference_barrier", false); + // arg0 : object to be resolved + + __ push_call_clobbered_registers(); + __ load_parameter(0, x10); + __ load_parameter(1, x11); -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ ++ bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); ++ bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); ++ bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); ++ bool is_native = ShenandoahBarrierSet::is_native_access(decorators); ++ if (is_strong) { ++ if (is_native) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } else { ++ if (UseCompressedOops) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); ++ } ++ } ++ } else if (is_weak) { ++ assert(!is_native, "weak must not be called off-heap"); ++ if (UseCompressedOops) { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); ++ } else { ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); ++ } + } else { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ assert(is_phantom, "only remaining strength"); ++ assert(is_native, "phantom must only be called off-heap"); ++ __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); + } + __ jalr(ra); + __ mv(t0, x10); @@ -14614,80 +17690,15 @@ index 000000000..1bc01e454 +#undef __ + +#endif // COMPILER1 -+ -+address ShenandoahBarrierSetAssembler::shenandoah_lrb() { -+ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); -+ return _shenandoah_lrb; -+} -+ -+#define __ cgen->assembler()-> -+ -+// Shenandoah load reference barrier. -+// -+// Input: -+// x10: OOP to evacuate. Not null. -+// x11: load address -+// -+// Output: -+// x10: Pointer to evacuated OOP. -+// -+// Trash t0 t1 Preserve everything else. -+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { -+ __ align(6); -+ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); -+ address start = __ pc(); -+ -+ Label slow_path; -+ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); -+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); -+ __ add(t1, t1, t0); -+ __ lbu(t1, Address(t1, 0)); -+ __ andi(t0, t1, 1); -+ __ bnez(t0, slow_path); -+ __ ret(); -+ -+ __ bind(slow_path); -+ __ enter(); // required for proper stackwalking of RuntimeStub frame -+ -+ __ push_call_clobbered_registers(); -+ -+ if (UseCompressedOops) { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); -+ } else { -+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); -+ } -+ __ jalr(ra); -+ __ mv(t0, x10); -+ __ pop_call_clobbered_registers(); -+ __ mv(x10, t0); -+ -+ __ leave(); // required for proper stackwalking of RuntimeStub frame -+ __ ret(); -+ -+ return start; -+} -+ -+#undef __ -+ -+void ShenandoahBarrierSetAssembler::barrier_stubs_init() { -+ if (ShenandoahLoadRefBarrier) { -+ int stub_code_size = 2048; -+ ResourceMark rm; -+ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); -+ CodeBuffer buf(bb); -+ StubCodeGenerator cgen(&buf); -+ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); -+ } -+} diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..84bc55706 +index 00000000000..a705f497667 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,92 @@ +@@ -0,0 +1,88 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14715,6 +17726,7 @@ index 000000000..84bc55706 + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#ifdef COMPILER1 +class LIR_Assembler; +class ShenandoahPreBarrierStub; @@ -14724,35 +17736,8 @@ index 000000000..84bc55706 +class StubCodeGenerator; + +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { -+public: -+ static address shenandoah_lrb(); -+ -+ void iu_barrier(MacroAssembler *masm, Register dst, Register tmp); -+ -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); -+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); -+#endif -+ -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); -+ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); -+ -+ virtual void barrier_stubs_init(); -+ +private: + -+ static address _shenandoah_lrb; -+ + void satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, @@ -14770,19 +17755,41 @@ index 000000000..84bc55706 + + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); -+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); -+ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); + -+ address generate_shenandoah_lrb(StubCodeGenerator* cgen); ++public: ++ ++ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); ++ ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); ++#endif ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); +}; + +#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad new file mode 100644 -index 000000000..6e310697d +index 00000000000..6c855f23c2a --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -@@ -0,0 +1,188 @@ +@@ -0,0 +1,285 @@ +// +// Copyright (c) 2018, Red Hat, Inc. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -14856,7 +17863,7 @@ index 000000000..6e310697d +%} + +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + @@ -14878,7 +17885,7 @@ index 000000000..6e310697d +%} + +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + @@ -14903,9 +17910,11 @@ index 000000000..6e310697d + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14913,6 +17922,7 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} + @@ -14924,6 +17934,7 @@ index 000000000..6e310697d + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14931,6 +17942,7 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} + @@ -14943,6 +17955,7 @@ index 000000000..6e310697d + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14951,6 +17964,49 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} ++ + ins_pipe(pipe_slow); +%} + @@ -14962,6 +18018,7 @@ index 000000000..6e310697d + format %{ + "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14969,18 +18026,1118 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} ++ ++instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_reserved(n)); ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 00000000000..3d3f4d4d774 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,441 @@ ++/* ++ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/codeBlob.hpp" ++#include "code/vmreg.inline.hpp" ++#include "gc/z/zBarrier.inline.hpp" ++#include "gc/z/zBarrierSet.hpp" ++#include "gc/z/zBarrierSetAssembler.hpp" ++#include "gc/z/zBarrierSetRuntime.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/z/c1/zBarrierSetC1.hpp" ++#endif // COMPILER1 ++#ifdef COMPILER2 ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#endif // COMPILER2 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#undef __ ++#define __ masm-> ++ ++void ZBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ if (!ZBarrierSet::barrier_needed(decorators, type)) { ++ // Barrier not needed ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } ++ ++ assert_different_registers(t1, src.base()); ++ assert_different_registers(t0, t1, dst); ++ ++ Label done; ++ ++ // Load bad mask into temp register. ++ __ la(t0, src); ++ __ ld(t1, address_bad_mask_from_thread(xthread)); ++ __ ld(dst, Address(t0)); ++ ++ // Test reference against bad mask. If mask bad, then we need to fix it up. ++ __ andr(t1, dst, t1); ++ __ beqz(t1, done); ++ ++ __ enter(); ++ ++ __ push_call_clobbered_registers_except(RegSet::of(dst)); ++ ++ if (c_rarg0 != dst) { ++ __ mv(c_rarg0, dst); ++ } ++ ++ __ mv(c_rarg1, t0); ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ ++ // Make sure dst has the return value. ++ if (dst != x10) { ++ __ mv(dst, x10); ++ } ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(dst)); ++ __ leave(); ++ ++ __ bind(done); ++} ++ ++#ifdef ASSERT ++ ++void ZBarrierSetAssembler::store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2) { ++ // Verify value ++ if (is_reference_type(type)) { ++ // Note that src could be noreg, which means we ++ // are storing null and can skip verification. ++ if (val != noreg) { ++ Label done; ++ ++ // tmp1 and tmp2 are often set to noreg. ++ RegSet savedRegs = RegSet::of(t0); ++ __ push_reg(savedRegs, sp); ++ ++ __ ld(t0, address_bad_mask_from_thread(xthread)); ++ __ andr(t0, val, t0); ++ __ beqz(t0, done); ++ __ stop("Verify oop store failed"); ++ __ should_not_reach_here(); ++ __ bind(done); ++ __ pop_reg(savedRegs, sp); ++ } ++ } ++ ++ // Store value ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++} ++ ++#endif // ASSERT ++ ++void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs) { ++ if (!is_oop) { ++ // Barrier not needed ++ return; ++ } ++ ++ BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); ++ ++ assert_different_registers(src, count, t0); ++ ++ __ push_reg(saved_regs, sp); ++ ++ if (count == c_rarg0 && src == c_rarg1) { ++ // exactly backwards!! ++ __ xorr(c_rarg0, c_rarg0, c_rarg1); ++ __ xorr(c_rarg1, c_rarg0, c_rarg1); ++ __ xorr(c_rarg0, c_rarg0, c_rarg1); ++ } else { ++ __ mv(c_rarg0, src); ++ __ mv(c_rarg1, count); ++ } ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); ++ ++ __ pop_reg(saved_regs, sp); ++ ++ BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); ++} ++ ++void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath) { ++ BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); ++ ++ assert_different_registers(jni_env, robj, tmp); ++ ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); ++ ++ // Compute the offset of address bad mask from the field of jni_environment ++ long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - ++ in_bytes(JavaThread::jni_environment_offset())); ++ ++ // Load the address bad mask ++ __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); ++ ++ // Check address bad mask ++ __ andr(tmp, robj, tmp); ++ __ bnez(tmp, slowpath); ++ ++ BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); ++} ++ ++#ifdef COMPILER2 ++ ++OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { ++ if (!OptoReg::is_reg(opto_reg)) { ++ return OptoReg::Bad; ++ } ++ ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_FloatRegister()) { ++ return opto_reg & ~1; ++ } ++ ++ return opto_reg; ++} ++ ++#undef __ ++#define __ _masm-> ++ ++class ZSaveLiveRegisters { ++private: ++ MacroAssembler* const _masm; ++ RegSet _gp_regs; ++ FloatRegSet _fp_regs; ++ VectorRegSet _vp_regs; ++ ++public: ++ void initialize(ZLoadBarrierStubC2* stub) { ++ // Record registers that needs to be saved/restored ++ RegMaskIterator rmi(stub->live()); ++ while (rmi.has_next()) { ++ const OptoReg::Name opto_reg = rmi.next(); ++ if (OptoReg::is_reg(opto_reg)) { ++ const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); ++ if (vm_reg->is_Register()) { ++ _gp_regs += RegSet::of(vm_reg->as_Register()); ++ } else if (vm_reg->is_FloatRegister()) { ++ _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); ++ } else if (vm_reg->is_VectorRegister()) { ++ const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); ++ _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); ++ } else { ++ fatal("Unknown register type"); ++ } ++ } ++ } ++ ++ // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated ++ _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); ++ } ++ ++ ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _gp_regs(), ++ _fp_regs(), ++ _vp_regs() { ++ // Figure out what registers to save/restore ++ initialize(stub); ++ ++ // Save registers ++ __ push_reg(_gp_regs, sp); ++ __ push_fp(_fp_regs, sp); ++ __ push_vp(_vp_regs, sp); ++ } ++ ++ ~ZSaveLiveRegisters() { ++ // Restore registers ++ __ pop_vp(_vp_regs, sp); ++ __ pop_fp(_fp_regs, sp); ++ __ pop_reg(_gp_regs, sp); ++ } ++}; ++ ++class ZSetupArguments { ++private: ++ MacroAssembler* const _masm; ++ const Register _ref; ++ const Address _ref_addr; ++ ++public: ++ ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : ++ _masm(masm), ++ _ref(stub->ref()), ++ _ref_addr(stub->ref_addr()) { ++ ++ // Setup arguments ++ if (_ref_addr.base() == noreg) { ++ // No self healing ++ if (_ref != c_rarg0) { ++ __ mv(c_rarg0, _ref); ++ } ++ __ mv(c_rarg1, zr); ++ } else { ++ // Self healing ++ if (_ref == c_rarg0) { ++ // _ref is already at correct place ++ __ la(c_rarg1, _ref_addr); ++ } else if (_ref != c_rarg1) { ++ // _ref is in wrong place, but not in c_rarg1, so fix it first ++ __ la(c_rarg1, _ref_addr); ++ __ mv(c_rarg0, _ref); ++ } else if (_ref_addr.base() != c_rarg0) { ++ assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); ++ __ mv(c_rarg0, _ref); ++ __ la(c_rarg1, _ref_addr); ++ } else { ++ assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); ++ if (_ref_addr.base() == c_rarg0) { ++ __ mv(t1, c_rarg1); ++ __ la(c_rarg1, _ref_addr); ++ __ mv(c_rarg0, t1); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++ } ++ } ++ ++ ~ZSetupArguments() { ++ // Transfer result ++ if (_ref != x10) { ++ __ mv(_ref, x10); ++ } ++ } ++}; ++ ++#undef __ ++#define __ masm-> ++ ++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { ++ BLOCK_COMMENT("ZLoadBarrierStubC2"); ++ ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ { ++ ZSaveLiveRegisters save_live_registers(masm, stub); ++ ZSetupArguments setup_arguments(masm, stub); ++ int32_t offset = 0; ++ __ la_patchable(t0, stub->slow_path(), offset); ++ __ jalr(x1, t0, offset); ++ } ++ ++ // Stub exit ++ __ j(*stub->continuation()); ++} ++ ++#undef __ ++ ++#endif // COMPILER2 ++ ++#ifdef COMPILER1 ++#undef __ ++#define __ ce->masm()-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const { ++ assert_different_registers(xthread, ref->as_register(), t1); ++ __ ld(t1, address_bad_mask_from_thread(xthread)); ++ __ andr(t1, t1, ref->as_register()); ++} ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const { ++ // Stub entry ++ __ bind(*stub->entry()); ++ ++ Register ref = stub->ref()->as_register(); ++ Register ref_addr = noreg; ++ Register tmp = noreg; ++ ++ if (stub->tmp()->is_valid()) { ++ // Load address into tmp register ++ ce->leal(stub->ref_addr(), stub->tmp()); ++ ref_addr = tmp = stub->tmp()->as_pointer_register(); ++ } else { ++ // Address already in register ++ ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); ++ } ++ ++ assert_different_registers(ref, ref_addr, noreg); ++ ++ // Save x10 unless it is the result or tmp register ++ // Set up SP to accomodate parameters and maybe x10. ++ if (ref != x10 && tmp != x10) { ++ __ sub(sp, sp, 32); ++ __ sd(x10, Address(sp, 16)); ++ } else { ++ __ sub(sp, sp, 16); ++ } ++ ++ // Setup arguments and call runtime stub ++ ce->store_parameter(ref_addr, 1); ++ ce->store_parameter(ref, 0); ++ ++ __ far_call(stub->runtime_stub()); ++ ++ // Verify result ++ __ verify_oop(x10, "Bad oop"); ++ ++ ++ // Move result into place ++ if (ref != x10) { ++ __ mv(ref, x10); ++ } ++ ++ // Restore x10 unless it is the result or tmp register ++ if (ref != x10 && tmp != x10) { ++ __ ld(x10, Address(sp, 16)); ++ __ add(sp, sp, 32); ++ } else { ++ __ add(sp, sp, 16); ++ } ++ ++ // Stub exit ++ __ j(*stub->continuation()); ++} ++ ++#undef __ ++#define __ sasm-> ++ ++void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const { ++ __ prologue("zgc_load_barrier stub", false); ++ ++ __ push_call_clobbered_registers_except(RegSet::of(x10)); ++ ++ // Setup arguments ++ __ load_parameter(0, c_rarg0); ++ __ load_parameter(1, c_rarg1); ++ ++ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); ++ ++ __ pop_call_clobbered_registers_except(RegSet::of(x10)); ++ ++ __ epilogue(); ++} ++ ++#undef __ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 00000000000..dc07ab635fe +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,101 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "code/vmreg.hpp" ++#include "oops/accessDecorators.hpp" ++#ifdef COMPILER2 ++#include "opto/optoreg.hpp" ++#endif // COMPILER2 ++ ++#ifdef COMPILER1 ++class LIR_Assembler; ++class LIR_Opr; ++class StubAssembler; ++class ZLoadBarrierStubC1; ++#endif // COMPILER1 ++ ++#ifdef COMPILER2 ++class Node; ++class ZLoadBarrierStubC2; ++#endif // COMPILER2 ++ ++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { ++public: ++ virtual void load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread); ++ ++#ifdef ASSERT ++ virtual void store_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Address dst, ++ Register val, ++ Register tmp1, ++ Register tmp2); ++#endif // ASSERT ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, ++ DecoratorSet decorators, ++ bool is_oop, ++ Register src, ++ Register dst, ++ Register count, ++ RegSet saved_regs); ++ ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, ++ Register jni_env, ++ Register robj, ++ Register tmp, ++ Label& slowpath); ++ ++#ifdef COMPILER1 ++ void generate_c1_load_barrier_test(LIR_Assembler* ce, ++ LIR_Opr ref) const; ++ ++ void generate_c1_load_barrier_stub(LIR_Assembler* ce, ++ ZLoadBarrierStubC1* stub) const; ++ ++ void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, ++ DecoratorSet decorators) const; ++#endif // COMPILER1 ++ ++#ifdef COMPILER2 ++ OptoReg::Name refine_register(const Node* node, ++ OptoReg::Name opto_reg); ++ ++ void generate_c2_load_barrier_stub(MacroAssembler* masm, ++ ZLoadBarrierStubC2* stub) const; ++#endif // COMPILER2 ++}; ++ ++#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp +new file mode 100644 +index 00000000000..d14997790af +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp +@@ -0,0 +1,212 @@ ++/* ++ * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/gcLogPrecious.hpp" ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/zGlobals.hpp" ++#include "runtime/globals.hpp" ++#include "runtime/os.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/powerOfTwo.hpp" ++ ++#ifdef LINUX ++#include ++#endif // LINUX ++ ++// ++// The heap can have three different layouts, depending on the max heap size. ++// ++// Address Space & Pointer Layout 1 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000014000000000 (20TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000010000000000 (16TB) ++// . . ++// +--------------------------------+ 0x00000c0000000000 (12TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000040000000000 (4TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 6 5 2 1 0 ++// +--------------------+----+-----------------------------------------------+ ++// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| ++// +--------------------+----+-----------------------------------------------+ ++// | | | ++// | | * 41-0 Object Offset (42-bits, 4TB address space) ++// | | ++// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) ++// | 0010 = Marked1 (Address view 8-12TB) ++// | 0100 = Remapped (Address view 16-20TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-46 Fixed (18-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 2 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000280000000000 (40TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// . . ++// +--------------------------------+ 0x0000180000000000 (24TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000080000000000 (8TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 7 6 3 2 0 ++// +------------------+-----+------------------------------------------------+ ++// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| ++// +-------------------+----+------------------------------------------------+ ++// | | | ++// | | * 42-0 Object Offset (43-bits, 8TB address space) ++// | | ++// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) ++// | 0010 = Marked1 (Address view 16-24TB) ++// | 0100 = Remapped (Address view 32-40TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-47 Fixed (17-bits, always zero) ++// ++// ++// Address Space & Pointer Layout 3 ++// -------------------------------- ++// ++// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) ++// . . ++// . . ++// . . ++// +--------------------------------+ 0x0000500000000000 (80TB) ++// | Remapped View | ++// +--------------------------------+ 0x0000400000000000 (64TB) ++// . . ++// +--------------------------------+ 0x0000300000000000 (48TB) ++// | Marked1 View | ++// +--------------------------------+ 0x0000200000000000 (32TB) ++// | Marked0 View | ++// +--------------------------------+ 0x0000100000000000 (16TB) ++// . . ++// +--------------------------------+ 0x0000000000000000 ++// ++// 6 4 4 4 4 ++// 3 8 7 4 3 0 ++// +------------------+----+-------------------------------------------------+ ++// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| ++// +------------------+----+-------------------------------------------------+ ++// | | | ++// | | * 43-0 Object Offset (44-bits, 16TB address space) ++// | | ++// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) ++// | 0010 = Marked1 (Address view 32-48TB) ++// | 0100 = Remapped (Address view 64-80TB) ++// | 1000 = Finalizable (Address view N/A) ++// | ++// * 63-48 Fixed (16-bits, always zero) ++// ++ ++// Default value if probing is not implemented for a certain platform: 128TB ++static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; ++// Minimum value returned, if probing fails: 64GB ++static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; ++ ++static size_t probe_valid_max_address_bit() { ++#ifdef LINUX ++ size_t max_address_bit = 0; ++ const size_t page_size = os::vm_page_size(); ++ for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { ++ const uintptr_t base_addr = ((uintptr_t) 1U) << i; ++ if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { ++ // msync suceeded, the address is valid, and maybe even already mapped. ++ max_address_bit = i; ++ break; ++ } ++ if (errno != ENOMEM) { ++ // Some error occured. This should never happen, but msync ++ // has some undefined behavior, hence ignore this bit. ++#ifdef ASSERT ++ fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#else // ASSERT ++ log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); ++#endif // ASSERT ++ continue; ++ } ++ // Since msync failed with ENOMEM, the page might not be mapped. ++ // Try to map it, to see if the address is valid. ++ void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ munmap(result_addr, page_size); ++ } ++ if ((uintptr_t) result_addr == base_addr) { ++ // address is valid ++ max_address_bit = i; ++ break; ++ } ++ } ++ if (max_address_bit == 0) { ++ // probing failed, allocate a very high page and take that bit as the maximum ++ const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; ++ void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); ++ if (result_addr != MAP_FAILED) { ++ max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; ++ munmap(result_addr, page_size); ++ } ++ } ++ log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); ++ return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); ++#else // LINUX ++ return DEFAULT_MAX_ADDRESS_BIT; ++#endif // LINUX ++} ++ ++size_t ZPlatformAddressOffsetBits() { ++ const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; ++ const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; ++ const size_t min_address_offset_bits = max_address_offset_bits - 2; ++ const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); ++ const size_t address_offset_bits = log2i_exact(address_offset); ++ return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); ++} ++ ++size_t ZPlatformAddressMetadataShift() { ++ return ZPlatformAddressOffsetBits(); ++} +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +new file mode 100644 +index 00000000000..f20ecd9b073 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP ++ ++const size_t ZPlatformGranuleSizeShift = 21; // 2MB ++const size_t ZPlatformHeapViews = 3; ++const size_t ZPlatformCacheLineSize = 64; ++ ++size_t ZPlatformAddressOffsetBits(); ++size_t ZPlatformAddressMetadataShift(); ++ ++#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +new file mode 100644 +index 00000000000..6b6f87814a5 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +@@ -0,0 +1,233 @@ ++// ++// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++ ++source_hpp %{ ++ ++#include "gc/shared/gc_globals.hpp" ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++ ++%} ++ ++source %{ ++ ++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { ++ if (barrier_data == ZLoadBarrierElided) { ++ return; ++ } ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); ++ __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(tmp, tmp, ref); ++ __ bnez(tmp, *stub->entry(), true /* far */); ++ __ bind(*stub->continuation()); ++} ++ ++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); ++ __ j(*stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++%} ++ ++// Load Pointer ++instruct zLoadP(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); ++ effect(TEMP dst); ++ ++ ins_cost(4 * DEFAULT_COST); ++ ++ format %{ "ld $dst, $mem, #@zLoadP" %} ++ ++ ins_encode %{ ++ const Address ref_addr (as_Register($mem$$base), $mem$$disp); ++ __ ld($dst$$Register, ref_addr); ++ z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr, TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" ++ "mv $res, $res == $oldval" %} ++ ++ ins_encode %{ ++ Label failed; ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ beqz($res$$Register, failed); ++ __ mv(t0, $oldval$$Register); ++ __ bind(failed); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); ++ __ andr(t1, t1, t0); ++ __ beqz(t1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ effect(KILL cr, TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" ++ "mv $res, $res == $oldval" %} ++ ++ ins_encode %{ ++ Label failed; ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ beqz($res$$Register, failed); ++ __ mv(t0, $oldval$$Register); ++ __ bind(failed); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); ++ __ andr(t1, t1, t0); ++ __ beqz(t1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result_as_bool */); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(t0, t0, $res$$Register); ++ __ beqz(t0, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(t0, t0, $res$$Register); ++ __ beqz(t0, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); ++ effect(TEMP_DEF prev, KILL cr); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} ++ ++ ins_encode %{ ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); ++ effect(TEMP_DEF prev, KILL cr); ++ ++ ins_cost(VOLATILE_REF_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} ++ ++ ins_encode %{ ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); ++ %} ++ ins_pipe(pipe_serial); ++%} diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp new file mode 100644 -index 000000000..96068e637 +index 00000000000..2936837d951 --- /dev/null +++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -0,0 +1,44 @@ +@@ -0,0 +1,52 @@ +/* -+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15012,25 +19169,32 @@ index 000000000..96068e637 +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + ++// RISCV has adopted a multicopy atomic model closely following ++// that of ARMv8. ++#define CPU_MULTI_COPY_ATOMIC ++ ++// To be safe, we deoptimize when we come across an access that needs ++// patching. This is similar to what is done on aarch64. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORTS_NATIVE_CX8 + +#define SUPPORT_RESERVED_STACK_AREA + -+#define THREAD_LOCAL_POLL ++#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false ++ ++#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY + +#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp new file mode 100644 -index 000000000..b46661a8f +index 00000000000..cbfc0583883 --- /dev/null +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -0,0 +1,120 @@ +@@ -0,0 +1,99 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15062,16 +19226,13 @@ index 000000000..b46661a8f +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + -+define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this -+ +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + -+define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, OptoLoopAlignment, 16); -+define_pd_global(intx, InlineFrequencyCount, 100); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) @@ -15094,13 +19255,8 @@ index 000000000..b46661a8f +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + -+define_pd_global(bool, UseMembar, true); -+ +define_pd_global(bool, PreserveFramePointer, false); + -+// GC Ergo Flags -+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread -+ +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); @@ -15108,55 +19264,43 @@ index 000000000..b46661a8f +// Clear short arrays bigger than one word in an arch-specific way +define_pd_global(intx, InitArrayShortSize, BytesPerLong); + -+define_pd_global(bool, ThreadLocalHandshakes, true); -+ +define_pd_global(intx, InlineSmallCode, 1000); + +#define ARCH_FLAGS(develop, \ + product, \ -+ diagnostic, \ -+ experimental, \ + notproduct, \ + range, \ -+ constraint, \ -+ writeable) \ ++ constraint) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ -+ product(bool, UseBarriersForVolatile, false, \ -+ "Use memory barriers to implement volatile accesses") \ -+ product(bool, UseCRC32, false, \ -+ "Use CRC32 instructions for CRC32 computation") \ -+ product(bool, UseBlockZeroing, true, \ -+ "Use DC ZVA for block zeroing") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") \ + /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ + product(bool, UseConservativeFence, true, \ -+ "Extend i for r and o for w in the pred/succ flags of fence") \ ++ "Extend i for r and o for w in the pred/succ flags of fence;" \ ++ "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ -+ product(intx, EagerArrayCopyThreshold, 128, \ -+ "Threshod of array length by bytes to " \ -+ "trigger the eager array copy") \ -+ range(0, 65535) \ -+ experimental(bool, UseRVV, false, "Use RVV instructions") \ -+ experimental(bool, UseZba, false, "Use Zba instructions") \ -+ experimental(bool, UseZbb, false, "Use Zbb instructions") ++ product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ ++ product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ ++ product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ ++ product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ ++ "Use RVV instructions for left/right shift of BigInteger") + +#endif // CPU_RISCV_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp new file mode 100644 -index 000000000..980b2a81b +index 00000000000..cc93103dc55 --- /dev/null +++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15191,7 +19335,7 @@ index 000000000..980b2a81b + +int InlineCacheBuffer::ic_stub_code_size() { + // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) -+ // 5: auipc + ld + j + address(2 * instruction_size ) ++ // 5: auipc + ld + j + address(2 * instruction_size) + return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; +} + @@ -15234,13 +19378,13 @@ index 000000000..980b2a81b +} diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp new file mode 100644 -index 000000000..ed8022784 +index 00000000000..922a80f9f3e --- /dev/null +++ b/src/hotspot/cpu/riscv/icache_riscv.cpp -@@ -0,0 +1,61 @@ +@@ -0,0 +1,51 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15264,28 +19408,18 @@ index 000000000..ed8022784 + */ + +#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" -+#include "macroAssembler_riscv.hpp" + +#define __ _masm-> + +static int icache_flush(address addr, int lines, int magic) { -+ // To make a store to instruction memory visible to all RISC-V harts, -+ // the writing hart has to execute a data FENCE before requesting that -+ // all remote RISC-V harts execute a FENCE.I -+ // -+ // No such-assurance is defined at the interface level of the builtin -+ // method, and so we should make sure it works. -+ __asm__ volatile("fence rw, rw" : : : "memory"); -+ -+ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); ++ os::icache_flush((long int) addr, (long int) (addr + (lines << ICache::log2_line_size))); + return magic; +} + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { -+ + address start = (address)icache_flush; -+ + *flush_icache_stub = (ICache::flush_icache_stub_t)start; + + // ICache::invalidate_range() contains explicit condition that the first @@ -15301,12 +19435,12 @@ index 000000000..ed8022784 +#undef __ diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp new file mode 100644 -index 000000000..a503d3be3 +index 00000000000..5bf40ca8204 --- /dev/null +++ b/src/hotspot/cpu/riscv/icache_riscv.hpp @@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -15349,14 +19483,14 @@ index 000000000..a503d3be3 +#endif // CPU_RISCV_ICACHE_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp new file mode 100644 -index 000000000..91deb0ae2 +index 00000000000..d12dcb2af19 --- /dev/null +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -0,0 +1,1932 @@ +@@ -0,0 +1,1940 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15388,18 +19522,17 @@ index 000000000..91deb0ae2 +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" ++#include "oops/markWord.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" -+#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" -+ ++#include "utilities/powerOfTwo.hpp" + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type @@ -15618,7 +19751,8 @@ index 000000000..91deb0ae2 + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset -+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, ++ "else change next line"); + ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); @@ -15651,6 +19785,18 @@ index 000000000..91deb0ae2 + ld(klass, Address(klass, Array::base_offset_in_bytes())); +} + ++void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, ++ Register method, ++ Register cache) { ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == TemplateTable::f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ ++ ld(method, Address(cache, method_offset)); // get f1 Method* ++} ++ +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// @@ -15858,17 +20004,16 @@ index 000000000..91deb0ae2 + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); -+ bool needs_thread_local_poll = generate_poll && -+ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ bool needs_thread_local_poll = generate_poll && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit())); ++ ld(t1, Address(xthread, JavaThread::polling_word_offset())); ++ andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } + if (table == Interpreter::dispatch_table(state)) { -+ mv(t1, Interpreter::distance_from_dispatch_table(state)); ++ li(t1, Interpreter::distance_from_dispatch_table(state)); + add(t1, Rs, t1); + shadd(t1, t1, xdispatch, t1, 3); + } else { @@ -15914,6 +20059,7 @@ index 000000000..91deb0ae2 + +// remove activation +// ++// Apply stack watermark barrier. +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. @@ -15934,6 +20080,23 @@ index 000000000..91deb0ae2 + // result check if synchronized method + Label unlocked, unlock, no_unlock; + ++ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, ++ // that would normally not be safe to use. Such bad returns into unsafe territory of ++ // the stack, will call InterpreterRuntime::at_unwind. ++ Label slow_path; ++ Label fast_path; ++ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); ++ j(fast_path); ++ ++ bind(slow_path); ++ push(state); ++ set_last_Java_frame(esp, fp, (address)pc(), t0); ++ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); ++ reset_last_Java_frame(true); ++ pop(state); ++ ++ bind(fast_path); ++ + // get the value of _do_not_unlock_if_synchronized into x13 + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); @@ -16082,6 +20245,7 @@ index 000000000..91deb0ae2 + + // restore sender esp + mv(esp, t1); ++ + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be @@ -16124,8 +20288,11 @@ index 000000000..91deb0ae2 + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + -+ if (UseBiasedLocking) { -+ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmp, obj_reg); ++ lwu(tmp, Address(tmp, Klass::access_flags_offset())); ++ andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); ++ bnez(tmp, slow_case); + } + + // Load (object->mark() | 1) into swap_reg @@ -16138,17 +20305,7 @@ index 000000000..91deb0ae2 + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + -+ if (PrintBiasedLockingStatistics) { -+ Label fail, fast; -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); -+ bind(fast); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); -+ j(done); -+ bind(fail); -+ } else { -+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); -+ } ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and @@ -16160,17 +20317,11 @@ index 000000000..91deb0ae2 + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg + sub(swap_reg, swap_reg, sp); -+ mv(t0, (int64_t)(7 - os::vm_page_size())); ++ li(t0, (int64_t)(7 - os::vm_page_size())); + andr(swap_reg, swap_reg, t0); + + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); -+ -+ if (PrintBiasedLockingStatistics) { -+ bnez(swap_reg, slow_case); -+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), -+ t1, t0); -+ } + beqz(swap_reg, done); + + bind(slow_case); @@ -16201,9 +20352,7 @@ index 000000000..91deb0ae2 + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + @@ -16223,10 +20372,6 @@ index 000000000..91deb0ae2 + // Free entry + sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + -+ if (UseBiasedLocking) { -+ biased_locking_exit(obj_reg, header_reg, done); -+ } -+ + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); @@ -16239,9 +20384,7 @@ index 000000000..91deb0ae2 + + // Call the runtime routine for slow case. + sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj -+ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), -+ lock_reg); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + + bind(done); + @@ -16261,7 +20404,7 @@ index 000000000..91deb0ae2 +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; -+ push_reg(RegSet::of(x10, x11), sp); // save x10, x11 ++ push_reg(0xc00, sp); // save x10, x11 + + // Test MDO to avoid the call if it is NULL. + ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); @@ -16274,7 +20417,7 @@ index 000000000..91deb0ae2 + add(x10, x11, x10); + sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + bind(set_mdp); -+ pop_reg(RegSet::of(x10, x11), sp); ++ pop_reg(0xc00, sp); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { @@ -16414,7 +20557,7 @@ index 000000000..91deb0ae2 +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); -+ addi(mdp_in, mdp_in, constant); ++ addi(mdp_in, mdp_in, (unsigned)constant); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + @@ -17012,7 +21155,7 @@ index 000000000..91deb0ae2 + + ld(t0, mdo_addr); + beqz(t0, none); -+ mv(tmp, (u1)TypeEntries::null_seen); ++ li(tmp, (u1)TypeEntries::null_seen); + beq(t0, tmp, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the @@ -17047,10 +21190,10 @@ index 000000000..91deb0ae2 + + lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + if (is_virtual) { -+ mv(tmp, (u1)DataLayout::virtual_call_type_data_tag); ++ li(tmp, (u1)DataLayout::virtual_call_type_data_tag); + bne(t0, tmp, profile_continue); + } else { -+ mv(tmp, (u1)DataLayout::call_type_data_tag); ++ li(tmp, (u1)DataLayout::call_type_data_tag); + bne(t0, tmp, profile_continue); + } + @@ -17080,7 +21223,7 @@ index 000000000..91deb0ae2 + mv(index, zr); // index < TypeProfileArgsLimit + bind(loop); + bgtz(index, profileReturnType); -+ mv(t0, (int)MethodData::profile_return()); ++ li(t0, (int)MethodData::profile_return()); + beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false + bind(profileReturnType); + // If return value type is profiled we may have no argument to profile @@ -17088,7 +21231,7 @@ index 000000000..91deb0ae2 + mv(t1, - TypeStackSlotEntries::per_arg_count()); + mul(t1, index, t1); + add(tmp, tmp, t1); -+ mv(t1, TypeStackSlotEntries::per_arg_count()); ++ li(t1, TypeStackSlotEntries::per_arg_count()); + add(t0, mdp, off_to_args); + blt(tmp, t1, done); + @@ -17099,8 +21242,8 @@ index 000000000..91deb0ae2 + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list -+ mv(t0, stack_slot_offset0); -+ mv(t1, slot_step); ++ li(t0, stack_slot_offset0); ++ li(t1, slot_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(t0, mdp, t0); @@ -17110,8 +21253,8 @@ index 000000000..91deb0ae2 + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + -+ mv(t0, argument_type_offset0); -+ mv(t1, type_step); ++ li(t0, argument_type_offset0); ++ li(t1, type_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(mdo_addr, mdp, t0); @@ -17123,7 +21266,7 @@ index 000000000..91deb0ae2 + + // increment index by 1 + addi(index, index, 1); -+ mv(t1, TypeProfileArgsLimit); ++ li(t1, TypeProfileArgsLimit); + blt(index, t1, loop); + bind(loopEnd); + @@ -17178,13 +21321,13 @@ index 000000000..91deb0ae2 + // length + Label do_profile; + lbu(t0, Address(xbcp, 0)); -+ mv(tmp, (u1)Bytecodes::_invokedynamic); ++ li(tmp, (u1)Bytecodes::_invokedynamic); + beq(t0, tmp, do_profile); -+ mv(tmp, (u1)Bytecodes::_invokehandle); ++ li(tmp, (u1)Bytecodes::_invokehandle); + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); -+ mv(t1, vmIntrinsics::_compiledLambdaForm); ++ li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); + bne(t0, t1, profile_continue); + bind(do_profile); + } @@ -17227,7 +21370,6 @@ index 000000000..91deb0ae2 + add(t0, mdp, off_base); + add(t1, mdp, type_base); + -+ + shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); + // load offset on the stack from the slot for this parameter + ld(tmp2, Address(tmp2, 0)); @@ -17287,12 +21429,12 @@ index 000000000..91deb0ae2 +#endif diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp new file mode 100644 -index 000000000..042ee8280 +index 00000000000..4d8cb086f82 --- /dev/null +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp -@@ -0,0 +1,283 @@ +@@ -0,0 +1,285 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17415,6 +21557,8 @@ index 000000000..042ee8280 + // Load cpool->resolved_klass_at(index). + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + ++ void load_resolved_method_at_index(int byte_no, Register method, Register cache); ++ + void pop_ptr(Register r = x10); + void pop_i(Register r = x10); + void pop_l(Register r = x10); @@ -17439,7 +21583,7 @@ index 000000000..042ee8280 + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + -+// Load float value from 'address'. The value is loaded onto the FPU register v0. ++ // Load float value from 'address'. The value is loaded onto the FPU register v0. + void load_float(Address src); + void load_double(Address src); + @@ -17576,14 +21720,14 @@ index 000000000..042ee8280 +#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp new file mode 100644 -index 000000000..777f326e3 +index 00000000000..d93530d8564 --- /dev/null +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -@@ -0,0 +1,296 @@ +@@ -0,0 +1,295 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17637,8 +21781,9 @@ index 000000000..777f326e3 +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + return g_FPArgReg[_num_reg_fp_args++]; ++ } else { ++ return fnoreg; + } -+ return fnoreg; +} + +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { @@ -17760,7 +21905,6 @@ index 000000000..777f326e3 + unsigned int _num_reg_int_args; + unsigned int _num_reg_fp_args; + -+ + intptr_t* single_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; @@ -17793,7 +21937,6 @@ index 000000000..777f326e3 + *_to++ = value; + } + -+ + virtual void pass_int() { + jint value = *(jint*)single_slot_addr(); + if (pass_gpr(value) < 0) { @@ -17801,7 +21944,6 @@ index 000000000..777f326e3 + } + } + -+ + virtual void pass_long() { + intptr_t value = *double_slot_addr(); + if (pass_gpr(value) < 0) { @@ -17826,7 +21968,7 @@ index 000000000..777f326e3 + } + } + -+ virtual void pass_double() { ++ virtual void pass_double() { + intptr_t value = *double_slot_addr(); + int arg = pass_fpr(value); + if (0 <= arg) { @@ -17844,12 +21986,13 @@ index 000000000..777f326e3 + _to = to; + + _int_args = to - (method->is_static() ? 16 : 17); -+ _fp_args = to - 8; ++ _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + } ++ + ~SlowSignatureHandler() + { + _from = NULL; @@ -17861,29 +22004,29 @@ index 000000000..777f326e3 +}; + + -+IRT_ENTRY(address, -+ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++JRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* current, + Method* method, + intptr_t* from, + intptr_t* to)) -+ methodHandle m(thread, (Method*)method); ++ methodHandle m(current, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); -+ ssh.iterate((uint64_t)UCONST64(-1)); ++ ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); -+IRT_END ++JRT_END diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp new file mode 100644 -index 000000000..06342869f +index 00000000000..05df63ba2ae --- /dev/null +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp @@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17952,14 +22095,13 @@ index 000000000..06342869f +#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp new file mode 100644 -index 000000000..a169b8c5f +index 00000000000..9a6084afa1d --- /dev/null +++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp -@@ -0,0 +1,89 @@ +@@ -0,0 +1,86 @@ +/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17988,7 +22130,7 @@ index 000000000..a169b8c5f +private: + + // FP value associated with _last_Java_sp: -+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore @@ -18038,23 +22180,21 @@ index 000000000..a169b8c5f + +public: + -+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } + -+ intptr_t* last_Java_fp(void) { return _last_Java_fp; } -+ // Assert (last_Java_sp == NULL || fp == NULL) -+ void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; } ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } + +#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp new file mode 100644 -index 000000000..9bab8e78f +index 00000000000..814ed23e471 --- /dev/null +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -@@ -0,0 +1,193 @@ +@@ -0,0 +1,214 @@ +/* -+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18084,6 +22224,7 @@ index 000000000..9bab8e78f +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> @@ -18135,12 +22276,30 @@ index 000000000..9bab8e78f + // An even value means there are no ongoing safepoint operations + __ andi(t0, rcounter, 1); + __ bnez(t0, slow); -+ __ xorr(robj, c_rarg1, rcounter); -+ __ xorr(robj, robj, rcounter); // obj, since -+ // robj ^ rcounter ^ rcounter == robj -+ // robj is address dependent on rcounter. + ++ if (JvmtiExport::can_post_field_access()) { ++ // Using barrier to order wrt. JVMTI check and load of result. ++ __ membar(MacroAssembler::LoadLoad); + ++ // Check to see if a field access watch has been set before we ++ // take the fast path. ++ int32_t offset2; ++ __ la_patchable(result, ++ ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), ++ offset2); ++ __ lwu(result, Address(result, offset2)); ++ __ bnez(result, slow); ++ ++ __ mv(robj, c_rarg1); ++ } else { ++ // Using address dependency to order wrt. load of result. ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. ++ } ++ ++ // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_cond(bs != NULL); + bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); @@ -18150,6 +22309,7 @@ index 000000000..9bab8e78f + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + __ add(roffset, robj, roffset); ++ + switch (type) { + case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; + case T_BYTE: __ lb(result, Address(roffset, 0)); break; @@ -18170,9 +22330,10 @@ index 000000000..9bab8e78f + default: ShouldNotReachHere(); + } + -+ // counter_addr is address dependent on result. -+ __ xorr(rcounter_addr, rcounter_addr, result); -+ __ xorr(rcounter_addr, rcounter_addr, result); ++ // Using acquire: Order JVMTI check and load of result wrt. succeeding check ++ // (LoadStore for volatile field). ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ + __ lw(t0, safepoint_counter_addr); + __ bne(rcounter, t0, slow); + @@ -18246,14 +22407,13 @@ index 000000000..9bab8e78f +} diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp new file mode 100644 -index 000000000..96775e0db +index 00000000000..83ffcc55d83 --- /dev/null +++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -0,0 +1,108 @@ +@@ -0,0 +1,106 @@ +/* -+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18280,7 +22440,7 @@ index 000000000..96775e0db +#define CPU_RISCV_JNITYPES_RISCV_HPP + +#include "jni.h" -+#include "memory/allocation.hpp" ++#include "memory/allStatic.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni @@ -18319,9 +22479,8 @@ index 000000000..96775e0db + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } -+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } -+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } ++ static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } @@ -18360,14 +22519,14 @@ index 000000000..96775e0db +#endif // CPU_RISCV_JNITYPES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp new file mode 100644 -index 000000000..5d6078bb3 +index 00000000000..86710295444 --- /dev/null +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -0,0 +1,5861 @@ +@@ -0,0 +1,4016 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18398,26 +22557,25 @@ index 000000000..5d6078bb3 +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" -+#include "runtime/biasedLocking.hpp" ++#include "oops/oop.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" -+#include "utilities/macros.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#endif ++#include "utilities/powerOfTwo.hpp" +#ifdef COMPILER2 -+#include "oops/oop.hpp" +#include "opto/compile.hpp" -+#include "opto/intrinsicnode.hpp" -+#include "opto/subnode.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" +#endif + +#ifdef PRODUCT @@ -18429,30 +22587,35 @@ index 000000000..5d6078bb3 + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg3, arg); + } +} + -+void MacroAssembler::align(int modulus) { -+ while (offset() % modulus != 0) { nop(); } ++void MacroAssembler::align(int modulus, int extra_offset) { ++ CompressibleRegion cr(this); ++ while ((offset() + extra_offset) % modulus != 0) { nop(); } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { @@ -18604,6 +22767,7 @@ index 000000000..5d6078bb3 + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); + } else { ++ InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); + } @@ -18693,6 +22857,35 @@ index 000000000..5d6078bb3 + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + ++void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { ++ assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); ++ assert_different_registers(klass, xthread, tmp); ++ ++ Label L_fallthrough, L_tmp; ++ if (L_fast_path == NULL) { ++ L_fast_path = &L_fallthrough; ++ } else if (L_slow_path == NULL) { ++ L_slow_path = &L_fallthrough; ++ } ++ ++ // Fast path check: class is fully initialized ++ lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); ++ sub(tmp, tmp, InstanceKlass::fully_initialized); ++ beqz(tmp, *L_fast_path); ++ ++ // Fast path check: current thread is initializer thread ++ ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); ++ ++ if (L_slow_path == &L_fallthrough) { ++ beq(xthread, tmp, *L_fast_path); ++ bind(*L_slow_path); ++ } else if (L_fast_path == &L_fallthrough) { ++ bne(xthread, tmp, *L_slow_path); ++ bind(*L_fast_path); ++ } else { ++ Unimplemented(); ++ } ++} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } @@ -18710,11 +22903,7 @@ index 000000000..5d6078bb3 + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++ li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; @@ -18749,11 +22938,8 @@ index 000000000..5d6078bb3 + } else { + ld(x10, addr); + } -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++ ++ li(t0, (uintptr_t)(address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; @@ -18803,51 +22989,47 @@ index 000000000..5d6078bb3 +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; -+ tty->print_cr(" pc = 0x%016" PRIX64, pc); ++ tty->print_cr(" pc = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif -+ tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]); -+ tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]); -+ tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]); -+ tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]); -+ tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]); -+ tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]); -+ tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]); -+ tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]); -+ tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]); -+ tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]); -+ tty->print_cr("x10 = 0x%016" PRIx64, regs[10]); -+ tty->print_cr("x11 = 0x%016" PRIx64, regs[11]); -+ tty->print_cr("x12 = 0x%016" PRIx64, regs[12]); -+ tty->print_cr("x13 = 0x%016" PRIx64, regs[13]); -+ tty->print_cr("x14 = 0x%016" PRIx64, regs[14]); -+ tty->print_cr("x15 = 0x%016" PRIx64, regs[15]); -+ tty->print_cr("x16 = 0x%016" PRIx64, regs[16]); -+ tty->print_cr("x17 = 0x%016" PRIx64, regs[17]); -+ tty->print_cr("x18 = 0x%016" PRIx64, regs[18]); -+ tty->print_cr("x19 = 0x%016" PRIx64, regs[19]); -+ tty->print_cr("x20 = 0x%016" PRIx64, regs[20]); -+ tty->print_cr("x21 = 0x%016" PRIx64, regs[21]); -+ tty->print_cr("x22 = 0x%016" PRIx64, regs[22]); -+ tty->print_cr("x23 = 0x%016" PRIx64, regs[23]); -+ tty->print_cr("x24 = 0x%016" PRIx64, regs[24]); -+ tty->print_cr("x25 = 0x%016" PRIx64, regs[25]); -+ tty->print_cr("x26 = 0x%016" PRIx64, regs[26]); -+ tty->print_cr("x27 = 0x%016" PRIx64, regs[27]); -+ tty->print_cr("x28 = 0x%016" PRIx64, regs[28]); -+ tty->print_cr("x30 = 0x%016" PRIx64, regs[30]); -+ tty->print_cr("x31 = 0x%016" PRIx64, regs[31]); ++ tty->print_cr(" x0 = 0x%016lx", regs[0]); ++ tty->print_cr(" x1 = 0x%016lx", regs[1]); ++ tty->print_cr(" x2 = 0x%016lx", regs[2]); ++ tty->print_cr(" x3 = 0x%016lx", regs[3]); ++ tty->print_cr(" x4 = 0x%016lx", regs[4]); ++ tty->print_cr(" x5 = 0x%016lx", regs[5]); ++ tty->print_cr(" x6 = 0x%016lx", regs[6]); ++ tty->print_cr(" x7 = 0x%016lx", regs[7]); ++ tty->print_cr(" x8 = 0x%016lx", regs[8]); ++ tty->print_cr(" x9 = 0x%016lx", regs[9]); ++ tty->print_cr("x10 = 0x%016lx", regs[10]); ++ tty->print_cr("x11 = 0x%016lx", regs[11]); ++ tty->print_cr("x12 = 0x%016lx", regs[12]); ++ tty->print_cr("x13 = 0x%016lx", regs[13]); ++ tty->print_cr("x14 = 0x%016lx", regs[14]); ++ tty->print_cr("x15 = 0x%016lx", regs[15]); ++ tty->print_cr("x16 = 0x%016lx", regs[16]); ++ tty->print_cr("x17 = 0x%016lx", regs[17]); ++ tty->print_cr("x18 = 0x%016lx", regs[18]); ++ tty->print_cr("x19 = 0x%016lx", regs[19]); ++ tty->print_cr("x20 = 0x%016lx", regs[20]); ++ tty->print_cr("x21 = 0x%016lx", regs[21]); ++ tty->print_cr("x22 = 0x%016lx", regs[22]); ++ tty->print_cr("x23 = 0x%016lx", regs[23]); ++ tty->print_cr("x24 = 0x%016lx", regs[24]); ++ tty->print_cr("x25 = 0x%016lx", regs[25]); ++ tty->print_cr("x26 = 0x%016lx", regs[26]); ++ tty->print_cr("x27 = 0x%016lx", regs[27]); ++ tty->print_cr("x28 = 0x%016lx", regs[28]); ++ tty->print_cr("x30 = 0x%016lx", regs[30]); ++ tty->print_cr("x31 = 0x%016lx", regs[31]); + BREAKPOINT; + } -+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); -+ } else { -+ ttyLocker ttyl; -+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); -+ assert(false, "DEBUG MESSAGE: %s", msg); + } ++ fatal("DEBUG MESSAGE: %s", msg); +} + +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { @@ -18873,13 +23055,9 @@ index 000000000..5d6078bb3 + +void MacroAssembler::stop(const char* msg) { + address ip = pc(); -+ push_reg(RegSet::range(x0, x31), sp); -+ if(msg != NULL && ip != NULL) { -+ mv(c_rarg0, (uintptr_t)(address)msg); -+ mv(c_rarg1, (uintptr_t)(address)ip); -+ } else { -+ ShouldNotReachHere(); -+ } ++ pusha(); ++ li(c_rarg0, (uintptr_t)(address)msg); ++ li(c_rarg1, (uintptr_t)(address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); @@ -18901,6 +23079,7 @@ index 000000000..5d6078bb3 + // CompiledDirectStaticCall::set_to_interpreted knows the + // exact layout of this stub. + ++ ifence(); + mov_metadata(xmethod, (Metadata*)NULL); + + // Jump to the entry point of the i2c stub. @@ -18912,14 +23091,24 @@ index 000000000..5d6078bb3 +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { ++ call_native_base(entry_point, retaddr); ++} ++ ++void MacroAssembler::call_native(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ call_native_base(entry_point); ++} ++ ++void MacroAssembler::call_native_base(address entry_point, Label *retaddr) { ++ Label E, L; + int32_t offset = 0; -+ push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp ++ push_reg(0x80000040, sp); // push << t0 & xmethod >> to sp + movptr_with_offset(t0, entry_point, offset); + jalr(x1, t0, offset); + if (retaddr != NULL) { + bind(*retaddr); + } -+ pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp ++ pop_reg(0x80000040, sp); // pop << t0 & xmethod >> from sp +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { @@ -19079,29 +23268,23 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::la(Register Rd, const Address &adr) { -+ code_section()->relocate(pc(), adr.rspec()); ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), adr.rspec()); + relocInfo::relocType rtype = adr.rspec().reloc()->type(); + -+ switch(adr.getMode()) { ++ switch (adr.getMode()) { + case Address::literal: { + if (rtype == relocInfo::none) { -+ mv(Rd, (intptr_t)(adr.target())); ++ li(Rd, (intptr_t)(adr.target())); + } else { + movptr(Rd, adr.target()); + } + break; + } -+ case Address::base_plus_offset:{ -+ Register base = adr.base(); -+ int64_t offset = adr.offset(); -+ if (offset == 0 && Rd != base) { -+ mv(Rd, base); -+ } else if (offset != 0 && Rd != base) { -+ add(Rd, base, offset, Rd); -+ } else if (offset != 0 && Rd == base) { -+ Register tmp = (Rd == t0) ? t1 : t0; -+ add(base, base, offset, tmp); -+ } ++ case Address::base_plus_offset: { ++ int32_t offset = 0; ++ baseOffset(Rd, adr, offset); ++ addi(Rd, Rd, offset); + break; + } + default: @@ -19144,26 +23327,31 @@ index 000000000..5d6078bb3 + + INSN(beq, feq, bnez); + INSN(bne, feq, beqz); ++ +#undef INSN + + +#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_s(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ ++ /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_s(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_d(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ ++ /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_d(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ @@ -19271,110 +23459,6 @@ index 000000000..5d6078bb3 + +#undef INSN + -+#ifdef COMPILER2 -+ -+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -+ bool is_far, bool is_unordered); -+ -+static conditional_branch_insn conditional_branches[] = -+{ -+ /* SHORT branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgt, -+ NULL, // BoolTest::overflow -+ (conditional_branch_insn)&Assembler::blt, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::ble, -+ NULL, // BoolTest::no_overflow -+ (conditional_branch_insn)&Assembler::bge, -+ -+ /* UNSIGNED branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgtu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bltu, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::bleu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bgeu -+}; -+ -+static float_conditional_branch_insn float_conditional_branches[] = -+{ -+ /* FLOAT SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::float_beq, -+ (float_conditional_branch_insn)&MacroAssembler::float_bgt, -+ NULL, // BoolTest::overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_blt, -+ (float_conditional_branch_insn)&MacroAssembler::float_bne, -+ (float_conditional_branch_insn)&MacroAssembler::float_ble, -+ NULL, // BoolTest::no_overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_bge, -+ -+ /* DOUBLE SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::double_beq, -+ (float_conditional_branch_insn)&MacroAssembler::double_bgt, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_blt, -+ (float_conditional_branch_insn)&MacroAssembler::double_bne, -+ (float_conditional_branch_insn)&MacroAssembler::double_ble, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_bge -+}; -+ -+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -+ "invalid conditional branch index"); -+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); -+} -+ -+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -+ "invalid float conditional branch index"); -+ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); -+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); -+} -+ -+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ case BoolTest::le: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ case BoolTest::gt: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -+ Label L; -+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -+ mv(dst, src); -+ bind(L); -+} -+#endif -+ +void MacroAssembler::push_reg(Register Rs) +{ + addi(esp, esp, 0 - wordSize); @@ -19390,7 +23474,7 @@ index 000000000..5d6078bb3 +int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { + int count = 0; + // Scan bitset to accumulate register pairs -+ for (int reg = 31; reg >= 0; reg --) { ++ for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } @@ -19403,6 +23487,7 @@ index 000000000..5d6078bb3 +// Return the number of words pushed +int MacroAssembler::push_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_pushed = 0;) ++ CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); @@ -19424,6 +23509,7 @@ index 000000000..5d6078bb3 + +int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_popped = 0;) ++ CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); @@ -19443,13 +23529,98 @@ index 000000000..5d6078bb3 + return count; +} + -+RegSet MacroAssembler::call_clobbered_registers() { -+ // Push integer registers x7, x10-x17, x28-x31. -+ return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31); ++// Push float registers in the bitset, except sp. ++// Return the number of heapwords pushed. ++int MacroAssembler::push_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_pushed = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int push_slots = count + (count & 1); ++ ++ if (count) { ++ addi(stack, stack, -push_slots * wordSize); ++ } ++ ++ for (int i = count - 1; i >= 0; i--) { ++ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); ++ words_pushed++; ++ } ++ ++ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); ++ return count; +} + -+void MacroAssembler::push_call_clobbered_registers() { -+ push_reg(call_clobbered_registers(), sp); ++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_popped = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int pop_slots = count + (count & 1); ++ ++ for (int i = count - 1; i >= 0; i--) { ++ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); ++ words_popped++; ++ } ++ ++ if (count) { ++ addi(stack, stack, pop_slots * wordSize); ++ } ++ ++ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); ++ return count; ++} ++ ++#ifdef COMPILER2 ++int MacroAssembler::push_vp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ ++ // Scan bitset to accumulate register pairs ++ unsigned char regs[32]; ++ int count = 0; ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; ++ } ++ ++ for (int i = 0; i < count; i++) { ++ sub(stack, stack, vector_size_in_bytes); ++ vs1r_v(as_VectorRegister(regs[i]), stack); ++ } ++ ++ return count * vector_size_in_bytes / wordSize; ++} ++ ++int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ ++ // Scan bitset to accumulate register pairs ++ unsigned char regs[32]; ++ int count = 0; ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; ++ } ++ ++ for (int i = count - 1; i >= 0; i--) { ++ vl1r_v(as_VectorRegister(regs[i]), stack); ++ add(stack, stack, vector_size_in_bytes); ++ } ++ ++ return count * vector_size_in_bytes / wordSize; ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ // Push integer registers x7, x10-x17, x28-x31. ++ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + + // Push float registers f0-f7, f10-f17, f28-f31. + addi(sp, sp, - wordSize * 20); @@ -19461,7 +23632,8 @@ index 000000000..5d6078bb3 + } +} + -+void MacroAssembler::pop_call_clobbered_registers() { ++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { @@ -19470,12 +23642,25 @@ index 000000000..5d6078bb3 + } + addi(sp, sp, wordSize * 20); + -+ pop_reg(call_clobbered_registers(), sp); ++ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); ++} ++ ++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::pusha() { ++ CompressibleRegion cr(this); ++ push_reg(0xffffffe2, sp); ++} ++ ++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::popa() { ++ CompressibleRegion cr(this); ++ pop_reg(0xffffffe2, sp); +} + +void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++ CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ push_reg(RegSet::range(x5, x31), sp); ++ push_reg(0xffffffe0, sp); + + // float registers + addi(sp, sp, - 32 * wordSize); @@ -19483,18 +23668,19 @@ index 000000000..5d6078bb3 + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } + -+ // vector registers -+ if (save_vectors) { ++ // vector registers ++ if (save_vectors) { + sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); + vsetvli(t0, x0, Assembler::e64, Assembler::m8); + for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ add(t0, sp, vector_size_in_bytes * i); -+ vse64_v(as_VectorRegister(i), t0); ++ add(t0, sp, vector_size_in_bytes * i); ++ vse64_v(as_VectorRegister(i), t0); + } + } +} + +void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { ++ CompressibleRegion cr(this); + // vector registers + if (restore_vectors) { + vsetvli(t0, x0, Assembler::e64, Assembler::m8); @@ -19511,7 +23697,7 @@ index 000000000..5d6078bb3 + addi(sp, sp, 32 * wordSize); + + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ pop_reg(RegSet::range(x5, x31), sp); ++ pop_reg(0xffffffe0, sp); +} + +static int patch_offset_in_jal(address branch, int64_t offset) { @@ -19541,12 +23727,12 @@ index 000000000..5d6078bb3 + +static int patch_addr_in_movptr(address branch, address target) { + const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load -+ int32_t lower = ((intptr_t)target << 35) >> 35; -+ int64_t upper = ((intptr_t)target - lower) >> 29; -+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] -+ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] -+ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] -+ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] ++ int32_t lower = ((intptr_t)target << 36) >> 36; ++ int64_t upper = ((intptr_t)target - lower) >> 28; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] + return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; +} + @@ -19618,9 +23804,9 @@ index 000000000..5d6078bb3 + +static address get_target_of_movptr(address insn_addr) { + assert_cond(insn_addr != NULL); -+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. -+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + return (address) target_address; +} @@ -19661,10 +23847,14 @@ index 000000000..5d6078bb3 + int64_t imm = (intptr_t)target; + return patch_imm_in_li32(branch, (int32_t)imm); + } else { -+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch); ++#ifdef ASSERT ++ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", ++ *(unsigned*)branch, p2i(branch)); ++ Disassembler::decode(branch - 16, branch + 16); ++#endif + ShouldNotReachHere(); ++ return -1; + } -+ return -1; +} + +address MacroAssembler::target_addr_for_insn(address insn_addr) { @@ -19694,7 +23884,7 @@ index 000000000..5d6078bb3 + // instruction. + if (NativeInstruction::is_li32_at(insn_addr)) { + // Move narrow OOP -+ narrowOop n = CompressedOops::encode((oop)o); ++ uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); + return patch_imm_in_li32(insn_addr, (int32_t)n); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { + // Move wide OOP @@ -19707,10 +23897,10 @@ index 000000000..5d6078bb3 +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { -+ mv(xheapbase, Universe::narrow_ptrs_base()); ++ mv(xheapbase, CompressedOops::ptrs_base()); + } else { + int32_t offset = 0; -+ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); ++ la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); + ld(xheapbase, Address(xheapbase, offset)); + } + } @@ -19721,6 +23911,13 @@ index 000000000..5d6078bb3 + code_section()->relocate(pc(), dest.rspec()); + movptr(Rd, dest.target()); +} ++ ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruciton ++ // movptr instead of li ++ movptr(Rd, addr); ++} ++ +void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); @@ -19795,26 +23992,10 @@ index 000000000..5d6078bb3 + } +} + -+// rotate right with imm bits -+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) -+{ -+ if (UseZbb) { -+ rori(dst, src, shift); -+ return; -+ } -+ -+ assert_different_registers(dst, tmp); -+ assert_different_registers(src, tmp); -+ assert(shift < 64, "shift amount must be < 64"); -+ slli(tmp, src, 64 - shift); -+ srli(dst, src, shift); -+ orr(dst, dst, tmp); -+} -+ +// reverse bytes in halfword in lower 16 bits and sign-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 48); + return; @@ -19831,7 +24012,7 @@ index 000000000..5d6078bb3 +// reverse bytes in lower word and sign-extend +// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) +void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + srai(Rd, Rd, 32); + return; @@ -19848,7 +24029,7 @@ index 000000000..5d6078bb3 +// reverse bytes in halfword in lower 16 bits and zero-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + srli(Rd, Rd, 48); + return; @@ -19865,11 +24046,11 @@ index 000000000..5d6078bb3 +// reverse bytes in halfwords in lower 32 bits and zero-extend +// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) +void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + roriw(Rd, Rd, 16); -+ zero_extend(Rd, Rd, 32); ++ zext_w(Rd, Rd); + return; + } + assert_different_registers(Rs, tmp1, tmp2); @@ -19894,19 +24075,20 @@ index 000000000..5d6078bb3 + slli(Rd, Rs, 16); + orr(Rd, Rd, tmp1); +} ++ +// reverse bytes in each halfword +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { ++ if (UseRVB) { + assert_different_registers(Rs, tmp1); + assert_different_registers(Rd, tmp1); + rev8(Rd, Rs); -+ zero_extend(tmp1, Rd, 32); ++ zext_w(tmp1, Rd); + roriw(tmp1, tmp1, 16); + slli(tmp1, tmp1, 32); + srli(Rd, Rd, 32); + roriw(Rd, Rd, 16); -+ zero_extend(Rd, Rd, 32); ++ zext_w(Rd, Rd); + orr(Rd, Rd, tmp1); + return; + } @@ -19921,7 +24103,7 @@ index 000000000..5d6078bb3 +// reverse bytes in each word +// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] +void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + rori(Rd, Rd, 32); + return; @@ -19935,7 +24117,7 @@ index 000000000..5d6078bb3 +// reverse bytes in doubleword +// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] +void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { -+ if (UseZbb) { ++ if (UseRVB) { + rev8(Rd, Rs); + return; + } @@ -19954,12 +24136,28 @@ index 000000000..5d6078bb3 + orr(Rd, tmp1, Rd); +} + ++// rotate right with shift bits ++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) ++{ ++ if (UseRVB) { ++ rori(dst, src, shift); ++ return; ++ } ++ ++ assert_different_registers(dst, tmp); ++ assert_different_registers(src, tmp); ++ assert(shift < 64, "shift amount must be < 64"); ++ slli(tmp, src, 64 - shift); ++ srli(dst, src, shift); ++ orr(dst, dst, tmp); ++} ++ +void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { + if (is_imm_in_range(imm, 12, 0)) { + and_imm12(Rd, Rn, imm); + } else { + assert_different_registers(Rn, tmp); -+ mv(tmp, imm); ++ li(tmp, imm); + andr(Rd, Rn, tmp); + } +} @@ -19969,11 +24167,11 @@ index 000000000..5d6078bb3 + if (src.is_register()) { + orr(tmp1, tmp1, src.as_register()); + } else { -+ if(is_imm_in_range(src.as_constant(), 12, 0)) { ++ if (is_imm_in_range(src.as_constant(), 12, 0)) { + ori(tmp1, tmp1, src.as_constant()); + } else { + assert_different_registers(tmp1, tmp2); -+ mv(tmp2, src.as_constant()); ++ li(tmp2, src.as_constant()); + orr(tmp1, tmp1, tmp2); + } + } @@ -19983,8 +24181,8 @@ index 000000000..5d6078bb3 +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { + if (UseCompressedClassPointers) { + lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); -+ if (Universe::narrow_klass_base() == NULL) { -+ slli(tmp, tmp, Universe::narrow_klass_shift()); ++ if (CompressedKlassPointers::base() == NULL) { ++ slli(tmp, tmp, CompressedKlassPointers::shift()); + beq(trial_klass, tmp, L); + return; + } @@ -19995,10 +24193,10 @@ index 000000000..5d6078bb3 + beq(trial_klass, tmp, L); +} + -+// Move an oop into a register. immediate is true if we want -+// immediate instrcutions, i.e. we are not going to patch this -+// instruction while the code is being executed by another thread. In -+// that case we can use move immediates rather than the constant pool. ++// Move an oop into a register. immediate is true if we want ++// immediate instructions and nmethod entry barriers are not enabled. ++// i.e. we are not going to patch this instruction while the code is being ++// executed by another thread. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { @@ -20007,13 +24205,17 @@ index 000000000..5d6078bb3 +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ if (!immediate) { ++ ++ // nmethod entry barrier necessitate using the constant pool. They have to be ++ // ordered with respected to oop access. ++ // Using immediate literals would necessitate fence.i. ++ if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ld_constant(dst, Address(dummy, rspec)); + } else @@ -20053,7 +24255,7 @@ index 000000000..5d6078bb3 + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. -+ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + sub(tmp, tmp, os::vm_page_size()); @@ -20062,6 +24264,7 @@ index 000000000..5d6078bb3 +} + +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { ++ assert_cond(masm != NULL); + int32_t offset = 0; + _masm = masm; + _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); @@ -20070,6 +24273,7 @@ index 000000000..5d6078bb3 +} + +SkipIfEqual::~SkipIfEqual() { ++ assert_cond(_masm != NULL); + _masm->bind(_label); + _masm = NULL; +} @@ -20089,6 +24293,22 @@ index 000000000..5d6078bb3 + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); +} + ++// ((WeakHandle)result).resolve() ++void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { ++ assert_different_registers(result, tmp); ++ Label resolved; ++ ++ // A null weak handle resolves to null. ++ beqz(result, resolved); ++ ++ // Only 64 bit platforms support GCs that require a tmp register ++ // Only IN_HEAP loads require a thread_tmp register ++ // WeakHandle::resolve is an indirection like jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ result, Address(result), tmp, noreg /* tmp_thread */); ++ bind(resolved); ++} ++ +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { @@ -20116,23 +24336,23 @@ index 000000000..5d6078bb3 + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, -+ Register tmp1, Register tmp2, Register tmp3) { ++ Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { -+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { -+ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +// Algorithm must match CompressedOops::encode. +void MacroAssembler::encode_heap_oop(Register d, Register s) { + verify_oop(s, "broken oop in encode_heap_oop"); -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + srli(d, s, LogMinObjAlignmentInBytes); + } else { + mv(d, s); @@ -20143,9 +24363,9 @@ index 000000000..5d6078bb3 + bgez(d, notNull); + mv(d, zr); + bind(notNull); -+ if (Universe::narrow_oop_shift() != 0) { -+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); -+ srli(d, d, Universe::narrow_oop_shift()); ++ if (CompressedOops::shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ srli(d, d, CompressedOops::shift()); + } + } +} @@ -20184,9 +24404,9 @@ index 000000000..5d6078bb3 +void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ if (CompressedKlassPointers::base() == NULL) { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + slli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); @@ -20200,16 +24420,17 @@ index 000000000..5d6078bb3 + } + + assert_different_registers(src, xbase); -+ mv(xbase, (uintptr_t)Universe::narrow_klass_base()); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ li(xbase, (uintptr_t)CompressedKlassPointers::base()); ++ ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); + } else { + add(dst, xbase, src); + } -+ if (xbase == xheapbase) { reinit_heapbase(); } + ++ if (xbase == xheapbase) { reinit_heapbase(); } +} + +void MacroAssembler::encode_klass_not_null(Register r) { @@ -20219,9 +24440,9 @@ index 000000000..5d6078bb3 +void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + -+ if (Universe::narrow_klass_base() == NULL) { -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ if (CompressedKlassPointers::base() == NULL) { ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); @@ -20229,8 +24450,8 @@ index 000000000..5d6078bb3 + return; + } + -+ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && -+ Universe::narrow_klass_shift() == 0) { ++ if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && ++ CompressedKlassPointers::shift() == 0) { + zero_extend(dst, src, 32); + return; + } @@ -20241,10 +24462,10 @@ index 000000000..5d6078bb3 + } + + assert_different_registers(src, xbase); -+ mv(xbase, (intptr_t)Universe::narrow_klass_base()); ++ li(xbase, (intptr_t)CompressedKlassPointers::base()); + sub(dst, src, xbase); -+ if (Universe::narrow_klass_shift() != 0) { -+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ if (CompressedKlassPointers::shift() != 0) { ++ assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); + srli(dst, dst, LogKlassAlignmentInBytes); + } + if (xbase == xheapbase) { @@ -20262,22 +24483,22 @@ index 000000000..5d6078bb3 + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. -+ if (Universe::narrow_oop_shift() != 0) { -+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (CompressedOops::shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); + slli(dst, src, LogMinObjAlignmentInBytes); -+ if (Universe::narrow_oop_base() != NULL) { ++ if (CompressedOops::base() != NULL) { + add(dst, xheapbase, dst); + } + } else { -+ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ assert(CompressedOops::base() == NULL, "sanity"); + mv(dst, src); + } +} + +void MacroAssembler::decode_heap_oop(Register d, Register s) { -+ if (Universe::narrow_oop_base() == NULL) { -+ if (Universe::narrow_oop_shift() != 0 || d != s) { -+ slli(d, s, Universe::narrow_oop_shift()); ++ if (CompressedOops::base() == NULL) { ++ if (CompressedOops::shift() != 0 || d != s) { ++ slli(d, s, CompressedOops::shift()); + } + } else { + Label done; @@ -20290,8 +24511,8 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, -+ Register tmp2, Register tmp3, DecoratorSet decorators) { -+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3); ++ Register thread_tmp, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, @@ -20306,7 +24527,7 @@ index 000000000..5d6078bb3 + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { -+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, @@ -20394,7 +24615,7 @@ index 000000000..5d6078bb3 + if (itable_index.is_register()) { + slli(t0, itable_index.as_register(), 3); + } else { -+ mv(t0, itable_index.as_constant() << 3); ++ li(t0, itable_index.as_constant() << 3); + } + add(recv_klass, recv_klass, t0); + if (itentry_off) { @@ -20439,17 +24660,11 @@ index 000000000..5d6078bb3 + ld(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; -+ Address addr = form_address(recv_klass, /* base */ -+ vtable_offset_in_bytes, /* offset */ -+ 12, /* expect offset bits */ -+ method_result); /* temp reg */ -+ ld(method_result, addr); ++ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); + } +} + +void MacroAssembler::membar(uint32_t order_constraint) { -+ if (!os::is_MP()) { return; } -+ + address prev = pc() - NativeMembar::instruction_size; + address last = code()->last_insn(); + @@ -20470,6 +24685,21 @@ index 000000000..5d6078bb3 + } +} + ++// Form an addres from base + offset in Rd. Rd my or may not ++// actually be used: you must use the Address that is returned. It ++// is up to you to ensure that the shift provided mathces the size ++// of your data. ++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { ++ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 ++ return Address(base, byte_offset); ++ } ++ ++ // Do it the hard way ++ mv(Rd, byte_offset); ++ add(Rd, base, Rd); ++ return Address(Rd); ++} ++ +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, @@ -20480,56 +24710,16 @@ index 000000000..5d6078bb3 + bind(L_failure); +} + -+// Write serialization page so VM thread can do a pseudo remote membar. -+// We use the current thread pointer to calculate a thread specific -+// offset to write to within the page. This minimizes bus traffic -+// due to cache line collision. -+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { -+ srli(tmp2, thread, os::get_serialize_page_shift_count()); -+ -+ int mask = os::vm_page_size() - sizeof(int); -+ andi(tmp2, tmp2, mask, tmp1); -+ -+ add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page()); -+ membar(MacroAssembler::AnyAny); -+ sw(zr, Address(tmp1)); -+} -+ -+void MacroAssembler::safepoint_poll(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); -+ } else { -+ int32_t offset = 0; -+ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); -+ lwu(t0, Address(t0, offset)); -+ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); -+ bnez(t0, slow_path); -+ } -+} -+ -+// Just like safepoint_poll, but use an acquiring load for thread- -+// local polling. -+// -+// We need an acquire here to ensure that any subsequent load of the -+// global SafepointSynchronize::_state flag is ordered after this load -+// of the local Thread::_polling page. We don't want this poll to -+// return false (i.e. not safepointing) and a later poll of the global -+// SafepointSynchronize::_state spuriously to return true. -+// -+// This is to avoid a race when we're in a native->Java transition -+// racing the code which wakes up from a safepoint. -+// -+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ membar(MacroAssembler::AnyAny); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); ++void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { ++ ld(t0, Address(xthread, JavaThread::polling_word_offset())); ++ if (acquire) { + membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); ++ } ++ if (at_return) { ++ bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); + } else { -+ safepoint_poll(slow_path); ++ andi(t0, t0, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path, true /* is_far */); + } +} + @@ -20540,17 +24730,16 @@ index 000000000..5d6078bb3 + // addr identifies memory word to compare against/update + Label retry_load, nope; + bind(retry_load); -+ // flush and load exclusive from the memory location -+ // and fail if it is not what we expect ++ // Load reserved from the memory location + lr_d(tmp, addr, Assembler::aqrl); ++ // Fail and exit if it is not what we expect + bne(tmp, oldv, nope); -+ // if we store+flush with no intervening write tmp wil be zero ++ // If the store conditional succeeds, tmp will be zero + sc_d(tmp, newv, addr, Assembler::rl); + beqz(tmp, succeed); -+ // retry so we only ever return after a load fails to compare -+ // ensures we don't return a stale value after a failed write. ++ // Retry only when the store conditional failed + j(retry_load); -+ // if the memory word differs we return it in oldv and signal a fail ++ + bind(nope); + membar(AnyAny); + mv(oldv, tmp); @@ -20616,9 +24805,10 @@ index 000000000..5d6078bb3 + andi(aligned_addr, addr, ~3); + + if (size == int8) { -+ mv(mask, 0xff); ++ addi(mask, zr, 0xff); + } else { -+ mv(mask, -1); ++ // size == int16 case ++ addi(mask, zr, -1); + zero_extend(mask, mask, 16); + } + sll(mask, mask, shift); @@ -20658,7 +24848,7 @@ index 000000000..5d6078bb3 + bnez(tmp, retry); + + if (result_as_bool) { -+ mv(result, 1); ++ addi(result, zr, 1); + j(done); + + bind(fail); @@ -20670,16 +24860,16 @@ index 000000000..5d6078bb3 + + bind(fail); + srl(result, tmp, shift); -+ } + -+ if (size == int8) { -+ sign_extend(result, result, 8); -+ } else if (size == int16) { -+ sign_extend(result, result, 16); ++ if (size == int8) { ++ sign_extend(result, result, 8); ++ } else { ++ // size == int16 case ++ sign_extend(result, result, 16); ++ } + } +} + -+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps. +// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement +// the weak CAS stuff. The major difference is that it just failed when store conditional +// failed. @@ -20693,7 +24883,7 @@ index 000000000..5d6078bb3 + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+ Label fail, done; ++ Label succ, fail, done; + + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); @@ -20702,14 +24892,13 @@ index 000000000..5d6078bb3 + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); -+ bnez(tmp, fail); ++ beqz(tmp, succ); + -+ // Success -+ mv(result, 1); ++ bind(fail); ++ addi(result, zr, 1); + j(done); + -+ // Fail -+ bind(fail); ++ bind(succ); + mv(result, zr); + + bind(done); @@ -20731,7 +24920,7 @@ index 000000000..5d6078bb3 + + // equal, succeed + if (result_as_bool) { -+ mv(result, 1); ++ li(result, 1); + } else { + mv(result, expected); + } @@ -20753,22 +24942,20 @@ index 000000000..5d6078bb3 + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { -+ assert(size != int8 && size != int16, "unsupported operand size"); -+ -+ Label fail, done; ++ Label fail, done, sc_done; + load_reserved(addr, size, acquire); + bne(t0, expected, fail); + store_conditional(addr, new_val, size, release); -+ bnez(t0, fail); ++ beqz(t0, sc_done); + -+ // Success -+ mv(result, 1); ++ // fail ++ bind(fail); ++ li(result, 1); + j(done); + -+ // Fail -+ bind(fail); -+ mv(result, zr); -+ ++ // sc_done ++ bind(sc_done); ++ mv(result, 0); + bind(done); +} + @@ -20817,229 +25004,7 @@ index 000000000..5d6078bb3 + +#undef ATOMIC_XCHGU + -+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ -+ // Check for biased locking unlock case, which is a no-op -+ // Note: we do not have to check the thread ID for two reasons. -+ // First, the interpreter checks for IllegalMonitorStateException at -+ // a higher level. Second, if the bias was revoked while we held the -+ // lock, the object could not be rebiased toward another thread, so -+ // the bias bit would be clear. -+ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3 -+ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); -+ if (flag->is_valid()) { mv(flag, tmp_reg); } -+ beqz(tmp_reg, done); -+} -+ -+void MacroAssembler::load_prototype_header(Register dst, Register src) { -+ load_klass(dst, src); -+ ld(dst, Address(dst, Klass::prototype_header_offset())); -+} -+ -+int MacroAssembler::biased_locking_enter(Register lock_reg, -+ Register obj_reg, -+ Register swap_reg, -+ Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, -+ Label* slow_case, -+ BiasedLockingCounters* counters, -+ Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ assert_different_registers(lock_reg, obj_reg, swap_reg); -+ -+ if (PrintBiasedLockingStatistics && counters == NULL) { -+ counters = BiasedLocking::counters(); -+ } -+ -+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); -+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); -+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); -+ -+ // Biased locking -+ // See whether the lock is currently biased toward our thread and -+ // whether the epoch is still valid -+ // Note that the runtime guarantees sufficient alignment of JavaThread -+ // pointers to allow age to be placed into low bits -+ // First check to see whether biasing is even enabled for this object -+ Label cas_label; -+ int null_check_offset = -1; -+ if (!swap_reg_contains_mark) { -+ null_check_offset = offset(); -+ ld(swap_reg, mark_addr); -+ } -+ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); -+ xori(t0, tmp_reg, markOopDesc::biased_lock_pattern); -+ bnez(t0, cas_label); // don't care flag unless jumping to done -+ // The bias pattern is present in the object's header. Need to check -+ // whether the bias owner and the epoch are both still current. -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, tmp_reg, xthread); -+ xorr(tmp_reg, swap_reg, tmp_reg); -+ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); -+ if (flag->is_valid()) { -+ mv(flag, tmp_reg); -+ } -+ -+ if (counters != NULL) { -+ Label around; -+ bnez(tmp_reg, around); -+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); -+ j(done); -+ bind(around); -+ } else { -+ beqz(tmp_reg, done); -+ } -+ -+ Label try_revoke_bias; -+ Label try_rebias; -+ -+ // At this point we know that the header has the bias pattern and -+ // that we are not the bias owner in the current epoch. We need to -+ // figure out more details about the state of the header in order to -+ // know what operations can be legally performed on the object's -+ // header. -+ -+ // If the low three bits in the xor result aren't clear, that means -+ // the prototype header is no longer biased and we have to revoke -+ // the bias on this object. -+ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); -+ bnez(t0, try_revoke_bias); -+ -+ // Biasing is still enabled for this data type. See whether the -+ // epoch of the current bias is still valid, meaning that the epoch -+ // bits of the mark word are equal to the epoch bits of the -+ // prototype header. (Note that the prototype header's epoch bits -+ // only change at a safepoint.) If not, attempt to rebias the object -+ // toward the current thread. Note that we must be absolutely sure -+ // that the current epoch is invalid in order to do this because -+ // otherwise the manipulations it performs on the mark word are -+ // illegal. -+ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); -+ bnez(t0, try_rebias); -+ -+ // The epoch of the current bias is still valid but we know nothing -+ // about the owner; it might be set or it might be clear. Try to -+ // acquire the bias of the object using an atomic operation. If this -+ // fails we will go in to the runtime to revoke the object's bias. -+ // Note that we first construct the presumed unbiased header so we -+ // don't accidentally blow away another thread's valid bias. -+ { -+ Label cas_success; -+ Label counter; -+ mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); -+ andr(swap_reg, swap_reg, t0); -+ orr(tmp_reg, swap_reg, xthread); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ -+ // If the biasing toward our thread failed, this means that -+ // another thread succeeded in biasing it toward itself and we -+ // need to revoke that bias. The revocation will occur in the -+ // interpreter runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ bind(try_rebias); -+ // At this point we know the epoch has expired, meaning that the -+ // current "bias owner", if any, is actually invalid. Under these -+ // circumstances _only_, we are allowed to use the current header's -+ // value as the comparison value when doing the cas to acquire the -+ // bias in the current epoch. In other words, we allow transfer of -+ // the bias from one thread to another directly in this situation. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success; -+ Label counter; -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, xthread, tmp_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ -+ // If the biasing toward our thread failed, then another thread -+ // succeeded in biasing it toward itself and we need to revoke that -+ // bias. The revocation will occur in the runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ // don't care flag unless jumping to done -+ bind(try_revoke_bias); -+ // The prototype mark in the klass doesn't have the bias bit set any -+ // more, indicating that objects of this data type are not supposed -+ // to be biased any more. We are going to try to reset the mark of -+ // this object to the prototype value and fall through to the -+ // CAS-based locking scheme. Note that if our CAS fails, it means -+ // that another thread raced us for the privilege of revoking the -+ // bias of this particular object, so it's okay to continue in the -+ // normal locking code. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success, nope; -+ load_prototype_header(tmp_reg, obj_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); -+ bind(cas_success); -+ -+ // Fall through to the normal CAS-based lock, because no matter what -+ // the result of the above CAS, some thread must have succeeded in -+ // removing the bias bit from the object's header. -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, -+ t0); -+ } -+ bind(nope); -+ } -+ -+ bind(cas_label); -+ -+ return null_check_offset; -+} -+ -+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { -+ Label retry_load; -+ bind(retry_load); -+ // flush and load exclusive from the memory location -+ lr_w(tmp, counter_addr); -+ addw(tmp, tmp, 1); -+ // if we store+flush with no intervening write tmp wil be zero -+ sc_w(tmp, tmp, counter_addr); -+ bnez(tmp, retry_load); -+} -+ -+void MacroAssembler::far_jump(Address entry, Register tmp) { ++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); @@ -21048,13 +25013,15 @@ index 000000000..5d6078bb3 + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x0, tmp, offset); + } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + j(entry); + } +} + -+void MacroAssembler::far_call(Address entry, Register tmp) { ++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); @@ -21063,8 +25030,10 @@ index 000000000..5d6078bb3 + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x1, tmp, offset); // link + } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jal(entry); // link + } +} @@ -21079,7 +25048,7 @@ index 000000000..5d6078bb3 + assert_different_registers(sub_klass, super_klass, tmp_reg); + bool must_load_sco = (super_check_offset == noreg); + if (must_load_sco) { -+ assert(tmp_reg != noreg, "supply either a tmp or a register offset"); ++ assert(tmp_reg != noreg, "supply either a temp or a register offset"); + } else { + assert_different_registers(sub_klass, super_klass, super_check_offset); + } @@ -21160,15 +25129,15 @@ index 000000000..5d6078bb3 + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, -+ Register tmp_reg, ++ Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ assert_different_registers(sub_klass, super_klass, tmp1_reg); + if (tmp2_reg != noreg) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0); ++ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); + } -+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg) ++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + + Label L_fallthrough; + int label_nulls = 0; @@ -21177,7 +25146,7 @@ index 000000000..5d6078bb3 + + assert(label_nulls <= 1, "at most one NULL in the batch"); + -+ // A couple of useful fields in sub_klass: ++ // A couple of usefule fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); @@ -21228,14 +25197,14 @@ index 000000000..5d6078bb3 + add(x15, x15, Array::base_offset_in_bytes()); + + // Set t0 to an obvious invalid value, falling through by default -+ mv(t0, -1); ++ li(t0, -1); + // Scan X12 words at [X15] for an occurrence of X10. + repne_scan(x15, x10, x12, t0); + + // pop will restore x10, so we should use a temp register to keep its value + mv(t1, x10); + -+ // Unspill the temp. registers: ++ // Unspill the temp registers: + pop_reg(pushed_registers, sp); + + bne(t1, t0, *L_failure); @@ -21268,26 +25237,28 @@ index 000000000..5d6078bb3 +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, -+ Register tmp1, ++ Register tmp, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); +} + + +// get_thread() can be called anywhere inside generated code so we +// need to save whatever non-callee save context might get clobbered -+// by the call to Thread::current() or, indeed, the call setup code ++// by the call to Thread::current() or, indeed, the call setup code. +void MacroAssembler::get_thread(Register thread) { + // save all call-clobbered regs except thread -+ RegSet saved_regs = RegSet::of(x10) + ra - thread; ++ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + ++ RegSet::range(x28, x31) + ra - thread; + push_reg(saved_regs, sp); + -+ mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); -+ jalr(ra); -+ if (thread != c_rarg0) { -+ mv(thread, c_rarg0); ++ int32_t offset = 0; ++ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); ++ jalr(ra, ra, offset); ++ if (thread != x10) { ++ mv(thread, x10); + } + + // restore pushed registers @@ -21295,8 +25266,9 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::load_byte_map_base(Register reg) { -+ jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); -+ mv(reg, (uint64_t)byte_map_base); ++ CardTable::CardValue* byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ li(reg, (uint64_t)byte_map_base); +} + +void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { @@ -21310,11 +25282,12 @@ index 000000000..5d6078bb3 + assert(is_valid_riscv64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + -+ code_section()->relocate(pc(), dest.rspec()); ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), dest.rspec()); + // RISC-V doesn't compute a page-aligned address, in order to partially + // compensate for the use of *signed* offsets in its base+disp12 + // addressing mode (RISC-V's PC-relative reach remains asymmetric -+ // [-(2G + 2K), 2G - 2K)). ++ // [-(2G + 2K), 2G - 2k). + if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { + int64_t distance = dest.target() - pc(); + auipc(reg1, (int32_t)distance + 0x800); @@ -21325,15 +25298,18 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::build_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + sub(sp, sp, framesize); + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } ++ verify_cross_modify_fence_not_required(); +} + +void MacroAssembler::remove_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + ld(fp, Address(sp, framesize - 2 * wordSize)); + ld(ra, Address(sp, framesize - wordSize)); + add(sp, sp, framesize); @@ -21365,28 +25341,22 @@ index 000000000..5d6078bb3 +} + +// Move the address of the polling page into dest. -+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ ld(dest, Address(xthread, Thread::polling_page_offset())); -+ } else { -+ uint64_t align = (uint64_t)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ la_patchable(dest, Address(page, rtype), offset); -+ } -+} -+ -+// Move the address of the polling page into dest. -+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { -+ int32_t offset = 0; -+ get_polling_page(dest, page, offset, rtype); -+ read_polling_page(dest, offset, rtype); ++void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { ++ ld(dest, Address(xthread, JavaThread::polling_page_offset())); +} + +// Read the polling page. The address of the polling page must +// already be in r. -+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { -+ code_section()->relocate(pc(), rtype); -+ lwu(zr, Address(r, offset)); ++address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { ++ address mark; ++ { ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), rtype); ++ lwu(zr, Address(r, offset)); ++ mark = inst_mark(); ++ } ++ verify_cross_modify_fence_not_required(); ++ return mark; +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { @@ -21396,12 +25366,13 @@ index 000000000..5d6078bb3 + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); -+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); ++ InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ code_section()->relocate(pc(), rspec); ++ code_section()->relocate(inst_mark(), rspec); + li32(dst, 0xDEADBEEF); + zero_extend(dst, dst, 32); +} @@ -21410,18 +25381,19 @@ index 000000000..5d6078bb3 + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); -+ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); ++ assert(!Universe::heap()->is_in(k), "should not be an oop"); + ++ InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); -+ code_section()->relocate(pc(), rspec); -+ narrowKlass nk = Klass::encode_klass(k); ++ code_section()->relocate(inst_mark(), rspec); ++ narrowKlass nk = CompressedKlassPointers::encode(k); + li32(dst, nk); + zero_extend(dst, dst, 32); +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. -+address MacroAssembler::trampoline_call(Address entry) { ++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type || + entry.rspec().type() == relocInfo::opt_virtual_call_type || @@ -21437,27 +25409,27 @@ index 000000000..5d6078bb3 + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && -+ Compile::current()->in_scratch_emit_size()); ++ Compile::current()->output()->in_scratch_emit_size()); +#endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { -+ postcond(pc() == badAddress); ++ postcond(pc() == badAddress); + return NULL; // CodeCache is full + } + } + } + -+ address call_pc = pc(); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + relocate(entry.rspec()); + if (!far_branches()) { + jal(entry.target()); + } else { + jal(pc()); + } -+ ++ // just need to return a non-null address + postcond(pc() != badAddress); -+ return call_pc; ++ return pc(); +} + +address MacroAssembler::ic_call(address entry, jint method_index) { @@ -21480,8 +25452,8 @@ index 000000000..5d6078bb3 + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { -+ // Max stub size: alignment nop, TrampolineStub. -+ address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size); ++ address stub = start_a_stub(NativeInstruction::instruction_size ++ + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } @@ -21492,7 +25464,8 @@ index 000000000..5d6078bb3 + + // make sure 4 byte aligned here, so that the destination address would be + // 8 byte aligned after 3 intructions -+ while (offset() % wordSize == 0) { nop(); } ++ // when we reach here we may get a 2-byte alignment so need to align it ++ align(wordSize, NativeCallTrampolineStub::data_offset); + + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); @@ -21507,6 +25480,7 @@ index 000000000..5d6078bb3 + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); ++ assert(offset() % wordSize == 0, "bad alignment"); + emit_int64((intptr_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); @@ -21522,54 +25496,26 @@ index 000000000..5d6078bb3 + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. -+ return form_address(dst.base(), dst.offset(), 12, t1); ++ return form_address(t1, dst.base(), dst.offset()); + default: + la(t1, dst); + return Address(t1); + } +} + -+void MacroAssembler::increment(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); ++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { + Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); ++ assert_different_registers(adr.base(), t0); + ld(t0, adr); -+ add(t0, t0, value, t1); ++ addi(t0, t0, imm); + sd(t0, adr); +} + -+void MacroAssembler::incrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); ++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { + Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); ++ assert_different_registers(adr.base(), t0); + lwu(t0, adr); -+ addw(t0, t0, value, t1); -+ sw(t0, adr); -+} -+ -+void MacroAssembler::decrement(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); -+ ld(t0, adr); -+ sub(t0, t0, value, t1); -+ sd(t0, adr); -+} -+ -+void MacroAssembler::decrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); -+ lwu(t0, adr); -+ subw(t0, t0, value, t1); ++ addiw(t0, t0, imm); + sw(t0, adr); +} + @@ -21581,1465 +25527,28 @@ index 000000000..5d6078bb3 + beq(src1, t0, equal); +} + -+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_equals(this, obj1, obj2, equal, is_far); ++void MacroAssembler::load_method_holder_cld(Register result, Register method) { ++ load_method_holder(result, method); ++ ld(result, Address(result, InstanceKlass::class_loader_data_offset())); +} + -+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_nequals(this, obj1, obj2, nequal, is_far); -+} -+ -+#ifdef COMPILER2 -+// Set dst NaN if either source is NaN. -+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ Label Ldone; -+ fsflags(zr); -+ if (is_double) { -+ if (is_min) { -+ fmin_d(dst, src1, src2); -+ } else { -+ fmax_d(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_d(zr, src1, src2); -+ } else { -+ if (is_min) { -+ fmin_s(dst, src1, src2); -+ } else { -+ fmax_s(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_s(zr, src1, src2); -+ } -+ frflags(t0); -+ beqz(t0, Ldone); -+ -+ // Src1 or src2 must be NaN here. Set dst NaN. -+ if (is_double) { -+ fadd_d(dst, src1, src2); -+ } else { -+ fadd_s(dst, src1, src2); -+ } -+ bind(Ldone); -+} -+ -+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -+ Register tmp4, Register tmp5, Register tmp6, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; // cnt2 only used in array length compare -+ Register elem_per_word = tmp6; -+ int log_elem_size = exact_log2(elem_size); -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -+ -+ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -+ mv(elem_per_word, wordSize / elem_size); -+ -+ BLOCK_COMMENT("arrays_equals {"); -+ -+ // if (a1 == a2), return true -+ oop_equal(a1, a2, SAME); -+ -+ mv(result, false); -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt2, cnt1, DONE); -+ beqz(cnt1, SAME); -+ -+ slli(tmp5, cnt1, 3 + log_elem_size); -+ sub(tmp5, zr, tmp5); -+ add(a1, a1, base_offset); -+ add(a2, a2, base_offset); -+ ld(tmp3, Address(a1, 0)); -+ ld(tmp4, Address(a2, 0)); -+ ble(cnt1, elem_per_word, SHORT); // short or same -+ -+ // Main 16 byte comparison loop with 2 exits -+ bind(NEXT_DWORD); { -+ ld(tmp1, Address(a1, wordSize)); -+ ld(tmp2, Address(a2, wordSize)); -+ sub(cnt1, cnt1, 2 * wordSize / elem_size); -+ blez(cnt1, TAIL); -+ bne(tmp3, tmp4, DONE); -+ ld(tmp3, Address(a1, 2 * wordSize)); -+ ld(tmp4, Address(a2, 2 * wordSize)); -+ add(a1, a1, 2 * wordSize); -+ add(a2, a2, 2 * wordSize); -+ ble(cnt1, elem_per_word, TAIL2); -+ } beq(tmp1, tmp2, NEXT_DWORD); -+ j(DONE); -+ -+ bind(TAIL); -+ xorr(tmp4, tmp3, tmp4); -+ xorr(tmp2, tmp1, tmp2); -+ sll(tmp2, tmp2, tmp5); -+ orr(tmp5, tmp4, tmp2); -+ j(IS_TMP5_ZR); -+ -+ bind(TAIL2); -+ bne(tmp1, tmp2, DONE); -+ -+ bind(SHORT); -+ xorr(tmp4, tmp3, tmp4); -+ sll(tmp5, tmp4, tmp5); -+ -+ bind(IS_TMP5_ZR); -+ bnez(tmp5, DONE); -+ -+ bind(SAME); -+ mv(result, true); -+ // That's it. -+ bind(DONE); -+ -+ BLOCK_COMMENT("} array_equals"); -+ postcond(pc() != badAddress); -+ return pc(); -+} -+ -+// Compare Strings -+ -+// For Strings we're passed the address of the first characters in a1 -+// and a2 and the length in cnt1. -+// elem_size is the element size in bytes: either 1 or 2. -+// There are two implementations. For arrays >= 8 bytes, all -+// comparisons (including the final one, which may overlap) are -+// performed 8 bytes at a time. For strings < 8 bytes, we compare a -+// halfword, then a short, and then a byte. -+ -+void MacroAssembler::string_equals(Register a1, Register a2, -+ Register result, Register cnt1, int elem_size) -+{ -+ Label SAME, DONE, SHORT, NEXT_WORD; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1); -+ -+ BLOCK_COMMENT("string_equals {"); -+ -+ beqz(cnt1, SAME); -+ mv(result, false); -+ -+ // Check for short strings, i.e. smaller than wordSize. -+ sub(cnt1, cnt1, wordSize); -+ blez(cnt1, SHORT); -+ -+ // Main 8 byte comparison loop. -+ bind(NEXT_WORD); { -+ ld(tmp1, Address(a1, 0)); -+ add(a1, a1, wordSize); -+ ld(tmp2, Address(a2, 0)); -+ add(a2, a2, wordSize); -+ sub(cnt1, cnt1, wordSize); -+ bne(tmp1, tmp2, DONE); -+ } bgtz(cnt1, NEXT_WORD); -+ -+ if (!AvoidUnalignedAccesses) { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -+ // length == 4. -+ add(tmp1, a1, cnt1); -+ ld(tmp1, Address(tmp1, 0)); -+ add(tmp2, a2, cnt1); -+ ld(tmp2, Address(tmp2, 0)); -+ bne(tmp1, tmp2, DONE); -+ j(SAME); -+ } -+ -+ bind(SHORT); -+ ld(tmp1, Address(a1)); -+ ld(tmp2, Address(a2)); -+ xorr(tmp1, tmp1, tmp2); -+ neg(cnt1, cnt1); -+ slli(cnt1, cnt1, LogBitsPerByte); -+ sll(tmp1, tmp1, cnt1); -+ bnez(tmp1, DONE); -+ -+ // Arrays are equal. -+ bind(SAME); -+ mv(result, true); -+ -+ // That's it. -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals"); -+} -+ -+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp); -+ -+// Compare strings. -+void MacroAssembler::string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -+ Register tmp3, int ae) -+{ -+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -+ SHORT_LOOP_START, TAIL_CHECK, L; -+ -+ const int STUB_THRESHOLD = 64 + 8; -+ bool isLL = ae == StrIntrinsicNode::LL; -+ bool isLU = ae == StrIntrinsicNode::LU; -+ bool isUL = ae == StrIntrinsicNode::UL; -+ -+ bool str1_isL = isLL || isLU; -+ bool str2_isL = isLL || isUL; -+ -+ // for L strings, 1 byte for 1 character -+ // for U strings, 2 bytes for 1 character -+ int str1_chr_size = str1_isL ? 1 : 2; -+ int str2_chr_size = str2_isL ? 1 : 2; -+ int minCharsInWord = isLL ? wordSize : wordSize / 2; -+ -+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ -+ BLOCK_COMMENT("string_compare {"); -+ -+ // Bizzarely, the counts are passed in bytes, regardless of whether they -+ // are L or U strings, however the result is always in characters. -+ if (!str1_isL) { -+ sraiw(cnt1, cnt1, 1); -+ } -+ if (!str2_isL) { -+ sraiw(cnt2, cnt2, 1); -+ } -+ -+ // Compute the minimum of the string lengths and save the difference in result. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); -+ -+ // A very short string -+ mv(t0, minCharsInWord); -+ ble(cnt2, t0, SHORT_STRING); -+ -+ // Compare longwords -+ // load first parts of strings and finish initialization while loading -+ { -+ if (str1_isL == str2_isL) { // LL or UU -+ // check if str1 and str2 are same string -+ beq(str1, str2, DONE); -+ // load 8 bytes once to compare -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ sub(cnt2, cnt2, minCharsInWord); -+ beqz(cnt2, TAIL_CHECK); -+ // convert cnt2 from characters to bytes -+ if(!str1_isL) { -+ slli(cnt2, cnt2, 1); -+ } -+ add(str2, str2, cnt2); -+ add(str1, str1, cnt2); -+ sub(cnt2, zr, cnt2); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ add(str1, str1, cnt2); -+ sub(cnt1, zr, cnt2); -+ slli(cnt2, cnt2, 1); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 4); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ slli(t0, cnt2, 1); -+ sub(cnt1, zr, t0); -+ add(str1, str1, t0); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 8); -+ } -+ addi(cnt2, cnt2, isUL ? 4 : 8); -+ bgez(cnt2, TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ -+ // main loop -+ bind(NEXT_WORD); -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt2, cnt2, 8); -+ } else if (isLU) { // LU case -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt1, cnt1, 4); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ addi(cnt2, cnt2, 8); -+ } else { // UL case -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ addi(cnt1, cnt1, 8); -+ addi(cnt2, cnt2, 4); -+ } -+ bgez(cnt2, TAIL); -+ -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, NEXT_WORD); -+ j(DIFFERENCE); -+ bind(TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ // Last longword. -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes when remaining bytes is -+ // less than 4(UL/LU) or 8 (LL/UU). -+ // Invalid bytes should be removed before comparison. -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ } else if (isLU) { // LU -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ slli(cnt2, cnt2, 1); // UL case should convert cnt2 to bytes -+ } -+ // remove invalid bytes -+ slli(t0, cnt2, LogBitsPerByte); -+ sll(tmp1, tmp1, t0); -+ sll(tmp2, tmp2, t0); -+ } else { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ if (str1_isL == str2_isL) { // LL or UU -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ } -+ } -+ bind(TAIL_CHECK); -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, DONE); -+ -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ bind(DIFFERENCE); -+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -+ srl(tmp1, tmp1, result); -+ srl(tmp2, tmp2, result); -+ if (isLL) { -+ andi(tmp1, tmp1, 0xFF); -+ andi(tmp2, tmp2, 0xFF); -+ } else { -+ andi(tmp1, tmp1, 0xFFFF); -+ andi(tmp2, tmp2, 0xFFFF); -+ } -+ sub(result, tmp1, tmp2); -+ j(DONE); -+ } -+ -+ bind(STUB); -+ RuntimeAddress stub = NULL; -+ switch (ae) { -+ case StrIntrinsicNode::LL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -+ break; -+ case StrIntrinsicNode::UU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -+ break; -+ case StrIntrinsicNode::LU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -+ break; -+ case StrIntrinsicNode::UL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(SHORT_STRING); -+ // Is the minimum length zero? -+ beqz(cnt2, DONE); -+ // arrange code to do most branches while loading and loading next characters -+ // while comparing previous -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ j(SHORT_LOOP_START); -+ bind(SHORT_LOOP); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST); -+ bind(SHORT_LOOP_START); -+ (this->*str1_load_chr)(tmp2, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(t0, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bne(tmp1, cnt1, SHORT_LOOP_TAIL); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST2); -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ beq(tmp2, t0, SHORT_LOOP); -+ sub(result, tmp2, t0); -+ j(DONE); -+ bind(SHORT_LOOP_TAIL); -+ sub(result, tmp1, cnt1); -+ j(DONE); -+ bind(SHORT_LAST2); -+ beq(tmp2, t0, DONE); -+ sub(result, tmp2, t0); -+ -+ j(DONE); -+ bind(SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bind(SHORT_LAST); -+ beq(tmp1, cnt1, DONE); -+ sub(result, tmp1, cnt1); -+ -+ bind(DONE); -+ -+ BLOCK_COMMENT("} string_compare"); -+} -+ -+// short string -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL) -+{ -+ Register ch1 = t0; -+ Register index = t1; -+ -+ BLOCK_COMMENT("string_indexof_char_short {"); -+ -+ Label LOOP, LOOP1, LOOP4, LOOP8; -+ Label MATCH, MATCH1, MATCH2, MATCH3, -+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; -+ -+ mv(result, -1); -+ mv(index, zr); -+ -+ bind(LOOP); -+ addi(t0, index, 8); -+ ble(t0, cnt1, LOOP8); -+ addi(t0, index, 4); -+ ble(t0, cnt1, LOOP4); -+ j(LOOP1); -+ -+ bind(LOOP8); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -+ beq(ch, ch1, MATCH4); -+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -+ beq(ch, ch1, MATCH5); -+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -+ beq(ch, ch1, MATCH6); -+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -+ beq(ch, ch1, MATCH7); -+ addi(index, index, 8); -+ addi(str1, str1, isL ? 8 : 16); -+ blt(index, cnt1, LOOP); -+ j(NOMATCH); -+ -+ bind(LOOP4); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ addi(index, index, 4); -+ addi(str1, str1, isL ? 4 : 8); -+ bge(index, cnt1, NOMATCH); -+ -+ bind(LOOP1); -+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -+ beq(ch, ch1, MATCH); -+ addi(index, index, 1); -+ addi(str1, str1, isL ? 1 : 2); -+ blt(index, cnt1, LOOP1); -+ j(NOMATCH); -+ -+ bind(MATCH1); -+ addi(index, index, 1); -+ j(MATCH); -+ -+ bind(MATCH2); -+ addi(index, index, 2); -+ j(MATCH); -+ -+ bind(MATCH3); -+ addi(index, index, 3); -+ j(MATCH); -+ -+ bind(MATCH4); -+ addi(index, index, 4); -+ j(MATCH); -+ -+ bind(MATCH5); -+ addi(index, index, 5); -+ j(MATCH); -+ -+ bind(MATCH6); -+ addi(index, index, 6); -+ j(MATCH); -+ -+ bind(MATCH7); -+ addi(index, index, 7); -+ -+ bind(MATCH); -+ mv(result, index); -+ bind(NOMATCH); -+ BLOCK_COMMENT("} string_indexof_char_short"); -+} -+ -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL) -+{ -+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -+ Register ch1 = t0; -+ Register orig_cnt = t1; -+ Register mask1 = tmp3; -+ Register mask2 = tmp2; -+ Register match_mask = tmp1; -+ Register trailing_char = tmp4; -+ Register unaligned_elems = tmp4; -+ -+ BLOCK_COMMENT("string_indexof_char {"); -+ beqz(cnt1, NOMATCH); -+ -+ addi(t0, cnt1, isL ? -32 : -16); -+ bgtz(t0, DO_LONG); -+ string_indexof_char_short(str1, cnt1, ch, result, isL); -+ j(DONE); -+ -+ bind(DO_LONG); -+ mv(orig_cnt, cnt1); -+ if (AvoidUnalignedAccesses) { -+ Label ALIGNED; -+ andi(unaligned_elems, str1, 0x7); -+ beqz(unaligned_elems, ALIGNED); -+ sub(unaligned_elems, unaligned_elems, 8); -+ neg(unaligned_elems, unaligned_elems); -+ if (!isL) { -+ srli(unaligned_elems, unaligned_elems, 1); -+ } -+ // do unaligned part per element -+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -+ bgez(result, DONE); -+ mv(orig_cnt, cnt1); -+ sub(cnt1, cnt1, unaligned_elems); -+ bind(ALIGNED); -+ } -+ -+ // duplicate ch -+ if (isL) { -+ slli(ch1, ch, 8); -+ orr(ch, ch1, ch); -+ } -+ slli(ch1, ch, 16); -+ orr(ch, ch1, ch); -+ slli(ch1, ch, 32); -+ orr(ch, ch1, ch); -+ -+ if (!isL) { -+ slli(cnt1, cnt1, 1); -+ } -+ -+ mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001); -+ mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); -+ -+ bind(CH1_LOOP); -+ ld(ch1, Address(str1)); -+ addi(str1, str1, 8); -+ addi(cnt1, cnt1, -8); -+ compute_match_mask(ch1, ch, match_mask, mask1, mask2); -+ bnez(match_mask, HIT); -+ bgtz(cnt1, CH1_LOOP); -+ j(NOMATCH); -+ -+ bind(HIT); -+ ctzc_bit(trailing_char, match_mask, isL, ch1, result); -+ srli(trailing_char, trailing_char, 3); -+ addi(cnt1, cnt1, 8); -+ ble(cnt1, trailing_char, NOMATCH); -+ // match case -+ if (!isL) { -+ srli(cnt1, cnt1, 1); -+ srli(trailing_char, trailing_char, 1); -+ } -+ -+ sub(result, orig_cnt, cnt1); -+ add(result, result, trailing_char); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof_char"); -+} -+ -+// Search for needle in haystack and return index or -1 -+// x10: result -+// x11: haystack -+// x12: haystack_len -+// x13: needle -+// x14: needle_len -+void MacroAssembler::string_indexof(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae) -+{ -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -+ -+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; -+ -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register nlen_tmp = tmp1; // needle len tmp -+ Register hlen_tmp = tmp2; // haystack len tmp -+ Register result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; -+ -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ -+ BLOCK_COMMENT("string_indexof {"); -+ -+ // Note, inline_string_indexOf() generates checks: -+ // if (pattern.count > src.count) return -1; -+ // if (pattern.count == 0) return 0; -+ -+ // We have two strings, a source string in haystack, haystack_len and a pattern string -+ // in needle, needle_len. Find the first occurence of pattern in source or return -1. -+ -+ // For larger pattern and source we use a simplified Boyer Moore algorithm. -+ // With a small pattern and source we use linear scan. -+ -+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -+ sub(result_tmp, haystack_len, needle_len); -+ // needle_len < 8, use linear scan -+ sub(t0, needle_len, 8); -+ bltz(t0, LINEARSEARCH); -+ // needle_len >= 256, use linear scan -+ sub(t0, needle_len, 256); -+ bgez(t0, LINEARSTUB); -+ // needle_len >= haystack_len/4, use linear scan -+ srli(t0, haystack_len, 2); -+ bge(needle_len, t0, LINEARSTUB); -+ -+ // Boyer-Moore-Horspool introduction: -+ // The Boyer Moore alogorithm is based on the description here:- -+ // -+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -+ // -+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -+ // and the 'Good Suffix' rule. -+ // -+ // These rules are essentially heuristics for how far we can shift the -+ // pattern along the search string. -+ // -+ // The implementation here uses the 'Bad Character' rule only because of the -+ // complexity of initialisation for the 'Good Suffix' rule. -+ // -+ // This is also known as the Boyer-Moore-Horspool algorithm: -+ // -+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -+ // -+ // #define ASIZE 256 -+ // -+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -+ // int i, j; -+ // unsigned c; -+ // unsigned char bc[ASIZE]; -+ // -+ // /* Preprocessing */ -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ // -+ // /* Searching */ -+ // j = 0; -+ // while (j <= n - m) { -+ // c = src[i+j]; -+ // if (pattern[m-1] == c) -+ // int k; -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // if (k < 0) return j; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -+ // // LL case: (c< 256) always true. Remove branch -+ // j += bc[pattern[j+m-1]]; -+ // #endif -+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -+ // // UU case: need if (c if not. -+ // if (c < ASIZE) -+ // j += bc[pattern[j+m-1]]; -+ // else -+ // j += m -+ // #endif -+ // } -+ // return -1; -+ // } -+ -+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; -+ -+ Register haystack_end = haystack_len; -+ Register skipch = tmp2; -+ -+ // pattern length is >=8, so, we can read at least 1 register for cases when -+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -+ // UL case. We'll re-read last character in inner pre-loop code to have -+ // single outer pre-loop load -+ const int firstStep = isLL ? 7 : 3; -+ -+ const int ASIZE = 256; -+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) -+ -+ sub(sp, sp, ASIZE); -+ -+ // init BC offset table with default value: needle_len -+ slli(t0, needle_len, 8); -+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -+ slli(tmp1, t0, 16); -+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -+ slli(tmp1, t0, 32); -+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] -+ -+ mv(ch1, sp); // ch1 is t0 -+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations -+ -+ bind(BM_INIT_LOOP); -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ for (int i = 0; i < 4; i++) { -+ sd(tmp5, Address(ch1, i * wordSize)); -+ } -+ add(ch1, ch1, 32); -+ sub(tmp6, tmp6, 4); -+ bgtz(tmp6, BM_INIT_LOOP); -+ -+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -+ Register orig_haystack = tmp5; -+ mv(orig_haystack, haystack); -+ // result_tmp = tmp4 -+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -+ mv(tmp3, needle); -+ -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ bind(BCLOOP); -+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -+ add(tmp3, tmp3, needle_chr_size); -+ if (!needle_isL) { -+ // ae == StrIntrinsicNode::UU -+ mv(tmp6, ASIZE); -+ bgeu(ch1, tmp6, BCSKIP); -+ } -+ add(tmp4, sp, ch1); -+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table -+ -+ bind(BCSKIP); -+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -+ bgtz(ch2, BCLOOP); -+ -+ // tmp6: pattern end, address after needle -+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -+ if (needle_isL == haystack_isL) { -+ // load last 8 bytes (8LL/4UU symbols) -+ ld(tmp6, Address(tmp6, -wordSize)); -+ } else { -+ // UL: from UTF-16(source) search Latin1(pattern) -+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks -+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -+ slli(ch2, tmp6, XLEN - 24); -+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -+ slli(ch1, tmp6, XLEN - 16); -+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -+ slli(ch2, ch2, 16); -+ orr(ch2, ch2, ch1); // 0x00000b0c -+ slli(result, tmp3, 48); // use result as temp register -+ orr(tmp6, tmp6, result); // 0x0a00000d -+ slli(result, ch2, 16); -+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d -+ } -+ -+ // i = m - 1; -+ // skipch = j + i; -+ // if (skipch == pattern[m - 1] -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // else -+ // move j with bad char offset table -+ bind(BMLOOPSTR2); -+ // compare pattern to source string backward -+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -+ (this->*haystack_load_1chr)(skipch, Address(result), noreg); -+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -+ if (needle_isL == haystack_isL) { -+ // re-init tmp3. It's for free because it's executed in parallel with -+ // load above. Alternative is to initialize it before loop, but it'll -+ // affect performance on in-order systems with 2 or more ld/st pipelines -+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -+ } -+ if (!isLL) { // UU/UL case -+ slli(ch2, nlen_tmp, 1); // offsets in bytes -+ } -+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -+ add(result, haystack, isLL ? nlen_tmp : ch2); -+ ld(ch2, Address(result)); // load 8 bytes from source string -+ mv(ch1, tmp6); -+ if (isLL) { -+ j(BMLOOPSTR1_AFTER_LOAD); -+ } else { -+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -+ j(BMLOOPSTR1_CMP); -+ } -+ -+ bind(BMLOOPSTR1); -+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ -+ bind(BMLOOPSTR1_AFTER_LOAD); -+ sub(nlen_tmp, nlen_tmp, 1); -+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); -+ -+ bind(BMLOOPSTR1_CMP); -+ beq(ch1, ch2, BMLOOPSTR1); -+ -+ bind(BMSKIP); -+ if (!isLL) { -+ // if we've met UTF symbol while searching Latin1 pattern, then we can -+ // skip needle_len symbols -+ if (needle_isL != haystack_isL) { -+ mv(result_tmp, needle_len); -+ } else { -+ mv(result_tmp, 1); -+ } -+ mv(t0, ASIZE); -+ bgeu(skipch, t0, BMADV); -+ } -+ add(result_tmp, sp, skipch); -+ lbu(result_tmp, Address(result_tmp)); // load skip offset -+ -+ bind(BMADV); -+ sub(nlen_tmp, needle_len, 1); -+ // move haystack after bad char skip offset -+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -+ ble(haystack, haystack_end, BMLOOPSTR2); -+ add(sp, sp, ASIZE); -+ j(NOMATCH); -+ -+ bind(BMLOOPSTR1_LASTCMP); -+ bne(ch1, ch2, BMSKIP); -+ -+ bind(BMMATCH); -+ sub(result, haystack, orig_haystack); -+ if (!haystack_isL) { -+ srli(result, result, 1); -+ } -+ add(sp, sp, ASIZE); -+ j(DONE); -+ -+ bind(LINEARSTUB); -+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -+ bltz(t0, LINEARSEARCH); -+ mv(result, zr); -+ RuntimeAddress stub = NULL; -+ if (isLL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -+ } else if (needle_isL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); -+ } else { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); -+ } -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(LINEARSEARCH); -+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof"); -+} -+ -+// string_indexof -+// result: x10 -+// src: x11 -+// src_count: x12 -+// pattern: x13 -+// pattern_count: x14 or 1/2/3/4 -+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae) -+{ -+ // Note: -+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -+ assert(needle_con_cnt <= 4, "Invalid needle constant count"); -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); -+ -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register hlen_neg = haystack_len, nlen_neg = needle_len; -+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; -+ -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; -+ -+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; -+ -+ Register first = tmp3; -+ -+ if (needle_con_cnt == -1) { -+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; -+ -+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -+ bltz(t0, DOSHORT); -+ -+ (this->*needle_load_1chr)(first, Address(needle), noreg); -+ slli(t0, needle_len, needle_chr_shift); -+ add(needle, needle, t0); -+ neg(nlen_neg, t0); -+ slli(t0, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, t0); -+ neg(hlen_neg, t0); -+ -+ bind(FIRST_LOOP); -+ add(t0, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -+ beq(first, ch2, STR1_LOOP); -+ -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); -+ -+ bind(STR1_LOOP); -+ add(nlen_tmp, nlen_neg, needle_chr_size); -+ add(hlen_tmp, hlen_neg, haystack_chr_size); -+ bgez(nlen_tmp, MATCH); -+ -+ bind(STR1_NEXT); -+ add(ch1, needle, nlen_tmp); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ add(nlen_tmp, nlen_tmp, needle_chr_size); -+ add(hlen_tmp, hlen_tmp, haystack_chr_size); -+ bltz(nlen_tmp, STR1_NEXT); -+ j(MATCH); -+ -+ bind(DOSHORT); -+ if (needle_isL == haystack_isL) { -+ sub(t0, needle_len, 2); -+ bltz(t0, DO1); -+ bgtz(t0, DO3); -+ } -+ } -+ -+ if (needle_con_cnt == 4) { -+ Label CH1_LOOP; -+ (this->*load_4chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 4); -+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(CH1_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_4chr)(ch2, Address(ch2), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ } -+ -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -+ Label CH1_LOOP; -+ BLOCK_COMMENT("string_indexof DO2 {"); -+ bind(DO2); -+ (this->*load_2chr)(ch1, Address(needle), noreg); -+ if (needle_con_cnt == 2) { -+ sub(result_tmp, haystack_len, 2); -+ } -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(CH1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ BLOCK_COMMENT("} string_indexof DO2"); -+ } -+ -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -+ BLOCK_COMMENT("string_indexof DO3 {"); -+ -+ bind(DO3); -+ (this->*load_2chr)(first, Address(needle), noreg); -+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -+ if (needle_con_cnt == 3) { -+ sub(result_tmp, haystack_len, 3); -+ } -+ slli(hlen_tmp, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, hlen_tmp); -+ neg(hlen_neg, hlen_tmp); -+ -+ bind(FIRST_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(ch2), noreg); -+ beq(first, ch2, STR1_LOOP); -+ -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); -+ -+ bind(STR1_LOOP); -+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ j(MATCH); -+ BLOCK_COMMENT("} string_indexof DO3"); -+ } -+ -+ if (needle_con_cnt == -1 || needle_con_cnt == 1) { -+ Label DO1_LOOP; -+ -+ BLOCK_COMMENT("string_indexof DO1 {"); -+ bind(DO1); -+ (this->*needle_load_1chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 1); -+ mv(tmp3, result_tmp); -+ if (haystack_chr_shift) { -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ } -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(DO1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, DO1_LOOP); -+ BLOCK_COMMENT("} string_indexof DO1"); -+ } -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(MATCH); -+ srai(t0, hlen_neg, haystack_chr_shift); -+ add(result, result_tmp, t0); -+ -+ bind(DONE); -+} -+ -+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { -+ Label loop; -+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; -+ -+ bind(loop); -+ vsetvli(tmp1, cnt, sew, Assembler::m2); -+ vlex_v(vr1, a1, sew); -+ vlex_v(vr2, a2, sew); -+ vmsne_vv(vrs, vr1, vr2); -+ vfirst_m(tmp2, vrs); -+ bgez(tmp2, DONE); -+ sub(cnt, cnt, tmp1); -+ if (!islatin) { -+ slli(tmp1, tmp1, 1); // get byte counts -+ } -+ add(a1, a1, tmp1); -+ add(a2, a2, tmp1); -+ bnez(cnt, loop); -+ -+ mv(result, true); -+} -+ -+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ BLOCK_COMMENT("string_equals_v {"); -+ -+ mv(result, false); -+ -+ if (elem_size == 2) { -+ srli(cnt, cnt, 1); -+ } -+ -+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals_v"); -+} -+ -+// used by C2 ClearArray patterns. -+// base: Address of a buffer to be zeroed -+// cnt: Count in HeapWords -+// -+// base, cnt, v0, v1 and t0 are clobbered. -+void MacroAssembler::clear_array_v(Register base, Register cnt) { -+ Label loop; -+ -+ // making zero words -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vxor_vv(v0, v0, v0); -+ -+ bind(loop); -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vse64_v(v0, base); -+ sub(cnt, cnt, t0); -+ shadd(base, t0, base, t0, 3); -+ bnez(cnt, loop); -+} -+ -+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -+ -+ BLOCK_COMMENT("arrays_equals_v {"); -+ -+ // if (a1 == a2), return true -+ mv(result, true); -+ oop_equal(a1, a2, DONE); -+ -+ mv(result, false); -+ // if a1 == null or a2 == null, return false -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ // if (a1.length != a2.length), return false -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt1, cnt2, DONE); -+ -+ la(a1, Address(a1, base_offset)); -+ la(a2, Address(a2, base_offset)); -+ -+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); -+ -+ bind(DONE); -+ -+ BLOCK_COMMENT("} arrays_equals_v"); -+} -+ -+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm) { -+ Label DIFFERENCE, DONE, L, loop; -+ bool encLL = encForm == StrIntrinsicNode::LL; -+ bool encLU = encForm == StrIntrinsicNode::LU; -+ bool encUL = encForm == StrIntrinsicNode::UL; -+ -+ bool str1_isL = encLL || encLU; -+ bool str2_isL = encLL || encUL; -+ -+ int minCharsInWord = encLL ? wordSize : wordSize / 2; -+ -+ BLOCK_COMMENT("string_compare {"); -+ -+ // for Lating strings, 1 byte for 1 character -+ // for UTF16 strings, 2 bytes for 1 character -+ if (!str1_isL) -+ sraiw(cnt1, cnt1, 1); -+ if (!str2_isL) -+ sraiw(cnt2, cnt2, 1); -+ -+ // if str1 == str2, return the difference -+ // save the minimum of the string lengths in cnt2. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); -+ -+ if (str1_isL == str2_isL) { // LL or UU -+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); -+ j(DONE); -+ } else { // LU or UL -+ Register strL = encLU ? str1 : str2; -+ Register strU = encLU ? str2 : str1; -+ VectorRegister vstr1 = encLU ? v4 : v0; -+ VectorRegister vstr2 = encLU ? v0 : v4; -+ -+ bind(loop); -+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); -+ vle8_v(vstr1, strL); -+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); -+ vzext_vf2(vstr2, vstr1); -+ vle16_v(vstr1, strU); -+ vmsne_vv(v0, vstr2, vstr1); -+ vfirst_m(tmp2, v0); -+ bgez(tmp2, DIFFERENCE); -+ sub(cnt2, cnt2, tmp1); -+ add(strL, strL, tmp1); -+ shadd(strU, tmp1, strU, tmp1, 1); -+ bnez(cnt2, loop); -+ j(DONE); -+ } -+ bind(DIFFERENCE); -+ slli(tmp1, tmp2, 1); -+ add(str1, str1, str1_isL ? tmp2 : tmp1); -+ add(str2, str2, str2_isL ? tmp2 : tmp1); -+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); -+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); -+ sub(result, tmp1, tmp2); -+ -+ bind(DONE); -+} -+ -+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { -+ Label loop; -+ assert_different_registers(src, dst, len, tmp, t0); -+ -+ BLOCK_COMMENT("byte_array_inflate_v {"); -+ bind(loop); -+ vsetvli(tmp, len, Assembler::e8, Assembler::m2); -+ vle8_v(v2, src); -+ vsetvli(t0, len, Assembler::e16, Assembler::m4); -+ vzext_vf2(v0, v2); -+ vse16_v(v0, dst); -+ sub(len, len, tmp); -+ add(src, src, tmp); -+ shadd(dst, tmp, dst, tmp, 1); -+ bnez(len, loop); -+ BLOCK_COMMENT("} byte_array_inflate_v"); -+ postcond(pc() != badAddress); -+ return pc(); -+} -+ -+// Compress char[] array to byte[]. -+// result: the array length if every element in array can be encoded; 0, otherwise. -+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label done; -+ encode_iso_array_v(src, dst, len, result, tmp); -+ beqz(len, done); -+ mv(result, zr); -+ bind(done); -+} -+ -+// result: the number of elements had been encoded. -+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label loop, DIFFERENCE, DONE; -+ -+ BLOCK_COMMENT("encode_iso_array_v {"); -+ mv(result, 0); -+ -+ bind(loop); -+ mv(tmp, 0xff); -+ vsetvli(t0, len, Assembler::e16, Assembler::m2); -+ vle16_v(v2, src); -+ // if element > 0xff, stop -+ vmsgtu_vx(v1, v2, tmp); -+ vfirst_m(tmp, v1); -+ vmsbf_m(v0, v1); -+ // compress char to byte -+ vsetvli(t0, len, Assembler::e8); -+ vncvt_x_x_w(v1, v2, Assembler::v0_t); -+ vse8_v(v1, dst, Assembler::v0_t); -+ -+ bgez(tmp, DIFFERENCE); -+ add(result, result, t0); -+ add(dst, dst, t0); -+ sub(len, len, t0); -+ shadd(src, t0, src, t0, 1); -+ bnez(len, loop); -+ j(DONE); -+ -+ bind(DIFFERENCE); -+ add(result, result, tmp); -+ -+ bind(DONE); -+ BLOCK_COMMENT("} encode_iso_array_v"); -+} -+ -+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { -+ Label loop, DONE; -+ -+ mv(result, true); -+ -+ bind(loop); -+ vsetvli(t0, len, Assembler::e8, Assembler::m4); -+ vle8_v(v0, ary); -+ // if element highest bit is set, return true -+ vmslt_vx(v0, v0, zr); -+ vfirst_m(tmp, v0); -+ bgez(tmp, DONE); -+ -+ sub(len, len, t0); -+ add(ary, ary, t0); -+ bnez(len, loop); -+ mv(result, false); -+ -+ bind(DONE); -+ postcond(pc() != badAddress); -+ return pc(); ++void MacroAssembler::load_method_holder(Register holder, Register method) { ++ ld(holder, Address(method, Method::const_offset())); // ConstMethod* ++ ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* ++ ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +} + +// string indexof +// compute index by trailing zeros -+void MacroAssembler::compute_index(Register haystack, Register trailing_zero, ++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, + Register match_mask, Register result, + Register ch2, Register tmp, + bool haystack_isL) +{ + int haystack_chr_shift = haystack_isL ? 0 : 1; -+ srl(match_mask, match_mask, trailing_zero); ++ srl(match_mask, match_mask, trailing_zeros); + srli(match_mask, match_mask, 1); -+ srli(tmp, trailing_zero, LogBitsPerByte); ++ srli(tmp, trailing_zeros, LogBitsPerByte); + if (!haystack_isL) andi(tmp, tmp, 0xE); + add(haystack, haystack, tmp); + ld(ch2, Address(haystack)); @@ -23050,9 +25559,11 @@ index 000000000..5d6078bb3 +// string indexof +// Find pattern element in src, compute match mask, +// only the first occurrence of 0x80/0x8000 at low bits is the valid match index -+// match mask patterns would be like: ++// match mask patterns and corresponding indices would be like: +// - 0x8080808080808080 (Latin1) ++// - 7 6 5 4 3 2 1 0 (match index) +// - 0x8000800080008000 (UTF16) ++// - 3 2 1 0 (match index) +void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2) +{ @@ -23063,6 +25574,72 @@ index 000000000..5d6078bb3 + andr(match_mask, match_mask, src); +} + ++#ifdef COMPILER2 ++// Code for BigInteger::mulAdd instrinsic ++// out = x10 ++// in = x11 ++// offset = x12 (already out.length-offset) ++// len = x13 ++// k = x14 ++// tmp = x28 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp) { ++ Label L_tail_loop, L_unroll, L_end; ++ mv(tmp, out); ++ mv(out, zr); ++ blez(len, L_end); ++ zero_extend(k, k, 32); ++ slliw(t0, offset, LogBytesPerInt); ++ add(offset, tmp, t0); ++ slliw(t0, len, LogBytesPerInt); ++ add(in, in, t0); ++ ++ const int unroll = 8; ++ li(tmp, unroll); ++ blt(len, tmp, L_tail_loop); ++ bind(L_unroll); ++ for (int i = 0; i < unroll; i++) { ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ } ++ subw(len, len, tmp); ++ bge(len, tmp, L_unroll); ++ ++ bind(L_tail_loop); ++ blez(len, L_end); ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ subw(len, len, 1); ++ j(L_tail_loop); ++ ++ bind(L_end); ++} ++ +// add two unsigned input and output carry +void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) +{ @@ -23097,88 +25674,6 @@ index 000000000..5d6078bb3 + add(final_dest_hi, dest_hi, carry); +} + -+// Code for BigInteger::mulAdd instrinsic -+// out = x10 -+// in = x11 -+// offset = x12 (already out.length-offset) -+// len = x13 -+// k = x14 -+void MacroAssembler::mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2) { -+ Label L_loop_1, L_loop_2, L_end, L_not_zero; -+ bnez(len, L_not_zero); -+ mv(out, zr); -+ j(L_end); -+ bind(L_not_zero); -+ zero_extend(k, k, 32); -+ shadd(offset, offset, out, t0, LogBytesPerInt); -+ shadd(in, len, in, t0, LogBytesPerInt); -+ mv(out, zr); -+ -+ if (AvoidUnalignedAccesses) { -+ // if in and offset are both 8 bytes aligned. -+ orr(t0, in, offset); -+ andi(t0, t0, 0x7); -+ beqz(t0, L_loop_2); -+ } else { -+ j(L_loop_2); -+ } -+ -+ bind(L_loop_1); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); -+ sub(len, len, 1); -+ beqz(len, L_end); -+ j(L_loop_1); -+ -+ -+ bind(L_loop_2); -+ Label L_one; -+ sub(len, len, 1); -+ bltz(len, L_end); -+ sub(len, len, 1); -+ bltz(len, L_one); -+ -+ sub(in, in, 8); -+ ld(tmp1, Address(in, 0)); -+ ror_imm(tmp1, tmp1, 32); // convert to little-endian -+ -+ const Register carry = out; -+ const Register src1_hi = t0; -+ const Register src1_lo = tmp2; -+ const Register src2 = t1; -+ -+ mulhu(src1_hi, k, tmp1); -+ mul(src1_lo, k, tmp1); -+ sub(offset, offset, 8); -+ ld(src2, Address(offset, 0)); -+ ror_imm(src2, src2, 32, tmp1); -+ add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1); -+ ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian -+ sd(src1_lo, Address(offset, 0)); -+ j(L_loop_2); -+ -+ bind(L_one); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); -+ -+ bind(L_end); -+} -+ +/** + * Multiply 32 bit by 32 bit first loop. + */ @@ -23187,33 +25682,31 @@ index 000000000..5d6078bb3 + Register carry, Register product, + Register idx, Register kdx) +{ -+ // long carry = 0; -+ // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) { -+ // long product = (y[j] & LONG_MASK) * -+ // (x[xstart] & LONG_MASK) + carry; -+ // z[k] = (int)product; ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // long product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + + Label L_first_loop, L_first_loop_exit; ++ blez(idx, L_first_loop_exit); + + shadd(t0, xstart, x, t0, LogBytesPerInt); + lwu(x_xstart, Address(t0, 0)); + + bind(L_first_loop); -+ sub(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); -+ ++ subw(idx, idx, 1); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(y_idx, Address(t0, 0)); + mul(product, x_xstart, y_idx); + add(product, product, carry); + srli(carry, product, 32); -+ sub(kdx, kdx, 1); ++ subw(kdx, kdx, 1); + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(product, Address(t0, 0)); -+ j(L_first_loop); ++ bgtz(idx, L_first_loop); + + bind(L_first_loop_exit); +} @@ -23239,7 +25732,7 @@ index 000000000..5d6078bb3 + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + -+ sub(xstart, xstart, 1); ++ subw(xstart, xstart, 1); + bltz(xstart, L_one_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); @@ -23247,9 +25740,9 @@ index 000000000..5d6078bb3 + ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + + bind(L_first_loop); -+ sub(idx, idx, 1); ++ subw(idx, idx, 1); + bltz(idx, L_first_loop_exit); -+ sub(idx, idx, 1); ++ subw(idx, idx, 1); + bltz(idx, L_one_y); + + shadd(t0, idx, y, t0, LogBytesPerInt); @@ -23262,7 +25755,7 @@ index 000000000..5d6078bb3 + cad(product, product, carry, t1); + adc(carry, t0, zr, t1); + -+ sub(kdx, kdx, 2); ++ subw(kdx, kdx, 2); + ror_imm(product, product, 32); // back to big-endian + shadd(t0, kdx, z, t0, LogBytesPerInt); + sd(product, Address(t0, 0)); @@ -23281,7 +25774,7 @@ index 000000000..5d6078bb3 +} + +/** -+ * Multiply 128 bit by 128. Unrolled inner loop. ++ * Multiply 128 bit by 128 bit. Unrolled inner loop. + * + */ +void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, @@ -23311,13 +25804,13 @@ index 000000000..5d6078bb3 + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + -+ srli(jdx, idx, 2); ++ srliw(jdx, idx, 2); + + bind(L_third_loop); + -+ sub(jdx, jdx, 1); ++ subw(jdx, jdx, 1); + bltz(jdx, L_third_loop_exit); -+ sub(idx, idx, 4); ++ subw(idx, idx, 4); + + shadd(t0, idx, y, t0, LogBytesPerInt); + ld(yz_idx2, Address(t0, 0)); @@ -23361,7 +25854,7 @@ index 000000000..5d6078bb3 + beqz(idx, L_post_third_loop_done); + + Label L_check_1; -+ sub(idx, idx, 2); ++ subw(idx, idx, 2); + bltz(idx, L_check_1); + + shadd(t0, idx, y, t0, LogBytesPerInt); @@ -23383,7 +25876,7 @@ index 000000000..5d6078bb3 + bind(L_check_1); + + andi(idx, idx, 0x1); -+ sub(idx, idx, 1); ++ subw(idx, idx, 1); + bltz(idx, L_post_third_loop_done); + shadd(t0, idx, y, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); @@ -23393,10 +25886,11 @@ index 000000000..5d6078bb3 + shadd(t0, idx, z, t0, LogBytesPerInt); + lwu(tmp4, Address(t0, 0)); + -+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry); ++ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + + shadd(t0, idx, z, t0, LogBytesPerInt); + sw(tmp3, Address(t0, 0)); ++ + slli(t0, carry2, 32); + srli(carry, tmp3, 32); + orr(carry, carry, t0); @@ -23405,7 +25899,7 @@ index 000000000..5d6078bb3 +} + +/** -+ * Code for BigInteger::multiplyToLen() instrinsic. ++ * Code for BigInteger::multiplyToLen() intrinsic. + * + * x10: x + * x11: xlen @@ -23441,78 +25935,76 @@ index 000000000..5d6078bb3 + mv(kdx, zlen); // kdx = xlen+ylen; + mv(carry, zr); // carry = 0; + -+ Label L_multiply_64_or_128, L_done; ++ Label L_multiply_64_x_64_loop, L_done; + -+ sub(xstart, xlen, 1); ++ subw(xstart, xlen, 1); + bltz(xstart, L_done); + + const Register jdx = tmp1; + + if (AvoidUnalignedAccesses) { -+ // if x and y are both 8 bytes aligend. ++ // Check if x and y are both 8-byte aligned. + orr(t0, xlen, ylen); + andi(t0, t0, 0x1); -+ beqz(t0, L_multiply_64_or_128); -+ } else { -+ j(L_multiply_64_or_128); ++ beqz(t0, L_multiply_64_x_64_loop); ++ ++ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ Label L_second_loop_unaligned; ++ bind(L_second_loop_unaligned); ++ mv(carry, zr); ++ mv(jdx, ylen); ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_done); ++ sub(sp, sp, 2 * wordSize); ++ sd(z, Address(sp, 0)); ++ sd(zr, Address(sp, wordSize)); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(product, Address(t0, 0)); ++ Label L_third_loop, L_third_loop_exit; ++ ++ blez(jdx, L_third_loop_exit); ++ ++ bind(L_third_loop); ++ subw(jdx, jdx, 1); ++ shadd(t0, jdx, y, t0, LogBytesPerInt); ++ lwu(t0, Address(t0, 0)); ++ mul(t1, t0, product); ++ add(t0, t1, carry); ++ shadd(tmp6, jdx, z, t1, LogBytesPerInt); ++ lwu(t1, Address(tmp6, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(tmp6, 0)); ++ srli(carry, t0, 32); ++ bgtz(jdx, L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ld(z, Address(sp, 0)); ++ addi(sp, sp, 2 * wordSize); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ j(L_second_loop_unaligned); + } + -+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ -+ Label L_second_loop_1; -+ bind(L_second_loop_1); -+ mv(carry, zr); -+ mv(jdx, ylen); -+ sub(xstart, xstart, 1); -+ bltz(xstart, L_done); -+ sub(sp, sp, 2 * wordSize); -+ sd(z, Address(sp, 0)); -+ sd(zr, Address(sp, wordSize)); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(product, Address(t0, 0)); -+ Label L_third_loop, L_third_loop_exit; -+ -+ bind(L_third_loop); -+ sub(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); -+ -+ shadd(t0, jdx, y, t0, LogBytesPerInt); -+ lwu(t0, Address(t0, 0)); -+ mul(t1, t0, product); -+ add(t0, t1, carry); -+ shadd(tmp6, jdx, z, t1, LogBytesPerInt); -+ lwu(t1, Address(tmp6, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(tmp6, 0)); -+ srli(carry, t0, 32); -+ j(L_third_loop); -+ -+ bind(L_third_loop_exit); -+ ld(z, Address(sp, 0)); -+ addi(sp, sp, 2 * wordSize); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ -+ j(L_second_loop_1); -+ -+ bind(L_multiply_64_or_128); ++ bind(L_multiply_64_x_64_loop); + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + -+ Label L_second_loop_2; -+ beqz(kdx, L_second_loop_2); ++ Label L_second_loop_aligned; ++ beqz(kdx, L_second_loop_aligned); + + Label L_carry; -+ sub(kdx, kdx, 1); ++ subw(kdx, kdx, 1); + beqz(kdx, L_carry); + + shadd(t0, kdx, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + srli(carry, carry, 32); -+ sub(kdx, kdx, 1); ++ subw(kdx, kdx, 1); + + bind(L_carry); + shadd(t0, kdx, z, t0, LogBytesPerInt); @@ -23533,11 +26025,11 @@ index 000000000..5d6078bb3 + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + -+ bind(L_second_loop_2); ++ bind(L_second_loop_aligned); + mv(carry, zr); // carry = 0; + mv(jdx, ylen); // j = ystart+1 + -+ sub(xstart, xstart, 1); // i = xstart-1; ++ subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_done); + + sub(sp, sp, 4 * wordSize); @@ -23546,7 +26038,7 @@ index 000000000..5d6078bb3 + Label L_last_x; + shadd(t0, xstart, z, t0, LogBytesPerInt); + addi(z, t0, 4); -+ sub(xstart, xstart, 1); // i = xstart-1; ++ subw(xstart, xstart, 1); // i = xstart-1; + bltz(xstart, L_last_x); + + shadd(t0, xstart, x, t0, LogBytesPerInt); @@ -23567,18 +26059,17 @@ index 000000000..5d6078bb3 + ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen + addi(sp, sp, 4 * wordSize); + -+ addi(tmp3, xlen, 1); ++ addiw(tmp3, xlen, 1); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); + -+ sub(tmp3, tmp3, 1); ++ subw(tmp3, tmp3, 1); + bltz(tmp3, L_done); + -+ // z[i] = (int) carry; + srli(carry, carry, 32); + shadd(t0, tmp3, z, t0, LogBytesPerInt); + sw(carry, Address(t0, 0)); -+ j(L_second_loop_2); ++ j(L_second_loop_aligned); + + // Next infrequent code is moved outside loops. + bind(L_last_x); @@ -23587,14 +26078,14 @@ index 000000000..5d6078bb3 + + bind(L_done); +} -+#endif // COMPILER2 ++#endif + +// Count bits of trailing zero chars from lsb to msb until first non-zero element. +// For LL case, one byte for one element, so shift 8 bits once, and for other case, +// shift 16 bits once. +void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) +{ -+ if (UseZbb) { ++ if (UseRVB) { + assert_different_registers(Rd, Rs, tmp1); + int step = isLL ? 8 : 16; + ctz(Rd, Rs); @@ -23605,7 +26096,7 @@ index 000000000..5d6078bb3 + assert_different_registers(Rd, Rs, tmp1, tmp2); + Label Loop; + int step = isLL ? 8 : 16; -+ mv(Rd, -step); ++ li(Rd, -step); + mv(tmp2, Rs); + + bind(Loop); @@ -23622,17 +26113,19 @@ index 000000000..5d6078bb3 +void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); -+ mv(tmp1, 0xFF000000); // first byte mask at lower word -+ andr(Rd, Rs, tmp1); -+ for (int i = 0; i < 2; i++) { -+ slli(Rd, Rd, wordSize); -+ srli(tmp1, tmp1, wordSize); ++ li(tmp1, 0xFF); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { + andr(tmp2, Rs, tmp1); ++ if (i) { ++ slli(tmp2, tmp2, i * 8); ++ } + orr(Rd, Rd, tmp2); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } + } -+ slli(Rd, Rd, wordSize); -+ andi(tmp2, Rs, 0xFF); // last byte mask at lower word -+ orr(Rd, Rd, tmp2); +} + +// This instruction reads adjacent 4 bytes from the upper half of source register, @@ -23642,8 +26135,17 @@ index 000000000..5d6078bb3 +void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) +{ + assert_different_registers(Rd, Rs, tmp1, tmp2); -+ srli(Rs, Rs, 32); // only upper 32 bits are needed -+ inflate_lo32(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF00000000); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ orr(Rd, Rd, tmp2); ++ srli(Rd, Rd, 8); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } ++ } +} + +// The size of the blocks erased by the zero_blocks stub. We must @@ -23675,7 +26177,7 @@ index 000000000..5d6078bb3 + if (StubRoutines::riscv::complete()) { + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { -+ DEBUG_ONLY(reset_labels1(around)); ++ DEBUG_ONLY(reset_labels(around)); + postcond(pc() == badAddress); + return NULL; + } @@ -23706,10 +26208,11 @@ index 000000000..5d6078bb3 + return pc(); +} + -+// base: Address of a buffer to be zeroed, 8 bytes aligned. -+// cnt: Immediate count in HeapWords. +#define SmallArraySize (18 * BytesPerLong) -+void MacroAssembler::zero_words(Register base, uint64_t cnt) ++ ++// base: Address of a buffer to be zeroed, 8 bytes aligned. ++// cnt: Immediate count in HeapWords. ++void MacroAssembler::zero_words(Register base, u_int64_t cnt) +{ + assert_different_registers(base, t0, t1); + @@ -23721,7 +26224,7 @@ index 000000000..5d6078bb3 + } + } else { + const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll -+ int remainder = cnt % unroll; ++ int remainder = cnt % unroll; + for (int i = 0; i < remainder; i++) { + sd(zr, Address(base, i * wordSize)); + } @@ -23730,7 +26233,7 @@ index 000000000..5d6078bb3 + Register cnt_reg = t0; + Register loop_base = t1; + cnt = cnt - remainder; -+ mv(cnt_reg, cnt); ++ li(cnt_reg, cnt); + add(loop_base, base, remainder * wordSize); + bind(loop); + sub(cnt_reg, cnt_reg, unroll); @@ -23740,6 +26243,7 @@ index 000000000..5d6078bb3 + add(loop_base, loop_base, unroll * wordSize); + bnez(cnt_reg, loop); + } ++ + BLOCK_COMMENT("} zero_words"); +} + @@ -23858,8 +26362,8 @@ index 000000000..5d6078bb3 +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize -+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { -+ assert_different_registers(addr, len, tmp1, t0, t1); ++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { ++ assert_different_registers(addr, len, tmp, t0, t1); + +#ifdef ASSERT + { @@ -23904,8 +26408,8 @@ index 000000000..5d6078bb3 + srli(len, len, LogBytesPerWord); + andi(t0, len, unroll - 1); // t0 = cnt % unroll + sub(len, len, t0); // cnt -= unroll -+ // tmp1 always points to the end of the region we're about to zero -+ shadd(tmp1, t0, addr, t1, LogBytesPerWord); ++ // tmp always points to the end of the region we're about to zero ++ shadd(tmp, t0, addr, t1, LogBytesPerWord); + la(t1, entry); + slli(t0, t0, 2); + sub(t1, t1, t0); @@ -23913,17 +26417,17 @@ index 000000000..5d6078bb3 + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) { -+ Assembler::sd(zr, Address(tmp1, i * wordSize)); ++ Assembler::sd(zr, Address(tmp, i * wordSize)); + } + bind(entry); -+ add(tmp1, tmp1, unroll * wordSize); ++ add(tmp, tmp, unroll * wordSize); + bnez(len, loop); +} + +// shift left by shamt and add +// Rd = (Rs1 << shamt) + Rs2 +void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { -+ if (UseZba) { ++ if (UseRVB) { + if (shamt == 1) { + sh1add(Rd, Rs1, Rs2); + return; @@ -23945,14 +26449,14 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::zero_extend(Register dst, Register src, int bits) { -+ if (UseZba && bits == 32) { -+ zext_w(dst, src); -+ return; -+ } -+ -+ if (UseZbb && bits == 16) { -+ zext_h(dst, src); -+ return; ++ if (UseRVB) { ++ if (bits == 16) { ++ zext_h(dst, src); ++ return; ++ } else if (bits == 32) { ++ zext_w(dst, src); ++ return; ++ } + } + + if (bits == 8) { @@ -23964,7 +26468,7 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::sign_extend(Register dst, Register src, int bits) { -+ if (UseZbb) { ++ if (UseRVB) { + if (bits == 8) { + sext_b(dst, src); + return; @@ -24010,231 +26514,41 @@ index 000000000..5d6078bb3 + bind(done); +} + -+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method) -+{ -+ ld(cpool, Address(method, Method::const_offset())); -+ ld(cpool, Address(cpool, ConstMethod::constants_offset())); -+ ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes())); ++void MacroAssembler::safepoint_ifence() { ++ ifence(); ++#ifndef PRODUCT ++ if (VerifyCrossModifyFence) { ++ // Clear the thread state. ++ sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); ++ } ++#endif +} + -+void MacroAssembler::load_max_stack(Register dst, Register method) -+{ -+ ld(dst, Address(xmethod, Method::const_offset())); -+ lhu(dst, Address(dst, ConstMethod::max_stack_offset())); -+} -+ -+// The java_calling_convention describes stack locations as ideal slots on -+// a frame with no abi restrictions. Since we must observe abi restrictions -+// (like the placement of the register window) the slots must be biased by -+// the following value. -+static int reg2offset_in(VMReg r) { -+ // Account for saved fp and ra -+ // This should really be in_preserve_stack_slots -+ return r->reg2stack() * VMRegImpl::stack_slot_size; -+} -+ -+static int reg2offset_out(VMReg r) { -+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; -+} -+ -+// On 64 bit we will store integer like items to the stack as -+// 64 bits items (riscv64 abi) even though java would only store -+// 32bits for a parameter. On 32bit it will simply be 32 bits -+// So this routine will do 32->32 on 32bit and 32->64 on 64bit -+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ // 32bits extend sign -+ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); -+ } -+ } -+} -+ -+// An oop arg. Must pass a handle not the oop itself -+void MacroAssembler::object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset) { -+ assert_cond(map != NULL && receiver_offset != NULL); -+ // must pass a handle. First figure out the location we use as a handle -+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); -+ -+ // See if oop is NULL if it is we need no handle -+ -+ if (src.first()->is_stack()) { -+ // Oop is already on the stack as an argument -+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); -+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); -+ if (is_receiver) { -+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; -+ } -+ -+ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ la(rHandle, Address(fp, reg2offset_in(src.first()))); -+ // conditionally move a NULL -+ Label notZero1; -+ bnez(t0, notZero1); -+ mv(rHandle, zr); -+ bind(notZero1); -+ } else { -+ -+ // Oop is in a register we must store it to the space we reserve -+ // on the stack for oop_handles and pass a handle if oop is non-NULL -+ -+ const Register rOop = src.first()->as_Register(); -+ int oop_slot = -1; -+ if (rOop == j_rarg0) { -+ oop_slot = 0; -+ } else if (rOop == j_rarg1) { -+ oop_slot = 1; -+ } else if (rOop == j_rarg2) { -+ oop_slot = 2; -+ } else if (rOop == j_rarg3) { -+ oop_slot = 3; -+ } else if (rOop == j_rarg4) { -+ oop_slot = 4; -+ } else if (rOop == j_rarg5) { -+ oop_slot = 5; -+ } else if (rOop == j_rarg6) { -+ oop_slot = 6; -+ } else { -+ assert(rOop == j_rarg7, "wrong register"); -+ oop_slot = 7; -+ } -+ -+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; -+ int offset = oop_slot * VMRegImpl::stack_slot_size; -+ -+ map->set_oop(VMRegImpl::stack2reg(oop_slot)); -+ // Store oop in handle area, may be NULL -+ sd(rOop, Address(sp, offset)); -+ if (is_receiver) { -+ *receiver_offset = offset; -+ } -+ -+ //rOop maybe the same as rHandle -+ if (rOop == rHandle) { -+ Label isZero; -+ beqz(rOop, isZero); -+ la(rHandle, Address(sp, offset)); -+ bind(isZero); -+ } else { -+ Label notZero2; -+ la(rHandle, Address(sp, offset)); -+ bnez(rOop, notZero2); -+ mv(rHandle, zr); -+ bind(notZero2); -+ } -+ } -+ -+ // If arg is on the stack then place it otherwise it is already in correct reg. -+ if (dst.first()->is_stack()) { -+ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); -+ } -+} -+ -+// A float arg may have to do float reg int reg conversion -+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ lwu(tmp, Address(fp, reg2offset_in(src.first()))); -+ sw(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()->is_Register()) { -+ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} -+ -+// A long move -+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ mv(dst.first()->as_Register(), src.first()->as_Register()); -+ } -+ } -+} -+ -+// A double move -+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()-> is_Register()) { -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} -+ -+void MacroAssembler::rt_call(address dest, Register tmp) { -+ CodeBlob *cb = CodeCache::find_blob(dest); -+ if (cb) { -+ far_call(RuntimeAddress(dest)); -+ } else { -+ int32_t offset = 0; -+ la_patchable(tmp, RuntimeAddress(dest), offset); -+ jalr(x1, tmp, offset); ++#ifndef PRODUCT ++void MacroAssembler::verify_cross_modify_fence_not_required() { ++ if (VerifyCrossModifyFence) { ++ // Check if thread needs a cross modify fence. ++ lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); ++ Label fence_not_required; ++ beqz(t0, fence_not_required); ++ // If it does then fail. ++ la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); ++ mv(c_rarg0, xthread); ++ jalr(t0); ++ bind(fence_not_required); + } +} ++#endif diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp new file mode 100644 -index 000000000..a4d5ce0e0 +index 00000000000..23e09475be1 --- /dev/null +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -0,0 +1,975 @@ +@@ -0,0 +1,858 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -24260,8 +26574,11 @@ index 000000000..a4d5ce0e0 +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + -+#include "asm/assembler.inline.hpp" -+#include "code/vmreg.hpp" ++#include "asm/assembler.hpp" ++#include "metaprogramming/enableIf.hpp" ++#include "oops/compressedOops.hpp" ++#include "utilities/powerOfTwo.hpp" ++ +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending @@ -24274,15 +26591,17 @@ index 000000000..a4d5ce0e0 + } + virtual ~MacroAssembler() {} + -+ void safepoint_poll(Label& slow_path); -+ void safepoint_poll_acquire(Label& slow_path); ++ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); ++ ++ // Place a fence.i after code may have been modified due to a safepoint. ++ void safepoint_ifence(); + + // Alignment -+ void align(int modulus); ++ void align(int modulus, int extra_offset = 0); + + // Stack frame creation/removal + // Note that SP must be updated to the right place before saving/restoring RA and FP -+ // because signal based thread suspend/resume could happend asychronously ++ // because signal based thread suspend/resume could happen asynchronously. + void enter() { + addi(sp, sp, - 2 * wordSize); + sd(ra, Address(sp, wordSize)); @@ -24375,6 +26694,13 @@ index 000000000..a4d5ce0e0 + // thread in the default location (xthread) + void reset_last_Java_frame(bool clear_fp); + ++ void call_native(address entry_point, ++ Register arg_0); ++ void call_native_base( ++ address entry_point, // the entry point ++ Label* retaddr = NULL ++ ); ++ + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call @@ -24402,6 +26728,7 @@ index 000000000..a4d5ce0e0 + virtual void check_and_handle_earlyret(Register java_thread); + virtual void check_and_handle_popframe(Register java_thread); + ++ void resolve_weak_handle(Register result, Register tmp); + void resolve_oop_handle(Register result, Register tmp = x15); + void resolve_jobject(Register value, Register thread, Register tmp); + @@ -24415,7 +26742,7 @@ index 000000000..a4d5ce0e0 + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, + Address src, Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, -+ Register src, Register tmp1, Register tmp2, Register tmp3); ++ Register src, Register tmp1, Register thread_tmp); + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); @@ -24435,7 +26762,7 @@ index 000000000..a4d5ce0e0 + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, -+ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); + + void store_klass_gap(Register dst, Register src); + @@ -24459,6 +26786,7 @@ index 000000000..a4d5ce0e0 + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register rs1, Register rs2, @@ -24481,15 +26809,13 @@ index 000000000..a4d5ce0e0 + RegisterOrConstant vtable_index, + Register method_result); + ++ // Form an addres from base + offset in Rd. Rd my or may not ++ // actually be used: you must use the Address that is returned. It ++ // is up to you to ensure that the shift provided mathces the size ++ // of your data. ++ Address form_address(Register Rd, Register base, long byte_offset); ++ + // allocation -+ void eden_allocate( -+ Register obj, // result: pointer to object after successful allocation -+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise -+ int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false -+ ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise @@ -24500,6 +26826,15 @@ index 000000000..a4d5ce0e0 + bool is_far = false + ); + ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); ++ + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. @@ -24516,11 +26851,11 @@ index 000000000..a4d5ce0e0 + + // The reset of the type cehck; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. -+ // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable. ++ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, -+ Register tmp_reg, ++ Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure); @@ -24580,20 +26915,14 @@ index 000000000..a4d5ce0e0 + + void unimplemented(const char* what = ""); + -+ void should_not_reach_here() { stop("should not reach here"); } -+ -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ return RegisterOrConstant(tmp); -+ } ++ void should_not_reach_here() { stop("should not reach here"); } + + static address target_addr_for_insn(address insn_addr); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. -+ static int pd_patch_instruction_size(address branch, address target) ; -+ void pd_patch_instruction(address branch, address target) { ++ static int pd_patch_instruction_size(address branch, address target); ++ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { @@ -24619,12 +26948,9 @@ index 000000000..a4d5ce0e0 + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + public: -+ // enum used for riscv--x86 linkage to define return type of x86 function -+ enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; -+ + // Standard pseudoinstruction + void nop(); -+ void mv(Register Rd, Register Rs) ; ++ void mv(Register Rd, Register Rs); + void notr(Register Rd, Register Rs); + void neg(Register Rd, Register Rs); + void negw(Register Rd, Register Rs); @@ -24671,11 +26997,11 @@ index 000000000..a4d5ce0e0 + void fsflagsi(unsigned imm); + + void beqz(Register Rs, const address &dest); ++ void bnez(Register Rs, const address &dest); + void blez(Register Rs, const address &dest); + void bgez(Register Rs, const address &dest); + void bltz(Register Rs, const address &dest); + void bgtz(Register Rs, const address &dest); -+ void bnez(Register Rs, const address &dest); + void la(Register Rd, Label &label); + void la(Register Rd, const address &dest); + void la(Register Rd, const Address &adr); @@ -24705,9 +27031,29 @@ index 000000000..a4d5ce0e0 + void pop_reg(Register Rd); + int push_reg(unsigned int bitset, Register stack); + int pop_reg(unsigned int bitset, Register stack); -+ static RegSet call_clobbered_registers(); -+ void push_call_clobbered_registers(); -+ void pop_call_clobbered_registers(); ++ void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } ++ void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } ++#ifdef COMPILER2 ++ void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } ++ void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } ++#endif // COMPILER2 ++ ++ // Push and pop everything that might be clobbered by a native ++ // runtime call except t0 and t1. (They are always ++ // temporary registers, so we don't have to protect them.) ++ // Additional registers can be excluded in a passed RegSet. ++ void push_call_clobbered_registers_except(RegSet exclude); ++ void pop_call_clobbered_registers_except(RegSet exclude); ++ ++ void push_call_clobbered_registers() { ++ push_call_clobbered_registers_except(RegSet()); ++ } ++ void pop_call_clobbered_registers() { ++ pop_call_clobbered_registers_except(RegSet()); ++ } ++ ++ void pusha(); ++ void popa(); + void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); + void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); + @@ -24721,18 +27067,15 @@ index 000000000..a4d5ce0e0 + } + + // mv -+ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } -+ -+ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } -+ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } ++ template::value)> ++ inline void mv(Register Rd, T o) { ++ li(Rd, (int64_t)o); ++ } + + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); ++ void mv(Register Rd, address addr); + void mv(Register Rd, RegisterOrConstant src); + + // logic @@ -24740,26 +27083,6 @@ index 000000000..a4d5ce0e0 + void orrw(Register Rd, Register Rs1, Register Rs2); + void xorrw(Register Rd, Register Rs1, Register Rs2); + -+ // vext -+ void vmnot_m(VectorRegister vd, VectorRegister vs); -+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); -+ void vfneg_v(VectorRegister vd, VectorRegister vs); -+ -+ // support for argument shuffling -+ void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset); -+ -+ void rt_call(address dest, Register tmp = t0); -+ + // revb + void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend + void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend @@ -24770,14 +27093,12 @@ index 000000000..a4d5ce0e0 + void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word + void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + -+ void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0); ++ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); ++ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); + void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); + -+ // Support for serializing memory accesses between threads -+ void serialize_memory(Register thread, Register tmp1, Register tmp2); -+ + void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); -+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ; ++ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); + void cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, @@ -24817,39 +27138,14 @@ index 000000000..a4d5ce0e0 + void atomic_xchgwu(Register prev, Register newv, Register addr); + void atomic_xchgalwu(Register prev, Register newv, Register addr); + -+ // Biased locking support -+ // lock_reg and obj_reg must be loaded up with the appropriate values. -+ // swap_reg is killed. -+ // tmp_reg must be supplied and must not be t0 or t1 -+ // Optional slow case is for implementations (interpreter and C1) which branch to -+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. -+ // Returns offset of first potentially-faulting instruction for null -+ // check info (currently consumed only by C1). If -+ // swap_reg_contains_mark is true then returns -1 as it is assumed -+ // the calling code has already passed any potential faults. -+ int biased_locking_enter(Register lock_reg, Register obj_reg, -+ Register swap_reg, Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, Label* slow_case = NULL, -+ BiasedLockingCounters* counters = NULL, -+ Register flag = noreg); -+ void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg); -+ + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + -+ //atomic -+ void atomic_incw(Register counter_addr, Register tmp1); -+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { -+ la(tmp1, counter_addr); -+ atomic_incw(tmp1, tmp2); -+ } -+ + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. -+ void far_call(Address entry, Register tmp = t0); -+ void far_jump(Address entry, Register tmp = t0); ++ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); ++ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + + static int far_branch_size() { + if (far_branches()) { @@ -24864,8 +27160,8 @@ index 000000000..a4d5ce0e0 + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); -+ sub(t1, sp, offset); -+ sd(zr, Address(t1)); ++ sub(t0, sp, offset); ++ sd(zr, Address(t0)); + } + + void la_patchable(Register reg1, const Address &dest, int32_t &offset); @@ -24873,123 +27169,44 @@ index 000000000..a4d5ce0e0 + virtual void _call_Unimplemented(address call_site) { + mv(t1, call_site); + } ++ + #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + -+#ifdef COMPILER2 -+ void spill(Register Rx, bool is64, int offset) { -+ is64 ? sd(Rx, Address(sp, offset)) -+ : sw(Rx, Address(sp, offset)); -+ } -+ -+ void spill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fsd(Rx, Address(sp, offset)) -+ : fsw(Rx, Address(sp, offset)); -+ } -+ -+ void spill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vs1r_v(Vx, t0); -+ } -+ -+ void unspill(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lw(Rx, Address(sp, offset)); -+ } -+ -+ void unspillu(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lwu(Rx, Address(sp, offset)); -+ } -+ -+ void unspill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fld(Rx, Address(sp, offset)) -+ : flw(Rx, Address(sp, offset)); -+ } -+ -+ void unspill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vl1r_v(Vx, t0); -+ } -+ -+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, -+ int vec_reg_size_in_bytes) { -+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); -+ unspill(v0, src_offset); -+ spill(v0, dst_offset); -+ } -+ -+#endif // COMPILER2 -+ + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + void reserved_stack_check(); -+ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); -+ void read_polling_page(Register r, address page, relocInfo::relocType rtype); -+ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); -+ // Return: the call PC -+ address trampoline_call(Address entry); ++ ++ void get_polling_page(Register dest, relocInfo::relocType rtype); ++ address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); ++ ++ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); + address ic_call(address entry, jint method_index = 0); -+ // Support for memory inc/dec -+ // n.b. increment/decrement calls with an Address destination will -+ // need to use a scratch register to load the value to be -+ // incremented. increment/decrement calls which add or subtract a -+ // constant value other than sign-extended 12-bit immediate will need -+ // to use a 2nd scratch register to hold the constant. so, an address -+ // increment/decrement may trash both t0 and t1. + -+ void increment(const Address dst, int64_t value = 1); -+ void incrementw(const Address dst, int32_t value = 1); ++ void add_memory_int64(const Address dst, int64_t imm); ++ void add_memory_int32(const Address dst, int32_t imm); + -+ void decrement(const Address dst, int64_t value = 1); -+ void decrementw(const Address dst, int32_t value = 1); + void cmpptr(Register src1, Address src2, Label& equal); -+ void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop -+ void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false); -+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); -+#ifdef COMPILER2 -+ void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min); + -+ address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size); ++ void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); ++ void load_method_holder_cld(Register result, Register method); ++ void load_method_holder(Register holder, Register method); + -+ void string_equals(Register a1, Register a2, Register result, Register cnt1, -+ int elem_size); -+ void string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, -+ Register tmp1, Register tmp2, Register tmp3, int ae); -+ void string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL); -+ void string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL); -+ void string_indexof(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae); -+ void string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae); -+ void compute_index(Register str1, Register trailing_zero, Register match_mask, ++ void compute_index(Register str1, Register trailing_zeros, Register match_mask, + Register result, Register char_tmp, Register tmp, + bool haystack_isL); + void compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2); ++ ++#ifdef COMPILER2 ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp); + void cad(Register dst, Register src1, Register src2, Register carry); + void cadc(Register dst, Register src1, Register src2, Register carry); + void adc(Register dst, Register src1, Register src2, Register carry); + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry = t0); -+ void mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2); ++ Register src1, Register src2, Register carry); + void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, @@ -25008,49 +27225,21 @@ index 000000000..a4d5ce0e0 + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi); -+#endif // COMPILER2 ++#endif ++ + void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); -+ ++ + void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); -+ void zero_words(Register base, uint64_t cnt); ++ ++ void zero_words(Register base, u_int64_t cnt); + address zero_words(Register ptr, Register cnt); + void fill_words(Register base, Register cnt, Register value); -+ void zero_memory(Register addr, Register len, Register tmp1); ++ void zero_memory(Register addr, Register len, Register tmp); + -+ // shift left by shamt and add ++ // shift left by shamt and add + void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + -+#ifdef COMPILER2 -+ // refer to conditional_branches and float_conditional_branches -+ static const int bool_test_bits = 3; -+ static const int neg_cond_bits = 2; -+ static const int unsigned_branch_mask = 1 << bool_test_bits; -+ static const int double_branch_mask = 1 << bool_test_bits; -+ -+ void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src); -+ -+ // cmp -+ void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false); -+ void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false); -+ -+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); -+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); -+ -+ // intrinsic methods implemented by vector instructions -+ void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm); -+ -+ void clear_array_v(Register base, Register cnt); -+ address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp); -+ void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp); -+ void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp); -+ -+ address has_negatives_v(Register ary, Register len, Register result, Register tmp); -+#endif -+ + // Here the float instructions with safe deal with some exceptions. + // e.g. convert from NaN, +Inf, -Inf to int, float, double + // will trigger exception, we need to deal with these situations @@ -25138,28 +27327,37 @@ index 000000000..a4d5ce0e0 + // if [src1 < src2], dst = -1; + void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); + -+ void load_constant_pool_cache(Register cpool, Register method); ++ int push_fp(unsigned int bitset, Register stack); ++ int pop_fp(unsigned int bitset, Register stack); + -+ void load_max_stack(Register dst, Register method); ++ int push_vp(unsigned int bitset, Register stack); ++ int pop_vp(unsigned int bitset, Register stack); ++ ++ // vext ++ void vmnot_m(VectorRegister vd, VectorRegister vs); ++ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); ++ void vfneg_v(VectorRegister vd, VectorRegister vs); + +private: -+ void load_prototype_header(Register dst, Register src); -+ void repne_scan(Register addr, Register value, Register count, Register tmp); + +#ifdef ASSERT -+ // Macro short-hand support to clean-up after a failed call to trampoline ++ // Template short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). -+#define reset_labels1(L1) L1.reset() -+#define reset_labels2(L1, L2) L1.reset(); L2.reset() -+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) -+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) ++ template ++ void reset_labels(Label& lbl, More&... more) { ++ lbl.reset(); reset_labels(more...); ++ } ++ template ++ void reset_labels(Label& lbl) { ++ lbl.reset(); ++ } +#endif ++ void repne_scan(Register addr, Register value, Register count, Register tmp); + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { -+ // sv48: must have bits 63-48 all equal to bit 47 -+ return ((uintptr_t)addr >> 47) == 0; ++ return ((uintptr_t)addr >> 48) == 0; + } + + void ld_constant(Register dest, const Address &const_addr) { @@ -25178,10 +27376,8 @@ index 000000000..a4d5ce0e0 + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + -+#ifdef COMPILER2 -+ void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE); -+#endif // COMPILER2 ++ // Check the current thread doesn't need a cross modify fence. ++ void verify_cross_modify_fence_not_required() PRODUCT_RETURN; +}; + +#ifdef ASSERT @@ -25205,17 +27401,17 @@ index 000000000..a4d5ce0e0 + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; ++ +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp new file mode 100644 -index 000000000..fc2b191c0 +index 00000000000..ef968ccd96d --- /dev/null +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -@@ -0,0 +1,30 @@ +@@ -0,0 +1,31 @@ +/* -+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25241,17 +27437,194 @@ index 000000000..fc2b191c0 +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP + ++// Still empty. ++ +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +new file mode 100644 +index 00000000000..23a75d20502 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp +@@ -0,0 +1,169 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_MATCHER_RISCV_HPP ++#define CPU_RISCV_MATCHER_RISCV_HPP ++ ++ // Defined within class Matcher ++ ++ // false => size gets scaled to BytesPerLong, ok. ++ static const bool init_array_count_is_in_bytes = false; ++ ++ // Whether this platform implements the scalable vector feature ++ static const bool implements_scalable_vector = true; ++ ++ static const bool supports_scalable_vector() { ++ return UseRVV; ++ } ++ ++ // riscv supports misaligned vectors store/load. ++ static constexpr bool misaligned_vectors_ok() { ++ return true; ++ } ++ ++ // Whether code generation need accurate ConvI2L types. ++ static const bool convi2l_type_required = false; ++ ++ // Does the CPU require late expand (see block.cpp for description of late expand)? ++ static const bool require_postalloc_expand = false; ++ ++ // Do we need to mask the count passed to shift instructions or does ++ // the cpu only look at the lower 5/6 bits anyway? ++ static const bool need_masked_shift_count = false; ++ ++ // No support for generic vector operands. ++ static const bool supports_generic_vector_operands = false; ++ ++ static constexpr bool isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++ } ++ ++ // Use conditional move (CMOVL) ++ static constexpr int long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++ } ++ ++ static constexpr int float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++ } ++ ++ // This affects two different things: ++ // - how Decode nodes are matched ++ // - how ImplicitNullCheck opportunities are recognized ++ // If true, the matcher will try to remove all Decodes and match them ++ // (as operands) into nodes. NullChecks are not prepared to deal with ++ // Decodes by final_graph_reshaping(). ++ // If false, final_graph_reshaping() forces the decode behind the Cmp ++ // for a NullCheck. The matcher matches the Decode node into a register. ++ // Implicit_null_check optimization moves the Decode along with the ++ // memory operation back up before the NullCheck. ++ static bool narrow_oop_use_complex_address() { ++ return CompressedOops::shift() == 0; ++ } ++ ++ static bool narrow_klass_use_complex_address() { ++ return false; ++ } ++ ++ static bool const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return CompressedOops::base() == NULL; ++ } ++ ++ static bool const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return CompressedKlassPointers::base() == NULL; ++ } ++ ++ // Is it better to copy float constants, or load them directly from ++ // memory? Intel can load a float constant from a direct address, ++ // requiring no extra registers. Most RISCs will have to materialize ++ // an address into a register first, so they would do better to copy ++ // the constant from stack. ++ static const bool rematerialize_float_constants = false; ++ ++ // If CPU can load and store mis-aligned doubles directly then no ++ // fixup is needed. Else we split the double into 2 integer pieces ++ // and move it piece-by-piece. Only happens when passing doubles into ++ // C code as the Java calling convention forces doubles to be aligned. ++ static const bool misaligned_doubles_ok = true; ++ ++ // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. ++ static const bool strict_fp_requires_explicit_rounding = false; ++ ++ // Are floats converted to double when stored to stack during ++ // deoptimization? ++ static constexpr bool float_in_double() { return false; } ++ ++ // Do ints take an entire long register or just half? ++ // The relevant question is how the int is callee-saved: ++ // the whole long is written but de-opt'ing will have to extract ++ // the relevant 32 bits. ++ static const bool int_in_long = true; ++ ++ // Does the CPU supports vector variable shift instructions? ++ static constexpr bool supports_vector_variable_shifts(void) { ++ return false; ++ } ++ ++ // Does the CPU supports vector variable rotate instructions? ++ static constexpr bool supports_vector_variable_rotates(void) { ++ return false; ++ } ++ ++ // Does the CPU supports vector constant rotate instructions? ++ static constexpr bool supports_vector_constant_rotates(int shift) { ++ return false; ++ } ++ ++ // Does the CPU supports vector unsigned comparison instructions? ++ static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { ++ return false; ++ } ++ ++ // Some microarchitectures have mask registers used on vectors ++ static const bool has_predicated_vectors(void) { ++ return false; ++ } ++ ++ // true means we have fast l2f convers ++ // false means that conversion is done by runtime call ++ static constexpr bool convL2FSupported(void) { ++ return true; ++ } ++ ++ // Implements a variant of EncodeISOArrayNode that encode ASCII only ++ static const bool supports_encode_ascii_array = false; ++ ++ // Returns pre-selection estimated size of a vector operation. ++ static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { ++ return 0; ++ } ++ ++#endif // CPU_RISCV_MATCHER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp new file mode 100644 -index 000000000..d049193d4 +index 00000000000..1f7c0c87c21 --- /dev/null +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -0,0 +1,440 @@ +@@ -0,0 +1,461 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25277,12 +27650,15 @@ index 000000000..d049193d4 +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" ++#include "classfile/vmClasses.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + @@ -25295,11 +27671,12 @@ index 000000000..d049193d4 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ assert_cond(_masm != NULL); + if (VerifyMethodHandles) { -+ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), + "MH argument is a Class"); + } -+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); +} + +#ifdef ASSERT @@ -25314,10 +27691,11 @@ index 000000000..d049193d4 + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, -+ Register obj, SystemDictionary::WKID klass_id, ++ Register obj, vmClassID klass_id, + const char* error_message) { -+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); -+ Klass* klass = SystemDictionary::well_known_klass(klass_id); ++ assert_cond(_masm != NULL); ++ InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); ++ Klass* klass = vmClasses::klass_at(klass_id); + Register temp = t1; + Register temp2 = t0; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; @@ -25338,12 +27716,13 @@ index 000000000..d049193d4 + BLOCK_COMMENT("} verify_klass"); +} + -+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + assert(method == xmethod, "interpreter calling convention"); + Label L_no_such_method; + __ beqz(xmethod, L_no_such_method); @@ -25374,6 +27753,7 @@ index 000000000..d049193d4 + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. @@ -25383,13 +27763,13 @@ index 000000000..d049193d4 + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); -+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); + __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); + __ verify_oop(method_temp); -+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); + __ verify_oop(method_temp); -+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack @@ -25399,7 +27779,7 @@ index 000000000..d049193d4 + sizeof(u2), /*is_signed*/ false); + Label L; + __ ld(t0, __ argument_address(temp2, -1)); -+ __ oop_equal(recv, t0, L); ++ __ beq(recv, t0, L); + __ ld(x10, __ argument_address(temp2, -1)); + __ ebreak(); + __ BIND(L); @@ -25412,6 +27792,7 @@ index 000000000..d049193d4 +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { ++ assert_cond(_masm != NULL); + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || @@ -25423,10 +27804,18 @@ index 000000000..d049193d4 + return NULL; + } + ++ // No need in interpreter entry for linkToNative for now. ++ // Interpreter calls compiled entry through i2c. ++ if (iid == vmIntrinsics::_linkToNative) { ++ __ ebreak(); ++ return NULL; ++ } ++ + // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // xmethod: Method* + // x13: argument locator (parameter slot count, added to sp) + // x11: used as temp to hold mh or receiver ++ // x10, x29: garbage temps, blown away + Register argp = x13; // argument list ptr, live on error paths + Register mh = x11; // MH receiver; dies quickly and is recycled + @@ -25476,7 +27865,6 @@ index 000000000..d049193d4 + trace_method_handle_interpreter_entry(_masm, iid); + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); -+ + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register recv = noreg; @@ -25499,6 +27887,7 @@ index 000000000..d049193d4 + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register temp1 = x7; @@ -25514,21 +27903,24 @@ index 000000000..d049193d4 + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + -+ if (iid == vmIntrinsics::_invokeBasic) { ++ if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ if (iid == vmIntrinsics::_linkToNative) { ++ assert(for_compiler_entry, "only compiler entry is supported"); ++ } + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) -+ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + -+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); -+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); -+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); -+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { @@ -25592,7 +27984,8 @@ index 000000000..d049193d4 + __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); + break; + -+ case vmIntrinsics::_linkToVirtual: { ++ case vmIntrinsics::_linkToVirtual: ++ { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + @@ -25619,7 +28012,8 @@ index 000000000..d049193d4 + break; + } + -+ case vmIntrinsics::_linkToInterface: { ++ case vmIntrinsics::_linkToInterface: ++ { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { @@ -25650,7 +28044,7 @@ index 000000000..d049193d4 + } + + default: -+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); + break; + } + @@ -25671,7 +28065,7 @@ index 000000000..d049193d4 + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, -+ oop mh, ++ oopDesc* mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { } + @@ -25690,13 +28084,12 @@ index 000000000..d049193d4 +#endif //PRODUCT diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp new file mode 100644 -index 000000000..8ed69efe8 +index 00000000000..f73aba29d67 --- /dev/null +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -@@ -0,0 +1,58 @@ +@@ -0,0 +1,57 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -25733,11 +28126,11 @@ index 000000000..8ed69efe8 + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, -+ Register obj, SystemDictionary::WKID klass_id, ++ Register obj, vmClassID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { -+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + @@ -25754,13 +28147,13 @@ index 000000000..8ed69efe8 + bool for_compiler_entry); diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp new file mode 100644 -index 000000000..4b1573130 +index 00000000000..0a05c577860 --- /dev/null +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -0,0 +1,404 @@ +@@ -0,0 +1,429 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -25786,10 +28179,12 @@ index 000000000..4b1573130 + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" ++#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" ++#include "runtime/orderAccess.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" @@ -25847,7 +28242,7 @@ index 000000000..4b1573130 + is_addi_at(instr + instruction_size) && // Addi + is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 + is_addi_at(instr + instruction_size * 3) && // Addi -+ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 ++ is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 + (is_addi_at(instr + instruction_size * 5) || + is_jalr_at(instr + instruction_size * 5) || + is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load @@ -25904,7 +28299,8 @@ index 000000000..4b1573130 +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || -+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || ++ CompiledICLocker::is_safe(addr_at(0)), + "concurrent code patching"); + + ResourceMark rm; @@ -25981,24 +28377,20 @@ index 000000000..4b1573130 + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* cb = CodeCache::find_blob(instruction_address()); -+ if(cb != NULL) { -+ nmethod* nm = cb->as_nmethod_or_null(); -+ if (nm != NULL) { -+ RelocIterator iter(nm, instruction_address(), next_instruction_address()); -+ while (iter.next()) { -+ if (iter.type() == relocInfo::oop_type) { -+ oop* oop_addr = iter.oop_reloc()->oop_addr(); -+ *oop_addr = cast_to_oop(x); -+ break; -+ } else if (iter.type() == relocInfo::metadata_type) { -+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); -+ *metadata_addr = (Metadata*)x; -+ break; -+ } ++ nmethod* nm = cb->as_nmethod_or_null(); ++ if (nm != NULL) { ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(x); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)x; ++ break; + } + } -+ } else { -+ ShouldNotReachHere(); + } +} + @@ -26034,12 +28426,28 @@ index 000000000..4b1573130 + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about ++ // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) ++ // i.e. jump to 0 when we need leave space for a wide immediate ++ // load ++ ++ // return -1 if jump to self or to 0 ++ if ((dest == (address) this) || dest == 0) { ++ dest = (address) -1; ++ } + -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; +}; + ++void NativeJump::set_jump_destination(address dest) { ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ if (dest == (address) -1) ++ dest = instruction_address(); ++ ++ MacroAssembler::pd_patch_instruction(instruction_address(), dest); ++ ICache::invalidate_range(instruction_address(), instruction_size); ++} ++ +//------------------------------------------------------------------- + +address NativeGeneralJump::jump_destination() const { @@ -26048,9 +28456,14 @@ index 000000000..4b1573130 + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about ++ // As a special case we also use jump to 0 when first generating ++ // a general jump ++ ++ // return -1 if jump to self or to 0 ++ if ((dest == (address) this) || dest == 0) { ++ dest = (address) -1; ++ } + -+ // return -1 if jump to self -+ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; +} + @@ -26061,6 +28474,7 @@ index 000000000..4b1573130 +} + +bool NativeInstruction::is_lwu_to_zr(address instr) { ++ assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0000011 && + extract_funct3(instr) == 0b110 && + extract_rd(instr) == zr); // zr @@ -26077,6 +28491,10 @@ index 000000000..4b1573130 + *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction +} + ++bool NativeInstruction::is_stop() { ++ return uint_at(0) == 0xffffffff; // an illegal instruction ++} ++ +//------------------------------------------------------------------- + +// MT-safe inserting of a jump over a jump or a nop (used by @@ -26164,14 +28582,14 @@ index 000000000..4b1573130 +} diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp new file mode 100644 -index 000000000..e8a4e0a46 +index 00000000000..718b2e3de6c --- /dev/null +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp -@@ -0,0 +1,561 @@ +@@ -0,0 +1,572 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -26211,16 +28629,20 @@ index 000000000..e8a4e0a46 +// - - NativeIllegalInstruction +// - - NativeCallTrampolineStub +// - - NativeMembar ++// - - NativeFenceI + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + ++class NativeCall; ++ +class NativeInstruction { + friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); + public: + enum { -+ instruction_size = 4 ++ instruction_size = 4, ++ compressed_instruction_size = 2, + }; + + juint encoding() const { @@ -26246,7 +28668,7 @@ index 000000000..e8a4e0a46 + static bool is_slli_shift_at(address instr, uint32_t shift) { + assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0010011 && // opcode field -+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation ++ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation + Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field + } + @@ -26341,9 +28763,10 @@ index 000000000..e8a4e0a46 + // load + static bool check_load_pc_relative_data_dependency(address instr) { + address auipc = instr; -+ address last_instr = auipc + instruction_size; ++ address load = auipc + instruction_size; + -+ return extract_rs1(last_instr) == extract_rd(auipc); ++ return extract_rd(load) == extract_rd(auipc) && ++ extract_rs1(load) == extract_rd(load); + } + + static bool is_movptr_at(address instr); @@ -26364,6 +28787,7 @@ index 000000000..e8a4e0a46 + inline bool is_jump_or_nop(); + bool is_safepoint_poll(); + bool is_sigill_zombie_not_entrant(); ++ bool is_stop(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } @@ -26422,27 +28846,24 @@ index 000000000..e8a4e0a46 + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + -+ void set_destination(address dest) { -+ if (is_jal()) { -+ intptr_t offset = (intptr_t)(dest - instruction_address()); -+ assert((offset & 0x1) == 0, "should be aligned"); -+ assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n"); -+ unsigned int insn = 0b1101111; // jal -+ address pInsn = (address)(&insn); -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra -+ set_int_at(displacement_offset, insn); -+ return; -+ } -+ ShouldNotReachHere(); ++ void set_destination(address dest) { ++ assert(is_jal(), "Should be jal instruction!"); ++ intptr_t offset = (intptr_t)(dest - instruction_address()); ++ assert((offset & 0x1) == 0, "bad alignment"); ++ assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); ++ unsigned int insn = 0b1101111; // jal ++ address pInsn = (address)(&insn); ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra ++ set_int_at(displacement_offset, insn); + } + -+ void verify_alignment() { ; } -+ void verify(); -+ void print(); ++ void verify_alignment() {} // do nothing on riscv ++ void verify(); ++ void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address addr); @@ -26611,15 +29032,13 @@ index 000000000..e8a4e0a46 + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(instruction_size); } + address jump_destination() const; ++ void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + -+ // Unit testing stuff -+ static void test() {} -+ + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry @@ -26699,7 +29118,9 @@ index 000000000..e8a4e0a46 + // 3). check if the offset in ld[31:20] equals the data_offset + assert_cond(addr != NULL); + const int instr_size = NativeInstruction::instruction_size; -+ if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) && ++ if (NativeInstruction::is_auipc_at(addr) && ++ NativeInstruction::is_ld_at(addr + instr_size) && ++ NativeInstruction::is_jalr_at(addr + 2 * instr_size) && + (NativeInstruction::extract_rd(addr) == x5) && + (NativeInstruction::extract_rd(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + instr_size) == x5) && @@ -26728,16 +29149,74 @@ index 000000000..e8a4e0a46 + return (NativeMembar*)addr; +} + ++class NativeFenceI : public NativeInstruction { ++public: ++ static inline int instruction_size() { ++ // 2 for fence.i + fence ++ return (UseConservativeFence ? 2 : 1) * NativeInstruction::instruction_size; ++ } ++}; ++ +#endif // CPU_RISCV_NATIVEINST_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +new file mode 100644 +index 00000000000..26c1edc36ff +--- /dev/null ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/registerMap.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { ++ if (base_reg->is_VectorRegister()) { ++ assert(base_reg->is_concrete(), "must pass base reg"); ++ int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / ++ VectorRegisterImpl::max_slots_per_register; ++ intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; ++ address base_location = location(base_reg); ++ if (base_location != NULL) { ++ return base_location + offset_in_bytes; ++ } else { ++ return NULL; ++ } ++ } else { ++ return location(base_reg->next(slot_idx)); ++ } ++} diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp new file mode 100644 -index 000000000..04a36c1c7 +index 00000000000..f34349811a9 --- /dev/null +++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -@@ -0,0 +1,46 @@ +@@ -0,0 +1,43 @@ +/* -+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -26770,10 +29249,8 @@ index 000000000..04a36c1c7 + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. -+ // Since there is none, we just return NULL. -+ // See registerMap_riscv.hpp for an example of grabbing registers -+ // from register save areas of a standard layout. -+ address pd_location(VMReg reg) const {return NULL;} ++ address pd_location(VMReg reg) const { return NULL; } ++ address pd_location(VMReg base_reg, int slot_idx) const; + + // no PD state to clear or copy: + void pd_clear() {} @@ -26781,215 +29258,15 @@ index 000000000..04a36c1c7 + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -new file mode 100644 -index 000000000..b30c1b107 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "asm/assembler.hpp" -+#include "asm/macroAssembler.inline.hpp" -+#include "asm/register.hpp" -+#include "interp_masm_riscv.hpp" -+#include "register_riscv.hpp" -+ -+REGISTER_DEFINITION(Register, noreg); -+ -+REGISTER_DEFINITION(Register, x0); -+REGISTER_DEFINITION(Register, x1); -+REGISTER_DEFINITION(Register, x2); -+REGISTER_DEFINITION(Register, x3); -+REGISTER_DEFINITION(Register, x4); -+REGISTER_DEFINITION(Register, x5); -+REGISTER_DEFINITION(Register, x6); -+REGISTER_DEFINITION(Register, x7); -+REGISTER_DEFINITION(Register, x8); -+REGISTER_DEFINITION(Register, x9); -+REGISTER_DEFINITION(Register, x10); -+REGISTER_DEFINITION(Register, x11); -+REGISTER_DEFINITION(Register, x12); -+REGISTER_DEFINITION(Register, x13); -+REGISTER_DEFINITION(Register, x14); -+REGISTER_DEFINITION(Register, x15); -+REGISTER_DEFINITION(Register, x16); -+REGISTER_DEFINITION(Register, x17); -+REGISTER_DEFINITION(Register, x18); -+REGISTER_DEFINITION(Register, x19); -+REGISTER_DEFINITION(Register, x20); -+REGISTER_DEFINITION(Register, x21); -+REGISTER_DEFINITION(Register, x22); -+REGISTER_DEFINITION(Register, x23); -+REGISTER_DEFINITION(Register, x24); -+REGISTER_DEFINITION(Register, x25); -+REGISTER_DEFINITION(Register, x26); -+REGISTER_DEFINITION(Register, x27); -+REGISTER_DEFINITION(Register, x28); -+REGISTER_DEFINITION(Register, x29); -+REGISTER_DEFINITION(Register, x30); -+REGISTER_DEFINITION(Register, x31); -+ -+REGISTER_DEFINITION(FloatRegister, fnoreg); -+ -+REGISTER_DEFINITION(FloatRegister, f0); -+REGISTER_DEFINITION(FloatRegister, f1); -+REGISTER_DEFINITION(FloatRegister, f2); -+REGISTER_DEFINITION(FloatRegister, f3); -+REGISTER_DEFINITION(FloatRegister, f4); -+REGISTER_DEFINITION(FloatRegister, f5); -+REGISTER_DEFINITION(FloatRegister, f6); -+REGISTER_DEFINITION(FloatRegister, f7); -+REGISTER_DEFINITION(FloatRegister, f8); -+REGISTER_DEFINITION(FloatRegister, f9); -+REGISTER_DEFINITION(FloatRegister, f10); -+REGISTER_DEFINITION(FloatRegister, f11); -+REGISTER_DEFINITION(FloatRegister, f12); -+REGISTER_DEFINITION(FloatRegister, f13); -+REGISTER_DEFINITION(FloatRegister, f14); -+REGISTER_DEFINITION(FloatRegister, f15); -+REGISTER_DEFINITION(FloatRegister, f16); -+REGISTER_DEFINITION(FloatRegister, f17); -+REGISTER_DEFINITION(FloatRegister, f18); -+REGISTER_DEFINITION(FloatRegister, f19); -+REGISTER_DEFINITION(FloatRegister, f20); -+REGISTER_DEFINITION(FloatRegister, f21); -+REGISTER_DEFINITION(FloatRegister, f22); -+REGISTER_DEFINITION(FloatRegister, f23); -+REGISTER_DEFINITION(FloatRegister, f24); -+REGISTER_DEFINITION(FloatRegister, f25); -+REGISTER_DEFINITION(FloatRegister, f26); -+REGISTER_DEFINITION(FloatRegister, f27); -+REGISTER_DEFINITION(FloatRegister, f28); -+REGISTER_DEFINITION(FloatRegister, f29); -+REGISTER_DEFINITION(FloatRegister, f30); -+REGISTER_DEFINITION(FloatRegister, f31); -+ -+REGISTER_DEFINITION(VectorRegister, vnoreg); -+ -+REGISTER_DEFINITION(VectorRegister, v0); -+REGISTER_DEFINITION(VectorRegister, v1); -+REGISTER_DEFINITION(VectorRegister, v2); -+REGISTER_DEFINITION(VectorRegister, v3); -+REGISTER_DEFINITION(VectorRegister, v4); -+REGISTER_DEFINITION(VectorRegister, v5); -+REGISTER_DEFINITION(VectorRegister, v6); -+REGISTER_DEFINITION(VectorRegister, v7); -+REGISTER_DEFINITION(VectorRegister, v8); -+REGISTER_DEFINITION(VectorRegister, v9); -+REGISTER_DEFINITION(VectorRegister, v10); -+REGISTER_DEFINITION(VectorRegister, v11); -+REGISTER_DEFINITION(VectorRegister, v12); -+REGISTER_DEFINITION(VectorRegister, v13); -+REGISTER_DEFINITION(VectorRegister, v14); -+REGISTER_DEFINITION(VectorRegister, v15); -+REGISTER_DEFINITION(VectorRegister, v16); -+REGISTER_DEFINITION(VectorRegister, v17); -+REGISTER_DEFINITION(VectorRegister, v18); -+REGISTER_DEFINITION(VectorRegister, v19); -+REGISTER_DEFINITION(VectorRegister, v20); -+REGISTER_DEFINITION(VectorRegister, v21); -+REGISTER_DEFINITION(VectorRegister, v22); -+REGISTER_DEFINITION(VectorRegister, v23); -+REGISTER_DEFINITION(VectorRegister, v24); -+REGISTER_DEFINITION(VectorRegister, v25); -+REGISTER_DEFINITION(VectorRegister, v26); -+REGISTER_DEFINITION(VectorRegister, v27); -+REGISTER_DEFINITION(VectorRegister, v28); -+REGISTER_DEFINITION(VectorRegister, v29); -+REGISTER_DEFINITION(VectorRegister, v30); -+REGISTER_DEFINITION(VectorRegister, v31); -+ -+REGISTER_DEFINITION(Register, c_rarg0); -+REGISTER_DEFINITION(Register, c_rarg1); -+REGISTER_DEFINITION(Register, c_rarg2); -+REGISTER_DEFINITION(Register, c_rarg3); -+REGISTER_DEFINITION(Register, c_rarg4); -+REGISTER_DEFINITION(Register, c_rarg5); -+REGISTER_DEFINITION(Register, c_rarg6); -+REGISTER_DEFINITION(Register, c_rarg7); -+ -+REGISTER_DEFINITION(FloatRegister, c_farg0); -+REGISTER_DEFINITION(FloatRegister, c_farg1); -+REGISTER_DEFINITION(FloatRegister, c_farg2); -+REGISTER_DEFINITION(FloatRegister, c_farg3); -+REGISTER_DEFINITION(FloatRegister, c_farg4); -+REGISTER_DEFINITION(FloatRegister, c_farg5); -+REGISTER_DEFINITION(FloatRegister, c_farg6); -+REGISTER_DEFINITION(FloatRegister, c_farg7); -+ -+REGISTER_DEFINITION(Register, j_rarg0); -+REGISTER_DEFINITION(Register, j_rarg1); -+REGISTER_DEFINITION(Register, j_rarg2); -+REGISTER_DEFINITION(Register, j_rarg3); -+REGISTER_DEFINITION(Register, j_rarg4); -+REGISTER_DEFINITION(Register, j_rarg5); -+REGISTER_DEFINITION(Register, j_rarg6); -+REGISTER_DEFINITION(Register, j_rarg7); -+ -+REGISTER_DEFINITION(FloatRegister, j_farg0); -+REGISTER_DEFINITION(FloatRegister, j_farg1); -+REGISTER_DEFINITION(FloatRegister, j_farg2); -+REGISTER_DEFINITION(FloatRegister, j_farg3); -+REGISTER_DEFINITION(FloatRegister, j_farg4); -+REGISTER_DEFINITION(FloatRegister, j_farg5); -+REGISTER_DEFINITION(FloatRegister, j_farg6); -+REGISTER_DEFINITION(FloatRegister, j_farg7); -+ -+REGISTER_DEFINITION(Register, zr); -+REGISTER_DEFINITION(Register, gp); -+REGISTER_DEFINITION(Register, tp); -+REGISTER_DEFINITION(Register, xmethod); -+REGISTER_DEFINITION(Register, ra); -+REGISTER_DEFINITION(Register, sp); -+REGISTER_DEFINITION(Register, fp); -+REGISTER_DEFINITION(Register, xheapbase); -+REGISTER_DEFINITION(Register, xcpool); -+REGISTER_DEFINITION(Register, xmonitors); -+REGISTER_DEFINITION(Register, xlocals); -+REGISTER_DEFINITION(Register, xthread); -+REGISTER_DEFINITION(Register, xbcp); -+REGISTER_DEFINITION(Register, xdispatch); -+REGISTER_DEFINITION(Register, esp); -+ -+REGISTER_DEFINITION(Register, t0); -+REGISTER_DEFINITION(Register, t1); -+REGISTER_DEFINITION(Register, t2); diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp new file mode 100644 -index 000000000..76215ef2a +index 00000000000..f8116e9df8c --- /dev/null +++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -0,0 +1,69 @@ +@@ -0,0 +1,73 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -27015,8 +29292,13 @@ index 000000000..76215ef2a +#include "precompiled.hpp" +#include "register_riscv.hpp" + ++REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); ++REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); ++ +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * + RegisterImpl::max_slots_per_register; ++ +const int ConcreteRegisterImpl::max_fpr = + ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; @@ -27027,8 +29309,8 @@ index 000000000..76215ef2a + + +const char* RegisterImpl::name() const { -+ const char* names[number_of_registers] = { -+ "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9", ++ static const char *const names[number_of_registers] = { ++ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", + "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", + "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", + "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" @@ -27037,7 +29319,7 @@ index 000000000..76215ef2a +} + +const char* FloatRegisterImpl::name() const { -+ const char* names[number_of_registers] = { ++ static const char *const names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", @@ -27047,7 +29329,7 @@ index 000000000..76215ef2a +} + +const char* VectorRegisterImpl::name() const { -+ const char* names[number_of_registers] = { ++ static const char *const names[number_of_registers] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -27057,13 +29339,12 @@ index 000000000..76215ef2a +} diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp new file mode 100644 -index 000000000..8beba6776 +index 00000000000..a9200cac647 --- /dev/null +++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -0,0 +1,337 @@ +@@ -0,0 +1,324 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -27111,41 +29392,57 @@ index 000000000..8beba6776 + +// Use Register as shortcut +class RegisterImpl; -+typedef RegisterImpl* Register; ++typedef const RegisterImpl* Register; + -+inline Register as_Register(int encoding) { -+ return (Register)(intptr_t) encoding; -+} ++inline constexpr Register as_Register(int encoding); + +class RegisterImpl: public AbstractRegisterImpl { ++ static constexpr Register first(); ++ + public: + enum { + number_of_registers = 32, -+ number_of_byte_registers = 32, -+ max_slots_per_register = 2 ++ max_slots_per_register = 2, ++ ++ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable ++ // for compressed instructions. See Table 17.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, + }; + + // derived registers, offsets, and addresses -+ Register successor() const { return as_Register(encoding() + 1); } ++ const Register successor() const { return this + 1; } + + // construction -+ inline friend Register as_Register(int encoding); ++ inline friend constexpr Register as_Register(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } -+ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; -+ int encoding_nocheck() const { return (intptr_t)this; } + -+ // Return the bit which represents this register. This is intended -+ // to be ORed into a bitmask: for usage see class RegSet below. -+ unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } ++ ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } ++ ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } +}; + -+// The integer registers of the riscv architecture ++REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++ ++// The integer registers of the RISCV architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + @@ -27184,36 +29481,58 @@ index 000000000..8beba6776 + +// Use FloatRegister as shortcut +class FloatRegisterImpl; -+typedef FloatRegisterImpl* FloatRegister; ++typedef const FloatRegisterImpl* FloatRegister; + -+inline FloatRegister as_FloatRegister(int encoding) { -+ return (FloatRegister)(intptr_t) encoding; -+} ++inline constexpr FloatRegister as_FloatRegister(int encoding); + +// The implementation of floating point registers for the architecture +class FloatRegisterImpl: public AbstractRegisterImpl { ++ static constexpr FloatRegister first(); ++ + public: + enum { + number_of_registers = 32, -+ max_slots_per_register = 2 ++ max_slots_per_register = 2, ++ ++ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, + }; + + // construction -+ inline friend FloatRegister as_FloatRegister(int encoding); ++ inline friend constexpr FloatRegister as_FloatRegister(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // derived registers, offsets, and addresses -+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ FloatRegister successor() const { ++ return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); ++ } + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } ++ ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } ++ ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } +}; + ++REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); ++ +// The float registers of the RISCV architecture + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); @@ -27253,14 +29572,14 @@ index 000000000..8beba6776 + +// Use VectorRegister as shortcut +class VectorRegisterImpl; -+typedef VectorRegisterImpl* VectorRegister; ++typedef const VectorRegisterImpl* VectorRegister; + -+inline VectorRegister as_VectorRegister(int encoding) { -+ return (VectorRegister)(intptr_t) encoding; -+} ++inline constexpr VectorRegister as_VectorRegister(int encoding); + -+// The implementation of vector registers for riscv-v ++// The implementation of vector registers for RVV +class VectorRegisterImpl: public AbstractRegisterImpl { ++ static constexpr VectorRegister first(); ++ + public: + enum { + number_of_registers = 32, @@ -27268,21 +29587,23 @@ index 000000000..8beba6776 + }; + + // construction -+ inline friend VectorRegister as_VectorRegister(int encoding); ++ inline friend constexpr VectorRegister as_VectorRegister(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // derived registers, offsets, and addresses -+ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } ++ VectorRegister successor() const { return this + 1; } + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return this - first(); } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +}; + ++REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); ++ +// The vector registers of RVV +CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + @@ -27341,73 +29662,21 @@ index 000000000..8beba6776 + static const int max_vpr; +}; + -+// A set of registers -+class RegSet { -+ uint32_t _bitset; -+ -+public: -+ RegSet(uint32_t bitset) : _bitset(bitset) { } -+ -+ RegSet() : _bitset(0) { } -+ -+ RegSet(Register r1) : _bitset(r1->bit()) { } -+ -+ ~RegSet() {} -+ -+ RegSet operator+(const RegSet aSet) const { -+ RegSet result(_bitset | aSet._bitset); -+ return result; -+ } -+ -+ RegSet operator-(const RegSet aSet) const { -+ RegSet result(_bitset & ~aSet._bitset); -+ return result; -+ } -+ -+ RegSet &operator+=(const RegSet aSet) { -+ *this = *this + aSet; -+ return *this; -+ } -+ -+ static RegSet of(Register r1) { -+ return RegSet(r1); -+ } -+ -+ static RegSet of(Register r1, Register r2) { -+ return of(r1) + r2; -+ } -+ -+ static RegSet of(Register r1, Register r2, Register r3) { -+ return of(r1, r2) + r3; -+ } -+ -+ static RegSet of(Register r1, Register r2, Register r3, Register r4) { -+ return of(r1, r2, r3) + r4; -+ } -+ -+ static RegSet range(Register start, Register end) { -+ uint32_t bits = ~0; -+ bits <<= start->encoding(); -+ bits <<= (31 - end->encoding()); -+ bits >>= (31 - end->encoding()); -+ -+ return RegSet(bits); -+ } -+ -+ uint32_t bits() const { return _bitset; } -+}; ++typedef AbstractRegSet RegSet; ++typedef AbstractRegSet FloatRegSet; ++typedef AbstractRegSet VectorRegSet; + +#endif // CPU_RISCV_REGISTER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp new file mode 100644 -index 000000000..f49fd6439 +index 00000000000..228a64eae2c --- /dev/null +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp @@ -0,0 +1,113 @@ +/* -+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -27444,10 +29713,10 @@ index 000000000..f49fd6439 + + int bytes; + -+ switch(type()) { ++ switch (type()) { + case relocInfo::oop_type: { + oop_Relocation *reloc = (oop_Relocation *)this; -+ // in movoop when immediate == false ++ // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate + if (NativeInstruction::is_load_pc_relative_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); @@ -27519,13 +29788,12 @@ index 000000000..f49fd6439 +} diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp new file mode 100644 -index 000000000..c30150e0a +index 00000000000..840ed935d88 --- /dev/null +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -@@ -0,0 +1,45 @@ +@@ -0,0 +1,44 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -27557,8 +29825,8 @@ index 000000000..c30150e0a + enum { + // Relocations are byte-aligned. + offset_unit = 1, -+ // We don't use format(). -+ format_width = 0 ++ // Must be at least 1 for RelocInfo::narrow_oop_in_const. ++ format_width = 1 + }; + + public: @@ -27570,14 +29838,14 @@ index 000000000..c30150e0a +#endif // CPU_RISCV_RELOCINFO_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad new file mode 100644 -index 000000000..137e9b7c7 +index 00000000000..588887e1d96 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -0,0 +1,10685 @@ +@@ -0,0 +1,10611 @@ +// -+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it @@ -27644,8 +29912,8 @@ index 000000000..137e9b7c7 +// +// follow the C1 compiler in making registers +// -+// x7, x9-x17, x28-x31 volatile (caller save) -+// x0-x4, x8, x27 system (no save, no allocate) ++// x7, x9-x17, x27-x31 volatile (caller save) ++// x0-x4, x8, x23 system (no save, no allocate) +// x5-x6 non-allocatable (so we can use them as temporary regs) + +// @@ -27658,8 +29926,8 @@ index 000000000..137e9b7c7 + +reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr +reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); -+reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra -+reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); ++reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra ++reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); +reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp +reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); +reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp @@ -27812,8 +30080,6 @@ index 000000000..137e9b7c7 +// bits. The 'physical' RVV vector register length is detected during +// startup, so the register allocator is able to identify the correct +// number of bytes needed for an RVV spill/unspill. -+// for Java use vector registers v0-v31 are always save on call just -+// as the platform ABI treats v0-v31 as caller save. + +reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); +reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); @@ -28114,9 +30380,7 @@ index 000000000..137e9b7c7 +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -+// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) -+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) -+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// Class for all 32 bit general purpose registers @@ -28323,7 +30587,7 @@ index 000000000..137e9b7c7 +); + +// Class for link register -+reg_class lr_reg( ++reg_class ra_reg( + R1, R1_H +); + @@ -28524,6 +30788,7 @@ index 000000000..137e9b7c7 + int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd + int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv + int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt ++ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); +%} + + @@ -28535,7 +30800,6 @@ index 000000000..137e9b7c7 +source_hpp %{ + +#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" @@ -28584,85 +30848,77 @@ index 000000000..137e9b7c7 + } +}; + -+// predicate controlling translation of StoreCM -+bool unnecessary_storestore(const Node *storecm); ++class Node::PD { ++public: ++ enum NodeFlags { ++ _last_flag = Node::_last_flag ++ }; ++}; + +bool is_CAS(int opcode, bool maybe_volatile); + +// predicate controlling translation of CompareAndSwapX -+bool needs_acquiring_load_exclusive(const Node *load); ++bool needs_acquiring_load_reserved(const Node *load); + -+ -+// predicate using the temp register for decoding klass -+bool maybe_use_tmp_register_decoding_klass(); ++// predicate controlling addressing modes ++bool size_fits_all_mem_uses(AddPNode* addp, int shift); +%} + +source %{ + -+ // Derived RegMask with conditionally allocatable registers ++// Derived RegMask with conditionally allocatable registers + -+ RegMask _ANY_REG32_mask; -+ RegMask _ANY_REG_mask; -+ RegMask _PTR_REG_mask; -+ RegMask _NO_SPECIAL_REG32_mask; -+ RegMask _NO_SPECIAL_REG_mask; -+ RegMask _NO_SPECIAL_PTR_REG_mask; ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++RegMask _NO_SPECIAL_REG32_mask; ++RegMask _NO_SPECIAL_REG_mask; ++RegMask _NO_SPECIAL_PTR_REG_mask; + -+ void reg_mask_init() { ++void reg_mask_init() { + -+ _ANY_REG32_mask = _ALL_REG32_mask; -+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); + -+ _ANY_REG_mask = _ALL_REG_mask; -+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); ++ _ANY_REG_mask = _ALL_REG_mask; ++ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); + -+ _PTR_REG_mask = _ALL_REG_mask; -+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); ++ _PTR_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); + -+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; -+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); ++ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; ++ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); + -+ _NO_SPECIAL_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ _NO_SPECIAL_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+ // x27 is not allocatable when compressed oops is on -+ if (UseCompressedOops) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ } -+ -+ // x8 is not allocatable when PreserveFramePointer is on -+ if (PreserveFramePointer) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); -+ } ++ // x27 is not allocatable when compressed oops is on ++ if (UseCompressedOops) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); + } + -+ -+// predicate controlling translation of StoreCM -+// -+// returns true if a StoreStore must precede the card write otherwise -+// false -+bool unnecessary_storestore(const Node *storecm) -+{ -+ assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); -+ -+ // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore) -+ // between an object put and the associated card mark when we are using -+ // CMS without conditional card marking -+ -+ if (UseConcMarkSweepGC && !UseCondCardMark) { -+ return false; ++ // x8 is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); + } ++} + -+ // a storestore is unnecesary in all other cases ++void PhaseOutput::pd_perform_mach_node_analysis() { ++} + -+ return true; ++int MachNode::pd_alignment_required() const { ++ return 1; ++} ++ ++int MachNode::compute_padding(int current_offset) const { ++ return 0; +} + +// is_CAS(int opcode, bool maybe_volatile) @@ -28671,12 +30927,14 @@ index 000000000..137e9b7c7 +// values otherwise false. +bool is_CAS(int opcode, bool maybe_volatile) +{ -+ switch(opcode) { ++ switch (opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: + case Op_CompareAndSwapB: + case Op_CompareAndSwapS: + case Op_GetAndSetI: @@ -28685,10 +30943,6 @@ index 000000000..137e9b7c7 + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: -+#if INCLUDE_SHENANDOAHGC -+ case Op_ShenandoahCompareAndSwapP: -+ case Op_ShenandoahCompareAndSwapN: -+#endif + return true; + case Op_CompareAndExchangeI: + case Op_CompareAndExchangeN: @@ -28698,10 +30952,14 @@ index 000000000..137e9b7c7 + case Op_CompareAndExchangeP: + case Op_WeakCompareAndSwapB: + case Op_WeakCompareAndSwapS: -+ case Op_WeakCompareAndSwapI: ++ case Op_WeakCompareAndSwapI: + case Op_WeakCompareAndSwapL: + case Op_WeakCompareAndSwapP: + case Op_WeakCompareAndSwapN: ++ case Op_ShenandoahWeakCompareAndSwapP: ++ case Op_ShenandoahWeakCompareAndSwapN: ++ case Op_ShenandoahCompareAndExchangeP: ++ case Op_ShenandoahCompareAndExchangeN: + return maybe_volatile; + default: + return false; @@ -28711,12 +30969,9 @@ index 000000000..137e9b7c7 +// predicate controlling translation of CAS +// +// returns true if CAS needs to use an acquiring load otherwise false -+bool needs_acquiring_load_exclusive(const Node *n) ++bool needs_acquiring_load_reserved(const Node *n) +{ + assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); -+ if (UseBarriersForVolatile) { -+ return false; -+ } + + LoadStoreNode* ldst = n->as_LoadStore(); + if (n != NULL && is_CAS(n->Opcode(), false)) { @@ -28727,12 +30982,6 @@ index 000000000..137e9b7c7 + // so we can just return true here + return true; +} -+ -+bool maybe_use_tmp_register_decoding_klass() { -+ return !UseCompressedOops && -+ Universe::narrow_klass_base() != NULL && -+ Universe::narrow_klass_shift() != 0; -+} +#define __ _masm. + +// advance declarations for helper functions to convert register @@ -28751,14 +31000,13 @@ index 000000000..137e9b7c7 + +int MachCallStaticJavaNode::ret_addr_offset() +{ -+ // call should be a simple jal -+ int off = 4; -+ return off; ++ // jal ++ return 1 * NativeInstruction::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() +{ -+ return 28; // movptr, jal ++ return 7 * NativeInstruction::instruction_size; // movptr, jal +} + +int MachCallRuntimeNode::ret_addr_offset() { @@ -28766,14 +31014,13 @@ index 000000000..137e9b7c7 + // jal(addr) + // or with far branches + // jal(trampoline_stub) -+ // for real runtime callouts it will be five instructions ++ // for real runtime callouts it will be 11 instructions + // see riscv_enc_java_to_runtime -+ // la(t1, retaddr) -+ // la(t0, RuntimeAddress(addr)) -+ // addi(sp, sp, -2 * wordSize) -+ // sd(zr, Address(sp)) -+ // sd(t1, Address(sp, wordSize)) -+ // jalr(t0) ++ // la(t1, retaddr) -> auipc + addi ++ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi ++ // addi(sp, sp, -2 * wordSize) -> addi ++ // sd(t1, Address(sp, wordSize)) -> sd ++ // jalr(t0) -> jalr + CodeBlob *cb = CodeCache::find_blob(_entry_point); + if (cb != NULL) { + return 1 * NativeInstruction::instruction_size; @@ -28782,18 +31029,37 @@ index 000000000..137e9b7c7 + } +} + -+// Indicate if the safepoint node needs the polling page as an input ++int MachCallNativeNode::ret_addr_offset() { ++ Unimplemented(); ++ return -1; ++} + -+// the shared code plants the oop data at the start of the generated -+// code for the safepoint node and that needs ot be at the load -+// instruction itself. so we cannot plant a mov of the safepoint poll -+// address followed by a load. setting this to true means the mov is -+// scheduled as a prior instruction. that's better for scheduling -+// anyway. ++// ++// Compute padding required for nodes which need alignment ++// + -+bool SafePointNode::needs_polling_address_input() ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ -+ return true; ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} ++ ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // skip the movptr in MacroAssembler::ic_call(): ++ // lui + addi + slli + addi + slli + addi ++ // Though movptr() has already 4-byte aligned with or without RVC, ++ // We need to prevent from further changes by explicitly calculating the size. ++ const int movptr_size = 6 * NativeInstruction::instruction_size; ++ current_offset += movptr_size; ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; +} + +//============================================================================= @@ -28806,7 +31072,8 @@ index 000000000..137e9b7c7 +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + __ ebreak(); +} + @@ -28823,20 +31090,21 @@ index 000000000..137e9b7c7 +#endif + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. + for (int i = 0; i < _count; i++) { + __ nop(); + } + } + + uint MachNopNode::size(PhaseRegAlloc*) const { -+ return _count * NativeInstruction::instruction_size; ++ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); + } + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + -+int Compile::ConstantTable::calculate_table_base_offset() const { ++int ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + @@ -28865,50 +31133,77 @@ index 000000000..137e9b7c7 + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + -+ int framesize = C->frame_slots() << LogBytesPerInt; ++ int framesize = C->output()->frame_slots() << LogBytesPerInt; + -+ if (C->need_stack_bang(framesize)) { ++ if (C->output()->need_stack_bang(framesize)) { + st->print("# stack bang size=%d\n\t", framesize); + } + ++ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); ++ st->print("sd ra, [sp, #%d]\n\t", - wordSize); ++ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } + st->print("sub sp, sp, #%d\n\t", framesize); -+ st->print("sd fp, [sp, #%d]", - 2 * wordSize); -+ st->print("sd ra, [sp, #%d]", - wordSize); -+ if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); } ++ ++ if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { ++ st->print("ld t0, [guard]\n\t"); ++ st->print("membar LoadLoad\n\t"); ++ st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); ++ st->print("beq t0, t1, skip\n\t"); ++ st->print("jalr #nmethod_entry_barrier_stub\n\t"); ++ st->print("j skip\n\t"); ++ st->print("guard: int\n\t"); ++ st->print("skip:\n\t"); ++ } +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp -+ const int framesize = C->frame_size_in_bytes(); -+ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ const int framesize = C->output()->frame_size_in_bytes(); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + __ nop(); + + assert_cond(C != NULL); -+ int bangsize = C->bang_size_in_bytes(); -+ if (C->need_stack_bang(bangsize) && UseStackBanging) { ++ ++ if (C->clinit_barrier_on_entry()) { ++ assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); ++ ++ Label L_skip_barrier; ++ ++ __ mov_metadata(t1, C->method()->holder()->constant_encoding()); ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ __ bind(L_skip_barrier); ++ } ++ ++ int bangsize = C->output()->bang_size_in_bytes(); ++ if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + __ build_frame(framesize); + ++ if (C->stub_function() == NULL) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->nmethod_entry_barrier(&_masm); ++ } ++ + if (VerifyStackAtCalls) { + Unimplemented(); + } + -+ C->set_frame_complete(cbuf.insts_size()); ++ C->output()->set_frame_complete(cbuf.insts_size()); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. -+ Compile::ConstantTable& constant_table = C->constant_table(); ++ ConstantTable& constant_table = C->output()->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} @@ -28932,7 +31227,7 @@ index 000000000..137e9b7c7 + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + assert_cond(C != NULL); -+ int framesize = C->frame_size_in_bytes(); ++ int framesize = C->output()->frame_size_in_bytes(); + + st->print("# pop frame %d\n\t", framesize); + @@ -28947,9 +31242,9 @@ index 000000000..137e9b7c7 + } + + if (do_polling() && C->is_method_compilation()) { -+ st->print("# touch polling page\n\t"); -+ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); -+ st->print("ld zr, [t0]"); ++ st->print("# test polling word\n\t"); ++ st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); ++ st->print("bgtu sp, t0, #slow_path"); + } +} +#endif @@ -28957,9 +31252,9 @@ index 000000000..137e9b7c7 +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); -+ int framesize = C->frame_size_in_bytes(); ++ int framesize = C->output()->frame_size_in_bytes(); + + __ remove_frame(framesize); + @@ -28968,7 +31263,13 @@ index 000000000..137e9b7c7 + } + + if (do_polling() && C->is_method_compilation()) { -+ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); ++ Label dummy_label; ++ Label* code_stub = &dummy_label; ++ if (!C->output()->in_scratch_emit_size()) { ++ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); ++ } ++ __ relocate(relocInfo::poll_return_type); ++ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); + } +} + @@ -28986,11 +31287,6 @@ index 000000000..137e9b7c7 + return MachNode::pipeline_class(); +} + -+int MachEpilogNode::safepoint_offset() const { -+ assert(do_polling(), "no return for this epilog node"); -+ return 4; -+} -+ +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float or @@ -29060,12 +31356,11 @@ index 000000000..137e9b7c7 + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + -+ if (bottom_type() == NULL) { -+ ShouldNotReachHere(); -+ } else if (bottom_type()->isa_vect() != NULL) { ++ if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + if (ireg == Op_VecA && cbuf) { -+ MacroAssembler _masm(cbuf); ++ C2_MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + // stack to stack @@ -29085,7 +31380,8 @@ index 000000000..137e9b7c7 + } + } + } else if (cbuf != NULL) { -+ MacroAssembler _masm(cbuf); ++ C2_MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy @@ -29214,7 +31510,7 @@ index 000000000..137e9b7c7 +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + + assert_cond(ra_ != NULL); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); @@ -29249,21 +31545,23 @@ index 000000000..137e9b7c7 + assert_cond(st != NULL); + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { -+ st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); -+ if (Universe::narrow_klass_shift() != 0) { ++ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ if (CompressedKlassPointers::shift() != 0) { + st->print_cr("\tdecode_klass_not_null t0, t0"); + } + } else { -+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + } -+ st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tbeq t0, t1, ic_hit"); ++ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tic_hit:"); +} +#endif + +void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + // This is the unverified entry point. -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + + Label skip; + __ cmp_klass(j_rarg0, t1, t0, skip); @@ -29290,7 +31588,7 @@ index 000000000..137e9b7c7 + // j #exception_blob_entry_point + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); @@ -29308,7 +31606,7 @@ index 000000000..137e9b7c7 +{ + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); @@ -29334,27 +31632,40 @@ index 000000000..137e9b7c7 + } + + switch (opcode) { ++ case Op_CacheWB: // fall through ++ case Op_CacheWBPreSync: // fall through ++ case Op_CacheWBPostSync: ++ if (!VM_Version::supports_data_cache_line_flush()) { ++ return false; ++ } ++ break; ++ + case Op_StrCompressedCopy: // fall through + case Op_StrInflatedCopy: // fall through -+ case Op_HasNegatives: ++ case Op_CountPositives: + return UseRVV; ++ + case Op_EncodeISOArray: + return UseRVV && SpecialEncodeISOArray; ++ + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; ++ ++ case Op_RotateRight: ++ case Op_RotateLeft: + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: -+ return UseZbb; ++ return UseRVB; + } + + return true; // Per default match rules are supported. +} + +// Identify extra cases that we might want to provide match rules for vector nodes and -+// other intrinsics guarded with vector length (vlen). ++// other intrinsics guarded with vector length (vlen) and element type (bt). +const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + return false; @@ -29363,19 +31674,26 @@ index 000000000..137e9b7c7 + return op_vec_supported(opcode); +} + -+const bool Matcher::has_predicated_vectors(void) { -+ return false; // not supported -+ ++const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { ++ return false; +} + -+const int Matcher::float_pressure(int default_pressure_threshold) { -+ return default_pressure_threshold; ++const RegMask* Matcher::predicate_reg_mask(void) { ++ return NULL; +} + -+int Matcher::regnum_to_fpu_offset(int regnum) -+{ ++const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { ++ return NULL; ++} ++ ++// Vector calling convention not yet implemented. ++const bool Matcher::supports_vector_calling_convention(void) { ++ return false; ++} ++ ++OptoRegPair Matcher::vector_return_value(uint ideal_reg) { + Unimplemented(); -+ return 0; ++ return OptoRegPair(0, 0); +} + +// Is this branch offset short enough that a short branch can be used? @@ -29401,17 +31719,6 @@ index 000000000..137e9b7c7 + return (-4096 <= offs && offs < 4096); +} + -+const bool Matcher::isSimpleConstant64(jlong value) { -+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. -+ // Probably always true, even if a temp register is required. -+ return true; -+} -+ -+// true just means we have fast l2f conversion -+const bool Matcher::convL2FSupported(void) { -+ return true; -+} -+ +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { + if (UseRVV) { @@ -29441,120 +31748,25 @@ index 000000000..137e9b7c7 + return 0; +} + -+const uint Matcher::vector_shift_count_ideal_reg(int size) { -+ switch(size) { -+ case 8: return Op_VecD; -+ case 16: return Op_VecX; -+ default: -+ if (size == vector_width_in_bytes(T_BYTE)) { -+ return Op_VecA; -+ } -+ } -+ ShouldNotReachHere(); -+ return 0; -+} -+ -+const bool Matcher::supports_scalable_vector() { -+ return UseRVV; -+} -+ +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return Matcher::max_vector_size(bt); +} + -+// AES support not yet implemented -+const bool Matcher::pass_original_key_for_aes() { ++MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { ++ ShouldNotReachHere(); // generic vector operands not supported ++ return NULL; ++} ++ ++bool Matcher::is_reg2reg_move(MachNode* m) { ++ ShouldNotReachHere(); // generic vector operands not supported + return false; +} + -+// riscv supports misaligned vectors store/load. -+const bool Matcher::misaligned_vectors_ok() { -+ return true; -+} -+ -+// false => size gets scaled to BytesPerLong, ok. -+const bool Matcher::init_array_count_is_in_bytes = false; -+ -+// Use conditional move (CMOVL) -+const int Matcher::long_cmove_cost() { -+ // long cmoves are no more expensive than int cmoves -+ return 0; -+} -+ -+const int Matcher::float_cmove_cost() { -+ // float cmoves are no more expensive than int cmoves -+ return 0; -+} -+ -+// Does the CPU require late expand (see block.cpp for description of late expand)? -+const bool Matcher::require_postalloc_expand = false; -+ -+// Do we need to mask the count passed to shift instructions or does -+// the cpu only look at the lower 5/6 bits anyway? -+const bool Matcher::need_masked_shift_count = false; -+ -+// This affects two different things: -+// - how Decode nodes are matched -+// - how ImplicitNullCheck opportunities are recognized -+// If true, the matcher will try to remove all Decodes and match them -+// (as operands) into nodes. NullChecks are not prepared to deal with -+// Decodes by final_graph_reshaping(). -+// If false, final_graph_reshaping() forces the decode behind the Cmp -+// for a NullCheck. The matcher matches the Decode node into a register. -+// Implicit_null_check optimization moves the Decode along with the -+// memory operation back up before the NullCheck. -+bool Matcher::narrow_oop_use_complex_address() { -+ return Universe::narrow_oop_shift() == 0; -+} -+ -+bool Matcher::narrow_klass_use_complex_address() { -+// TODO -+// decide whether we need to set this to true ++bool Matcher::is_generic_vector(MachOper* opnd) { ++ ShouldNotReachHere(); // generic vector operands not supported + return false; +} + -+bool Matcher::const_oop_prefer_decode() { -+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. -+ return Universe::narrow_oop_base() == NULL; -+} -+ -+bool Matcher::const_klass_prefer_decode() { -+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. -+ return Universe::narrow_klass_base() == NULL; -+} -+ -+// Is it better to copy float constants, or load them directly from -+// memory? Intel can load a float constant from a direct address, -+// requiring no extra registers. Most RISCs will have to materialize -+// an address into a register first, so they would do better to copy -+// the constant from stack. -+const bool Matcher::rematerialize_float_constants = false; -+ -+// If CPU can load and store mis-aligned doubles directly then no -+// fixup is needed. Else we split the double into 2 integer pieces -+// and move it piece-by-piece. Only happens when passing doubles into -+// C code as the Java calling convention forces doubles to be aligned. -+const bool Matcher::misaligned_doubles_ok = true; -+ -+// No-op on amd64 -+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { -+ Unimplemented(); -+} -+ -+// Advertise here if the CPU requires explicit rounding operations to -+// implement the UseStrictFP mode. -+const bool Matcher::strict_fp_requires_explicit_rounding = false; -+ -+// Are floats converted to double when stored to stack during -+// deoptimization? -+bool Matcher::float_in_double() { return false; } -+ -+// Do ints take an entire long register or just half? -+// The relevant question is how the int is callee-saved: -+// the whole long is written but de-opt'ing will have to extract -+// the relevant 32 bits. -+const bool Matcher::int_in_long = true; -+ +// Return whether or not this register is ever used as an argument. +// This function is used on startup to build the trampoline stubs in +// generateOptoStub. Registers not mentioned will be killed by the VM @@ -29586,6 +31798,33 @@ index 000000000..137e9b7c7 + return can_be_java_arg(reg); +} + ++uint Matcher::int_pressure_limit() ++{ ++ // A derived pointer is live at CallNode and then is flagged by RA ++ // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip ++ // derived pointers and lastly fail to spill after reaching maximum ++ // number of iterations. Lowering the default pressure threshold to ++ // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become ++ // a high register pressure area of the code so that split_DEF can ++ // generate DefinitionSpillCopy for the derived pointer. ++ uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; ++ if (!PreserveFramePointer) { ++ // When PreserveFramePointer is off, frame pointer is allocatable, ++ // but different from other SOC registers, it is excluded from ++ // fatproj's mask because its save type is No-Save. Decrease 1 to ++ // ensure high pressure at fatproj when PreserveFramePointer is off. ++ // See check_pressure_at_fatproj(). ++ default_int_pressure_threshold--; ++ } ++ return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; ++} ++ ++uint Matcher::float_pressure_limit() ++{ ++ // _FLOAT_REG_mask is generated by adlc from the float_reg register class. ++ return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; ++} ++ +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; +} @@ -29632,51 +31871,21 @@ index 000000000..137e9b7c7 + return true; +} + -+const bool Matcher::convi2l_type_required = false; -+ -+// Should the Matcher clone shifts on addressing modes, expecting them -+// to be subsumed into complex addressing expressions or compute them -+// into registers? -+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++// Should the Matcher clone input 'm' of node 'n'? ++bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + assert_cond(m != NULL); -+ if (clone_base_plus_offset_address(m, mstack, address_visited)) { -+ return true; -+ } -+ -+ Node *off = m->in(AddPNode::Offset); -+ if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && -+ size_fits_all_mem_uses(m, off->in(2)->get_int()) && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(2), Visit); -+ Node *conv = off->in(1); -+ if (conv->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(conv)) { -+ address_visited.set(conv->_idx); // Flag as address_visited -+ mstack.push(conv->in(1), Pre_Visit); -+ } else { -+ mstack.push(conv, Pre_Visit); -+ } -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); -+ return true; -+ } else if (off != NULL && off->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(1), Pre_Visit); -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); ++ if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) ++ mstack.push(m, Visit); // m = ShiftCntV + return true; + } + return false; +} + -+void Compile::reshape_address(AddPNode* addp) { ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); +} + +%} @@ -29713,14 +31922,15 @@ index 000000000..137e9b7c7 + // BEGIN Non-volatile memory access + + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + int64_t con = (int64_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, con); ++ __ li(dst_reg, con); + %} + + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL || con == (address)1) { @@ -29733,35 +31943,25 @@ index 000000000..137e9b7c7 + __ mov_metadata(dst_reg, (Metadata*)con); + } else { + assert(rtype == relocInfo::none, "unexpected reloc type"); -+ __ mv(dst_reg, $src$$constant); ++ __ li(dst_reg, $src$$constant); + } + } + %} + + enc_class riscv_enc_mov_p1(iRegP dst) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, 1); -+ %} -+ -+ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ -+ MacroAssembler _masm(&cbuf); -+ int32_t offset = 0; -+ address page = (address)$src$$constant; -+ unsigned long align = (unsigned long)page & 0xfff; -+ assert(align == 0, "polling page must be page aligned"); -+ Register dst_reg = as_Register($dst$$reg); -+ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); -+ __ addi(dst_reg, dst_reg, offset); ++ __ li(dst_reg, 1); + %} + + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); + %} + + enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { @@ -29774,13 +31974,13 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_mov_zero(iRegNorP dst) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mv(dst_reg, zr); + %} + + enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { @@ -29793,42 +31993,42 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); @@ -29837,15 +32037,15 @@ index 000000000..137e9b7c7 + // compare and branch instruction encodings + + enc_class riscv_enc_j(label lbl) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + __ j(*L); + %} + + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; -+ switch($cmp$$cmpcode) { ++ switch ($cmp$$cmpcode) { + case(BoolTest::ge): + __ j(*L); + break; @@ -29867,7 +32067,7 @@ index 000000000..137e9b7c7 + + Label miss; + Label done; -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, + NULL, &miss); + if ($primary) { @@ -29879,21 +32079,21 @@ index 000000000..137e9b7c7 + + __ bind(miss); + if (!$primary) { -+ __ mv(cr_reg, 1); ++ __ li(cr_reg, 1); + } + + __ bind(done); + %} + + enc_class riscv_enc_java_static_call(method meth) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + + address addr = (address)$meth$$method; + address call = NULL; + assert_cond(addr != NULL); + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. -+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type)); ++ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; @@ -29902,23 +32102,23 @@ index 000000000..137e9b7c7 + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); -+ call = __ trampoline_call(Address(addr, rspec)); ++ call = __ trampoline_call(Address(addr, rspec), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } ++ + // Emit stub for static call -+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call); ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } -+ + %} + + enc_class riscv_enc_java_dynamic_call(method meth) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + int method_index = resolved_method_index(cbuf); + address call = __ ic_call((address)$meth$$method, method_index); + if (call == NULL) { @@ -29928,7 +32128,7 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_call_epilog() %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + __ call_Unimplemented(); @@ -29936,7 +32136,7 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_java_to_runtime(method meth) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + + // some calls to generated routines (arraycopy code) are scheduled + // by C2 as runtime calls. if so we can call them using a jr (they @@ -29964,160 +32164,165 @@ index 000000000..137e9b7c7 + %} + + // using the cr register as the bool result: 0 for success; others failed. -+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ -+ MacroAssembler _masm(&cbuf); ++ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ ++ C2_MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr, t0); + -+ // Load markOop from object into displaced_header. ++ // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } -+ -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ // ignore slow case here -+ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ __ load_klass(flag, oop); ++ __ lwu(flag, Address(flag, Klass::access_flags_offset())); ++ __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); ++ __ bnez(flag, cont, true /* is_far */); + } + + // Check for existing monitor -+ if ((EmitSync & 0x02) == 0) { -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); ++ __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ ++ if (!UseHeavyMonitors) { ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markWord::unlocked_value); ++ ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. ++ ++ // Check if the owner is self by comparing the value in the ++ // markWord of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here ++ } else { ++ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path + } + -+ // Set tmp to be (markOop of object | UNLOCK_VALUE). -+ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); ++ __ j(cont); + -+ // Initialize the box. (Must happen before we update the object mark!) ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markWord::monitor_value so use markWord::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markWord::unused_mark().value()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+ // Compare object markOop with an unlocked value (tmp) and if -+ // equal exchange the stack address of our box with object markOop. -+ // On failure disp_hdr contains the possibly locked markOop. -+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/disp_hdr); ++ __ beqz(flag, cont); // CAS success means locking succeeded ++ ++ __ bne(flag, xthread, cont); // Check for recursive locking ++ ++ // Recursive lock case + __ mv(flag, zr); -+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // If the compare-and-exchange succeeded, then we found an unlocked -+ // object, will have now locked it will continue at label cont -+ // We did not see an unlocked object so try the fast recursive case. -+ -+ // Check if the owner is self by comparing the value in the -+ // markOop of object (disp_hdr) with the stack pointer. -+ __ sub(disp_hdr, disp_hdr, sp); -+ __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); -+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, -+ // hence we can store 0 as the displaced header in the box, which indicates that it is a -+ // recursive lock. -+ __ andr(tmp/*==0?*/, disp_hdr, tmp); -+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ __ mv(flag, tmp); // we can use the value of tmp as the result here -+ -+ if ((EmitSync & 0x02) == 0) { -+ __ j(cont); -+ -+ // Handle existing monitor. -+ __ bind(object_has_monitor); -+ // The object's monitor m is unlocked iff m->owner == NULL, -+ // otherwise m->owner may contain a thread or a stack address. -+ // -+ // Try to CAS m->owner from NULL to current thread. -+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); -+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) -+ -+ // Store a non-null value into the box to avoid looking like a re-entrant -+ // lock. The fast-path monitor unlock code checks for -+ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the -+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. -+ __ mv(tmp, (address)markOopDesc::unused_mark()); -+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ } ++ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ add(tmp, tmp, 1u); ++ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); + + __ bind(cont); + %} + + // using cr flag to indicate the fast_unlock result: 0 for success; others failed. -+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ -+ MacroAssembler _masm(&cbuf); ++ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ ++ C2_MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; ++ if (!UseHeavyMonitors) { ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); + } + -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ __ biased_locking_exit(oop, tmp, cont, flag); -+ } -+ -+ // Find the lock address and load the displaced header from the stack. -+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // If the displaced header is 0, we have a recursive unlock. -+ __ mv(flag, disp_hdr); -+ __ beqz(disp_hdr, cont); -+ + // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ ++ if (!UseHeavyMonitors) { ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. ++ ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds ++ } else { ++ __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path + } -+ -+ // Check if it is still a light weight lock, this is true if we -+ // see the stack address of the basicLock in the markOop of the -+ // object. -+ -+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -+ Assembler::rl, /*result*/tmp); -+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds + __ j(cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ bind(object_has_monitor); -+ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor -+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -+ __ bnez(flag, cont); ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markWord::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + -+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -+ __ bnez(flag, cont); -+ // need a release store here -+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sd(zr, Address(tmp)); // set unowned -+ } ++ Label notRecursive; ++ __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. ++ ++ // Recursive lock ++ __ addi(disp_hdr, disp_hdr, -1); ++ __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ mv(flag, zr); ++ __ j(cont); ++ ++ __ bind(notRecursive); ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned + + __ bind(cont); + %} @@ -30125,7 +32330,7 @@ index 000000000..137e9b7c7 + // arithmetic encodings + + enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); @@ -30133,7 +32338,7 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); @@ -30141,7 +32346,7 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); @@ -30149,7 +32354,7 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); @@ -30157,13 +32362,15 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_tail_call(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + __ jr(target_reg); + %} + + enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + // exception oop should be in x10 + // ret addr has been popped into ra @@ -30173,12 +32380,13 @@ index 000000000..137e9b7c7 + %} + + enc_class riscv_enc_rethrow() %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); + %} + + enc_class riscv_enc_ret() %{ -+ MacroAssembler _masm(&cbuf); ++ C2_MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + __ ret(); + %} + @@ -30243,18 +32451,12 @@ index 000000000..137e9b7c7 +// SP meets the minimum alignment. + +frame %{ -+ // What direction does stack grow in (assumed to be same for C & Java) -+ stack_direction(TOWARDS_LOW); -+ + // These three registers define part of the calling convention + // between compiled code and the interpreter. + + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R31); + -+ // Method Oop Register when calling interpreter. -+ interpreter_method_oop_reg(R31); -+ + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + @@ -30274,12 +32476,6 @@ index 000000000..137e9b7c7 + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + -+ // Number of stack slots between incoming argument block and the start of -+ // a new frame. The PROLOG must add this many slots to the stack. The -+ // EPILOG must remove this many slots. -+ // RISCV needs two words for RA (return address) and FP (frame pointer). -+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); -+ + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); @@ -30298,25 +32494,6 @@ index 000000000..137e9b7c7 + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + -+ // Body of function which returns an integer array locating -+ // arguments either in registers or in stack slots. Passed an array -+ // of ideal registers called "sig" and a "length" count. Stack-slot -+ // offsets are based on outgoing arguments, i.e. a CALLER setting up -+ // arguments for a CALLEE. Incoming stack arguments are -+ // automatically biased by the preserve_stack_slots field above. -+ -+ calling_convention -+ %{ -+ // No difference between ingoing/outgoing just pass false -+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); -+ %} -+ -+ c_calling_convention -+ %{ -+ // This is obviously always outgoing -+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); -+ %} -+ + // Location of compiled Java return values. Same as C for now. + return_value + %{ @@ -30465,16 +32642,6 @@ index 000000000..137e9b7c7 + interface(CONST_INTER); +%} + -+operand immI_32() -+%{ -+ predicate(n->get_int() == 32); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ +operand immI_63() +%{ + predicate(n->get_int() == 63); @@ -30485,20 +32652,10 @@ index 000000000..137e9b7c7 + interface(CONST_INTER); +%} + -+operand immI_64() -+%{ -+ predicate(n->get_int() == 64); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ +// 32 bit integer valid for add immediate +operand immIAdd() +%{ -+ predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int())); ++ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); + match(ConI); + op_cost(0); + format %{ %} @@ -30508,7 +32665,7 @@ index 000000000..137e9b7c7 +// 32 bit integer valid for sub immediate +operand immISub() +%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int())); ++ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); + match(ConI); + op_cost(0); + format %{ %} @@ -30593,23 +32750,13 @@ index 000000000..137e9b7c7 + interface(CONST_INTER); +%} + -+// Polling Page Pointer Immediate -+operand immPollPage() -+%{ -+ predicate((address)n->get_ptr() == os::get_polling_page()); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ +// Card Table Byte Map Base +operand immByteMapBase() +%{ + // Get base of card map + predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && -+ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ (CardTable::CardValue*)n->get_ptr() == ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); + match(ConP); + + op_cost(0); @@ -31199,7 +33346,7 @@ index 000000000..137e9b7c7 + +operand indirectN(iRegN reg) +%{ -+ predicate(Universe::narrow_oop_shift() == 0); ++ predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + op_cost(0); @@ -31214,7 +33361,7 @@ index 000000000..137e9b7c7 + +operand indOffIN(iRegN reg, immIOffset off) +%{ -+ predicate(Universe::narrow_oop_shift() == 0); ++ predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); @@ -31229,7 +33376,7 @@ index 000000000..137e9b7c7 + +operand indOffLN(iRegN reg, immLOffset off) +%{ -+ predicate(Universe::narrow_oop_shift() == 0); ++ predicate(CompressedOops::shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); @@ -32224,6 +34371,7 @@ index 000000000..137e9b7c7 + format %{ "lw $dst, $mem\t# int, #@loadI" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32239,6 +34387,7 @@ index 000000000..137e9b7c7 + format %{ "lw $dst, $mem\t# int, #@loadI2L" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32269,6 +34418,7 @@ index 000000000..137e9b7c7 + format %{ "ld $dst, $mem\t# int, #@loadL" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32294,11 +34444,13 @@ index 000000000..137e9b7c7 +instruct loadP(iRegPNoSp dst, memory mem) +%{ + match(Set dst (LoadP mem)); ++ predicate(n->as_Load()->barrier_data() == 0); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# ptr, #@loadP" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32329,6 +34481,7 @@ index 000000000..137e9b7c7 + format %{ "ld $dst, $mem\t# class, #@loadKlass" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32374,6 +34527,7 @@ index 000000000..137e9b7c7 + format %{ "fld $dst, $mem\t# double, #@loadD" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32445,19 +34599,6 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_imm); +%} + -+// Load Poll Page Constant -+instruct loadConPollPage(iRegPNoSp dst, immPollPage con) -+%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST * 6); -+ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} -+ -+ ins_encode(riscv_enc_mov_poll_page(dst, con)); -+ -+ ins_pipe(ialu_imm); -+%} -+ +// Load Byte Map Base Constant +instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) +%{ @@ -32574,7 +34715,6 @@ index 000000000..137e9b7c7 +instruct storeimmCM0(immI0 zero, memory mem) +%{ + match(Set mem (StoreCM mem zero)); -+ predicate(unnecessary_storestore(n)); + + ins_cost(STORE_COST); + format %{ "storestore (elided)\n\t" @@ -32672,6 +34812,7 @@ index 000000000..137e9b7c7 + format %{ "sw $src, $mem\t# int, #@storeI" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32701,6 +34842,7 @@ index 000000000..137e9b7c7 + format %{ "sd $src, $mem\t# long, #@storeL" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32731,6 +34873,7 @@ index 000000000..137e9b7c7 + format %{ "sd $src, $mem\t# ptr, #@storeP" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32761,6 +34904,7 @@ index 000000000..137e9b7c7 + format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32770,8 +34914,6 @@ index 000000000..137e9b7c7 +instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) +%{ + match(Set mem (StoreN mem zero)); -+ predicate(Universe::narrow_oop_base() == NULL && -+ Universe::narrow_klass_base() == NULL); + + ins_cost(STORE_COST); + format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} @@ -32807,6 +34949,7 @@ index 000000000..137e9b7c7 + format %{ "fsd $src, $mem\t# double, #@storeD" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32822,6 +34965,7 @@ index 000000000..137e9b7c7 + format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + @@ -32873,7 +35017,7 @@ index 000000000..137e9b7c7 + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. -+// implemented using sc_d on RISCV. ++// implemented using sc_d on RISCV64. +instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) +%{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); @@ -32892,8 +35036,6 @@ index 000000000..137e9b7c7 + ins_pipe(pipe_serial); +%} + -+// storeLConditional is used by PhaseMacroExpand::expand_lock_node -+// when attempting to rebias a lock towards the current thread. +instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); @@ -32938,13 +35080,14 @@ index 000000000..137e9b7c7 + +// standard CompareAndSwapX when we are using barriers +// these have higher priority than the rules selected by a predicate -+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" @@ -32960,13 +35103,14 @@ index 000000000..137e9b7c7 + ins_pipe(pipe_slow); +%} + -+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" @@ -33016,6 +35160,8 @@ index 000000000..137e9b7c7 + +instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); ++ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); @@ -33050,7 +35196,7 @@ index 000000000..137e9b7c7 +instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + @@ -33075,7 +35221,7 @@ index 000000000..137e9b7c7 +instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + @@ -33099,7 +35245,7 @@ index 000000000..137e9b7c7 + +instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + @@ -33117,7 +35263,7 @@ index 000000000..137e9b7c7 + +instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + @@ -33135,7 +35281,7 @@ index 000000000..137e9b7c7 + +instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + @@ -33153,7 +35299,7 @@ index 000000000..137e9b7c7 + +instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + @@ -33281,6 +35427,7 @@ index 000000000..137e9b7c7 + +instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); @@ -33302,7 +35449,7 @@ index 000000000..137e9b7c7 +instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + @@ -33326,7 +35473,7 @@ index 000000000..137e9b7c7 +instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + @@ -33349,7 +35496,7 @@ index 000000000..137e9b7c7 + +instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + @@ -33371,7 +35518,7 @@ index 000000000..137e9b7c7 + +instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + @@ -33393,7 +35540,7 @@ index 000000000..137e9b7c7 + +instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + @@ -33415,7 +35562,7 @@ index 000000000..137e9b7c7 + +instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + @@ -33446,13 +35593,14 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapB" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33469,13 +35617,14 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapS" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33489,12 +35638,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapI" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33508,12 +35658,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapL" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33527,12 +35678,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapN" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33540,18 +35692,20 @@ index 000000000..137e9b7c7 + +instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + + format %{ + "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapP" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33560,7 +35714,7 @@ index 000000000..137e9b7c7 +instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + @@ -33570,13 +35724,14 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33585,7 +35740,7 @@ index 000000000..137e9b7c7 +instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, + iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + @@ -33595,13 +35750,14 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" + %} + + ins_encode %{ + __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33609,7 +35765,7 @@ index 000000000..137e9b7c7 + +instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + @@ -33617,12 +35773,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapIAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33630,7 +35787,7 @@ index 000000000..137e9b7c7 + +instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + @@ -33638,12 +35795,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapLAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33651,7 +35809,7 @@ index 000000000..137e9b7c7 + +instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + @@ -33659,12 +35817,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapNAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33672,7 +35831,7 @@ index 000000000..137e9b7c7 + +instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + @@ -33680,12 +35839,13 @@ index 000000000..137e9b7c7 + + format %{ + "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapPAcq" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" + %} + + ins_encode %{ + __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + + ins_pipe(pipe_slow); @@ -33738,6 +35898,7 @@ index 000000000..137e9b7c7 + +instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) +%{ ++ predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetP mem newv)); + + ins_cost(ALU_COST); @@ -33753,7 +35914,7 @@ index 000000000..137e9b7c7 + +instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetI mem newv)); + @@ -33770,7 +35931,7 @@ index 000000000..137e9b7c7 + +instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetL mem newv)); + @@ -33787,7 +35948,7 @@ index 000000000..137e9b7c7 + +instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetN mem newv)); + @@ -33804,7 +35965,7 @@ index 000000000..137e9b7c7 + +instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); + + match(Set prev (GetAndSetP mem newv)); + @@ -33949,7 +36110,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddL mem incr)); + @@ -33965,7 +36126,7 @@ index 000000000..137e9b7c7 +%} + +instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddL mem incr)); + @@ -33982,7 +36143,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddL mem incr)); + @@ -33999,7 +36160,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddL mem incr)); + @@ -34016,7 +36177,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddI mem incr)); + @@ -34033,7 +36194,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddI mem incr)); + @@ -34050,7 +36211,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set newval (GetAndAddI mem incr)); + @@ -34067,7 +36228,7 @@ index 000000000..137e9b7c7 + +instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + + match(Set dummy (GetAndAddI mem incr)); + @@ -34100,6 +36261,7 @@ index 000000000..137e9b7c7 + format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -34115,6 +36277,7 @@ index 000000000..137e9b7c7 + format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + int32_t con = (int32_t)$src2$$constant; + __ addiw(as_Register($dst$$reg), + as_Register($src1$$reg), @@ -34131,6 +36294,7 @@ index 000000000..137e9b7c7 + format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ addiw(as_Register($dst$$reg), + as_Register($src1$$reg), + $src2$$constant); @@ -34147,6 +36311,7 @@ index 000000000..137e9b7c7 + format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -34162,6 +36327,7 @@ index 000000000..137e9b7c7 + format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); + %} + @@ -34177,6 +36343,7 @@ index 000000000..137e9b7c7 + format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), @@ -34193,6 +36360,7 @@ index 000000000..137e9b7c7 + format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -34208,6 +36376,7 @@ index 000000000..137e9b7c7 + format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ add(as_Register($dst$$reg), + as_Register($src1$$reg), @@ -34225,6 +36394,7 @@ index 000000000..137e9b7c7 + format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -34241,6 +36411,7 @@ index 000000000..137e9b7c7 + format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addiw + __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), @@ -34257,6 +36428,7 @@ index 000000000..137e9b7c7 + format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -34272,6 +36444,7 @@ index 000000000..137e9b7c7 + format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // src2 is imm, so actually call the addi + __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), @@ -34401,6 +36574,7 @@ index 000000000..137e9b7c7 + format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} + ins_pipe(ialu_reg_shift); @@ -34532,7 +36706,7 @@ index 000000000..137e9b7c7 +// Long Shifts + +// Shift Left Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (LShiftL src1 src2)); + @@ -34556,6 +36730,7 @@ index 000000000..137e9b7c7 + format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ slli(as_Register($dst$$reg), @@ -34567,7 +36742,7 @@ index 000000000..137e9b7c7 +%} + +// Shift Right Logical Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (URShiftL src1 src2)); + @@ -34591,6 +36766,7 @@ index 000000000..137e9b7c7 + format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srli(as_Register($dst$$reg), @@ -34609,6 +36785,7 @@ index 000000000..137e9b7c7 + format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srli(as_Register($dst$$reg), @@ -34620,7 +36797,7 @@ index 000000000..137e9b7c7 +%} + +// Shift Right Arithmetic Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount +instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ + match(Set dst (RShiftL src1 src2)); + @@ -34644,6 +36821,7 @@ index 000000000..137e9b7c7 + format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + // the shift amount is encoded in the lower + // 6 bits of the I-immediate field for RV64I + __ srai(as_Register($dst$$reg), @@ -34923,19 +37101,14 @@ index 000000000..137e9b7c7 +// Math.max(FF)F +instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (MaxF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++ effect(TEMP_DEF dst); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxF_reg_reg\n\t" -+ "fmax.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ format %{ "maxF $dst, $src1, $src2" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, false /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_s); @@ -34944,19 +37117,14 @@ index 000000000..137e9b7c7 +// Math.min(FF)F +instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ + match(Set dst (MinF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++ effect(TEMP_DEF dst); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minF_reg_reg\n\t" -+ "fmin.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ format %{ "minF $dst, $src1, $src2" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, true /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_s); @@ -34965,19 +37133,14 @@ index 000000000..137e9b7c7 +// Math.max(DD)D +instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (MaxD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++ effect(TEMP_DEF dst); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxD_reg_reg\n\t" -+ "fmax.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ format %{ "maxD $dst, $src1, $src2" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, false /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_d); @@ -34986,19 +37149,14 @@ index 000000000..137e9b7c7 +// Math.min(DD)D +instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ + match(Set dst (MinD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++ effect(TEMP_DEF dst); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minD_reg_reg\n\t" -+ "fmin.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ format %{ "minD $dst, $src1, $src2" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, true /* is_min */); + %} + + ins_pipe(fp_dop_reg_reg_d); @@ -35066,31 +37224,16 @@ index 000000000..137e9b7c7 + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI_reg" %} -+ -+ ins_encode %{ -+ __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ format %{ ++ "sraiw t0, $src, 0x1f\n\t" ++ "addw $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absI_reg" + %} + -+ ins_pipe(ialu_reg_reg); -+%} -+ -+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2L (AbsI src))); -+ -+ ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI2L_reg" %} -+ + ins_encode %{ + __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); @@ -35100,14 +37243,16 @@ index 000000000..137e9b7c7 + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 3); -+ format %{ "srai t0, $src, 0x3f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "sub $dst, $dst, t0\t#absL_reg" %} ++ format %{ ++ "srai t0, $src, 0x3f\n\t" ++ "add $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absL_reg" ++ %} + + ins_encode %{ + __ srai(t0, as_Register($src$$reg), 0x3f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ __ add(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); + %} + + ins_pipe(ialu_reg_reg); @@ -35140,7 +37285,7 @@ index 000000000..137e9b7c7 +%} + +instruct sqrtF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (SqrtF src)); ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} @@ -35178,6 +37323,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35194,6 +37340,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ andi(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); @@ -35210,6 +37357,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35242,6 +37390,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ xorr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35274,6 +37423,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ andr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35290,6 +37440,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ andi(as_Register($dst$$reg), + as_Register($src1$$reg), + (int32_t)($src2$$constant)); @@ -35306,6 +37457,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ orr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35338,6 +37490,7 @@ index 000000000..137e9b7c7 + + ins_cost(ALU_COST); + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ xorr(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); @@ -35365,7 +37518,7 @@ index 000000000..137e9b7c7 +// ============================================================================ +// BSWAP Instructions + -+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{ ++instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ + match(Set dst (ReverseBytesI src)); + effect(TEMP cr); + @@ -35379,7 +37532,7 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{ ++instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{ + match(Set dst (ReverseBytesL src)); + effect(TEMP cr); + @@ -35490,6 +37643,7 @@ index 000000000..137e9b7c7 + +instruct membar_storestore() %{ + match(MemBarStoreStore); ++ match(StoreStoreFence); + ins_cost(ALU_COST); + + format %{ "MEMBAR-store-store\t#@membar_storestore" %} @@ -35538,6 +37692,7 @@ index 000000000..137e9b7c7 + format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + if ($dst$$reg != $src$$reg) { + __ mv(as_Register($dst$$reg), as_Register($src$$reg)); + } @@ -35553,6 +37708,7 @@ index 000000000..137e9b7c7 + format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + if ($dst$$reg != $src$$reg) { + __ mv(as_Register($dst$$reg), as_Register($src$$reg)); + } @@ -35572,6 +37728,17 @@ index 000000000..137e9b7c7 + ins_pipe(pipe_class_empty); +%} + ++instruct castLL(iRegL dst) ++%{ ++ match(Set dst (CastLL dst)); ++ ++ size(0); ++ format %{ "# castLL of $dst, #@castLL" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} ++ +instruct castII(iRegI dst) +%{ + match(Set dst (CastII dst)); @@ -35594,6 +37761,39 @@ index 000000000..137e9b7c7 + ins_pipe(pipe_class_empty); +%} + ++instruct castFF(fRegF dst) ++%{ ++ match(Set dst (CastFF dst)); ++ ++ size(0); ++ format %{ "# castFF of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} ++ ++instruct castDD(fRegD dst) ++%{ ++ match(Set dst (CastDD dst)); ++ ++ size(0); ++ format %{ "# castDD of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} ++ ++instruct castVV(vReg dst) ++%{ ++ match(Set dst (CastVV dst)); ++ ++ size(0); ++ format %{ "# castVV of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} ++ +// ============================================================================ +// Convert Instructions + @@ -35663,6 +37863,7 @@ index 000000000..137e9b7c7 + format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); + %} + @@ -35817,6 +38018,7 @@ index 000000000..137e9b7c7 + format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ zero_extend($dst$$Register, $src$$Register, 32); + %} + @@ -35827,13 +38029,14 @@ index 000000000..137e9b7c7 +// in case of 32bit oops (heap < 4Gb). +instruct convN2I(iRegINoSp dst, iRegN src) +%{ -+ predicate(Universe::narrow_oop_shift() == 0); ++ predicate(CompressedOops::shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(ALU_COST); + format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ mv($dst$$Register, $src$$Register); + %} + @@ -35899,26 +38102,7 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg); +%} + -+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ -+ predicate(!maybe_use_tmp_register_decoding_klass()); -+ -+ match(Set dst (DecodeNKlass src)); -+ -+ ins_cost(ALU_COST); -+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} -+ -+ ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0); -+ %} -+ -+ ins_pipe(ialu_reg); -+%} -+ -+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{ -+ predicate(maybe_use_tmp_register_decoding_klass()); -+ ++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{ + match(Set dst (DecodeNKlass src)); + + effect(TEMP tmp); @@ -35949,6 +38133,7 @@ index 000000000..137e9b7c7 + format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + @@ -35985,6 +38170,7 @@ index 000000000..137e9b7c7 + format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + @@ -36003,6 +38189,7 @@ index 000000000..137e9b7c7 + format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + @@ -36039,6 +38226,7 @@ index 000000000..137e9b7c7 + format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + @@ -36057,6 +38245,7 @@ index 000000000..137e9b7c7 + format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + @@ -36075,6 +38264,7 @@ index 000000000..137e9b7c7 + format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + @@ -36398,7 +38588,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36418,7 +38608,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36477,7 +38667,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36496,7 +38686,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36517,7 +38707,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36537,7 +38727,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36558,7 +38748,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36578,7 +38768,7 @@ index 000000000..137e9b7c7 + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + @@ -36595,7 +38785,7 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%} ++ format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); @@ -36612,7 +38802,7 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} ++ format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); @@ -36630,10 +38820,10 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} ++ format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + @@ -36648,10 +38838,10 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} ++ format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + @@ -36923,7 +39113,7 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(BRANCH_COST); -+ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} ++ format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); @@ -36972,7 +39162,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -36987,7 +39177,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37030,7 +39220,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37045,7 +39235,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37063,7 +39253,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37081,7 +39271,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37099,7 +39289,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37117,7 +39307,7 @@ index 000000000..137e9b7c7 + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37132,7 +39322,7 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} ++ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), @@ -37148,7 +39338,7 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} ++ format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), @@ -37165,10 +39355,10 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} ++ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37181,10 +39371,10 @@ index 000000000..137e9b7c7 + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} ++ format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + @@ -37482,28 +39672,11 @@ index 000000000..137e9b7c7 + match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} -+ -+ ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); -+ -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, @@ -37517,13 +39690,15 @@ index 000000000..137e9b7c7 +instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ + match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} @@ -37531,16 +39706,18 @@ index 000000000..137e9b7c7 + ins_pipe(pipe_slow); +%} + -+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} @@ -37552,10 +39729,11 @@ index 000000000..137e9b7c7 + match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + + ins_encode %{ + __ enc_cmove($cop$$cmpcode, @@ -37570,13 +39748,32 @@ index 000000000..137e9b7c7 + match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); + ins_cost(ALU_COST + BRANCH_COST); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ ++ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ format %{ ++ "bneg$cop $op1, $op2\t#@cmovI_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} @@ -37589,7 +39786,8 @@ index 000000000..137e9b7c7 +// Procedure Call/Return Instructions + +// Call Java Static Instruction -+ ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) +%{ + match(CallStaticJava); @@ -37600,15 +39798,18 @@ index 000000000..137e9b7c7 + + format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} + -+ ins_encode( riscv_enc_java_static_call(meth), -+ riscv_enc_call_epilog ); ++ ins_encode(riscv_enc_java_static_call(meth), ++ riscv_enc_call_epilog); + + ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + +// TO HERE + +// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) +%{ + match(CallDynamicJava); @@ -37619,10 +39820,11 @@ index 000000000..137e9b7c7 + + format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} + -+ ins_encode( riscv_enc_java_dynamic_call(meth), -+ riscv_enc_call_epilog ); ++ ins_encode(riscv_enc_java_dynamic_call(meth), ++ riscv_enc_call_epilog); + + ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + +// Call Runtime Instruction @@ -37637,7 +39839,7 @@ index 000000000..137e9b7c7 + + format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} @@ -37654,7 +39856,7 @@ index 000000000..137e9b7c7 + + format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} @@ -37671,7 +39873,7 @@ index 000000000..137e9b7c7 + + format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode(riscv_enc_java_to_runtime(meth)); + + ins_pipe(pipe_class_call); +%} @@ -37684,31 +39886,31 @@ index 000000000..137e9b7c7 +// gen_subtype_check()). Return zero for a hit. The encoding +// ALSO sets flags. + -+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result) ++instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); -+ effect(KILL temp, KILL cr); ++ effect(KILL tmp, KILL cr); + + ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); + format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} + -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + + opcode(0x1); // Force zero of result reg on hit + + ins_pipe(pipe_class_memory); +%} + -+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result, -+ immP0 zero, rFlagsReg cr) ++instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, ++ immP0 zero, rFlagsReg cr) +%{ + match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); -+ effect(KILL temp, KILL result); ++ effect(KILL tmp, KILL result); + + ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); + format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} + -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + + opcode(0x0); // Don't zero result reg on hit + @@ -37719,7 +39921,7 @@ index 000000000..137e9b7c7 + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} @@ -37737,7 +39939,7 @@ index 000000000..137e9b7c7 + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} @@ -37754,7 +39956,7 @@ index 000000000..137e9b7c7 + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} @@ -37772,7 +39974,7 @@ index 000000000..137e9b7c7 + rFlagsReg cr) +%{ + predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} @@ -37786,15 +39988,15 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, @@ -37807,15 +40009,15 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + ins_encode %{ + __ string_indexof($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, @@ -37828,13 +40030,13 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} + + ins_encode %{ @@ -37849,13 +40051,14 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ + format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} + + ins_encode %{ @@ -37870,15 +40073,15 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof_linearscan($str1$$Register, $str2$$Register, @@ -37891,15 +40094,15 @@ index 000000000..137e9b7c7 +%} + +instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + ins_encode %{ + int icnt2 = (int)$int_cnt2$$constant; + __ string_indexof_linearscan($str1$$Register, $str2$$Register, @@ -37913,19 +40116,18 @@ index 000000000..137e9b7c7 + +instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); ++ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, false /* isU */) ; ++ $tmp3$$Register, $tmp4$$Register, false /* isU */); + %} + ins_pipe(pipe_class_memory); +%} @@ -37933,15 +40135,14 @@ index 000000000..137e9b7c7 + +instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); ++ predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, @@ -37951,11 +40152,11 @@ index 000000000..137e9b7c7 +%} + +// clearing of an array -+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) ++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) +%{ + predicate(!UseRVV); + match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, KILL cr); ++ effect(USE_KILL cnt, USE_KILL base); + + ins_cost(4 * DEFAULT_COST); + format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} @@ -38022,42 +40223,34 @@ index 000000000..137e9b7c7 + +instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ + predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) +%{ + predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 2); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 2); + %} + ins_pipe(pipe_class_memory); +%} @@ -38100,29 +40293,29 @@ index 000000000..137e9b7c7 + +// inlined locking and unlocking +// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) ++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + match(Set cr (FastLock object box)); -+ effect(TEMP tmp, TEMP tmp2); ++ effect(TEMP tmp1, TEMP tmp2); + + ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); -+ format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %} ++ format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %} + -+ ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2)); ++ ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2)); + + ins_pipe(pipe_serial); +%} + +// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) ++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + match(Set cr (FastUnlock object box)); -+ effect(TEMP tmp, TEMP tmp2); ++ effect(TEMP tmp1, TEMP tmp2); + + ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); -+ format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %} ++ format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} + -+ ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2)); ++ ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2)); + + ins_pipe(pipe_serial); +%} @@ -38184,7 +40377,7 @@ index 000000000..137e9b7c7 + + format %{ "j rethrow_stub\t#@RethrowException" %} + -+ ins_encode( riscv_enc_rethrow() ); ++ ins_encode(riscv_enc_rethrow()); + + ins_pipe(pipe_class_call); +%} @@ -38212,6 +40405,7 @@ index 000000000..137e9b7c7 + format %{ "#@ShouldNotReachHere" %} + + ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); + if (is_reachable()) { + __ halt(); + } @@ -38261,10 +40455,10 @@ index 000000000..137e9b7c7 +// End: diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad new file mode 100644 -index 000000000..6f7055a39 +index 00000000000..4488c1c4031 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv_b.ad -@@ -0,0 +1,605 @@ +@@ -0,0 +1,527 @@ +// +// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -38292,54 +40486,51 @@ index 000000000..6f7055a39 + +// RISCV Bit-Manipulation Extension Architecture Description File + -+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{ -+ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32)); -+ effect(DEF dst, USE src); -+ -+ format %{ "roriw $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %} ++instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); ++ ++ format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ -+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f); ++ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); + %} + + ins_pipe(ialu_reg_shift); +%} + -+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{ -+ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64)); -+ effect(DEF dst, USE src); ++instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); + -+ format %{ "rori $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %} ++ format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ -+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f); ++ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); + %} + + ins_pipe(ialu_reg_shift); +%} + -+// ror expander -+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); + -+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %} ++ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ + ins_pipe(ialu_reg_reg); +%} + -+// ror expander -+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateRight src shift)); + -+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %} ++ format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); @@ -38347,111 +40538,36 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + ++instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateLeft src shift)); + -+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift)))); -+ -+ expand %{ -+ rorI_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift)))); -+ -+ expand %{ -+ rorI_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift)))); -+ -+ expand %{ -+ rorL_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift)))); -+ -+ expand %{ -+ rorL_reg_b(dst, src, shift); -+ %} -+%} -+ -+// rol expander -+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); -+ -+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %} ++ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ + ins_pipe(ialu_reg_reg); +%} + -+// rol expander -+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ predicate(UseRVB); ++ match(Set dst (RotateLeft src shift)); + -+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %} ++ format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} + ins_cost(ALU_COST); + ins_encode %{ + __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); + %} -+ + ins_pipe(ialu_reg_reg); +%} + -+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift)))); -+ -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift)))); -+ -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift)))); -+ -+ expand %{ -+ rolL_reg_b(dst, src, shift); -+ %} -+%} -+ -+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift)))); -+ -+ expand %{ -+ rolL_reg_b(dst, src, shift); -+ %} -+%} -+ +// Convert oop into int for vectors alignment masking -+instruct convP2I_b(iRegINoSp dst, iRegP src) %{ -+ predicate(UseZba); ++instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ ++ predicate(UseRVB); + match(Set dst (ConvL2I (CastP2X src))); + -+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} ++ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -38462,11 +40578,11 @@ index 000000000..6f7055a39 +%} + +// byte to int -+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ -+ predicate(UseZbb); ++instruct convB2I_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ ++ predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + -+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} ++ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -38477,11 +40593,11 @@ index 000000000..6f7055a39 +%} + +// int to short -+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ -+ predicate(UseZbb); ++instruct convI2S_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ ++ predicate(UseRVB); + match(Set dst (RShiftI (LShiftI src lshift) rshift)); + -+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} ++ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -38492,11 +40608,11 @@ index 000000000..6f7055a39 +%} + +// short to unsigned int -+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ -+ predicate(UseZbb); ++instruct convS2UI_reg_reg_rvb(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ ++ predicate(UseRVB); + match(Set dst (AndI src mask)); + -+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} ++ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -38507,11 +40623,11 @@ index 000000000..6f7055a39 +%} + +// int to unsigned long (zero extend) -+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ -+ predicate(UseZba); ++instruct convI2UL_reg_reg_rvb(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ ++ predicate(UseRVB); + match(Set dst (AndL (ConvI2L src) mask)); + -+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} ++ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_rvb" %} + + ins_cost(ALU_COST); + ins_encode %{ @@ -38522,12 +40638,12 @@ index 000000000..6f7055a39 +%} + +// BSWAP instructions -+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); ++instruct bytes_reverse_int_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); + match(Set dst (ReverseBytesI src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_rvb" %} + + ins_encode %{ + __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38536,12 +40652,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); ++instruct bytes_reverse_long_rvb(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseRVB); + match(Set dst (ReverseBytesL src)); + + ins_cost(ALU_COST); -+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} ++ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_rvb" %} + + ins_encode %{ + __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38550,12 +40666,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); ++instruct bytes_reverse_unsigned_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); + match(Set dst (ReverseBytesUS src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_rvb" %} + + ins_encode %{ + __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38564,12 +40680,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); ++instruct bytes_reverse_short_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); + match(Set dst (ReverseBytesS src)); + + ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_rvb" %} + + ins_encode %{ + __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38579,12 +40695,12 @@ index 000000000..6f7055a39 +%} + +// Shift Add Pointer -+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); ++instruct shaddP_reg_reg_rvb(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ ++ predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -38597,12 +40713,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); ++instruct shaddP_reg_reg_ext_rvb(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ ++ predicate(UseRVB); + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -38616,12 +40732,12 @@ index 000000000..6f7055a39 +%} + +// Shift Add Long -+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); ++instruct shaddL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ ++ predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL src2 imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -38634,12 +40750,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); ++instruct shaddL_reg_reg_ext_rvb(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ ++ predicate(UseRVB); + match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); + + ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_rvb" %} + + ins_encode %{ + __ shadd(as_Register($dst$$reg), @@ -38653,12 +40769,12 @@ index 000000000..6f7055a39 +%} + +// Zeros Count instructions -+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); ++instruct countLeadingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); + match(Set dst (CountLeadingZerosI src)); + + ins_cost(ALU_COST); -+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} ++ format %{ "clzw $dst, $src\t#@countLeadingZerosI_rvb" %} + + ins_encode %{ + __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38667,12 +40783,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); ++instruct countLeadingZerosL_rvb(iRegINoSp dst, iRegL src) %{ ++ predicate(UseRVB); + match(Set dst (CountLeadingZerosL src)); + + ins_cost(ALU_COST); -+ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} ++ format %{ "clz $dst, $src\t#@countLeadingZerosL_rvb" %} + + ins_encode %{ + __ clz(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38681,12 +40797,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); ++instruct countTrailingZerosI_rvb(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseRVB); + match(Set dst (CountTrailingZerosI src)); + + ins_cost(ALU_COST); -+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} ++ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_rvb" %} + + ins_encode %{ + __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38695,12 +40811,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg); +%} + -+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); ++instruct countTrailingZerosL_rvb(iRegINoSp dst, iRegL src) %{ ++ predicate(UseRVB); + match(Set dst (CountTrailingZerosL src)); + + ins_cost(ALU_COST); -+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} ++ format %{ "ctz $dst, $src\t#@countTrailingZerosL_rvb" %} + + ins_encode %{ + __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38710,12 +40826,12 @@ index 000000000..6f7055a39 +%} + +// Population Count instructions -+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ ++instruct popCountI_rvb(iRegINoSp dst, iRegIorL2I src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + + ins_cost(ALU_COST); -+ format %{ "cpopw $dst, $src\t#@popCountI_b" %} ++ format %{ "cpopw $dst, $src\t#@popCountI_rvb" %} + + ins_encode %{ + __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38725,12 +40841,12 @@ index 000000000..6f7055a39 +%} + +// Note: Long/bitCount(long) returns an int. -+instruct popCountL_b(iRegINoSp dst, iRegL src) %{ ++instruct popCountL_rvb(iRegINoSp dst, iRegL src) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + + ins_cost(ALU_COST); -+ format %{ "cpop $dst, $src\t#@popCountL_b" %} ++ format %{ "cpop $dst, $src\t#@popCountL_rvb" %} + + ins_encode %{ + __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); @@ -38740,12 +40856,12 @@ index 000000000..6f7055a39 +%} + +// Max and Min -+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); ++instruct minI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseRVB); + match(Set dst (MinI src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} ++ format %{ "min $dst, $src1, $src2\t#@minI_reg_rvb" %} + + ins_encode %{ + __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -38754,12 +40870,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); ++instruct maxI_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseRVB); + match(Set dst (MaxI src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} ++ format %{ "max $dst, $src1, $src2\t#@maxI_reg_rvb" %} + + ins_encode %{ + __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); @@ -38769,14 +40885,14 @@ index 000000000..6f7055a39 +%} + +// Abs -+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ -+ predicate(UseZbb); ++instruct absI_reg_rvb(iRegINoSp dst, iRegI src) %{ ++ predicate(UseRVB); + match(Set dst (AbsI src)); + + ins_cost(ALU_COST * 2); + format %{ + "negw t0, $src\n\t" -+ "max $dst, $src, t0\t#@absI_reg_b" ++ "max $dst, $src, t0\t#@absI_reg_rvb" + %} + + ins_encode %{ @@ -38787,14 +40903,14 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); ++instruct absL_reg_rvb(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseRVB); + match(Set dst (AbsL src)); + + ins_cost(ALU_COST * 2); + format %{ + "neg t0, $src\n\t" -+ "max $dst, $src, t0\t#@absL_reg_b" ++ "max $dst, $src, t0\t#@absL_reg_rvb" + %} + + ins_encode %{ @@ -38802,16 +40918,16 @@ index 000000000..6f7055a39 + __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_reg); +%} + +// And Not -+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); ++instruct andnI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseRVB); + match(Set dst (AndI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} ++ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), @@ -38822,12 +40938,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); ++instruct andnL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseRVB); + match(Set dst (AndL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} ++ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_rvb" %} + + ins_encode %{ + __ andn(as_Register($dst$$reg), @@ -38839,12 +40955,12 @@ index 000000000..6f7055a39 +%} + +// Or Not -+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); ++instruct ornI_reg_reg_rvb(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseRVB); + match(Set dst (OrI src1 (XorI src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} ++ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), @@ -38855,12 +40971,12 @@ index 000000000..6f7055a39 + ins_pipe(ialu_reg_reg); +%} + -+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); ++instruct ornL_reg_reg_rvb(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseRVB); + match(Set dst (OrL src1 (XorL src2 m1))); + + ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} ++ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_rvb" %} + + ins_encode %{ + __ orn(as_Register($dst$$reg), @@ -38870,12 +40986,13 @@ index 000000000..6f7055a39 + + ins_pipe(ialu_reg_reg); +%} +\ No newline at end of file diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad new file mode 100644 -index 000000000..905041890 +index 00000000000..3828e096b21 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv_v.ad -@@ -0,0 +1,1723 @@ +@@ -0,0 +1,2065 @@ +// +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. @@ -38902,7 +41019,7 @@ index 000000000..905041890 +// +// + -+// RISCV VEC Architecture Description File ++// RISCV Vector Extension Architecture Description File + +opclass vmemA(indirect); + @@ -38912,19 +41029,7 @@ index 000000000..905041890 + +source %{ + -+ static inline BasicType vector_element_basic_type(const MachNode* n) { -+ const TypeVect* vt = n->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } -+ -+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { -+ int def_idx = use->operand_index(opnd); -+ Node* def = use->in(def_idx); -+ const TypeVect* vt = def->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } -+ -+ static void loadStore(MacroAssembler masm, bool is_store, ++ static void loadStore(C2_MacroAssembler masm, bool is_store, + VectorRegister reg, BasicType bt, Register base) { + Assembler::SEW sew = Assembler::elemtype_to_sew(bt); + masm.vsetvli(t0, x0, sew); @@ -38952,6 +41057,29 @@ index 000000000..905041890 + case Op_ExtractL: + case Op_ExtractS: + case Op_ExtractUB: ++ // Vector API specific ++ case Op_AndReductionV: ++ case Op_OrReductionV: ++ case Op_XorReductionV: ++ case Op_LoadVectorGather: ++ case Op_StoreVectorScatter: ++ case Op_VectorBlend: ++ case Op_VectorCast: ++ case Op_VectorCastB2X: ++ case Op_VectorCastD2X: ++ case Op_VectorCastF2X: ++ case Op_VectorCastI2X: ++ case Op_VectorCastL2X: ++ case Op_VectorCastS2X: ++ case Op_VectorInsert: ++ case Op_VectorLoadConst: ++ case Op_VectorLoadMask: ++ case Op_VectorLoadShuffle: ++ case Op_VectorMaskCmp: ++ case Op_VectorRearrange: ++ case Op_VectorReinterpret: ++ case Op_VectorStoreMask: ++ case Op_VectorTest: + return false; + default: + return UseRVV; @@ -38973,8 +41101,8 @@ index 000000000..905041890 + format %{ "vle $dst, $mem\t#@loadV" %} + ins_encode %{ + VectorRegister dst_reg = as_VectorRegister($dst$$reg); -+ loadStore(MacroAssembler(&cbuf), false, dst_reg, -+ vector_element_basic_type(this), as_Register($mem$$base)); ++ loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, ++ Matcher::vector_element_basic_type(this), as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} @@ -38985,8 +41113,8 @@ index 000000000..905041890 + format %{ "vse $src, $mem\t#@storeV" %} + ins_encode %{ + VectorRegister src_reg = as_VectorRegister($src$$reg); -+ loadStore(MacroAssembler(&cbuf), true, src_reg, -+ vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ loadStore(C2_MacroAssembler(&cbuf), true, src_reg, ++ Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); + %} + ins_pipe(pipe_slow); +%} @@ -39224,6 +41352,98 @@ index 000000000..905041890 + ins_pipe(pipe_slow); +%} + ++// vector integer max/min ++ ++instruct vmax(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && ++ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} ++ ins_encode %{ ++ BasicType bt = Matcher::vector_element_basic_type(this); ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ __ vsetvli(t0, x0, sew); ++ __ vmax_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmin(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && ++ n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} ++ ins_encode %{ ++ BasicType bt = Matcher::vector_element_basic_type(this); ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ __ vsetvli(t0, x0, sew); ++ __ vmin_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector float-point max/min ++ ++instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ false /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ true /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vminF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vminF $dst, $src1, $src2\t#@vminF" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ false /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vminD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinV src1 src2)); ++ effect(TEMP_DEF dst); ++ ins_cost(VEC_COST); ++ format %{ "vminD $dst, $src1, $src2\t#@vminD" %} ++ ins_encode %{ ++ __ minmax_FD_v(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), ++ true /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ +// vector fmla + +// dst_src1 = dst_src1 + src2 * src3 @@ -39655,7 +41875,7 @@ index 000000000..905041890 + __ vsetvli(t0, x0, Assembler::e32); + __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); + __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); ++ as_VectorRegister($tmp$$reg)); + __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); + %} + ins_pipe(pipe_slow); @@ -39678,6 +41898,232 @@ index 000000000..905041890 + ins_pipe(pipe_slow); +%} + ++// vector integer max reduction ++instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector integer min reduction ++instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Label Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector float max reduction ++ ++instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ false /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MaxReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ true /* is_double */, false /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector float min reduction ++ ++instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ false /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (MinReductionV src1 src2)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); ++ format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} ++ ins_encode %{ ++ __ reduce_minmax_FD_v($dst$$FloatRegister, ++ $src1$$FloatRegister, as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), ++ true /* is_double */, true /* is_min */); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector Math.rint, floor, ceil ++ ++instruct vroundD(vReg dst, vReg src, immI rmode) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); ++ match(Set dst (RoundDoubleModeV src rmode)); ++ format %{ "vroundD $dst, $src, $rmode" %} ++ ins_encode %{ ++ switch ($rmode$$constant) { ++ case RoundDoubleModeNode::rmode_rint: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rne); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ case RoundDoubleModeNode::rmode_floor: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ case RoundDoubleModeNode::rmode_ceil: ++ __ csrwi(CSR_FRM, C2_MacroAssembler::rup); ++ __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ +// vector replicate + +instruct replicateB(vReg dst, iRegIorL2I src) %{ @@ -39999,7 +42445,7 @@ index 000000000..905041890 +%} + +instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVB src shift)); ++ match(Set dst (RShiftVB src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} + ins_encode %{ @@ -40017,7 +42463,7 @@ index 000000000..905041890 +%} + +instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVS src shift)); ++ match(Set dst (RShiftVS src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} + ins_encode %{ @@ -40035,7 +42481,7 @@ index 000000000..905041890 +%} + +instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVI src shift)); ++ match(Set dst (RShiftVI src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} + ins_encode %{ @@ -40051,33 +42497,26 @@ index 000000000..905041890 + ins_pipe(pipe_slow); +%} + -+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (RShiftVL src shift)); ++instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (RShiftVL src (RShiftCntV shift))); + ins_cost(VEC_COST); -+ effect(TEMP tmp); + format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } -+ if (con < 32) { -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVB src shift)); ++ match(Set dst (URShiftVB src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} + ins_encode %{ @@ -40099,7 +42538,7 @@ index 000000000..905041890 +%} + +instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVS src shift)); ++ match(Set dst (URShiftVS src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} + ins_encode %{ @@ -40121,7 +42560,7 @@ index 000000000..905041890 +%} + +instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVI src shift)); ++ match(Set dst (URShiftVI src (RShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} + ins_encode %{ @@ -40137,33 +42576,26 @@ index 000000000..905041890 + ins_pipe(pipe_slow); +%} + -+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (URShiftVL src shift)); ++instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (URShiftVL src (RShiftCntV shift))); + ins_cost(VEC_COST); -+ effect(TEMP tmp); + format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); + if (con == 0) { + __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), + as_VectorRegister($src$$reg)); + return; + } -+ if (con < 32) { -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} + +instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVB src shift)); ++ match(Set dst (LShiftVB src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} + ins_encode %{ @@ -40180,7 +42612,7 @@ index 000000000..905041890 +%} + +instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVS src shift)); ++ match(Set dst (LShiftVS src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} + ins_encode %{ @@ -40197,7 +42629,7 @@ index 000000000..905041890 +%} + +instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVI src shift)); ++ match(Set dst (LShiftVI src (LShiftCntV shift))); + ins_cost(VEC_COST); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} + ins_encode %{ @@ -40208,22 +42640,15 @@ index 000000000..905041890 + ins_pipe(pipe_slow); +%} + -+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (LShiftVL src shift)); ++instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); ++ match(Set dst (LShiftVL src (LShiftCntV shift))); + ins_cost(VEC_COST); -+ effect(TEMP tmp); + format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; + __ vsetvli(t0, x0, Assembler::e64); -+ if (con < 32) { -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); + %} + ins_pipe(pipe_slow); +%} @@ -40377,11 +42802,11 @@ index 000000000..905041890 + +instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +%{ + predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ @@ -40394,11 +42819,11 @@ index 000000000..905041890 + +instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +%{ + predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ @@ -40410,11 +42835,11 @@ index 000000000..905041890 +%} + +instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +%{ + predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} + ins_encode %{ @@ -40425,11 +42850,11 @@ index 000000000..905041890 +%} + +instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +%{ + predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); + + format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ @@ -40445,7 +42870,7 @@ index 000000000..905041890 +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} @@ -40464,7 +42889,7 @@ index 000000000..905041890 +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} @@ -40483,7 +42908,7 @@ index 000000000..905041890 +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} @@ -40501,7 +42926,7 @@ index 000000000..905041890 +%{ + predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, + TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + + format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} @@ -40524,11 +42949,7 @@ index 000000000..905041890 + + format %{ "String Inflate $src,$dst" %} + ins_encode %{ -+ address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_memory); +%} @@ -40567,20 +42988,58 @@ index 000000000..905041890 + ins_pipe( pipe_slow ); +%} + -+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp) ++instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) +%{ + predicate(UseRVV); -+ match(Set result (HasNegatives ary1 len)); -+ effect(USE_KILL ary1, USE_KILL len, TEMP tmp); -+ format %{ "has negatives byte[] $ary1,$len -> $result" %} ++ match(Set result (CountPositives ary len)); ++ effect(USE_KILL ary, USE_KILL len, TEMP tmp); ++ ++ format %{ "count positives byte[] $ary, $len -> $result" %} + ins_encode %{ -+ address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); + %} -+ ins_pipe( pipe_slow ); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) ++%{ ++ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, ++ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} ++ ++ ins_encode %{ ++ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ false /* isL */); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) ++%{ ++ predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, ++ TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); ++ ++ format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} ++ ++ ins_encode %{ ++ __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ true /* isL */); ++ %} ++ ++ ins_pipe(pipe_class_memory); +%} + +// clearing of an array @@ -40601,14 +43060,14 @@ index 000000000..905041890 +%} diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp new file mode 100644 -index 000000000..9922ff4cf +index 00000000000..f85d4b25a76 --- /dev/null +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -0,0 +1,2738 @@ +@@ -0,0 +1,2761 @@ +/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -40637,15 +43096,24 @@ index 000000000..9922ff4cf +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" ++#include "utilities/formatBuffer.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" @@ -40723,7 +43191,7 @@ index 000000000..9922ff4cf + } + + int reg_offset_in_bytes(Register r) { -+ assert(r->encoding() > 4, "ra, sp, gp and tp not saved"); ++ assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); + return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; + } + @@ -40737,10 +43205,6 @@ index 000000000..9922ff4cf + RegisterImpl::max_slots_per_register * + BytesPerInt; + } -+ -+ // During deoptimization only the result registers need to be restored, -+ // all the other values have already been extracted. -+ void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { @@ -40753,6 +43217,7 @@ index 000000000..9922ff4cf + } +#endif + ++ assert_cond(masm != NULL && total_frame_words != NULL); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; @@ -40807,6 +43272,7 @@ index 000000000..9922ff4cf +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++ assert_cond(masm != NULL); +#ifdef COMPILER2 + __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); +#else @@ -40815,36 +43281,24 @@ index 000000000..9922ff4cf + __ leave(); +} + -+void RegisterSaver::restore_result_registers(MacroAssembler* masm) { -+ // Just restore result register. Only used by deoptimization. By -+ // now any callee save register that needs to be restored to a c2 -+ // caller of the deoptee has been extracted into the vframeArray -+ // and will be stuffed into the c2i adapter we create for later -+ // restoration so only result registers need to be restored here. -+ // Restore fp result register -+ __ fld(f10, Address(sp, freg_offset_in_bytes(f10))); -+ // Restore integer result register -+ __ ld(x10, Address(sp, reg_offset_in_bytes(x10))); -+ -+ // Pop all of the register save are off the stack -+ __ add(sp, sp, align_up(ra_offset_in_bytes(), 16)); -+} -+ +// Is vector's size (in bytes) bigger than a size saved by default? +// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. +bool SharedRuntime::is_wide_vector(int size) { + return UseRVV; +} + -+size_t SharedRuntime::trampoline_size() { -+ // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction -+ return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and ra ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; +} + -+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli -+ __ jalr(x0, t0, offset); ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- @@ -40869,9 +43323,7 @@ index 000000000..9922ff4cf + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, -+ int total_args_passed, -+ int is_outgoing) { -+ assert_cond(sig_bt != NULL && regs != NULL); ++ int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { @@ -40945,6 +43397,7 @@ index 000000000..9922ff4cf + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { ++ assert_cond(masm != NULL); + Label L; + __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); + __ beqz(t0, L); @@ -40966,6 +43419,11 @@ index 000000000..9922ff4cf + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); + __ jalr(x1, t0, offset); ++ ++ // Explicit fence.i required because fixup_callers_callsite may change the code ++ // stream. ++ __ safepoint_ifence(); ++ + __ pop_CPU_state(); + // restore sp + __ leave(); @@ -41052,7 +43510,7 @@ index 000000000..9922ff4cf + __ sd(t0, Address(sp, next_off), /*temp register*/esp); +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaaaul); ++ __ li(t0, 0xdeadffffdeadaaaaul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + } else { @@ -41068,10 +43526,10 @@ index 000000000..9922ff4cf + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // jlong/double in gpr ++ // long/double in gpr +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaabul); ++ __ li(t0, 0xdeadffffdeadaaabul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ sd(r, Address(sp, next_off)); @@ -41087,7 +43545,7 @@ index 000000000..9922ff4cf + } else { +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaacul); ++ __ li(t0, 0xdeadffffdeadaaacul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); @@ -41254,10 +43712,39 @@ index 000000000..9922ff4cf + + address c2i_entry = __ pc(); + ++ // Class initialization barrier for static methods ++ address c2i_no_clinit_check_entry = NULL; ++ if (VM_Version::supports_fast_class_init_checks()) { ++ Label L_skip_barrier; ++ ++ { // Bypass the barrier for non-static methods ++ __ lwu(t0, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t1, t0, JVM_ACC_STATIC); ++ __ beqz(t1, L_skip_barrier); // non-static ++ } ++ ++ __ load_method_holder(t1, xmethod); ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ ++ __ bind(L_skip_barrier); ++ c2i_no_clinit_check_entry = __ pc(); ++ } ++ ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->c2i_entry_barrier(masm); ++ + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); -+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++} ++ ++int SharedRuntime::vector_calling_convention(VMRegPair *regs, ++ uint num_bits, ++ uint total_args_passed) { ++ Unimplemented(); ++ return 0; +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -41265,7 +43752,6 @@ index 000000000..9922ff4cf + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on riscv"); -+ assert_cond(sig_bt != NULL && regs != NULL); + + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. @@ -41343,7 +43829,190 @@ index 000000000..9922ff4cf + return stk_args; +} + ++// On 64 bit we will store integer like items to the stack as ++// 64 bits items (riscv64 abi) even though java would only store ++// 32bits for a parameter. On 32bit it will simply be 32 bits ++// So this routine will do 32->32 on 32bit and 32->64 on 64bit ++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ // 32bits extend sign ++ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); ++ // must pass a handle. First figure out the location we use as a handle ++ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); ++ ++ // See if oop is NULL if it is we need no handle ++ ++ if (src.first()->is_stack()) { ++ ++ // Oop is already on the stack as an argument ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ la(rHandle, Address(fp, reg2offset_in(src.first()))); ++ // conditionally move a NULL ++ Label notZero1; ++ __ bnez(t0, notZero1); ++ __ mv(rHandle, zr); ++ __ bind(notZero1); ++ } else { ++ ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles and pass a handle if oop is non-NULL ++ ++ const Register rOop = src.first()->as_Register(); ++ int oop_slot = -1; ++ if (rOop == j_rarg0) { ++ oop_slot = 0; ++ } else if (rOop == j_rarg1) { ++ oop_slot = 1; ++ } else if (rOop == j_rarg2) { ++ oop_slot = 2; ++ } else if (rOop == j_rarg3) { ++ oop_slot = 3; ++ } else if (rOop == j_rarg4) { ++ oop_slot = 4; ++ } else if (rOop == j_rarg5) { ++ oop_slot = 5; ++ } else if (rOop == j_rarg6) { ++ oop_slot = 6; ++ } else { ++ assert(rOop == j_rarg7, "wrong register"); ++ oop_slot = 7; ++ } ++ ++ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot * VMRegImpl::stack_slot_size; ++ ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ // Store oop in handle area, may be NULL ++ __ sd(rOop, Address(sp, offset)); ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ ++ //rOop maybe the same as rHandle ++ if (rOop == rHandle) { ++ Label isZero; ++ __ beqz(rOop, isZero); ++ __ la(rHandle, Address(sp, offset)); ++ __ bind(isZero); ++ } else { ++ Label notZero2; ++ __ la(rHandle, Address(sp, offset)); ++ __ bnez(rOop, notZero2); ++ __ mv(rHandle, zr); ++ __ bind(notZero2); ++ } ++ } ++ ++ // If arg is on the stack then place it otherwise it is already in correct reg. ++ if (dst.first()->is_stack()) { ++ __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lwu(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sw(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()->is_Register()) { ++ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ __ mv(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()-> is_Register()) { ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { @@ -41361,6 +44030,7 @@ index 000000000..9922ff4cf +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { @@ -41378,6 +44048,7 @@ index 000000000..9922ff4cf +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { @@ -41391,6 +44062,7 @@ index 000000000..9922ff4cf +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { @@ -41410,85 +44082,17 @@ index 000000000..9922ff4cf + } +} + -+// Check GCLocker::needs_gc and enter the runtime if it's true. This -+// keeps a new JNI critical region from starting until a GC has been -+// forced. Save down any oops in registers and describe them in an -+// OopMap. -+static void check_needs_gc_for_critical_native(MacroAssembler* masm, -+ int stack_slots, -+ int total_c_args, -+ int total_in_args, -+ int arg_save_area, -+ OopMapSet* oop_maps, -+ VMRegPair* in_regs, -+ BasicType* in_sig_bt) { Unimplemented(); } -+ -+// Unpack an array argument into a pointer to the body and the length -+// if the array is non-null, otherwise pass 0 for both. -+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } -+ -+class ComputeMoveOrder: public StackObj { -+ class MoveOperation: public ResourceObj { -+ friend class ComputeMoveOrder; -+ private: -+ VMRegPair _src; -+ VMRegPair _dst; -+ int _src_index; -+ int _dst_index; -+ bool _processed; -+ MoveOperation* _next; -+ MoveOperation* _prev; -+ -+ static int get_id(VMRegPair r) { Unimplemented(); return 0; } -+ -+ public: -+ MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): -+ _src(src) -+ , _dst(dst) -+ , _src_index(src_index) -+ , _dst_index(dst_index) -+ , _processed(false) -+ , _next(NULL) -+ , _prev(NULL) { Unimplemented(); } -+ -+ ~MoveOperation() { -+ _next = NULL; -+ _prev = NULL; -+ } -+ -+ VMRegPair src() const { Unimplemented(); return _src; } -+ int src_id() const { Unimplemented(); return 0; } -+ int src_index() const { Unimplemented(); return 0; } -+ VMRegPair dst() const { Unimplemented(); return _src; } -+ void set_dst(int i, VMRegPair dst) { Unimplemented(); } -+ int dst_index() const { Unimplemented(); return 0; } -+ int dst_id() const { Unimplemented(); return 0; } -+ MoveOperation* next() const { Unimplemented(); return 0; } -+ MoveOperation* prev() const { Unimplemented(); return 0; } -+ void set_processed() { Unimplemented(); } -+ bool is_processed() const { Unimplemented(); return 0; } -+ -+ // insert -+ void break_cycle(VMRegPair temp_register) { Unimplemented(); } -+ -+ void link(GrowableArray& killer) { Unimplemented(); } -+ }; -+ -+ private: -+ GrowableArray edges; -+ -+ public: -+ ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, -+ BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } -+ -+ ~ComputeMoveOrder() {} -+ // Collected all the move operations -+ void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } -+ -+ // Walk the edges breaking cycles between moves. The result list -+ // can be walked in order to produce the proper set of loads -+ GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } -+}; ++static void rt_call(MacroAssembler* masm, address dest) { ++ assert_cond(masm != NULL); ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb) { ++ __ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(dest), offset); ++ __ jalr(x1, t0, offset); ++ } ++} + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle& method, @@ -41529,10 +44133,10 @@ index 000000000..9922ff4cf + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = x9; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); -+ } else if (iid == vmIntrinsics::_invokeBasic) { ++ } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { + has_receiver = true; + } else { -+ fatal("unexpected intrinsic id %d", iid); ++ fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); + } + + if (member_reg != noreg) { @@ -41604,8 +44208,7 @@ index 000000000..9922ff4cf + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, -+ BasicType ret_type, -+ address critical_entry) { ++ BasicType ret_type) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); @@ -41630,12 +44233,7 @@ index 000000000..9922ff4cf + in_ByteSize(-1), + (OopMapSet*)NULL); + } -+ bool is_critical_native = true; -+ address native_func = critical_entry; -+ if (native_func == NULL) { -+ native_func = method->native_function(); -+ is_critical_native = false; -+ } ++ address native_func = method->native_function(); + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) @@ -41650,70 +44248,20 @@ index 000000000..9922ff4cf + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); -+ int total_c_args = total_in_args; -+ if (!is_critical_native) { -+ total_c_args += 1; -+ if (method->is_static()) { -+ total_c_args++; -+ } -+ } else { -+ for (int i = 0; i < total_in_args; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ total_c_args++; -+ } -+ } -+ } ++ int total_c_args = total_in_args + (method->is_static() ? 2 : 1); + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); -+ assert_cond(out_sig_bt != NULL && out_regs != NULL); + BasicType* in_elem_bt = NULL; + + int argc = 0; -+ if (!is_critical_native) { -+ out_sig_bt[argc++] = T_ADDRESS; -+ if (method->is_static()) { -+ out_sig_bt[argc++] = T_OBJECT; -+ } ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } + -+ for (int i = 0; i < total_in_args ; i++) { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ } -+ } else { -+ Thread* THREAD = Thread::current(); -+ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); -+ assert_cond(in_elem_bt != NULL); -+ SignatureStream ss(method->signature()); -+ for (int i = 0; i < total_in_args ; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ // Arrays are passed as int, elem* pair -+ out_sig_bt[argc++] = T_INT; -+ out_sig_bt[argc++] = T_ADDRESS; -+ Symbol* atype = ss.as_symbol(CHECK_NULL); -+ const char* at = atype->as_C_string(); -+ if (strlen(at) == 2) { -+ assert(at[0] == '[', "must be"); -+ switch (at[1]) { -+ case 'B': in_elem_bt[i] = T_BYTE; break; -+ case 'C': in_elem_bt[i] = T_CHAR; break; -+ case 'D': in_elem_bt[i] = T_DOUBLE; break; -+ case 'F': in_elem_bt[i] = T_FLOAT; break; -+ case 'I': in_elem_bt[i] = T_INT; break; -+ case 'J': in_elem_bt[i] = T_LONG; break; -+ case 'S': in_elem_bt[i] = T_SHORT; break; -+ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; -+ default: ShouldNotReachHere(); -+ } -+ } -+ } else { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ in_elem_bt[i] = T_VOID; -+ } -+ if (in_sig_bt[i] != T_VOID) { -+ assert(in_sig_bt[i] == ss.type(), "must match"); -+ ss.next(); -+ } -+ } ++ for (int i = 0; i < total_in_args ; i++) { ++ out_sig_bt[argc++] = in_sig_bt[i]; + } + + // Now figure out where the args must be stored and how much stack space @@ -41730,34 +44278,6 @@ index 000000000..9922ff4cf + + // Now the space for the inbound oop handle area + int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers -+ if (is_critical_native) { -+ // Critical natives may have to call out so they need a save area -+ // for register arguments. -+ int double_slots = 0; -+ int single_slots = 0; -+ for ( int i = 0; i < total_in_args; i++) { -+ if (in_regs[i].first()->is_Register()) { -+ const Register reg = in_regs[i].first()->as_Register(); -+ switch (in_sig_bt[i]) { -+ case T_BOOLEAN: -+ case T_BYTE: -+ case T_SHORT: -+ case T_CHAR: -+ case T_INT: single_slots++; break; -+ case T_ARRAY: // specific to LP64 (7145024) -+ case T_LONG: double_slots++; break; -+ default: ShouldNotReachHere(); -+ } -+ } else if (in_regs[i].first()->is_FloatRegister()) { -+ ShouldNotReachHere(); -+ } -+ } -+ total_save_slots = double_slots * 2 + single_slots; -+ // align the save area -+ if (double_slots != 0) { -+ stack_slots = align_up(stack_slots, 2); -+ } -+ } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; @@ -41848,18 +44368,27 @@ index 000000000..9922ff4cf + // first instruction with a jump. + __ nop(); + -+ // Generate stack overflow check -+ if (UseStackBanging) { -+ __ bang_stack_with_offset(checked_cast(JavaThread::stack_shadow_zone_size())); -+ } else { -+ Unimplemented(); ++ if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { ++ Label L_skip_barrier; ++ __ mov_metadata(t1, method->method_holder()); // InstanceKlass* ++ __ clinit_barrier(t1, t0, &L_skip_barrier); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ ++ __ bind(L_skip_barrier); + } + ++ // Generate stack overflow check ++ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); ++ + // Generate a new frame for the wrapper. + __ enter(); + // -2 because return address is already present and so is saved fp + __ sub(sp, sp, stack_size - 2 * wordSize); + ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ assert_cond(bs != NULL); ++ bs->nmethod_entry_barrier(masm); ++ + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + @@ -41868,11 +44397,6 @@ index 000000000..9922ff4cf + + const Register oop_handle_reg = x18; + -+ if (is_critical_native) { -+ check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, -+ oop_handle_offset, oop_maps, in_regs, in_sig_bt); -+ } -+ + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have @@ -41917,22 +44441,14 @@ index 000000000..9922ff4cf + +#endif /* ASSERT */ + -+ // This may iterate in two different directions depending on the -+ // kind of native it is. The reason is that for regular JNI natives -+ // the incoming and outgoing registers are offset upwards and for -+ // critical natives they are offset down. ++ // For JNI natives the incoming and outgoing registers are offset upwards. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(x9->as_VMReg()); + -+ if (!is_critical_native) { -+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { -+ arg_order.push(i); -+ arg_order.push(c_arg); -+ } -+ } else { -+ // Compute a valid move order, using tmp_vmreg to break any cycles -+ ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); + } + + int temploc = -1; @@ -41940,20 +44456,7 @@ index 000000000..9922ff4cf + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("mv %d -> %d", i, c_arg)); -+ if (c_arg == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // This arg needs to be moved to a temporary -+ __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); -+ in_regs[i] = tmp_vmreg; -+ temploc = i; -+ continue; -+ } else if (i == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // Read from the temporary location -+ assert(temploc != -1, "must be valid"); -+ i = temploc; -+ temploc = -1; -+ } ++ assert(c_arg != -1 && i != -1, "wrong order"); +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); @@ -41968,32 +44471,17 @@ index 000000000..9922ff4cf +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: -+ if (is_critical_native) { -+ unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); -+ c_arg++; -+#ifdef ASSERT -+ if (out_regs[c_arg].first()->is_Register()) { -+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; -+ } else if (out_regs[c_arg].first()->is_FloatRegister()) { -+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; -+ } -+#endif -+ int_args++; -+ break; -+ } -+ // no break + case T_OBJECT: -+ assert(!is_critical_native, "no oop arguments"); -+ __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], -+ ((i == 0) && (!is_static)), -+ &receiver_offset); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); + int_args++; + break; + case T_VOID: + break; + + case T_FLOAT: -+ __ float_move(in_regs[i], out_regs[c_arg]); ++ float_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + @@ -42001,12 +44489,12 @@ index 000000000..9922ff4cf + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); -+ __ double_move(in_regs[i], out_regs[c_arg]); ++ double_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_LONG : -+ __ long_move(in_regs[i], out_regs[c_arg]); ++ long_move(masm, in_regs[i], out_regs[c_arg]); + int_args++; + break; + @@ -42015,7 +44503,7 @@ index 000000000..9922ff4cf + break; + + default: -+ __ move32_64(in_regs[i], out_regs[c_arg]); ++ move32_64(masm, in_regs[i], out_regs[c_arg]); + int_args++; + } + } @@ -42025,7 +44513,7 @@ index 000000000..9922ff4cf + int c_arg = total_c_args - total_in_args; + + // Pre-load a static method's oop into c_rarg1. -+ if (method->is_static() && !is_critical_native) { ++ if (method->is_static()) { + + // load oop into a register + __ movoop(c_rarg1, @@ -42084,7 +44572,6 @@ index 000000000..9922ff4cf + Label lock_done; + + if (method->is_synchronized()) { -+ assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + @@ -42098,41 +44585,40 @@ index 000000000..9922ff4cf + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + -+ if (UseBiasedLocking) { -+ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); ++ if (!UseHeavyMonitors) { ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg ++ ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); ++ ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); ++ } else { ++ __ j(slow_path_lock); + } + -+ // Load (object->mark() | 1) into swap_reg % x10 -+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ __ ori(swap_reg, t0, 1); -+ -+ // Save (object->mark() | 1) into BasicLock's displaced header -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ -+ // src -> dest if dest == x10 else x10 <- dest -+ { -+ Label here; -+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); -+ } -+ -+ // Test if the oopMark is an obvious stack pointer, i.e., -+ // 1) (mark & 3) == 0, and -+ // 2) sp <= mark < mark + os::pagesize() -+ // These 3 tests can be done by evaluating the following -+ // expression: ((mark - sp) & (3 - os::vm_page_size())), -+ // assuming both stack pointer and pagesize have their -+ // least significant 2 bits clear. -+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg -+ -+ __ sub(swap_reg, swap_reg, sp); -+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); -+ -+ // Save the test result, for recursive case, the result is zero -+ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); -+ __ bnez(swap_reg, slow_path_lock); -+ + // Slow path will re-enter here -+ + __ bind(lock_done); + } + @@ -42140,9 +44626,7 @@ index 000000000..9922ff4cf + // Finally just about ready to make the JNI call + + // get JNIEnv* which is first argument to native -+ if (!is_critical_native) { -+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); -+ } ++ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); + + // Now set thread in native + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); @@ -42150,7 +44634,7 @@ index 000000000..9922ff4cf + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sw(t0, Address(t1)); + -+ __ rt_call(native_func); ++ rt_call(masm, native_func); + + __ bind(native_return); + @@ -42158,10 +44642,13 @@ index 000000000..9922ff4cf + oop_maps->add_gc_map(return_pc - start, map); + + // Unpack native results. -+ if(ret_type != T_OBJECT && ret_type != T_ARRAY) { ++ if (ret_type != T_OBJECT && ret_type != T_ARRAY) { + __ cast_primitive_type(ret_type, x10); + } + ++ Label safepoint_in_progress, safepoint_in_progress_done; ++ Label after_transition; ++ + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -42171,38 +44658,28 @@ index 000000000..9922ff4cf + // didn't see any synchronization is progress, and escapes. + __ mv(t0, _thread_in_native_trans); + -+ if(os::is_MP()) { -+ if (UseMembar) { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); -+ -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, x12, t0); -+ } -+ } else { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); -+ } ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests -+ Label safepoint_in_progress, safepoint_in_progress_done; + { -+ __ safepoint_poll_acquire(safepoint_in_progress); ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ ++ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); + __ bind(safepoint_in_progress_done); + } + + // change thread state -+ Label after_transition; + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); + __ mv(t0, _thread_in_Java); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); @@ -42212,7 +44689,7 @@ index 000000000..9922ff4cf + Label reguard; + Label reguard_done; + __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); -+ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); + __ beq(t0, t1, reguard); + __ bind(reguard_done); + @@ -42228,30 +44705,31 @@ index 000000000..9922ff4cf + + Label done; + -+ if (UseBiasedLocking) { -+ __ biased_locking_exit(obj_reg, old_hdr, done); ++ if (!UseHeavyMonitors) { ++ // Simple recursive lock? ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); + } + -+ // Simple recursive lock? -+ -+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ __ beqz(t0, done); + + // Must save x10 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + ++ if (!UseHeavyMonitors) { ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); + -+ // get address of the stack lock -+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); -+ // get old displaced header -+ __ ld(old_hdr, Address(x10, 0)); -+ -+ // Atomic swap old header if oop still contains the stack lock -+ Label succeed; -+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); -+ __ bind(succeed); ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); ++ } else { ++ __ j(slow_path_unlock); ++ } + + // slow path re-enters here + __ bind(unlock_done); @@ -42274,7 +44752,7 @@ index 000000000..9922ff4cf + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve result. -+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ if (is_reference_type(ret_type)) { + __ resolve_jobject(x10, xthread, t1); + } + @@ -42283,32 +44761,26 @@ index 000000000..9922ff4cf + __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + -+ if (!is_critical_native) { -+ // reset handle block -+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); -+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); -+ } ++ // reset handle block ++ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); + + __ leave(); + -+ if (!is_critical_native) { -+ // Any exception pending? -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ bnez(t0, exception_pending); -+ } ++ // Any exception pending? ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ bnez(t0, exception_pending); + + // We're done + __ ret(); + + // Unexpected paths are out of line and go here + -+ if (!is_critical_native) { -+ // forward the exception -+ __ bind(exception_pending); ++ // forward the exception ++ __ bind(exception_pending); + -+ // and forward the exception -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); -+ } ++ // and forward the exception ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // Slow path locking & unlocking + if (method->is_synchronized()) { @@ -42345,7 +44817,7 @@ index 000000000..9922ff4cf + __ block_comment("Slow path unlock {"); + __ bind(slow_path_unlock); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + save_native_result(masm, ret_type, stack_slots); + } + @@ -42358,7 +44830,7 @@ index 000000000..9922ff4cf + __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + +#ifdef ASSERT + { @@ -42372,7 +44844,7 @@ index 000000000..9922ff4cf + + __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + restore_native_result(masm, ret_type, stack_slots); + } + __ j(unlock_done); @@ -42385,7 +44857,7 @@ index 000000000..9922ff4cf + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ j(reguard_done); @@ -42404,21 +44876,12 @@ index 000000000..9922ff4cf + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + int32_t offset = 0; -+ if (!is_critical_native) { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); -+ } else { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset); -+ } ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); + __ jalr(x1, t0, offset); ++ + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + -+ if (is_critical_native) { -+ // The call above performed the transition to thread_in_Java so -+ // skip the transition logic above. -+ __ j(after_transition); -+ } -+ + __ j(safepoint_in_progress_done); + __ block_comment("} safepoint"); + } @@ -42466,10 +44929,6 @@ index 000000000..9922ff4cf + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + assert(nm != NULL, "create native nmethod fail!"); -+ if (is_critical_native) { -+ nm->set_lazy_critical_native(true); -+ } -+ + return nm; +} + @@ -42590,7 +45049,7 @@ index 000000000..9922ff4cf + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. -+ __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved ++ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread @@ -42651,7 +45110,7 @@ index 000000000..9922ff4cf + + __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + Label noException; -+ __ mv(t0, Deoptimization::Unpack_exception); ++ __ li(t0, Deoptimization::Unpack_exception); + __ bne(xcpool, t0, noException); // Was exception pending? + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); @@ -42668,7 +45127,14 @@ index 000000000..9922ff4cf + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. -+ reg_saver.restore_result_registers(masm); ++ ++ // Restore fp result register ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ // Restore integer result register ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ ++ // Pop all of the register save area off the stack ++ __ add(sp, sp, frame_size_in_words * wordSize); + + // All of the register save area has been popped of the stack. Only the + // return address remains. @@ -42697,10 +45163,8 @@ index 000000000..9922ff4cf + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x9, x12); -+ } ++ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x9, x12); +#endif + // Load address of array of frame pcs into x12 + __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -42725,7 +45189,7 @@ index 000000000..9922ff4cf + __ sub(sp, sp, x9); + + // Push interpreter frames in a loop -+ __ mv(t0, (uint64_t)0xDEADDEAD); // Make a recognizable pattern ++ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern + __ mv(t1, t0); + Label loop; + __ bind(loop); @@ -42775,7 +45239,7 @@ index 000000000..9922ff4cf + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, -+ new OopMap( frame_size_in_words, 0 )); ++ new OopMap(frame_size_in_words, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); @@ -42798,6 +45262,14 @@ index 000000000..9922ff4cf + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + ++// Number of stack slots between incoming argument block and the start of ++// a new frame. The PROLOG must add this many slots to the stack. The ++// EPILOG must remove this many slots. ++// RISCV needs two words for RA (return address) and FP (frame pointer). ++uint SharedRuntime::in_preserve_stack_slots() { ++ return 2 * VMRegImpl::slots_per_word; ++} ++ +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} @@ -42901,12 +45373,10 @@ index 000000000..9922ff4cf + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x11, x12); -+ } ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x11, x12); +#endif + + // Load address of array of frame pcs into x12 (address*) @@ -43076,7 +45546,7 @@ index 000000000..9922ff4cf + __ bind(noException); + + Label no_adjust, bail; -+ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ if (!cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); + __ bne(x18, t0, no_adjust); @@ -43128,7 +45598,7 @@ index 000000000..9922ff4cf +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { -+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; @@ -43210,6 +45680,14 @@ index 000000000..9922ff4cf +} + +#ifdef COMPILER2 ++RuntimeStub* SharedRuntime::make_native_invoker(address call_target, ++ int shadow_space_bytes, ++ const GrowableArray& input_registers, ++ const GrowableArray& output_registers) { ++ Unimplemented(); ++ return nullptr; ++} ++ +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. @@ -43290,6 +45768,10 @@ index 000000000..9922ff4cf + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); + __ jalr(x1, t0, offset); + ++ ++ // handle_exception_C is a special VM call which does not require an explicit ++ // instruction sync afterwards. ++ + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., @@ -43345,14 +45827,14 @@ index 000000000..9922ff4cf +#endif // COMPILER2 diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp new file mode 100644 -index 000000000..c5b3b094c +index 00000000000..b3fdd04db1b --- /dev/null +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp -@@ -0,0 +1,3743 @@ +@@ -0,0 +1,3864 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -43378,9 +45860,11 @@ index 000000000..c5b3b094c +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" @@ -43394,10 +45878,13 @@ index 000000000..c5b3b094c +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/align.hpp" ++#include "utilities/powerOfTwo.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif -+ ++#if INCLUDE_ZGC ++#include "gc/z/zThreadLocalData.hpp" ++#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure @@ -43465,28 +45952,16 @@ index 000000000..c5b3b094c + // we don't need to save x6-x7 and x28-x31 which both C and Java treat as + // volatile + // -+ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary -+ // registers and C expects to be callee-save ++ // we save x18-x27 which Java uses as temporary registers and C ++ // expects to be callee-save + // + // so the stub frame looks like this when we enter Java code + // + // [ return_from_Java ] <--- sp + // [ argument word n ] + // ... -+ // -34 [ argument word 1 ] -+ // -33 [ saved f27 ] <--- sp_after_call -+ // -32 [ saved f26 ] -+ // -31 [ saved f25 ] -+ // -30 [ saved f24 ] -+ // -29 [ saved f23 ] -+ // -28 [ saved f22 ] -+ // -27 [ saved f21 ] -+ // -26 [ saved f20 ] -+ // -25 [ saved f19 ] -+ // -24 [ saved f18 ] -+ // -23 [ saved f9 ] -+ // -22 [ saved f8 ] -+ // -21 [ saved x27 ] ++ // -22 [ argument word 1 ] ++ // -21 [ saved x27 ] <--- sp_after_call + // -20 [ saved x26 ] + // -19 [ saved x25 ] + // -18 [ saved x24 ] @@ -43505,26 +45980,13 @@ index 000000000..c5b3b094c + // -5 [ parameters (x15) ] + // -4 [ parameter size (x16) ] + // -3 [ thread (x17) ] -+ // -2 [ saved fp (x8) ] -+ // -1 [ saved ra (x1) ] ++ // -2 [ saved fp (x8) ] ++ // -1 [ saved ra (x1) ] + // 0 [ ] <--- fp == saved sp (x2) + + // Call stub stack layout word offsets from fp + enum call_stub_layout { -+ sp_after_call_off = -33, -+ -+ f27_off = -33, -+ f26_off = -32, -+ f25_off = -31, -+ f24_off = -30, -+ f23_off = -29, -+ f22_off = -28, -+ f21_off = -27, -+ f20_off = -26, -+ f19_off = -25, -+ f18_off = -24, -+ f9_off = -23, -+ f8_off = -22, ++ sp_after_call_off = -21, + + x27_off = -21, + x26_off = -20, @@ -43539,15 +46001,15 @@ index 000000000..c5b3b094c + x9_off = -11, + + call_wrapper_off = -10, -+ result_off = -9, -+ result_type_off = -8, -+ method_off = -7, -+ entry_point_off = -6, -+ parameters_off = -5, -+ parameter_size_off = -4, -+ thread_off = -3, -+ fp_f = -2, -+ retaddr_off = -1, ++ result_off = -9, ++ result_type_off = -8, ++ method_off = -7, ++ entry_point_off = -6, ++ parameters_off = -5, ++ parameter_size_off = -4, ++ thread_off = -3, ++ fp_f = -2, ++ retaddr_off = -1, + }; + + address generate_call_stub(address& return_address) { @@ -43570,19 +46032,6 @@ index 000000000..c5b3b094c + + const Address thread (fp, thread_off * wordSize); + -+ const Address f27_save (fp, f27_off * wordSize); -+ const Address f26_save (fp, f26_off * wordSize); -+ const Address f25_save (fp, f25_off * wordSize); -+ const Address f24_save (fp, f24_off * wordSize); -+ const Address f23_save (fp, f23_off * wordSize); -+ const Address f22_save (fp, f22_off * wordSize); -+ const Address f21_save (fp, f21_off * wordSize); -+ const Address f20_save (fp, f20_off * wordSize); -+ const Address f19_save (fp, f19_off * wordSize); -+ const Address f18_save (fp, f18_off * wordSize); -+ const Address f9_save (fp, f9_off * wordSize); -+ const Address f8_save (fp, f8_off * wordSize); -+ + const Address x27_save (fp, x27_off * wordSize); + const Address x26_save (fp, x26_off * wordSize); + const Address x25_save (fp, x25_off * wordSize); @@ -43629,19 +46078,6 @@ index 000000000..c5b3b094c + __ sd(x26, x26_save); + __ sd(x27, x27_save); + -+ __ fsd(f8, f8_save); -+ __ fsd(f9, f9_save); -+ __ fsd(f18, f18_save); -+ __ fsd(f19, f19_save); -+ __ fsd(f20, f20_save); -+ __ fsd(f21, f21_save); -+ __ fsd(f22, f22_save); -+ __ fsd(f23, f23_save); -+ __ fsd(f24, f24_save); -+ __ fsd(f25, f25_save); -+ __ fsd(f26, f26_save); -+ __ fsd(f27, f27_save); -+ + // install Java thread in global register now we have saved + // whatever value it held + __ mv(xthread, c_rarg7); @@ -43701,13 +46137,13 @@ index 000000000..c5b3b094c + __ ld(j_rarg2, result); + Label is_long, is_float, is_double, exit; + __ ld(j_rarg1, result_type); -+ __ mv(t0, (u1)T_OBJECT); ++ __ li(t0, (u1)T_OBJECT); + __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_LONG); ++ __ li(t0, (u1)T_LONG); + __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_FLOAT); ++ __ li(t0, (u1)T_FLOAT); + __ beq(j_rarg1, t0, is_float); -+ __ mv(t0, (u1)T_DOUBLE); ++ __ li(t0, (u1)T_DOUBLE); + __ beq(j_rarg1, t0, is_double); + + // handle T_INT case @@ -43733,19 +46169,6 @@ index 000000000..c5b3b094c +#endif + + // restore callee-save registers -+ __ fld(f27, f27_save); -+ __ fld(f26, f26_save); -+ __ fld(f25, f25_save); -+ __ fld(f24, f24_save); -+ __ fld(f23, f23_save); -+ __ fld(f22, f22_save); -+ __ fld(f21, f21_save); -+ __ fld(f20, f20_save); -+ __ fld(f19, f19_save); -+ __ fld(f18, f18_save); -+ __ fld(f9, f9_save); -+ __ fld(f8, f8_save); -+ + __ ld(x27, x27_save); + __ ld(x26, x26_save); + __ ld(x25, x25_save); @@ -43945,7 +46368,7 @@ index 000000000..c5b3b094c + + Label exit, error; + -+ __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3 ++ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 + + __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ ld(c_rarg3, Address(c_rarg2)); @@ -43956,12 +46379,22 @@ index 000000000..c5b3b094c + // make sure object is 'reasonable' + __ beqz(x10, exit); // if obj is NULL it is OK + ++#if INCLUDE_ZGC ++ if (UseZGC) { ++ // Check if mask is good. ++ // verifies that ZAddressBadMask & x10 == 0 ++ __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(c_rarg2, x10, c_rarg3); ++ __ bnez(c_rarg2, error); ++ } ++#endif ++ + // Check if the oop is in the right area of memory + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, x10, c_rarg3); + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + -+ // Compare c_rarg2 and c_rarg3 ++ // Compare c_rarg2 and c_rarg3. + __ bne(c_rarg2, c_rarg3, error); + + // make sure klass is 'reasonable', which is not zero. @@ -43971,16 +46404,15 @@ index 000000000..c5b3b094c + // return if everything seems ok + __ bind(exit); + -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + __ ret(); + + // handle errors + __ bind(error); -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + -+ __ push_reg(RegSet::range(x0, x31), sp); -+ // prepare parameters for debug64, c_rarg0: address of error message, -+ // c_rarg1: return address, c_rarg2: address of regs on stack ++ __ pusha(); ++ // debug(char* msg, int64_t pc, int64_t regs[]) + __ mv(c_rarg0, t0); // pass address of error message + __ mv(c_rarg1, ra); // pass return address + __ mv(c_rarg2, sp); // pass address of regs on stack @@ -43991,6 +46423,7 @@ index 000000000..c5b3b094c + int32_t offset = 0; + __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); + __ jalr(x1, t0, offset); ++ __ ebreak(); + + return start; + } @@ -44036,42 +46469,276 @@ index 000000000..c5b3b094c + return start; + } + -+ typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset); ++ typedef enum { ++ copy_forwards = 1, ++ copy_backwards = -1 ++ } copy_direction; + -+ void copy_by_step(RegSet tmp_regs, Register src, Register dst, -+ unsigned unroll_factor, int unit) { -+ unsigned char regs[32]; -+ int offset = unit < 0 ? unit : 0; ++ // Bulk copy of blocks of 8 words. ++ // ++ // count is a count of words. ++ // ++ // Precondition: count >= 8 ++ // ++ // Postconditions: ++ // ++ // The least significant bit of count contains the remaining count ++ // of words to copy. The rest of count is trash. ++ // ++ // s and d are adjusted to point to the remaining words to copy ++ // ++ void generate_copy_longs(Label &start, Register s, Register d, Register count, ++ copy_direction direction) { ++ int unit = wordSize * direction; ++ int bias = wordSize; + -+ // Scan bitset to get tmp regs -+ unsigned int regsSize = 0; -+ unsigned bitset = tmp_regs.bits(); ++ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, ++ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; + -+ assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs"); -+ assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs"); ++ const Register stride = x30; + -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[regsSize++] = reg; -+ } -+ bitset <<= 1; ++ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, ++ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); ++ assert_different_registers(s, d, count, t0); ++ ++ Label again, drain; ++ const char* stub_name = NULL; ++ if (direction == copy_forwards) { ++ stub_name = "forward_copy_longs"; ++ } else { ++ stub_name = "backward_copy_longs"; ++ } ++ StubCodeMark mark(this, "StubRoutines", stub_name); ++ __ align(CodeEntryAlignment); ++ __ bind(start); ++ ++ if (direction == copy_forwards) { ++ __ sub(s, s, bias); ++ __ sub(d, d, bias); + } + ++#ifdef ASSERT ++ // Make sure we are never given < 8 words ++ { ++ Label L; ++ ++ __ li(t0, 8); ++ __ bge(count, t0, L); ++ __ stop("genrate_copy_longs called with < 8 words"); ++ __ bind(L); ++ } ++#endif ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ __ addi(s, s, 8 * unit); ++ ++ __ sub(count, count, 16); ++ __ bltz(count, drain); ++ ++ __ bind(again); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ ++ __ addi(s, s, 8 * unit); ++ __ addi(d, d, 8 * unit); ++ ++ __ sub(count, count, 8); ++ __ bgez(count, again); ++ ++ // Drain ++ __ bind(drain); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ __ addi(d, d, 8 * unit); ++ ++ { ++ Label L1, L2; ++ __ andi(t0, count, 4); ++ __ beqz(t0, L1); ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ addi(s, s, 4 * unit); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ addi(d, d, 4 * unit); ++ ++ __ bind(L1); ++ ++ if (direction == copy_forwards) { ++ __ addi(s, s, bias); ++ __ addi(d, d, bias); ++ } ++ ++ __ andi(t0, count, 2); ++ __ beqz(t0, L2); ++ if (direction == copy_backwards) { ++ __ addi(s, s, 2 * unit); ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(d, d, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ } else { ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(s, s, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ __ addi(d, d, 2 * unit); ++ } ++ __ bind(L2); ++ } ++ ++ __ ret(); ++ } ++ ++ Label copy_f, copy_b; ++ ++ // All-singing all-dancing memory copy. ++ // ++ // Copy count units of memory from s to d. The size of a unit is ++ // step, which can be positive or negative depending on the direction ++ // of copy. If is_aligned is false, we align the source address. ++ // ++ /* ++ * if (is_aligned) { ++ * goto copy_8_bytes; ++ * } ++ * bool is_backwards = step < 0; ++ * int granularity = uabs(step); ++ * count = count * granularity; * count bytes ++ * ++ * if (is_backwards) { ++ * s += count; ++ * d += count; ++ * } ++ * ++ * count limit maybe greater than 16, for better performance ++ * if (count < 16) { ++ * goto copy_small; ++ * } ++ * ++ * if ((dst % 8) == (src % 8)) { ++ * aligned; ++ * goto copy8; ++ * } ++ * ++ * copy_small: ++ * load element one by one; ++ * done; ++ */ ++ ++ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); ++ ++ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { ++ bool is_backward = step < 0; ++ int granularity = uabs(step); ++ ++ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; ++ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); ++ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); ++ Label loop_forward, loop_backward, done; ++ ++ __ mv(dst, d); ++ __ mv(src, s); ++ __ mv(cnt, count); ++ ++ __ bind(loop_forward); ++ __ vsetvli(vl, cnt, sew, Assembler::m8); ++ if (is_backward) { ++ __ bne(vl, cnt, loop_backward); ++ } ++ ++ __ vlex_v(v0, src, sew); ++ __ sub(cnt, cnt, vl); ++ __ slli(vl, vl, (int)sew); ++ __ add(src, src, vl); ++ ++ __ vsex_v(v0, dst, sew); ++ __ add(dst, dst, vl); ++ __ bnez(cnt, loop_forward); ++ ++ if (is_backward) { ++ __ j(done); ++ ++ __ bind(loop_backward); ++ __ sub(tmp, cnt, vl); ++ __ slli(tmp, tmp, sew); ++ __ add(tmp1, s, tmp); ++ __ vlex_v(v0, tmp1, sew); ++ __ add(tmp2, d, tmp); ++ __ vsex_v(v0, tmp2, sew); ++ __ sub(cnt, cnt, vl); ++ __ bnez(cnt, loop_forward); ++ __ bind(done); ++ } ++ } ++ ++ void copy_memory(bool is_aligned, Register s, Register d, ++ Register count, Register tmp, int step) { ++ if (UseRVV) { ++ return copy_memory_v(s, d, count, tmp, step); ++ } ++ ++ bool is_backwards = step < 0; ++ int granularity = uabs(step); ++ ++ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; ++ ++ Label same_aligned; ++ Label copy8, copy_small, done; ++ + copy_insn ld_arr = NULL, st_arr = NULL; -+ switch (abs(unit)) { ++ switch (granularity) { + case 1 : + ld_arr = (copy_insn)&MacroAssembler::lbu; + st_arr = (copy_insn)&MacroAssembler::sb; + break; -+ case BytesPerShort : ++ case 2 : + ld_arr = (copy_insn)&MacroAssembler::lhu; + st_arr = (copy_insn)&MacroAssembler::sh; + break; -+ case BytesPerInt : ++ case 4 : + ld_arr = (copy_insn)&MacroAssembler::lwu; + st_arr = (copy_insn)&MacroAssembler::sw; + break; -+ case BytesPerLong : ++ case 8 : + ld_arr = (copy_insn)&MacroAssembler::ld; + st_arr = (copy_insn)&MacroAssembler::sd; + break; @@ -44079,294 +46746,83 @@ index 000000000..c5b3b094c + ShouldNotReachHere(); + } + -+ for (unsigned i = 0; i < unroll_factor; i++) { -+ (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset); -+ (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset); -+ } -+ -+ __ addi(src, src, unroll_factor * unit); -+ __ addi(dst, dst, unroll_factor * unit); -+ } -+ -+ void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int ele_size, unsigned align_unit) { -+ bool is_backwards = ele_size < 0; -+ size_t granularity = uabs(ele_size); -+ for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) { -+ int offset = is_backwards ? (int)(-unit) : unit; -+ Label exit; -+ __ andi(tmp, count_in_bytes, unit); -+ __ beqz(tmp, exit); -+ copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset); -+ __ bind(exit); -+ } -+ } -+ -+ void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int step, Label *Lcopy_small, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ RegSet tmp_regs = RegSet::range(x13, x16); -+ assert_different_registers(src, dst, count_in_bytes, tmp); -+ -+ Label loop, copy2, copy1, finish; -+ if (loopsize == noreg) { -+ loopsize = t1; -+ __ mv(loopsize, 8 * granularity); -+ } -+ -+ // Cyclic copy with 8*step. -+ __ bind(loop); -+ { -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step); -+ __ sub(count_in_bytes, count_in_bytes, 8 * granularity); -+ __ bge(count_in_bytes, loopsize, loop); -+ } -+ -+ if (Lcopy_small != NULL) { -+ __ bind(*Lcopy_small); -+ } -+ -+ // copy memory smaller than step * 8 bytes -+ __ andi(tmp, count_in_bytes, granularity << 2); -+ __ beqz(tmp, copy2); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step); -+ -+ __ bind(copy2); -+ __ andi(tmp, count_in_bytes, granularity << 1); -+ __ beqz(tmp, copy1); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step); -+ -+ __ bind(copy1); -+ __ andi(tmp, count_in_bytes, granularity); -+ __ beqz(tmp, finish); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step); -+ -+ __ bind(finish); -+ } -+ -+ // Cyclic copy with one step. -+ void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ Label loop1; -+ if (loopsize == noreg) { -+ loopsize = t0; -+ __ mv(loopsize, granularity); -+ } -+ -+ __ bind(loop1); -+ { -+ copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step); -+ __ sub(count_in_bytes, count_in_bytes, granularity); -+ __ bge(count_in_bytes, loopsize, loop1); -+ } -+ } -+ -+ void align_unit(Register src, Register dst, Register count_in_bytes, -+ unsigned unit, bool is_backwards) { -+ Label skip; -+ __ andi(t0, dst, unit); -+ __ beqz(t0, skip); -+ copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit); -+ __ sub(count_in_bytes, count_in_bytes, unit); -+ __ bind(skip); -+ } -+ -+ void copy_memory(bool is_align, Register s, Register d, Register count_in_elements, -+ Register tmp, int ele_step) { -+ -+ bool is_backwards = ele_step < 0; -+ unsigned int granularity = uabs(ele_step); -+ Label Lcopy_small, Ldone, Lcopy_ele, Laligned; -+ const Register count_in_bytes = x31, src = x28, dst = x29; -+ assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1); -+ __ slli(count_in_bytes, count_in_elements, exact_log2(granularity)); -+ __ add(src, s, is_backwards ? count_in_bytes : zr); -+ __ add(dst, d, is_backwards ? count_in_bytes : zr); -+ -+ // if count_in_elements < 8, copy_small -+ __ mv(t0, 8); -+ if (is_align && granularity < BytesPerLong) { -+ __ blt(count_in_bytes, t0, Lcopy_small); ++ __ beqz(count, done); ++ __ slli(cnt, count, exact_log2(granularity)); ++ if (is_backwards) { ++ __ add(src, s, cnt); ++ __ add(dst, d, cnt); + } else { -+ __ blt(count_in_elements, t0, Lcopy_small); ++ __ mv(src, s); ++ __ mv(dst, d); + } + -+ if (granularity < BytesPerLong) { -+ Label Lcopy_aligned[3]; -+ Label Lalign8; -+ if (!is_align) { -+ Label Lalign_and_copy; -+ __ mv(t0, EagerArrayCopyThreshold); -+ __ blt(count_in_bytes, t0, Lalign_and_copy); -+ // Align dst to 8. -+ for (unsigned unit = granularity; unit <= 4; unit <<= 1) { -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } -+ -+ Register shr = x30, shl = x7, tmp1 = x13; -+ -+ __ andi(shr, src, 0x7); -+ __ beqz(shr, Lalign8); -+ { -+ // calculaute the shift for store doubleword -+ __ slli(shr, shr, 3); -+ __ sub(shl, shr, 64); -+ __ sub(shl, zr, shl); -+ -+ // alsrc: previous position of src octal alignment -+ Register alsrc = t1; -+ __ andi(alsrc, src, -8); -+ -+ // move src to tail -+ __ andi(t0, count_in_bytes, -8); -+ if (is_backwards) { -+ __ sub(src, src, t0); -+ } else { -+ __ add(src, src, t0); -+ } -+ -+ // prepare for copy_dstaligned_loop -+ __ ld(tmp1, alsrc, 0); -+ dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards); -+ __ mv(x17, 8); -+ __ blt(count_in_bytes, x17, Lcopy_small); -+ dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards); -+ __ j(Lcopy_small); -+ } -+ __ j(Ldone); -+ __ bind(Lalign_and_copy); -+ -+ // Check src and dst could be 8/4/2 algined at the same time. If could, align the -+ // memory and copy by 8/4/2. -+ __ xorr(t1, src, dst); -+ -+ for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) { -+ Label skip; -+ unsigned int unit = alignment >> 1; -+ // Check src and dst could be aligned to checkbyte at the same time -+ // if copy from src to dst. If couldn't, jump to label not_aligned. -+ __ andi(t0, t1, alignment - 1); -+ __ bnez(t0, Lcopy_aligned[exact_log2(unit)]); -+ // Align src and dst to unit. -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } -+ } -+ __ bind(Lalign8); -+ for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) { -+ // Copy memory by steps, which has been aligned to step_size. -+ Label loop8, Ltail; -+ int step = is_backwards ? (-step_size) : step_size; -+ if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before -+ // Check whether the memory size is smaller than step_size. -+ __ mv(t0, step_size); -+ __ blt(count_in_bytes, t0, Ltail); -+ } -+ const Register eight_step = t1; -+ __ mv(eight_step, step_size * 8); -+ __ bge(count_in_bytes, eight_step, loop8); -+ // If memory is less than 8*step_size bytes, loop by step. -+ copy_loop1(src, dst, count_in_bytes, step, t0); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(loop8); -+ // If memory is greater than or equal to 8*step_size bytes, loop by step*8. -+ copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step); -+ __ bind(Ltail); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]); -+ } ++ if (is_aligned) { ++ __ addi(tmp, cnt, -8); ++ __ bgez(tmp, copy8); ++ __ j(copy_small); + } -+ // If the ele_step is greater than 8, or the memory src and dst cannot -+ // be aligned with a number greater than the value of step. -+ // Cyclic copy with 8*ele_step. -+ copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg); + -+ __ bind(Ldone); -+ } ++ __ mv(tmp, 16); ++ __ blt(cnt, tmp, copy_small); + -+ void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17; -+ const Register sll_reg1 = is_backwards ? tmp1 : tmp2, -+ srl_reg1 = is_backwards ? tmp2 : tmp1, -+ sll_reg2 = is_backwards ? tmp2 : tmp3, -+ srl_reg2 = is_backwards ? tmp3 : tmp2, -+ sll_reg3 = is_backwards ? tmp3 : tmp4, -+ srl_reg3 = is_backwards ? tmp4 : tmp3, -+ sll_reg4 = is_backwards ? tmp4 : tmp1, -+ srl_reg4 = is_backwards ? tmp1 : tmp4; -+ assert_different_registers(t0, thirty_two, alsrc, shr, shl); -+ int unit = is_backwards ? -wordSize : wordSize; -+ int offset = is_backwards ? -wordSize : 0; -+ Label loop, done; ++ __ xorr(tmp, src, dst); ++ __ andi(tmp, tmp, 0b111); ++ __ bnez(tmp, copy_small); + -+ __ mv(thirty_two, 32); -+ __ blt(count_in_bytes, thirty_two, done); ++ __ bind(same_aligned); ++ __ andi(tmp, src, 0b111); ++ __ beqz(tmp, copy8); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ beqz(cnt, done); ++ __ j(same_aligned); + -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg1, shl); -+ __ srl(tmp1, srl_reg1, shr); -+ __ orr(tmp1, tmp1, t0); -+ __ sd(tmp1, dst, offset); ++ __ bind(copy8); ++ if (is_backwards) { ++ __ addi(src, src, -wordSize); ++ __ addi(dst, dst, -wordSize); ++ } ++ __ ld(tmp3, Address(src)); ++ __ sd(tmp3, Address(dst)); ++ if (!is_backwards) { ++ __ addi(src, src, wordSize); ++ __ addi(dst, dst, wordSize); ++ } ++ __ addi(cnt, cnt, -wordSize); ++ __ addi(tmp4, cnt, -8); ++ __ bgez(tmp4, copy8); // cnt >= 8, do next loop + -+ __ ld(tmp3, alsrc, unit * 2); -+ __ sll(t0, sll_reg2, shl); -+ __ srl(tmp2, srl_reg2, shr); -+ __ orr(tmp2, tmp2, t0); -+ __ sd(tmp2, dst, unit + offset); ++ __ beqz(cnt, done); + -+ __ ld(tmp4, alsrc, unit * 3); -+ __ sll(t0, sll_reg3, shl); -+ __ srl(tmp3, srl_reg3, shr); -+ __ orr(tmp3, tmp3, t0); -+ __ sd(tmp3, dst, unit * 2 + offset); -+ -+ __ ld(tmp1, alsrc, unit * 4); -+ __ sll(t0, sll_reg4, shl); -+ __ srl(tmp4, srl_reg4, shr); -+ __ orr(tmp4, tmp4, t0); -+ __ sd(tmp4, dst, unit * 3 + offset); -+ -+ __ add(alsrc, alsrc, unit * 4); -+ __ add(dst, dst, unit * 4); -+ __ sub(count_in_bytes, count_in_bytes, 32); -+ __ bge(count_in_bytes, thirty_two, loop); ++ __ bind(copy_small); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ bgtz(cnt, copy_small); + + __ bind(done); + } + -+ void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, Register eight, -+ bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16; -+ const Register sll_reg = is_backwards ? tmp1 : tmp2, -+ srl_reg = is_backwards ? tmp2 : tmp1; -+ assert_different_registers(t0, eight, alsrc, shr, shl); -+ Label loop; -+ int unit = is_backwards ? -wordSize : wordSize; -+ -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg, shl); -+ __ srl(tmp1, srl_reg, shr); -+ __ orr(t0, tmp1, t0); -+ __ sd(t0, dst, is_backwards ? unit : 0); -+ __ mv(tmp1, tmp2); -+ __ add(alsrc, alsrc, unit); -+ __ add(dst, dst, unit); -+ __ sub(count_in_bytes, count_in_bytes, 8); -+ __ bge(count_in_bytes, eight, loop); -+ } -+ + // Scan over array at a for count oops, verifying each one. + // Preserves a and count, clobbers t0 and t1. -+ void verify_oop_array(int size, Register a, Register count, Register temp) { ++ void verify_oop_array(size_t size, Register a, Register count, Register temp) { + Label loop, end; + __ mv(t1, zr); + __ slli(t0, count, exact_log2(size)); @@ -44374,7 +46830,7 @@ index 000000000..c5b3b094c + __ bgeu(t1, t0, end); + + __ add(temp, a, t1); -+ if (size == wordSize) { ++ if (size == (size_t)wordSize) { + __ ld(temp, Address(temp, 0)); + __ verify_oop(temp); + } else { @@ -44399,7 +46855,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point @@ -44435,16 +46891,22 @@ index 000000000..c5b3b094c + // save regs before copy_memory + __ push_reg(RegSet::of(d, count), sp); + } -+ copy_memory(aligned, s, d, count, t0, checked_cast(size)); ++ ++ { ++ // UnsafeCopyMemory page error: continue after ucm ++ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); ++ UnsafeCopyMemoryMark ucmm(this, add_entry, true); ++ copy_memory(aligned, s, d, count, t0, size); ++ } + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { -+ verify_oop_array(checked_cast(size), d, count, t2); ++ verify_oop_array(size, d, count, t2); + } + } + -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + + __ leave(); + __ mv(x10, zr); // return 0 @@ -44465,9 +46927,9 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // -+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target, ++ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address* entry, const char* name, + bool dest_uninitialized = false) { + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; @@ -44503,14 +46965,20 @@ index 000000000..c5b3b094c + __ push_reg(RegSet::of(d, count), sp); + } + -+ copy_memory(aligned, s, d, count, t0, -size); ++ { ++ // UnsafeCopyMemory page error: continue after ucm ++ bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); ++ UnsafeCopyMemoryMark ucmm(this, add_entry, true); ++ copy_memory(aligned, s, d, count, t0, -size); ++ } ++ + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { + verify_oop_array(size, d, count, t2); + } + } -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + __ leave(); + __ mv(x10, zr); // return 0 + __ ret(); @@ -44627,7 +47095,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point @@ -44651,7 +47119,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_copy(bool aligned, address nooverlap_target, + address* entry, const char* name, @@ -44715,7 +47183,7 @@ index 000000000..c5b3b094c + address generate_disjoint_oop_copy(bool aligned, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); + } + @@ -44733,7 +47201,7 @@ index 000000000..c5b3b094c + address nooverlap_target, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, + name, dest_uninitialized); + } @@ -44807,7 +47275,7 @@ index 000000000..c5b3b094c + + __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ // Caller of this entry point must set up the argument registers ++ // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + BLOCK_COMMENT("Entry:"); @@ -44859,7 +47327,7 @@ index 000000000..c5b3b094c + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); -+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop ++ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop + __ add(to, to, UseCompressedOops ? 4 : 8); + __ sub(count, count, 1); + __ beqz(count, L_do_card_marks); @@ -45015,13 +47483,13 @@ index 000000000..c5b3b094c + const Register dst_pos = c_rarg3; // destination position + const Register length = c_rarg4; + ++ // Registers used as temps ++ const Register dst_klass = c_rarg5; ++ + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", name); + -+ // Registers used as temps -+ const Register dst_klass = c_rarg5; -+ + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame @@ -45265,7 +47733,7 @@ index 000000000..c5b3b094c + } + + __ BIND(L_failed); -+ __ mv(x10, -1); ++ __ li(x10, -1); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + @@ -45456,6 +47924,9 @@ index 000000000..c5b3b094c + address entry_jlong_arraycopy = NULL; + address entry_checkcast_arraycopy = NULL; + ++ generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards); ++ generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards); ++ + StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); + + //*** jbyte @@ -45598,42 +48069,42 @@ index 000000000..c5b3b094c + __ ret(); + } + -+#ifdef COMPILER2 + // code for comparing 16 bytes of strings with same encoding -+ void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) { ++ void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; + __ ld(tmp5, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ xorr(tmp4, tmp1, tmp2); + __ ld(cnt1, Address(str2)); -+ __ addi(str2, str2, wordSize); ++ __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF1); + __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ xorr(tmp4, tmp5, cnt1); + __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); ++ __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF2); + } + + // code for comparing 8 characters of strings with Latin1 and Utf16 encoding -+ void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) { -+ const Register tmp = x30; ++ void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1, ++ Label &DIFF2) { ++ const Register strU = x12, curU = x7, strL = x29, tmp = x30; + __ ld(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize); ++ __ addi(strL, strL, 8); + __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); ++ __ addi(strU, strU, 8); + __ inflate_lo32(tmp, tmpL); + __ mv(t0, tmp); -+ __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); ++ __ xorr(tmp, curU, t0); ++ __ bnez(tmp, DIFF2); + -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); ++ __ ld(curU, Address(strU)); ++ __ addi(strU, strU, 8); + __ inflate_hi32(tmp, tmpL); + __ mv(t0, tmp); + __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); ++ __ bnez(tmp, DIFF1); + } + + // x10 = result @@ -45648,7 +48119,8 @@ index 000000000..c5b3b094c + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); + address entry = __ pc(); -+ Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE; ++ Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, ++ DONE, CALCULATE_DIFFERENCE; + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, + tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; + RegSet spilled_regs = RegSet::of(tmp4, tmp5); @@ -45659,9 +48131,16 @@ index 000000000..c5b3b094c + __ mv(isLU ? tmp1 : tmp2, tmp3); + __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); + __ addi(str2, str2, isLU ? wordSize : wordSize / 2); -+ __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols. ++ __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. + __ push_reg(spilled_regs, sp); + ++ if (isLU) { ++ __ add(str1, str1, cnt2); ++ __ shadd(str2, cnt2, str2, t0, 1); ++ } else { ++ __ shadd(str1, cnt2, str1, t0, 1); ++ __ add(str2, str2, cnt2); ++ } + __ xorr(tmp3, tmp1, tmp2); + __ mv(tmp5, tmp2); + __ bnez(tmp3, CALCULATE_DIFFERENCE); @@ -45671,71 +48150,47 @@ index 000000000..c5b3b094c + tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison + tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison + -+ // make sure main loop is byte-aligned, we should load another 4 bytes from strL -+ __ beqz(cnt2, DONE); // no characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, -wordSize / 2); ++ __ sub(tmp2, strL, cnt2); // strL pointer to load from ++ __ slli(t0, cnt2, 1); ++ __ sub(cnt1, strU, t0); // strU pointer to load from + -+ __ beqz(cnt2, DONE); // no character left -+ __ sub(cnt2, cnt2, wordSize * 2); ++ __ ld(tmp4, Address(cnt1)); ++ __ addi(cnt1, cnt1, 8); ++ __ beqz(cnt2, LOAD_LAST); // no characters left except last load ++ __ sub(cnt2, cnt2, 16); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); // smaller loop -+ __ sub(cnt2, cnt2, wordSize * 2); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ __ sub(cnt2, cnt2, 16); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + __ bgez(cnt2, SMALL_LOOP); -+ __ addi(t0, cnt2, wordSize * 2); -+ __ beqz(t0, DONE); -+ __ bind(TAIL); // 1..15 characters left -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ Label LOAD_LAST, WORD_CMP; -+ __ addi(t0, cnt2, wordSize); -+ __ bgtz(t0, LOAD_LAST); -+ // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ addi(cnt2, cnt2, wordSize); -+ __ beqz(cnt2, DONE); // no character left -+ __ bind(LOAD_LAST); // 1..7 characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ addi(t0, cnt2, wordSize / 2); -+ __ blez(t0, WORD_CMP); -+ __ slli(t0, t0, 1); // now in bytes -+ __ slli(t0, t0, LogBitsPerByte); -+ __ sll(tmpL, tmpL, t0); -+ __ sll(tmpU, tmpU, t0); -+ // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison -+ __ bind(WORD_CMP); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, wordSize / 2); -+ __ bltz(cnt2, LOAD_LAST); // 1..3 characters left -+ __ j(DONE); // no character left -+ } else { -+ // Unaligned accesses. Load from non-byte aligned address. -+ __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string -+ __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string -+ // last 16 characters -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ j(DONE); -+ } -+ __ bind(DIFF); ++ __ addi(t0, cnt2, 16); ++ __ beqz(t0, LOAD_LAST); ++ __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) ++ // Address of 8 bytes before last 4 characters in UTF-16 string ++ __ shadd(cnt1, cnt2, cnt1, t0, 1); ++ // Address of 16 bytes before last 4 characters in Latin1 string ++ __ add(tmp2, tmp2, cnt2); ++ __ ld(tmp4, Address(cnt1, -8)); ++ // last 16 characters before last load ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ __ j(LOAD_LAST); ++ __ bind(DIFF2); ++ __ mv(tmpU, tmp4); ++ __ bind(DIFF1); + __ mv(tmpL, t0); ++ __ j(CALCULATE_DIFFERENCE); ++ __ bind(LOAD_LAST); ++ // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU. ++ // No need to load it again ++ __ mv(tmpU, tmp4); ++ __ ld(tmpL, Address(strL)); ++ __ inflate_lo32(tmp3, tmpL); ++ __ mv(tmpL, tmp3); ++ __ xorr(tmp3, tmpU, tmpL); ++ __ beqz(tmp3, DONE); ++ + // Find the first different characters in the longwords and + // compute their difference. + __ bind(CALCULATE_DIFFERENCE); @@ -45751,6 +48206,50 @@ index 000000000..c5b3b094c + return entry; + } + ++ address generate_method_entry_barrier() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); ++ ++ Label deoptimize_label; ++ ++ address start = __ pc(); ++ ++ __ set_last_Java_frame(sp, fp, ra, t0); ++ ++ __ enter(); ++ __ add(t1, sp, wordSize); ++ ++ __ sub(sp, sp, 4 * wordSize); ++ ++ __ push_call_clobbered_registers(); ++ ++ __ mv(c_rarg0, t1); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ mv(t0, x10); ++ ++ __ pop_call_clobbered_registers(); ++ ++ __ bnez(t0, deoptimize_label); ++ ++ __ leave(); ++ __ ret(); ++ ++ __ BIND(deoptimize_label); ++ ++ __ ld(t0, Address(sp, 0)); ++ __ ld(fp, Address(sp, wordSize)); ++ __ ld(ra, Address(sp, wordSize * 2)); ++ __ ld(t1, Address(sp, wordSize * 3)); ++ ++ __ mv(sp, t0); ++ __ jr(t1); ++ ++ return start; ++ } ++ + // x10 = result + // x11 = str1 + // x12 = cnt1 @@ -45778,49 +48277,35 @@ index 000000000..c5b3b094c + __ add(str1, str1, wordSize); + __ add(str2, str2, wordSize); + // less than 16 bytes left? -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ push_reg(spilled_regs, sp); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); + compare_string_16_bytes_same(DIFF, DIFF2); -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ bgez(cnt2, SMALL_LOOP); + __ bind(TAIL); -+ __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ addi(cnt2, cnt2, isLL ? 16 : 8); + __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ blez(cnt2, CHECK_LAST); + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); + __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ addi(str2, str2, 8); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ bind(CHECK_LAST); + if (!isLL) { + __ add(cnt2, cnt2, cnt2); // now in bytes + } + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ __ ld(tmp5, Address(str1)); -+ __ ld(cnt1, Address(str2)); -+ __ neg(cnt2, cnt2); -+ __ slli(cnt2, cnt2, LogBitsPerByte); -+ __ sll(tmp5, tmp5, cnt2); -+ __ sll(cnt1, cnt1, cnt2); -+ } else { -+ // Unaligned access. Load from non-byte aligned address. -+ __ add(str1, str1, cnt2); -+ __ ld(tmp5, Address(str1)); -+ __ add(str2, str2, cnt2); -+ __ ld(cnt1, Address(str2)); -+ } -+ ++ __ add(str1, str1, cnt2); ++ __ ld(tmp5, Address(str1)); ++ __ add(str2, str2, cnt2); ++ __ ld(cnt1, Address(str2)); + __ xorr(tmp4, tmp5, cnt1); + __ beqz(tmp4, LENGTH_DIFF); + // Find the first different characters in the longwords and @@ -45889,7 +48374,7 @@ index 000000000..c5b3b094c + // parameters + Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; + // temporary registers -+ Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25; ++ Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25; + // redefinitions + Register ch1 = x28, ch2 = x29; + RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); @@ -45910,9 +48395,13 @@ index 000000000..c5b3b094c + + // first is needle[0] + __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); -+ __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ __ mv(mask1, haystack_isL ? mask0101 : mask0001); + __ mul(first, first, mask1); -+ __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ __ mv(mask2, haystack_isL ? mask7f7f : mask7fff); + if (needle_isL != haystack_isL) { + __ mv(tmp, ch1); + } @@ -45920,7 +48409,7 @@ index 000000000..c5b3b094c + __ blez(haystack_len, L_SMALL); + + if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + // xorr, sub, orr, notr, andr + // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] @@ -45957,7 +48446,7 @@ index 000000000..c5b3b094c + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set ++ __ mv(trailing_zeros, -1); // all bits set + __ j(L_SMALL_PROCEED); + + __ align(OptoLoopAlignment); @@ -45965,42 +48454,42 @@ index 000000000..c5b3b094c + __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); + __ neg(haystack_len, haystack_len); + if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set ++ __ mv(trailing_zeros, -1); // all bits set + + __ bind(L_SMALL_PROCEED); -+ __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits. ++ __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits. + __ notr(ch2, ch2); + __ andr(match_mask, match_mask, ch2); -+ __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check ++ __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check + __ beqz(match_mask, NOMATCH); + + __ bind(L_SMALL_HAS_ZERO_LOOP); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ mv(ch2, wordSize / haystack_chr_size); + __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + + __ bind(L_SMALL_CMP_LOOP); -+ __ shadd(first, trailing_zero, needle, first, needle_chr_shift); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ __ shadd(first, trailing_zeros, needle, first, needle_chr_shift); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); -+ __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); ++ __ add(trailing_zeros, trailing_zeros, 1); ++ __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); + __ beq(first, ch2, L_SMALL_CMP_LOOP); + + __ bind(L_SMALL_CMP_LOOP_NOMATCH); + __ beqz(match_mask, NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(result, result, 1); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_SMALL_HAS_ZERO_LOOP); @@ -46012,14 +48501,14 @@ index 000000000..c5b3b094c + + __ align(OptoLoopAlignment); + __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_HAS_ZERO); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); + __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) + __ sub(result, result, 1); // array index from 0, so result -= 1 @@ -46029,26 +48518,26 @@ index 000000000..c5b3b094c + __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); + __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); + // load next 8 bytes from haystack, and increase result index -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + + // compare one char + __ bind(L_CMP_LOOP); -+ __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift); ++ __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift); + needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); // next char index ++ __ add(trailing_zeros, trailing_zeros, 1); // next char index + __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); -+ __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP); ++ __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); + __ beq(needle_len, ch2, L_CMP_LOOP); + + __ bind(L_CMP_LOOP_NOMATCH); + __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_HAS_ZERO_LOOP); + @@ -46059,7 +48548,7 @@ index 000000000..c5b3b094c + + __ align(OptoLoopAlignment); + __ bind(L_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + __ j(DONE); @@ -46101,28 +48590,28 @@ index 000000000..c5b3b094c + StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + } + ++#ifdef COMPILER2 + address generate_mulAdd() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register out = x10; + const Register in = x11; + const Register offset = x12; + const Register len = x13; + const Register k = x14; -+ const Register tmp1 = x28; -+ const Register tmp2 = x29; ++ const Register tmp = x28; + + BLOCK_COMMENT("Entry:"); + __ enter(); -+ __ mul_add(out, in, offset, len, k, tmp1, tmp2); ++ __ mul_add(out, in, offset, len, k, tmp); + __ leave(); + __ ret(); + -+ return start; ++ return entry; + } + + /** @@ -46132,7 +48621,7 @@ index 000000000..c5b3b094c + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address -+ * c_rarg3 - y lenth ++ * c_rarg3 - y length + * c_rarg4 - z address + * c_rarg5 - z length + */ @@ -46140,7 +48629,7 @@ index 000000000..c5b3b094c + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; @@ -46157,26 +48646,20 @@ index 000000000..c5b3b094c + const Register tmp6 = x30; + const Register tmp7 = x31; + -+ RegSet spilled_regs = RegSet::of(tmp1, tmp2); + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame -+ __ push_reg(spilled_regs, sp); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + -+ return start; ++ return entry; + } + + address generate_squareToLen() + { -+ // squareToLen algorithm for sizes 1..127 described in java code works -+ // faster than multiply_to_len on some CPUs and slower on others, but -+ // multiply_to_len shows a bit better overall results + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen"); -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; @@ -46193,132 +48676,123 @@ index 000000000..c5b3b094c + const Register tmp6 = x30; + const Register tmp7 = x31; + -+ RegSet spilled_regs = RegSet::of(y, tmp2); + BLOCK_COMMENT("Entry:"); + __ enter(); -+ __ push_reg(spilled_regs, sp); + __ mv(y, x); + __ mv(ylen, xlen); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); + __ leave(); + __ ret(); + -+ return start; ++ return entry; + } -+#endif // COMPILER2 + -+ // Continuation point for throwing of implicit exceptions that are -+ // not handled in the current activation. Fabricates an exception -+ // oop and initiates normal exception dispatching in this -+ // frame. Since we need to preserve callee-saved values (currently -+ // only for C2, but done for C1 as well) we need a callee-saved oop -+ // map and therefore have to make these stubs into RuntimeStubs -+ // rather than BufferBlobs. If the compiler needs all registers to -+ // be preserved between the fault point and the exception handler -+ // then it must assume responsibility for that in -+ // AbstractCompiler::continuation_for_implicit_null_exception or -+ // continuation_for_implicit_division_by_zero_exception. All other -+ // implicit exceptions (e.g., NullPointerException or -+ // AbstractMethodError on entry) are either at call sites or -+ // otherwise assume that stack unwinding will be initiated, so -+ // caller saved registers were assumed volatile in the compiler. ++ // Arguments: ++ // ++ // Input: ++ // c_rarg0 - newArr address ++ // c_rarg1 - oldArr address ++ // c_rarg2 - newIdx ++ // c_rarg3 - shiftCount ++ // c_rarg4 - numIter ++ // ++ address generate_bigIntegerLeftShift() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); ++ address entry = __ pc(); + -+#undef __ -+#define __ masm-> ++ Label loop, exit; + -+ address generate_throw_exception(const char* name, -+ address runtime_entry, -+ Register arg1 = noreg, -+ Register arg2 = noreg) { -+ // Information about frame layout at time of blocking runtime call. -+ // Note that we only have to preserve callee-saved registers since -+ // the compilers are responsible for supplying a continuation point -+ // if they expect all registers to be preserved. -+ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 -+ assert_cond(runtime_entry != NULL); -+ enum layout { -+ fp_off = 0, -+ fp_off2, -+ return_off, -+ return_off2, -+ framesize // inclusive of return address -+ }; ++ Register newArr = c_rarg0; ++ Register oldArr = c_rarg1; ++ Register newIdx = c_rarg2; ++ Register shiftCount = c_rarg3; ++ Register numIter = c_rarg4; + -+ const int insts_size = 512; -+ const int locs_size = 64; ++ Register shiftRevCount = c_rarg5; ++ Register oldArrNext = t1; + -+ CodeBuffer code(name, insts_size, locs_size); -+ OopMapSet* oop_maps = new OopMapSet(); -+ MacroAssembler* masm = new MacroAssembler(&code); -+ assert_cond(oop_maps != NULL && masm != NULL); ++ __ beqz(numIter, exit); ++ __ shadd(newArr, newIdx, newArr, t0, 2); + -+ address start = __ pc(); ++ __ li(shiftRevCount, 32); ++ __ sub(shiftRevCount, shiftRevCount, shiftCount); + -+ // This is an inlined and slightly modified version of call_VM -+ // which has the ability to fetch the return PC out of -+ // thread-local storage and also sets up last_Java_sp slightly -+ // differently than the real call_VM ++ __ bind(loop); ++ __ addi(oldArrNext, oldArr, 4); ++ __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); ++ __ vle32_v(v0, oldArr); ++ __ vle32_v(v4, oldArrNext); ++ __ vsll_vx(v0, v0, shiftCount); ++ __ vsrl_vx(v4, v4, shiftRevCount); ++ __ vor_vv(v0, v0, v4); ++ __ vse32_v(v0, newArr); ++ __ sub(numIter, numIter, t0); ++ __ shadd(oldArr, t0, oldArr, t1, 2); ++ __ shadd(newArr, t0, newArr, t1, 2); ++ __ bnez(numIter, loop); + -+ __ enter(); // Save FP and RA before call ++ __ bind(exit); ++ __ ret(); + -+ assert(is_even(framesize / 2), "sp not 16-byte aligned"); -+ -+ // ra and fp are already in place -+ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog -+ -+ int frame_complete = __ pc() - start; -+ -+ // Set up last_Java_sp and last_Java_fp -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); -+ -+ // Call runtime -+ if (arg1 != noreg) { -+ assert(arg2 != c_rarg1, "clobbered"); -+ __ mv(c_rarg1, arg1); -+ } -+ if (arg2 != noreg) { -+ __ mv(c_rarg2, arg2); -+ } -+ __ mv(c_rarg0, xthread); -+ BLOCK_COMMENT("call runtime_entry"); -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, runtime_entry, offset); -+ __ jalr(x1, t0, offset); -+ -+ // Generate oop map -+ OopMap* map = new OopMap(framesize, 0); -+ assert_cond(map != NULL); -+ -+ oop_maps->add_gc_map(the_pc - start, map); -+ -+ __ reset_last_Java_frame(true); -+ -+ __ leave(); -+ -+ // check for pending exceptions -+#ifdef ASSERT -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, L); -+ __ should_not_reach_here(); -+ __ bind(L); -+#endif // ASSERT -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); -+ -+ -+ // codeBlob framesize is in words (not VMRegImpl::slot_size) -+ RuntimeStub* stub = -+ RuntimeStub::new_runtime_stub(name, -+ &code, -+ frame_complete, -+ (framesize >> (LogBytesPerWord - LogBytesPerInt)), -+ oop_maps, false); -+ assert(stub != NULL, "create runtime stub fail!"); -+ return stub->entry_point(); ++ return entry; + } + ++ // Arguments: ++ // ++ // Input: ++ // c_rarg0 - newArr address ++ // c_rarg1 - oldArr address ++ // c_rarg2 - newIdx ++ // c_rarg3 - shiftCount ++ // c_rarg4 - numIter ++ // ++ address generate_bigIntegerRightShift() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); ++ address entry = __ pc(); ++ ++ Label loop, exit; ++ ++ Register newArr = c_rarg0; ++ Register oldArr = c_rarg1; ++ Register newIdx = c_rarg2; ++ Register shiftCount = c_rarg3; ++ Register numIter = c_rarg4; ++ Register idx = numIter; ++ ++ Register shiftRevCount = c_rarg5; ++ Register oldArrNext = c_rarg6; ++ Register newArrCur = t0; ++ Register oldArrCur = t1; ++ ++ __ beqz(idx, exit); ++ __ shadd(newArr, newIdx, newArr, t0, 2); ++ ++ __ li(shiftRevCount, 32); ++ __ sub(shiftRevCount, shiftRevCount, shiftCount); ++ ++ __ bind(loop); ++ __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); ++ __ sub(idx, idx, t0); ++ __ shadd(oldArrNext, idx, oldArr, t1, 2); ++ __ shadd(newArrCur, idx, newArr, t1, 2); ++ __ addi(oldArrCur, oldArrNext, 4); ++ __ vle32_v(v0, oldArrCur); ++ __ vle32_v(v4, oldArrNext); ++ __ vsrl_vx(v0, v0, shiftCount); ++ __ vsll_vx(v4, v4, shiftRevCount); ++ __ vor_vv(v0, v0, v4); ++ __ vse32_v(v0, newArrCur); ++ __ bnez(idx, loop); ++ ++ __ bind(exit); ++ __ ret(); ++ ++ return entry; ++ } ++#endif ++ +#ifdef COMPILER2 + class MontgomeryMultiplyGenerator : public MacroAssembler { + @@ -46534,10 +49008,12 @@ index 000000000..c5b3b094c + // Rj == i-len + + // Ra as temp register -+ shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord); -+ shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord); -+ shadd(Pb, len, Pb_base, Ra, LogBytesPerWord); -+ shadd(Pn, len, Pn_base, Ra, LogBytesPerWord); ++ slli(Ra, Rj, LogBytesPerWord); ++ add(Pa, Pa_base, Ra); ++ add(Pm, Pm_base, Ra); ++ slli(Ra, len, LogBytesPerWord); ++ add(Pb, Pb_base, Ra); ++ add(Pn, Pn_base, Ra); + + // Ra = *++Pa; + // Rb = *--Pb; @@ -46566,7 +49042,8 @@ index 000000000..c5b3b094c + // store it. + // Pm_base[i-len] = tmp0; + // Rj as temp register -+ shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord); ++ slli(Rj, Rj, LogBytesPerWord); ++ add(Rj, Pm_base, Rj); + sd(tmp0, Address(Rj)); + + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; @@ -46593,14 +49070,15 @@ index 000000000..c5b3b094c + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); -+ mv(t0, 1); // set carry flag, i.e. no borrow ++ li(t0, 1); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + notr(Rn, Rn); + add(Rm, Rm, t0); + add(Rm, Rm, Rn); + sltu(t0, Rm, Rn); -+ shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register ++ slli(Rn, i, LogBytesPerWord); // Rn as temp register ++ add(Rn, Pm_base, Rn); + sd(Rm, Address(Rn)); + add(i, i, 1); + slli(Rn, i, LogBytesPerWord); @@ -46624,7 +49102,8 @@ index 000000000..c5b3b094c + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < x28 && tmp2 < x28, "register corruption"); + -+ shadd(s, len, s, tmp1, LogBytesPerWord); ++ slli(tmp1, len, LogBytesPerWord); ++ add(s, s, tmp1); + mv(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli(tmp1, len, LogBytesPerWord); @@ -46673,7 +49152,6 @@ index 000000000..c5b3b094c + ld(Rn, Address(Pn)); + } + -+ + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + @@ -46704,7 +49182,7 @@ index 000000000..c5b3b094c + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); -+ // cad(zr, tmp0, Rlo_mn); ++ // cad(zr, tmp, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); @@ -46757,7 +49235,7 @@ index 000000000..c5b3b094c + enter(); + + // Make room. -+ mv(Ra, 512); ++ li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); @@ -46783,7 +49261,7 @@ index 000000000..c5b3b094c + { + ld(Rn, Address(Pn_base)); + mul(Rlo_mn, Rn, inv); -+ mv(t0, -1); ++ li(t0, -1); + Label ok; + beq(Rlo_mn, t0, ok); + stop("broken inverse in Montgomery multiply"); @@ -46820,8 +49298,8 @@ index 000000000..c5b3b094c + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; -+ slli(Rj, Rlen, 1); // Rj as temp register -+ bge(Ri, Rj, end); ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + + bind(loop); + pre2(Ri, Rlen); @@ -46835,13 +49313,12 @@ index 000000000..c5b3b094c + + post2(Ri, Rlen); + addw(Ri, Ri, 1); -+ slli(Rj, Rlen, 1); -+ blt(Ri, Rj, loop); ++ slli(t0, Rlen, 1); ++ blt(Ri, t0, loop); + bind(end); + } + block_comment("} // i"); + -+ + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra @@ -46880,7 +49357,7 @@ index 000000000..c5b3b094c + enter(); + + // Make room. -+ mv(Ra, 512); ++ li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); @@ -46938,8 +49415,8 @@ index 000000000..c5b3b094c + mv(Ri, Rlen); { + Label loop, end; + bind(loop); -+ slli(Rj, Rlen, 1); -+ bge(Ri, Rj, end); ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + + pre2(Ri, Rlen); + @@ -46985,6 +49462,117 @@ index 000000000..c5b3b094c + }; +#endif // COMPILER2 + ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ ++#undef __ ++#define __ masm-> ++ ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ Register arg1 = noreg, ++ Register arg2 = noreg) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 ++ assert_cond(runtime_entry != NULL); ++ enum layout { ++ fp_off = 0, ++ fp_off2, ++ return_off, ++ return_off2, ++ framesize // inclusive of return address ++ }; ++ ++ const int insts_size = 512; ++ const int locs_size = 64; ++ ++ CodeBuffer code(name, insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ assert_cond(oop_maps != NULL && masm != NULL); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++ ++ __ enter(); // Save FP and RA before call ++ ++ assert(is_even(framesize / 2), "sp not 16-byte aligned"); ++ ++ // ra and fp are already in place ++ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog ++ ++ int frame_complete = __ pc() - start; ++ ++ // Set up last_Java_sp and last_Java_fp ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ // Call runtime ++ if (arg1 != noreg) { ++ assert(arg2 != c_rarg1, "clobbered"); ++ __ mv(c_rarg1, arg1); ++ } ++ if (arg2 != noreg) { ++ __ mv(c_rarg2, arg2); ++ } ++ __ mv(c_rarg0, xthread); ++ BLOCK_COMMENT("call runtime_entry"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, runtime_entry, offset); ++ __ jalr(x1, t0, offset); ++ ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ assert_cond(map != NULL); ++ ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ leave(); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif // ASSERT ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ ++ // codeBlob framesize is in words (not VMRegImpl::slot_size) ++ RuntimeStub* stub = ++ RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ (framesize >> (LogBytesPerWord - LogBytesPerInt)), ++ oop_maps, false); ++ assert(stub != NULL, "create runtime stub fail!"); ++ return stub->entry_point(); ++ } ++ + // Initialization + void generate_initial() { + // Generate initial stubs and initializes the entry points @@ -47012,6 +49600,13 @@ index 000000000..c5b3b094c + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_delayed_StackOverflowError)); ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); + } + + void generate_all() { @@ -47050,10 +49645,6 @@ index 000000000..c5b3b094c + StubRoutines::_squareToLen = generate_squareToLen(); + } + -+ generate_compare_long_strings(); -+ -+ generate_string_indexof_stubs(); -+ + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); @@ -47065,14 +49656,21 @@ index 000000000..c5b3b094c + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } -+#endif // COMPILER2 -+ // Safefetch stubs. -+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, -+ &StubRoutines::_safefetch32_fault_pc, -+ &StubRoutines::_safefetch32_continuation_pc); -+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, -+ &StubRoutines::_safefetchN_fault_pc, -+ &StubRoutines::_safefetchN_continuation_pc); ++ ++ if (UseRVVForBigIntegerShiftIntrinsics) { ++ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); ++ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); ++ } ++#endif ++ ++ generate_compare_long_strings(); ++ ++ generate_string_indexof_stubs(); ++ ++ BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); ++ if (bs_nm != NULL) { ++ StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); ++ } + + StubRoutines::riscv::set_completed(); + } @@ -47089,17 +49687,22 @@ index 000000000..c5b3b094c + ~StubGenerator() {} +}; // end class declaration + ++#define UCM_TABLE_MAX_ENTRIES 8 +void StubGenerator_generate(CodeBuffer* code, bool all) { ++ if (UnsafeCopyMemory::_table == NULL) { ++ UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); ++ } ++ + StubGenerator g(code, all); +} diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp new file mode 100644 -index 000000000..633108b95 +index 00000000000..395a2d338e4 --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,58 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -47134,7 +49737,6 @@ index 000000000..633108b95 +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + -+address StubRoutines::riscv::_get_previous_fp_entry = NULL; +address StubRoutines::riscv::_get_previous_sp_entry = NULL; + +address StubRoutines::riscv::_f2i_fixup = NULL; @@ -47146,8 +49748,6 @@ index 000000000..633108b95 +address StubRoutines::riscv::_double_sign_mask = NULL; +address StubRoutines::riscv::_double_sign_flip = NULL; +address StubRoutines::riscv::_zero_blocks = NULL; -+address StubRoutines::riscv::_has_negatives = NULL; -+address StubRoutines::riscv::_has_negatives_long = NULL; +address StubRoutines::riscv::_compare_long_string_LL = NULL; +address StubRoutines::riscv::_compare_long_string_UU = NULL; +address StubRoutines::riscv::_compare_long_string_LU = NULL; @@ -47156,16 +49756,17 @@ index 000000000..633108b95 +address StubRoutines::riscv::_string_indexof_linear_uu = NULL; +address StubRoutines::riscv::_string_indexof_linear_ul = NULL; +address StubRoutines::riscv::_large_byte_array_inflate = NULL; ++address StubRoutines::riscv::_method_entry_barrier = NULL; + +bool StubRoutines::riscv::_completed = false; diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp new file mode 100644 -index 000000000..8aa81980e +index 00000000000..51f07819c33 --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -@@ -0,0 +1,179 @@ +@@ -0,0 +1,161 @@ +/* -+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -47197,20 +49798,19 @@ index 000000000..8aa81980e +// definition. See stubRoutines.hpp for a description on how to +// extend it. + -+static bool returns_to_call_stub(address return_pc) { ++static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address; +} + +enum platform_dependent_constants { + code_size1 = 19000, // simply increase if too small (assembler will crash if too small) -+ code_size2 = 36000 // simply increase if too small (assembler will crash if too small) ++ code_size2 = 28000 // simply increase if too small (assembler will crash if too small) +}; + +class riscv { + friend class StubGenerator; + + private: -+ static address _get_previous_fp_entry; + static address _get_previous_sp_entry; + + static address _f2i_fixup; @@ -47225,8 +49825,6 @@ index 000000000..8aa81980e + + static address _zero_blocks; + -+ static address _has_negatives; -+ static address _has_negatives_long; + static address _compare_long_string_LL; + static address _compare_long_string_LU; + static address _compare_long_string_UL; @@ -47235,57 +49833,46 @@ index 000000000..8aa81980e + static address _string_indexof_linear_uu; + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; ++ ++ static address _method_entry_barrier; ++ + static bool _completed; + + public: + -+ static address get_previous_fp_entry() -+ { -+ return _get_previous_fp_entry; -+ } -+ -+ static address get_previous_sp_entry() -+ { ++ static address get_previous_sp_entry() { + return _get_previous_sp_entry; + } + -+ static address f2i_fixup() -+ { ++ static address f2i_fixup() { + return _f2i_fixup; + } + -+ static address f2l_fixup() -+ { ++ static address f2l_fixup() { + return _f2l_fixup; + } + -+ static address d2i_fixup() -+ { ++ static address d2i_fixup() { + return _d2i_fixup; + } + -+ static address d2l_fixup() -+ { ++ static address d2l_fixup() { + return _d2l_fixup; + } + -+ static address float_sign_mask() -+ { ++ static address float_sign_mask() { + return _float_sign_mask; + } + -+ static address float_sign_flip() -+ { ++ static address float_sign_flip() { + return _float_sign_flip; + } + -+ static address double_sign_mask() -+ { ++ static address double_sign_mask() { + return _double_sign_mask; + } + -+ static address double_sign_flip() -+ { ++ static address double_sign_flip() { + return _double_sign_flip; + } + @@ -47293,14 +49880,6 @@ index 000000000..8aa81980e + return _zero_blocks; + } + -+ static address has_negatives() { -+ return _has_negatives; -+ } -+ -+ static address has_negatives_long() { -+ return _has_negatives_long; -+ } -+ + static address compare_long_string_LL() { + return _compare_long_string_LL; + } @@ -47333,6 +49912,10 @@ index 000000000..8aa81980e + return _large_byte_array_inflate; + } + ++ static address method_entry_barrier() { ++ return _method_entry_barrier; ++ } ++ + static bool complete() { + return _completed; + } @@ -47345,14 +49928,14 @@ index 000000000..8aa81980e +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp new file mode 100644 -index 000000000..f5e212204 +index 00000000000..6537b2dbd94 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -0,0 +1,1841 @@ +@@ -0,0 +1,1794 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -47377,7 +49960,6 @@ index 000000000..f5e212204 + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" -+#include "classfile/javaClasses.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/bytecodeTracer.hpp" @@ -47396,13 +49978,14 @@ index 000000000..f5e212204 +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" ++#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" -+#include "utilities/macros.hpp" ++#include "utilities/powerOfTwo.hpp" +#include + +#ifndef PRODUCT @@ -47453,7 +50036,7 @@ index 000000000..f5e212204 + // bcp (NULL) + // ... + -+ // Restore RA ++ // Restore ra + __ ld(ra, Address(sp, 0)); + __ addi(sp, sp , 2 * wordSize); + @@ -47892,7 +50475,7 @@ index 000000000..f5e212204 + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); -+ __ membar(MacroAssembler::AnyAny); ++ __ fence(0xf, 0xf); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} @@ -47908,80 +50491,31 @@ index 000000000..f5e212204 +// +// xmethod: method +// -+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, -+ Label* profile_method, -+ Label* profile_method_continue) { ++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. -+ if (TieredCompilation) { -+ int increment = InvocationCounter::count_increment; -+ Label no_mdo; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x10, Address(xmethod, Method::method_data_offset())); -+ __ beqz(x10, no_mdo); -+ // Increment counter in the MDO -+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); -+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); -+ __ j(done); -+ } -+ __ bind(no_mdo); -+ // Increment counter in MethodCounters -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ __ get_method_counters(xmethod, t1, done); -+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); -+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); -+ __ bind(done); -+ } else { // not TieredCompilation -+ const Address backedge_counter(t1, -+ MethodCounters::backedge_counter_offset() + -+ InvocationCounter::counter_offset()); -+ const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); -+ -+ __ get_method_counters(xmethod, t1, done); -+ -+ if (ProfileInterpreter) { // %%% Merge this into MethodData* -+ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ __ addw(x11, x11, 1); -+ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); -+ } -+ // Update standard invocation counters -+ __ lwu(x11, invocation_counter); -+ __ lwu(x10, backedge_counter); -+ -+ __ addw(x11, x11, InvocationCounter::count_increment); -+ __ andi(x10, x10, InvocationCounter::count_mask_value); -+ -+ __ sw(x11, invocation_counter); -+ __ addw(x10, x10, x11); // add both counters -+ -+ // profile_method is non-null only for interpreted method so -+ // profile_method != NULL == !native_call -+ -+ if (ProfileInterpreter && profile_method != NULL) { -+ // Test to see if we should create a method data oop -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t1, *profile_method_continue); -+ -+ // if no method data exists, go to profile_method -+ __ test_method_data_pointer(t1, *profile_method); -+ } -+ -+ { -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); -+ __ bltu(x10, t1, done); -+ __ j(*overflow); // offset is too large so we have to use j instead of bgeu here -+ } -+ __ bind(done); ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); + } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { @@ -48165,19 +50699,13 @@ index 000000000..f5e212204 + __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + + // Get mirror and store it in the frame as GC root for this Method* -+#if INCLUDE_SHENANDOAHGC -+ if (UseShenandoahGC) { -+ __ load_mirror(x28, xmethod); -+ __ sd(x28, Address(sp, 4 * wordSize)); -+ } else -+#endif -+ { -+ __ load_mirror(t0, xmethod); -+ __ sd(t0, Address(sp, 4 * wordSize)); -+ } ++ __ load_mirror(t2, xmethod); + __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); + -+ __ load_constant_pool_cache(xcpool, xmethod); ++ __ ld(xcpool, Address(xmethod, Method::const_offset())); ++ __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); ++ __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes())); + __ sd(xcpool, Address(sp, 3 * wordSize)); + __ sd(xlocals, Address(sp, 2 * wordSize)); + @@ -48192,7 +50720,8 @@ index 000000000..f5e212204 + + // Move SP out of the way + if (!native_call) { -+ __ load_max_stack(t0, xmethod); ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); + __ slli(t0, t0, 3); + __ sub(t0, sp, t0); @@ -48237,11 +50766,11 @@ index 000000000..f5e212204 + // xmethod: Method* + // x30: senderSP must preserve for slow path, set SP to it on fast path + -+ // RA is live. It must be saved around calls. ++ // ra is live. It must be saved around calls. + + address entry = __ pc(); + -+ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ const int referent_offset = java_lang_ref_Reference::referent_offset(); + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; @@ -48300,18 +50829,42 @@ index 000000000..f5e212204 +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { -+ // Bang each page in the shadow zone. We can't assume it's been done for -+ // an interpreter frame with greater than a page of locals, so each page -+ // needs to be checked. Only true for non-native. -+ if (UseStackBanging) { -+ const int n_shadow_pages = checked_cast(JavaThread::stack_shadow_zone_size()) / os::vm_page_size(); -+ const int start_page = native_call ? n_shadow_pages : 1; -+ const int page_size = os::vm_page_size(); -+ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { -+ __ sub(t1, sp, pages * page_size); -+ __ sd(zr, Address(t1)); -+ } ++ // See more discussion in stackOverflow.hpp. ++ ++ const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = shadow_zone_size / page_size; ++ ++#ifdef ASSERT ++ Label L_good_limit; ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); ++ __ bnez(t0, L_good_limit); ++ __ stop("shadow zone safe limit is not initialized"); ++ __ bind(L_good_limit); ++ ++ Label L_good_watermark; ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); ++ __ bnez(t0, L_good_watermark); ++ __ stop("shadow zone growth watermark is not initialized"); ++ __ bind(L_good_watermark); ++#endif ++ ++ Label L_done; ++ ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); ++ __ bgtu(sp, t0, L_done); ++ ++ for (int p = 1; p <= n_shadow_pages; p++) { ++ __ bang_stack_with_offset(p * page_size); + } ++ ++ // Record the new watermark, but only if the update is above the safe limit. ++ // Otherwise, the next time around the check above would pass the safe limit. ++ __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); ++ __ bleu(sp, t0, L_done); ++ __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); ++ ++ __ bind(L_done); +} + +// Interpreter stub for calling a native method. (asm interpreter) @@ -48319,7 +50872,7 @@ index 000000000..f5e212204 +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags -+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // x11: Method* + // x30: sender sp @@ -48376,7 +50929,7 @@ index 000000000..f5e212204 + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; @@ -48535,23 +51088,22 @@ index 000000000..f5e212204 + __ mv(t0, _thread_in_native_trans); + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ if (os::is_MP()) { -+ if (UseMembar) { -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, t0, t1); -+ } -+ } ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; -+ __ safepoint_poll_acquire(L); ++ ++ // We need an acquire here to ensure that any subsequent load of the ++ // global SafepointSynchronize::_state flag is ordered after this load ++ // of the thread-local polling word. We don't want this poll to ++ // return false (i.e. not safepointing) and a later poll of the global ++ // SafepointSynchronize::_state spuriously to return true. ++ // ++ // This is to avoid a race when we're in a native->Java transition ++ // racing the code which wakes up from a safepoint. ++ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + __ bind(L); @@ -48592,7 +51144,7 @@ index 000000000..f5e212204 + // and result handler will pick it up + + { -+ Label no_oop, not_weak, store_result; ++ Label no_oop; + __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ bne(t, result_handler, no_oop); + // Unbox oop result, e.g. JNIHandles::resolve result. @@ -48607,14 +51159,14 @@ index 000000000..f5e212204 + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); -+ __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + -+ __ push_call_clobbered_registers(); ++ __ pusha(); // only save smashed registers + __ mv(c_rarg0, xthread); + __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ jalr(t1); -+ __ pop_call_clobbered_registers(); ++ __ popa(); // only restore smashed registers + __ bind(no_reguard); + } + @@ -48798,15 +51350,8 @@ index 000000000..f5e212204 + + // increment invocation count & check for overflow + Label invocation_counter_overflow; -+ Label profile_method; -+ Label profile_method_continue; + if (inc_counter) { -+ generate_counter_incr(&invocation_counter_overflow, -+ &profile_method, -+ &profile_method_continue); -+ if (ProfileInterpreter) { -+ __ bind(profile_method_continue); -+ } ++ generate_counter_incr(&invocation_counter_overflow); + } + + Label continue_after_compile; @@ -48843,15 +51388,6 @@ index 000000000..f5e212204 + + // invocation counter overflow + if (inc_counter) { -+ if (ProfileInterpreter) { -+ // We have decided to profile this method in the interpreter -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ set_method_data_pointer_for_bcp(); -+ // don't think we need this -+ __ get_method(x11); -+ __ jal(profile_method_continue); -+ } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); @@ -49014,7 +51550,7 @@ index 000000000..f5e212204 + Label L_done; + + __ lbu(t0, Address(xbcp, 0)); -+ __ mv(t1, Bytecodes::_invokestatic); ++ __ li(t1, Bytecodes::_invokestatic); + __ bne(t1, t0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. @@ -49060,7 +51596,7 @@ index 000000000..f5e212204 + // ra: return address/pc that threw exception + // sp: expression stack of caller + // fp: fp of caller -+ // FIXME: There's no point saving RA here because VM calls don't trash it ++ // FIXME: There's no point saving ra here because VM calls don't trash it + __ sub(sp, sp, 2 * wordSize); + __ sd(x10, Address(sp, 0)); // save exception + __ sd(ra, Address(sp, wordSize)); // save return address @@ -49157,7 +51693,7 @@ index 000000000..f5e212204 + __ push_reg(t0); + __ push_reg(x10); + __ mv(x10, (address) &BytecodeCounter::_counter_value); -+ __ mv(t0, 1); ++ __ li(t0, 1); + __ amoadd_d(zr, x10, t0, Assembler::aqrl); + __ pop_reg(x10); + __ pop_reg(t0); @@ -49192,14 +51728,14 @@ index 000000000..f5e212204 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp new file mode 100644 -index 000000000..8e6e7dee5 +index 00000000000..d2a301c6e74 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -0,0 +1,4028 @@ +@@ -0,0 +1,3951 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -49225,6 +51761,8 @@ index 000000000..8e6e7dee5 +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "gc/shared/tlab_globals.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" @@ -49234,20 +51772,16 @@ index 000000000..8e6e7dee5 +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" ++#include "utilities/powerOfTwo.hpp" + +#define __ _masm-> + -+// Platform-dependent initialization -+ -+void TemplateTable::pd_initialize() { -+ // No riscv specific initialization -+} -+ +// Address computation: local variables + +static inline Address iaddress(int n) { @@ -49270,12 +51804,15 @@ index 000000000..8e6e7dee5 + return iaddress(n); +} + -+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, 0); +} + -+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address laddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, Interpreter::local_offset_in_bytes(1));; +} @@ -49284,7 +51821,8 @@ index 000000000..8e6e7dee5 + return iaddress(r, temp, _masm); +} + -+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address daddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { + return laddress(r, temp, _masm); +} + @@ -49292,6 +51830,10 @@ index 000000000..8e6e7dee5 + return iaddress(r, temp, _masm); +} + ++static inline Address at_rsp() { ++ return Address(esp, 0); ++} ++ +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos () { @@ -49326,13 +51868,15 @@ index 000000000..8e6e7dee5 + Register val, + DecoratorSet decorators) { + assert(val == noreg || val == x10, "parameter is just for looks"); -+ __ store_heap_oop(dst, val, x29, x11, x13, decorators); ++ assert_cond(_masm != NULL); ++ __ store_heap_oop(dst, val, x29, x11, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators) { ++ assert_cond(_masm != NULL); + __ load_heap_oop(dst, src, x7, x11, decorators); +} + @@ -49426,13 +51970,13 @@ index 000000000..8e6e7dee5 +void TemplateTable::iconst(int value) +{ + transition(vtos, itos); -+ __ mv(x10, value); ++ __ li(x10, value); +} + +void TemplateTable::lconst(int value) +{ + transition(vtos, ltos); -+ __ mv(x10, value); ++ __ li(x10, value); +} + +void TemplateTable::fconst(int value) @@ -49595,6 +52139,7 @@ index 000000000..8e6e7dee5 + int32_t offset = 0; + __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); + __ ld(tmp, Address(rarg, offset)); ++ __ resolve_oop_handle(tmp); + __ bne(result, tmp, notNull); + __ mv(result, zr); // NULL object reference + __ bind(notNull); @@ -49641,7 +52186,6 @@ index 000000000..8e6e7dee5 + __ bind(notLong); + condy_helper(Done); + __ bind(Done); -+ +} + +void TemplateTable::condy_helper(Label& Done) @@ -49937,8 +52481,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); + __ addw(x10, x10, zr); // signed extended +} + @@ -49951,8 +52495,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::faload() @@ -49964,8 +52508,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::daload() @@ -49977,8 +52521,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::aaload() @@ -49990,9 +52534,9 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop); ++ __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); + do_oop_load(_masm, -+ Address(t0), ++ Address(x10), + x10, + IS_ARRAY); +} @@ -50006,8 +52550,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); -+ __ shadd(t0, x11, x10, t0, 0); -+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 0); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::caload() @@ -50019,8 +52563,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +// iload followed by caload frequent pair @@ -50036,8 +52580,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::saload() @@ -50049,8 +52593,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::iload(int n) @@ -50237,7 +52781,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::lastore() { @@ -50250,7 +52794,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::fastore() { @@ -50263,7 +52807,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg); +} + +void TemplateTable::dastore() { @@ -50276,7 +52820,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg); +} + +void TemplateTable::aastore() { @@ -50357,7 +52901,7 @@ index 000000000..8e6e7dee5 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + + __ add(x11, x13, x11); -+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg); +} + +void TemplateTable::castore() @@ -50371,7 +52915,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ shadd(t0, x11, x13, t0, 1); -+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::sastore() @@ -50931,7 +53475,6 @@ index 000000000..8e6e7dee5 + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; -+ Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches @@ -50956,73 +53499,28 @@ index 000000000..8e6e7dee5 + __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + -+ if (TieredCompilation) { -+ Label no_mdo; -+ int increment = InvocationCounter::count_increment; -+ if (ProfileInterpreter) { -+ // Are we profiling? -+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); -+ __ beqz(x11, no_mdo); -+ // Increment the MDO backedge counter -+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); -+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); -+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, -+ x10, t0, false, -+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ __ j(dispatch); -+ } -+ __ bind(no_mdo); -+ // Increment backedge counter in MethodCounters* -+ __ ld(t0, Address(xmethod, Method::method_counters_offset())); -+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); -+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, -+ x10, t1, false, ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); -+ } else { // not TieredCompilation -+ // increment counter -+ __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(x10, Address(t1, be_offset)); // load backedge counter -+ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter -+ __ sw(t0, Address(t1, be_offset)); // store counter -+ -+ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter -+ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits -+ __ addw(x10, x10, t0); // add both counters -+ -+ if (ProfileInterpreter) { -+ // Test to see if we should create a method data oop -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); -+ __ blt(x10, t0, dispatch); -+ -+ // if no method data exists, go to profile method -+ __ test_method_data_pointer(x10, profile_method); -+ -+ if (UseOnStackReplacement) { -+ // check for overflow against x11 which is the MDO taken count -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower -+ -+ // When ProfileInterpreter is on, the backedge_count comes -+ // from the MethodData*, which value does not get reset on -+ // the call to frequency_counter_overflow(). To avoid -+ // excessive calls to the overflow routine while the method is -+ // being compiled, add a second test to make sure the overflow -+ // function is called only once every overflow_frequency. -+ const int overflow_frequency = 1024; -+ __ andi(x11, x11, overflow_frequency - 1); -+ __ beqz(x11, backedge_counter_overflow); -+ -+ } -+ } else { -+ if (UseOnStackReplacement) { -+ // check for overflow against x10, which is the sum of the -+ // counters -+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); -+ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual -+ } -+ } ++ __ j(dispatch); + } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ bind(dispatch); + } + @@ -51034,65 +53532,53 @@ index 000000000..8e6e7dee5 + // xbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + -+ if (UseLoopCounter) { -+ if (ProfileInterpreter && !TieredCompilation) { -+ // Out-of-line code to allocate method data oop. -+ __ bind(profile_method); -+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode -+ __ set_method_data_pointer_for_bcp(); -+ __ j(dispatch); ++ if (UseLoopCounter && UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); + } ++ __ bnez(x12, dispatch); + -+ if (UseOnStackReplacement) { -+ // invocation counter overflow -+ __ bind(backedge_counter_overflow); -+ __ neg(x12, x12); -+ __ add(x12, x12, xbcp); // branch xbcp -+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) -+ __ call_VM(noreg, -+ CAST_FROM_FN_PTR(address, -+ InterpreterRuntime::frequency_counter_overflow), -+ x12); -+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. + -+ // x10: osr nmethod (osr ok) or NULL (osr not possible) -+ // w11: target bytecode -+ // x12: temporary -+ __ beqz(x10, dispatch); // test result -- no osr if null -+ // nmethod may have been invalidated (VM may block upon call_VM return) -+ __ lbu(x12, Address(x10, nmethod::state_offset())); -+ if (nmethod::in_use != 0) { -+ __ sub(x12, x12, nmethod::in_use); -+ } -+ __ bnez(x12, dispatch); ++ __ mv(x9, x10); // save the nmethod + -+ // We have the address of an on stack replacement routine in x10 -+ // We need to prepare to execute the OSR method. First we must -+ // migrate the locals and monitors off of the stack. ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + -+ __ mv(x9, x10); // save the nmethod ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); + -+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); + -+ // x10 is OSR buffer, move it to expected parameter location -+ __ mv(j_rarg0, x10); -+ -+ // remove activation -+ // get sender esp -+ __ ld(esp, -+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ // remove frame anchor -+ __ leave(); -+ // Ensure compiled code always sees stack at proper alignment -+ __ andi(sp, esp, -16); -+ -+ // and begin the OSR nmethod -+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); -+ __ jr(t0); -+ } ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); + } -+ +} + +void TemplateTable::if_0cmp(Condition cc) @@ -51188,9 +53674,9 @@ index 000000000..8e6e7dee5 + __ pop_ptr(x11); + + if (cc == equal) { -+ __ oop_nequal(x11, x10, not_taken); ++ __ bne(x11, x10, not_taken); + } else if (cc == not_equal) { -+ __ oop_equal(x11, x10, not_taken); ++ __ beq(x11, x10, not_taken); + } + branch(false, false); + __ bind(not_taken); @@ -51495,7 +53981,7 @@ index 000000000..8e6e7dee5 + const Register temp = x9; + assert_different_registers(Rcache, index, temp); + -+ Label resolved; ++ Label resolved, clinit_barrier_slow; + + Bytecodes::Code code = bytecode(); + switch (code) { @@ -51509,6 +53995,10 @@ index 000000000..8e6e7dee5 + __ mv(t0, (int) code); + __ beq(temp, t0, resolved); + ++ // resolve first time through ++ // Class initialization barrier slow path lands here as well. ++ __ bind(clinit_barrier_slow); ++ + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mv(temp, (int) code); + __ call_VM(noreg, entry, temp); @@ -51518,6 +54008,13 @@ index 000000000..8e6e7dee5 + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); ++ ++ // Class initialization barrier for static methods ++ if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { ++ __ load_resolved_method_at_index(byte_no, temp, Rcache); ++ __ load_method_holder(temp, temp); ++ __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); ++ } +} + +// The Rcache and index registers must be set before call @@ -51648,14 +54145,6 @@ index 000000000..8e6e7dee5 + pop_and_check_object(obj); + } + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + __ add(off, obj, off); + const Address field(off); + @@ -51918,7 +54407,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); // off register as temparator register. -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); + } @@ -51938,7 +54427,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); + } @@ -51979,7 +54468,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); + } @@ -51999,7 +54488,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); + } @@ -52019,7 +54508,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); + } @@ -52039,7 +54528,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); + } @@ -52059,7 +54548,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); + } @@ -52081,7 +54570,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); + } @@ -52218,28 +54707,28 @@ index 000000000..8e6e7dee5 + do_oop_store(_masm, field, x10, IN_HEAP); + break; + case Bytecodes::_fast_lputfield: -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_zputfield: -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + break; + default: + ShouldNotReachHere(); @@ -52298,14 +54787,6 @@ index 000000000..8e6e7dee5 + __ add(x11, x10, x11); + const Address field(x11, 0); + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: @@ -52357,16 +54838,6 @@ index 000000000..8e6e7dee5 + __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()))); -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ addi(xbcp, xbcp, 1); @@ -52383,8 +54854,8 @@ index 000000000..8e6e7dee5 + __ verify_oop(x10); + break; + case ftos: -+ __ add(t0, x10, x11); -+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg); ++ __ add(x10, x10, x11); ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg); + break; + default: + ShouldNotReachHere(); @@ -52406,11 +54877,6 @@ index 000000000..8e6e7dee5 +//----------------------------------------------------------------------------- +// Calls + -+void TemplateTable::count_calls(Register method, Register temp) -+{ -+ __ call_Unimplemented(); -+} -+ +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. @@ -52432,8 +54898,8 @@ index 000000000..8e6e7dee5 + assert(recv == noreg || recv == x12, ""); + + // setup registers & access constant pool cache -+ if (recv == noreg) { -+ recv = x12; ++ if (recv == noreg) { ++ recv = x12; + } + if (flags == noreg) { + flags = x13; @@ -52455,7 +54921,6 @@ index 000000000..8e6e7dee5 + // since the parameter_size includes it. + __ push_reg(x9); + __ mv(x9, index); -+ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, x9); + __ pop_reg(x9); + __ push_reg(index); // push appendix (MethodType, CallSite, etc.) @@ -52495,7 +54960,7 @@ index 000000000..8e6e7dee5 + __ beqz(t0, notFinal); + + const Register method = index; // method must be xmethod -+ assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention"); ++ assert(method == xmethod, "Method must be xmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* @@ -52518,7 +54983,7 @@ index 000000000..8e6e7dee5 + // profile this call + __ profile_virtual_call(x10, xlocals, x13); + -+ // get target methodOop & entry point ++ // get target Method & entry point + __ lookup_virtual_method(x10, index, method); + __ profile_arguments_type(x13, method, x14, true); + __ jump_from_interpreted(method); @@ -52639,9 +55104,7 @@ index 000000000..8e6e7dee5 + __ profile_virtual_call(x13, x30, x9); + + // Get declaring interface class from method, and itable index -+ __ ld(x10, Address(xmethod, Method::const_offset())); -+ __ ld(x10, Address(x10, ConstMethod::constants_offset())); -+ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); ++ __ load_method_holder(x10, xmethod); + __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); + __ subw(xmethod, xmethod, Method::itable_index_max); + __ negw(xmethod, xmethod); @@ -52654,7 +55117,7 @@ index 000000000..8e6e7dee5 + xmethod, x30, + no_such_interface); + -+ // xmethod: methodOop to call ++ // xmethod: Method to call + // x12: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error @@ -52666,7 +55129,7 @@ index 000000000..8e6e7dee5 + + // do the call + // x12: receiver -+ // xmethod,: methodOop ++ // xmethod: Method + __ jump_from_interpreted(xmethod); + __ should_not_reach_here(); + @@ -52830,11 +55293,7 @@ index 000000000..8e6e7dee5 + + // initialize object hader only. + __ bind(initialize_header); -+ if (UseBiasedLocking) { -+ __ ld(t0, Address(x14, Klass::prototype_header_offset())); -+ } else { -+ __ mv(t0, (intptr_t)markOopDesc::prototype()); -+ } ++ __ mv(t0, (intptr_t)markWord::prototype().value()); + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last @@ -52843,7 +55302,7 @@ index 000000000..8e6e7dee5 + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value -+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); + __ pop(atos); // restore the return value + } + __ j(done); @@ -52988,7 +55447,7 @@ index 000000000..8e6e7dee5 + __ j(done); + // Come here on success + __ bind(ok_is_subtype); -+ __ mv(x10, 1); ++ __ li(x10, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { @@ -53226,12 +55685,12 @@ index 000000000..8e6e7dee5 +} diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp new file mode 100644 -index 000000000..b437c8f4c +index 00000000000..fcc86108d28 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp @@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -53272,15 +55731,101 @@ index 000000000..b437c8f4c +static void index_check(Register array, Register index); + +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +new file mode 100644 +index 00000000000..4f50adb05c3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalNativeInvoker.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} +diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp +new file mode 100644 +index 00000000000..ce70da72f2e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "prims/universalUpcallHandler.hpp" ++#include "utilities/debug.hpp" ++ ++address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { ++ Unimplemented(); ++ return nullptr; ++} ++ ++address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { ++ ShouldNotCallThis(); ++ return nullptr; ++} ++ ++bool ProgrammableUpcallHandler::supports_optimized_upcalls() { ++ return false; ++} diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp new file mode 100644 -index 000000000..03079aec0 +index 00000000000..6c89133de02 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp -@@ -0,0 +1,43 @@ +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53321,172 +55866,14 @@ index 000000000..03079aec0 +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -new file mode 100644 -index 000000000..dd4f5c9ae ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "memory/allocation.hpp" -+#include "memory/allocation.inline.hpp" -+#include "runtime/os.inline.hpp" -+#include "vm_version_ext_riscv.hpp" -+ -+// VM_Version_Ext statics -+int VM_Version_Ext::_no_of_threads = 0; -+int VM_Version_Ext::_no_of_cores = 0; -+int VM_Version_Ext::_no_of_sockets = 0; -+bool VM_Version_Ext::_initialized = false; -+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; -+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; -+ -+void VM_Version_Ext::initialize_cpu_information(void) { -+ // do nothing if cpu info has been initialized -+ if (_initialized) { -+ return; -+ } -+ -+ int core_id = -1; -+ int chip_id = -1; -+ int len = 0; -+ char* src_string = NULL; -+ -+ _no_of_cores = os::processor_count(); -+ _no_of_threads = _no_of_cores; -+ _no_of_sockets = _no_of_cores; -+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); -+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); -+ _initialized = true; -+} -+ -+int VM_Version_Ext::number_of_threads(void) { -+ initialize_cpu_information(); -+ return _no_of_threads; -+} -+ -+int VM_Version_Ext::number_of_cores(void) { -+ initialize_cpu_information(); -+ return _no_of_cores; -+} -+ -+int VM_Version_Ext::number_of_sockets(void) { -+ initialize_cpu_information(); -+ return _no_of_sockets; -+} -+ -+const char* VM_Version_Ext::cpu_name(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; -+ } -+ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); -+ return tmp; -+} -+ -+const char* VM_Version_Ext::cpu_description(void) { -+ initialize_cpu_information(); -+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); -+ if (NULL == tmp) { -+ return NULL; -+ } -+ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); -+ return tmp; -+} -diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -new file mode 100644 -index 000000000..0982b6668 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP -+ -+#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" -+ -+class VM_Version_Ext : public VM_Version { -+ private: -+ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; -+ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; -+ -+ static int _no_of_threads; -+ static int _no_of_cores; -+ static int _no_of_sockets; -+ static bool _initialized; -+ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; -+ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; -+ -+ public: -+ static int number_of_threads(void); -+ static int number_of_cores(void); -+ static int number_of_sockets(void); -+ -+ static const char* cpu_name(void); -+ static const char* cpu_description(void); -+ static void initialize_cpu_information(void); -+ -+}; -+ -+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp new file mode 100644 -index 000000000..31d5bb5f4 +index 00000000000..768c7633ca6 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -0,0 +1,190 @@ +@@ -0,0 +1,230 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53512,9 +55899,10 @@ index 000000000..31d5bb5f4 + +#include "precompiled.hpp" +#include "runtime/java.hpp" ++#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" +#include "utilities/formatBuffer.hpp" ++#include "utilities/macros.hpp" + +#include OS_HEADER_INLINE(os) + @@ -53527,6 +55915,7 @@ index 000000000..31d5bb5f4 + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } ++ + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); + } @@ -53567,16 +55956,26 @@ index 000000000..31d5bb5f4 + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + ++ if (UseSHA3Intrinsics) { ++ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); ++ } ++ + if (UseCRC32Intrinsics) { -+ warning("CRC32Intrinsics instructions are not available on this CPU."); ++ warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + + if (UseCRC32CIntrinsics) { -+ warning("CRC32CIntrinsics instructions are not available on this CPU."); ++ warning("CRC32C intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + ++ if (UseMD5Intrinsics) { ++ warning("MD5 intrinsics are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseMD5Intrinsics, false); ++ } ++ + if (UseRVV) { + if (!(_features & CPU_V)) { + warning("RVV is not supported on this CPU"); @@ -53587,11 +55986,21 @@ index 000000000..31d5bb5f4 + } + } + ++ if (UseRVB && !(_features & CPU_B)) { ++ warning("RVB is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVB, false); ++ } ++ ++ if (UseRVC && !(_features & CPU_C)) { ++ warning("RVC is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVC, false); ++ } ++ + if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { + FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); + } + -+ if (UseZbb) { ++ if (UseRVB) { + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } @@ -53610,16 +56019,16 @@ index 000000000..31d5bb5f4 + _features_string = os::strdup(buf); + +#ifdef COMPILER2 -+ initialize_c2(); ++ c2_initialize(); +#endif // COMPILER2 +} + +#ifdef COMPILER2 -+void VM_Version::initialize_c2() { -+ // lack of cmove in riscv ++void VM_Version::c2_initialize() { + if (UseCMoveUnconditionally) { + FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); + } ++ + if (ConditionalMoveLimit > 0) { + FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); + } @@ -53632,6 +56041,10 @@ index 000000000..31d5bb5f4 + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + ++ if (!UseRVV) { ++ FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); ++ } ++ + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; @@ -53658,14 +56071,6 @@ index 000000000..31d5bb5f4 + FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + -+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); -+ } -+ -+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); -+ } -+ + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } @@ -53673,16 +56078,39 @@ index 000000000..31d5bb5f4 + if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); + } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); ++ } +} +#endif // COMPILER2 ++ ++void VM_Version::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; ++} diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp new file mode 100644 -index 000000000..0178e6d75 +index 00000000000..8e35530359a --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -@@ -0,0 +1,65 @@ +@@ -0,0 +1,72 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53710,14 +56138,28 @@ index 000000000..0178e6d75 +#define CPU_RISCV_VM_VERSION_RISCV_HPP + +#include "runtime/abstract_vm_version.hpp" ++#include "runtime/arguments.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version : public Abstract_VM_Version { ++#ifdef COMPILER2 ++private: ++ static void c2_initialize(); ++#endif // COMPILER2 ++ ++protected: ++ static const char* _uarch; ++ static uint32_t _initial_vector_length; ++ static void get_os_cpu_info(); ++ static uint32_t get_current_vector_length(); ++ +public: + // Initialization + static void initialize(); + ++ constexpr static bool supports_stack_watermark_barrier() { return true; } ++ + enum Feature_Flag { +#define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ @@ -53726,36 +56168,27 @@ index 000000000..0178e6d75 + decl(F, "f", 5) \ + decl(D, "d", 3) \ + decl(C, "c", 2) \ -+ decl(V, "v", 21) ++ decl(V, "v", 21) \ ++ decl(B, "b", 1) + +#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), + CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) +#undef DECLARE_CPU_FEATURE_FLAG + }; + -+protected: -+ static const char* _uarch; -+ static uint32_t _initial_vector_length; -+ static void get_os_cpu_info(); -+ static uint32_t get_current_vector_length(); -+ -+#ifdef COMPILER2 -+private: -+ static void initialize_c2(); -+#endif // COMPILER2 ++ static void initialize_cpu_information(void); +}; + +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp new file mode 100644 -index 000000000..6572d9334 +index 00000000000..aa7222dc64a --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,64 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53783,10 +56216,10 @@ index 000000000..6572d9334 +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { -+ Register reg = ::as_Register(0); + int i = 0; ++ Register reg = ::as_Register(0); + for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { -+ for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + reg = reg->successor(); @@ -53794,34 +56227,38 @@ index 000000000..6572d9334 + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { -+ for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = freg->name(); ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); + } + freg = freg->successor(); + } + + VectorRegister vreg = ::as_VectorRegister(0); + for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { -+ for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = vreg->name(); ++ for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); + } + vreg = vreg->successor(); + } + -+ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) { ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { + regName[i] = "NON-GPR-FPR-VPR"; + } +} ++ ++VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { ++ Unimplemented(); ++ return VMRegImpl::Bad(); ++} diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp new file mode 100644 -index 000000000..ec76a1db1 +index 00000000000..9e611b1f671 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -@@ -0,0 +1,64 @@ +@@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53860,38 +56297,42 @@ index 000000000..ec76a1db1 +} + +inline Register as_Register() { -+ assert( is_Register(), "must be"); ++ assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +} + +inline FloatRegister as_FloatRegister() { -+ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ assert(is_FloatRegister() && is_even(value()), "must be"); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / + FloatRegisterImpl::max_slots_per_register); +} + +inline VectorRegister as_VectorRegister() { -+ assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" ); ++ assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); + return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / + VectorRegisterImpl::max_slots_per_register); +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); -+ return is_even(value()); ++ if (is_VectorRegister()) { ++ int base = value() - ConcreteRegisterImpl::max_fpr; ++ return (base % VectorRegisterImpl::max_slots_per_register) == 0; ++ } else { ++ return is_even(value()); ++ } +} + +#endif // CPU_RISCV_VMREG_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp new file mode 100644 -index 000000000..9605e59f4 +index 00000000000..06b70020b4b --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp -@@ -0,0 +1,47 @@ +@@ -0,0 +1,46 @@ +/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53917,19 +56358,19 @@ index 000000000..9605e59f4 +#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP +#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP + -+inline VMReg RegisterImpl::as_VMReg() { -+ if( this == noreg ) { ++inline VMReg RegisterImpl::as_VMReg() const { ++ if (this == noreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); +} + -+inline VMReg FloatRegisterImpl::as_VMReg() { ++inline VMReg FloatRegisterImpl::as_VMReg() const { + return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_gpr); +} + -+inline VMReg VectorRegisterImpl::as_VMReg() { ++inline VMReg VectorRegisterImpl::as_VMReg() const { + return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_fpr); +} @@ -53937,12 +56378,12 @@ index 000000000..9605e59f4 +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp new file mode 100644 -index 000000000..b2aa87ab8 +index 00000000000..78b81138003 --- /dev/null +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -0,0 +1,260 @@ +/* -+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -54015,7 +56456,7 @@ index 000000000..b2aa87ab8 +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(t2)); ++ __ add_memory_int64(Address(t2), 1); + } +#endif + @@ -54106,7 +56547,7 @@ index 000000000..b2aa87ab8 +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(x18)); ++ __ add_memory_int64(Address(x18), 1); + } +#endif + @@ -54197,14 +56638,23 @@ index 000000000..b2aa87ab8 +} + +int VtableStub::pd_code_alignment() { -+ // riscv cache line size is 64 bytes, but we want to limit alignment loss. ++ // RISCV cache line size is not an architected constant. We just align on word size. + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -index 897be2209..3b836fe6b 100644 +index 897be2209e2..ee298f56653 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2016, 2019, SAP SE. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2019 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } @@ -54212,82 +56662,22 @@ index 897be2209..3b836fe6b 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on s390"); + Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; switch (condition) { case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; -diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad -index e335f473d..53ad912cb 100644 ---- a/src/hotspot/cpu/s390/s390.ad -+++ b/src/hotspot/cpu/s390/s390.ad -@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) { - // BUT: make sure match rule is not disabled by a false predicate! - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // TODO - // Identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - int Matcher::regnum_to_fpu_offset(int regnum) { -@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) { - return Node::NotAMachineReg; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // z/Architecture does support misaligned store/load at minimal extra cost. - const bool Matcher::misaligned_vectors_ok() { - return true; -diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad -index 7a2798a51..7d9b17b44 100644 ---- a/src/hotspot/cpu/sparc/sparc.ad -+++ b/src/hotspot/cpu/sparc/sparc.ad -@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // SPARC doesn't support misaligned vectors store/load. - const bool Matcher::misaligned_vectors_ok() { - return false; diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -index cee3140f4..d38c63600 100644 +index cee3140f4f7..82e9de5a06f 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { } } @@ -54295,299 +56685,64 @@ index cee3140f4..d38c63600 100644 -void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86"); ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp oprs on x86"); + Assembler::Condition acond, ncond; switch (condition) { case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -index 82fd8522b..8016d328a 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register - bind(DONE_LABEL); - } // string_indexof_char - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { -+ ShortBranchVerifier sbv(this); -+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); -+ -+ int stride = 16; -+ -+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, -+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, -+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, -+ FOUND_SEQ_CHAR, DONE_LABEL; -+ -+ movptr(result, str1); -+ if (UseAVX >= 2) { -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT); -+ cmpl(cnt1, stride*2); -+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); -+ vpxor(vec2, vec2); -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFE0); //vector count (in chars) -+ andl(cnt1,0x0000001F); //tail count (in chars) -+ -+ bind(SCAN_TO_32_CHAR_LOOP); -+ vmovdqu(vec3, Address(result, 0)); -+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); -+ vptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 32); -+ subl(tmp, stride*2); -+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); -+ jmp(SCAN_TO_16_CHAR); -+ -+ bind(SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ -+ bind(SCAN_TO_16_CHAR); -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left -+ if (UseAVX < 2) { -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes) -+ andl(cnt1,0x0000000F); //tail count (in bytes) -+ -+ bind(SCAN_TO_16_CHAR_LOOP); -+ movdqu(vec3, Address(result, 0)); -+ pcmpeqb(vec3, vec1); -+ ptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 16); -+ subl(tmp, stride); -+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... -+ -+ bind(SCAN_TO_CHAR_INIT); -+ testl(cnt1, cnt1); -+ jcc(Assembler::zero, RET_NOT_FOUND); -+ bind(SCAN_TO_CHAR_LOOP); -+ load_unsigned_byte(tmp, Address(result, 0)); -+ cmpl(ch, tmp); -+ jccb(Assembler::equal, FOUND_SEQ_CHAR); -+ addptr(result, 1); -+ subl(cnt1, 1); -+ jccb(Assembler::zero, RET_NOT_FOUND); -+ jmp(SCAN_TO_CHAR_LOOP); -+ -+ bind(RET_NOT_FOUND); -+ movl(result, -1); -+ jmpb(DONE_LABEL); -+ -+ bind(FOUND_CHAR); -+ if (UseAVX >= 2) { -+ vpmovmskb(tmp, vec3); -+ } else { -+ pmovmskb(tmp, vec3); -+ } -+ bsfl(ch, tmp); -+ addptr(result, ch); -+ -+ bind(FOUND_SEQ_CHAR); -+ subptr(result, str1); -+ -+ bind(DONE_LABEL); -+} // stringL_indexof_char -+ - // helper function for string_compare - void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, - Address::ScaleFactor scale, Address::ScaleFactor scale1, -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -index 1bed0cce9..47a062c11 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -@@ -1659,6 +1659,8 @@ public: - #ifdef COMPILER2 - void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, - XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); -+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); - - // IndexOf strings. - // Small strings are loaded through stack if they cross page boundary. -diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index baa7cc774..238d8729b 100644 ---- a/src/hotspot/cpu/x86/x86.ad -+++ b/src/hotspot/cpu/x86/x86.ad -@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) { - return ret_value; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen - bool ret_value = match_rule_supported(opcode); -+ if (!vector_size_supported(bt, vlen)) { -+ ret_value = false; -+ } - if (ret_value) { - switch (opcode) { - case Op_AbsVB: -@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) { - return MIN2(size,max_size); - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ -+ - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); -diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad -index bc9947327..bbe49bd62 100644 ---- a/src/hotspot/cpu/x86/x86_32.ad -+++ b/src/hotspot/cpu/x86/x86_32.ad -@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2 - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast array equals - instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad -index 7e6739ffe..53f887ea6 100644 ---- a/src/hotspot/cpu/x86/x86_64.ad -+++ b/src/hotspot/cpu/x86/x86_64.ad -@@ -2975,7 +2975,7 @@ frame - RAX_H_num // Op_RegL - }; - // Excluded flags and vector registers. -- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); -+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); - return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); - %} - %} -@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -- rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) -+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) - %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr) -+%{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast string equals - instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, - legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr) diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 74945999e..6c79d20a4 100644 +index 3799adf5dd9..6f75e623a9a 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp -@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { - {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, - {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, - {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, -- {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"}, -+#ifdef _LP64 -+ {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"}, -+#else -+ {EM_RISCV, EM_RISCV, ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"}, -+#endif - {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"}, - }; - -@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { +@@ -2845,6 +2845,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { strncpy(cpuinfo, "IA64", length); #elif defined(PPC) strncpy(cpuinfo, "PPC64", length); +#elif defined(RISCV) -+ strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length); ++ strncpy(cpuinfo, "RISCV64", length); #elif defined(S390) strncpy(cpuinfo, "S390", length); #elif defined(SPARC) -@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() { - IA64_ONLY(256 * M) - PPC_ONLY(4 * M) - S390_ONLY(1 * M) -- SPARC_ONLY(4 * M); -+ SPARC_ONLY(4 * M) -+ RISCV64_ONLY(2 * M); - #endif // ZERO - - FILE *fp = fopen("/proc/meminfo", "r"); +diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +new file mode 100644 +index 00000000000..f2610af6cdd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +@@ -0,0 +1,26 @@ ++/* ++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// nothing required here diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp new file mode 100644 -index 000000000..961fff011 +index 00000000000..761da5d743e --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -@@ -0,0 +1,113 @@ +@@ -0,0 +1,134 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54614,32 +56769,32 @@ index 000000000..961fff011 +#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP + -+#include "vm_version_riscv.hpp" ++#include "runtime/vm_version.hpp" + +// Implementation of class atomic ++ +// Note that memory_order_conservative requires a full barrier after atomic stores. +// See https://patchwork.kernel.org/patch/3575821/ + -+#define FULL_MEM_BARRIER __sync_synchronize() -+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); -+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); -+ +template -+struct Atomic::PlatformAdd -+ : public Atomic::AddAndFetch > -+{ -+ template -+ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++struct Atomic::PlatformAdd { ++ template ++ D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } ++ ++ template ++ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { ++ return add_and_fetch(dest, add_value, order) - add_value; ++ } +}; + +template +template -+inline T Atomic::PlatformXchg::operator()(T exchange_value, -+ T volatile* dest, ++inline T Atomic::PlatformXchg::operator()(T volatile* dest, ++ T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); @@ -54647,12 +56802,12 @@ index 000000000..961fff011 + return res; +} + -+// No direct support for cmpxchg of bytes; emulate using int. ++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings. +template +template -+inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, -+ T volatile* dest, ++inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), + T compare_value, ++ T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; @@ -54671,9 +56826,9 @@ index 000000000..961fff011 + +template<> +template -+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, -+ T volatile* dest, ++inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), + T compare_value, ++ T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order != memory_order_relaxed) { @@ -54698,15 +56853,36 @@ index 000000000..961fff011 + return rv; +} + ++template ++struct Atomic::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } ++}; ++ ++template ++struct Atomic::PlatformOrderedStore ++{ ++ template ++ void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } ++}; ++ ++template ++struct Atomic::PlatformOrderedStore ++{ ++ template ++ void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } ++}; ++ +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp +diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp new file mode 100644 -index 000000000..44f04d1a9 +index 00000000000..28868c76406 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp -@@ -0,0 +1,44 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp +@@ -0,0 +1,45 @@ +/* -+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54727,20 +56903,21 @@ index 000000000..44f04d1a9 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ * + */ + -+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. -+inline u2 Bytes::swap_u2(u2 x) { ++inline u2 Bytes::swap_u2(u2 x) { + return bswap_16(x); +} + -+inline u4 Bytes::swap_u4(u4 x) { ++inline u4 Bytes::swap_u4(u4 x) { + return bswap_32(x); +} + @@ -54748,15 +56925,52 @@ index 000000000..44f04d1a9 + return bswap_64(x); +} + -+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp ++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp new file mode 100644 -index 000000000..645b40a7c +index 00000000000..147cfdf3c10 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -@@ -0,0 +1,116 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +@@ -0,0 +1,31 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP ++ ++// Empty for build system ++ ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +new file mode 100644 +index 00000000000..1aa58f27871 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54780,105 +56994,31 @@ index 000000000..645b40a7c + * + */ + -+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP + -+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ (void)memmove(to, from, count * HeapWordSize); -+} ++#include + -+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) { -+ switch (count) { -+ case 8: to[7] = from[7]; // fall through -+ case 7: to[6] = from[6]; // fall through -+ case 6: to[5] = from[5]; // fall through -+ case 5: to[4] = from[4]; // fall through -+ case 4: to[3] = from[3]; // fall through -+ case 3: to[2] = from[2]; // fall through -+ case 2: to[1] = from[1]; // fall through -+ case 1: to[0] = from[0]; // fall through -+ case 0: break; -+ default: -+ if(is_atomic) { -+ while (count-- > 0) { *to++ = *from++; } -+ } else { -+ memcpy(to, from, count * HeapWordSize); -+ } -+ } -+} ++// ++// Support for building on older Linux systems ++// + -+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, false); -+} ++#ifndef SYS_memfd_create ++#define SYS_memfd_create 279 ++#endif ++#ifndef SYS_fallocate ++#define SYS_fallocate 47 ++#endif + -+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, true); -+} -+ -+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_conjoint_words(from, to, count); -+} -+ -+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words(from, to, count); -+} -+ -+static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -+ (void)memmove(to, from, count); -+} -+ -+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { -+ pd_conjoint_bytes(from, to, count); -+} -+ -+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { -+ _Copy_conjoint_jshorts_atomic(from, to, count); -+} -+ -+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -+ _Copy_conjoint_jints_atomic(from, to, count); -+} -+ -+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -+ _Copy_conjoint_jlongs_atomic(from, to, count); -+} -+ -+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); -+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); -+} -+ -+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_bytes(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jshorts(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jints(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} -+ -+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -+ assert(!UseCompressedOops, "foo!"); -+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); -+ _Copy_arrayof_conjoint_jlongs(from, to, count); -+} -+ -+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp new file mode 100644 -index 000000000..041cdf4ff +index 00000000000..297414bfcd5 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp @@ -0,0 +1,43 @@ +/* -+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54922,12 +57062,12 @@ index 000000000..041cdf4ff +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp new file mode 100644 -index 000000000..842aa51e0 +index 00000000000..1c33dc1e87f --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -@@ -0,0 +1,73 @@ +@@ -0,0 +1,63 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54956,7 +57096,7 @@ index 000000000..842aa51e0 + +// Included in orderAccess.hpp header file. + -+#include "vm_version_riscv.hpp" ++#include "runtime/vm_version.hpp" + +// Implementation of class OrderAccess. + @@ -54965,6 +57105,10 @@ index 000000000..842aa51e0 +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } + ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ +inline void OrderAccess::acquire() { + READ_MEM_BARRIER; +} @@ -54977,37 +57121,23 @@ index 000000000..842aa51e0 + FULL_MEM_BARRIER; +} + -+template -+struct OrderAccess::PlatformOrderedLoad -+{ -+ template -+ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } -+}; -+ -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); } -+}; -+ -+template -+struct OrderAccess::PlatformOrderedStore -+{ -+ template -+ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } -+}; ++inline void OrderAccess::cross_modify_fence_impl() { ++ asm volatile("fence.i" : : : "memory"); ++ if (UseConservativeFence) { ++ asm volatile("fence ir, ir" : : : "memory"); ++ } ++} + +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp new file mode 100644 -index 000000000..37947701b +index 00000000000..1f46bbab0a2 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -0,0 +1,628 @@ +@@ -0,0 +1,466 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55032,8 +57162,6 @@ index 000000000..37947701b + +// no precompiled headers +#include "asm/macroAssembler.hpp" -+#include "classfile/classLoader.hpp" -+#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/codeCache.hpp" +#include "code/icBuffer.hpp" @@ -55046,23 +57174,25 @@ index 000000000..37947701b +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" -+#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" ++#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" ++#include "signals_posix.hpp" +#include "utilities/debug.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include ++# include +# include +# include +# include @@ -55093,11 +57223,11 @@ index 000000000..37947701b + return (char*) -1; +} + -+address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++address os::Posix::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__gregs[REG_PC]; +} + -+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; +} + @@ -55109,31 +57239,13 @@ index 000000000..37947701b + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; +} + -+// For Forte Analyzer AsyncGetCallTrace profiling support - thread -+// is currently interrupted by SIGPROF. -+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal -+// frames. Currently we don't do that on Linux, so it's the same as -+// os::fetch_frame_from_context(). -+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, -+ const ucontext_t* uc, -+ intptr_t** ret_sp, -+ intptr_t** ret_fp) { -+ -+ assert(thread != NULL, "just checking"); -+ assert(ret_sp != NULL, "just checking"); -+ assert(ret_fp != NULL, "just checking"); -+ -+ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); -+} -+ -+ExtendedPC os::fetch_frame_from_context(const void* ucVoid, -+ intptr_t** ret_sp, intptr_t** ret_fp) { -+ -+ ExtendedPC epc; ++address os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ address epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { -+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ epc = os::Posix::ucontext_get_pc(uc); + if (ret_sp != NULL) { + *ret_sp = os::Linux::ucontext_get_sp(uc); + } @@ -55141,8 +57253,7 @@ index 000000000..37947701b + *ret_fp = os::Linux::ucontext_get_fp(uc); + } + } else { -+ // construct empty ExtendedPC for return value checking -+ epc = ExtendedPC(NULL); ++ epc = NULL; + if (ret_sp != NULL) { + *ret_sp = (intptr_t *)NULL; + } @@ -55154,51 +57265,23 @@ index 000000000..37947701b + return epc; +} + ++frame os::fetch_compiled_frame_from_context(const void* ucVoid) { ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); ++ address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ return frame(frame_sp, frame_fp, frame_pc); ++} ++ +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; -+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); -+ return frame(frame_sp, frame_fp, epc.pc()); -+} -+ -+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { -+ address pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (Interpreter::contains(pc)) { -+ // interpreter performs stack banging after the fixed frame header has -+ // been generated while the compilers perform it before. To maintain -+ // semantic consistency between interpreted and compiled frames, the -+ // method returns the Java sender of the current frame. -+ *fr = os::fetch_frame_from_context(uc); -+ if (!fr->is_first_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ *fr = fr->java_sender(); -+ } -+ } else { -+ // more complex code with compiled code -+ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); -+ CodeBlob* cb = CodeCache::find_blob(pc); -+ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { -+ // Not sure where the pc points to, fallback to default -+ // stack overflow handling -+ return false; -+ } else { -+ // In compiled code, the stack banging is performed before RA -+ // has been saved in the frame. RA is live, and SP and FP -+ // belong to the caller. -+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); -+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); -+ address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] - -+ NativeInstruction::instruction_size); -+ *fr = frame(frame_sp, frame_fp, frame_pc); -+ if (!fr->is_java_frame()) { -+ assert(fr->safe_for_sender(thread), "Safety check"); -+ assert(!fr->is_first_frame(), "Safety check"); -+ *fr = fr->java_sender(); -+ } -+ } -+ } -+ assert(fr->is_java_frame(), "Safety check"); -+ return true; ++ address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc); +} + +// By default, gcc always saves frame pointer rfp on this stack. This @@ -55209,7 +57292,7 @@ index 000000000..37947701b + +NOINLINE frame os::current_frame() { + intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); -+ if(sender_sp != NULL) { ++ if (sender_sp != NULL) { + frame myframe((intptr_t*)os::current_stack_pointer(), + sender_sp[frame::link_offset], + CAST_FROM_FN_PTR(address, os::current_frame)); @@ -55226,66 +57309,8 @@ index 000000000..37947701b +} + +// Utility functions -+extern "C" JNIEXPORT int -+JVM_handle_linux_signal(int sig, -+ siginfo_t* info, -+ void* ucVoid, -+ int abort_if_unrecognized) { -+ ucontext_t* uc = (ucontext_t*) ucVoid; -+ -+ Thread* t = Thread::current_or_null_safe(); -+ -+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away -+ // (no destructors can be run) -+ os::ThreadCrashProtection::check_crash_protection(sig, t); -+ -+ SignalHandlerMark shm(t); -+ -+ // Note: it's not uncommon that JNI code uses signal/sigset to install -+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, -+ // or have a SIGILL handler when detecting CPU type). When that happens, -+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To -+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals -+ // that do not require siginfo/ucontext first. -+ -+ if (sig == SIGPIPE || sig == SIGXFSZ) { -+ // allow chained handler to go first -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } else { -+ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 -+ return true; -+ } -+ } -+ -+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT -+ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { -+ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { -+ return 1; -+ } -+ } -+#endif -+ -+ JavaThread* thread = NULL; -+ VMThread* vmthread = NULL; -+ if (os::Linux::signal_handlers_are_installed) { -+ if (t != NULL ) { -+ if(t->is_Java_thread()) { -+ thread = (JavaThread*)t; -+ } else if(t->is_VM_thread()) { -+ vmthread = (VMThread *)t; -+ } -+ } -+ } -+ -+ // Handle SafeFetch faults -+ if (uc != NULL) { -+ address const pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (StubRoutines::is_safefetch_fault(pc)) { -+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); -+ return 1; -+ } -+ } ++bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, ++ ucontext_t* uc, JavaThread* thread) { + + // decide if this trap can be handled by a stub + address stub = NULL; @@ -55294,69 +57319,21 @@ index 000000000..37947701b + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { -+ pc = (address) os::Linux::ucontext_get_pc(uc); ++ pc = (address) os::Posix::ucontext_get_pc(uc); ++ ++ address addr = (address) info->si_addr; ++ ++ // Make sure the high order byte is sign extended, as it may be masked away by the hardware. ++ if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { ++ addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); ++ } + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { -+ address addr = (address) info->si_addr; -+ + // check if fault address is within thread stack -+ if (thread->on_local_stack(addr)) { -+ // stack overflow -+ if (thread->in_stack_yellow_reserved_zone(addr)) { -+ if (thread->thread_state() == _thread_in_Java) { -+ if (thread->in_stack_reserved_zone(addr)) { -+ frame fr; -+ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { -+ assert(fr.is_java_frame(), "Must be a Java frame"); -+ frame activation = -+ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); -+ if (activation.sp() != NULL) { -+ thread->disable_stack_reserved_zone(); -+ if (activation.is_interpreted_frame()) { -+ thread->set_reserved_stack_activation((address)( -+ activation.fp() + frame::interpreter_frame_initial_sp_offset)); -+ } else { -+ thread->set_reserved_stack_activation((address)activation.unextended_sp()); -+ } -+ return 1; -+ } -+ } -+ } -+ // Throw a stack overflow exception. Guard pages will be reenabled -+ // while unwinding the stack. -+ thread->disable_stack_yellow_reserved_zone(); -+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); -+ } else { -+ // Thread was in the vm or native code. Return and try to finish. -+ thread->disable_stack_yellow_reserved_zone(); -+ return 1; -+ } -+ } else if (thread->in_stack_red_zone(addr)) { -+ // Fatal red zone violation. Disable the guard pages and fall through -+ // to handle_unexpected_exception way down below. -+ thread->disable_stack_red_zone(); -+ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); -+ -+ // This is a likely cause, but hard to verify. Let's just print -+ // it as a hint. -+ tty->print_raw_cr("Please check if any of your loaded .so files has " -+ "enabled executable stack (see man page execstack(8))"); -+ } else { -+ // Accessing stack address below sp may cause SEGV if current -+ // thread has MAP_GROWSDOWN stack. This should only happen when -+ // current thread was created by user code with MAP_GROWSDOWN flag -+ // and then attached to VM. See notes in os_linux.cpp. -+ if (thread->osthread()->expanding_stack() == 0) { -+ thread->osthread()->set_expanding_stack(); -+ if (os::Linux::manually_expand_stack(thread, addr)) { -+ thread->osthread()->clear_expanding_stack(); -+ return 1; -+ } -+ thread->osthread()->clear_expanding_stack(); -+ } else { -+ fatal("recursive segv. expanding stack."); -+ } ++ if (thread->is_in_full_stack(addr)) { ++ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { ++ return true; // continue + } + } + } @@ -55372,7 +57349,7 @@ index 000000000..37947701b + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); -+ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++ } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault @@ -55380,10 +57357,32 @@ index 000000000..37947701b + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; -+ if (nm != NULL && nm->has_unsafe_access()) { ++ bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); ++ if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + address next_pc = pc + NativeCall::instruction_size; ++ if (is_unsafe_arraycopy) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } ++ } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { ++ // Pull a pointer to the error message out of the instruction ++ // stream. ++ const uint64_t *detail_msg_ptr ++ = (uint64_t*)(pc + NativeInstruction::instruction_size); ++ const char *detail_msg = (const char *)*detail_msg_ptr; ++ const char *msg = "stop"; ++ if (TraceTraps) { ++ tty->print_cr("trap: %s: (SIGILL)", msg); ++ } ++ ++ // End life with a fatal error, message and detail message and the context. ++ // Note: no need to do any post-processing here (e.g. signal chaining) ++ va_list va_dummy; ++ VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); ++ va_end(va_dummy); ++ ++ ShouldNotReachHere(); + } else if (sig == SIGFPE && + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = @@ -55393,14 +57392,18 @@ index 000000000..37947701b + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && -+ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++ MacroAssembler::uses_implicit_null_check((void*)addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } -+ } else if (thread->thread_state() == _thread_in_vm && -+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ -+ thread->doing_unsafe_access()) { ++ } else if ((thread->thread_state() == _thread_in_vm || ++ thread->thread_state() == _thread_in_native) && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; ++ if (UnsafeCopyMemory::contains_pc(pc)) { ++ next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); ++ } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + @@ -55412,17 +57415,6 @@ index 000000000..37947701b + stub = addr_slow; + } + } -+ -+ // Check to see if we caught the safepoint code in the -+ // process of write protecting the memory serialization page. -+ // It write enables the page immediately after protecting it -+ // so we can just return to retry the write. -+ if ((sig == SIGSEGV) && -+ os::is_memory_serialize_page(thread, (address) info->si_addr)) { -+ // Block current thread until the memory serialize page permission restored. -+ os::block_on_serialize_page_trap(); -+ return true; -+ } + } + + if (stub != NULL) { @@ -55431,34 +57423,11 @@ index 000000000..37947701b + thread->set_saved_exception_pc(pc); + } + -+ os::Linux::ucontext_set_pc(uc, stub); ++ os::Posix::ucontext_set_pc(uc, stub); + return true; + } + -+ // signal-chaining -+ if (os::Linux::chained_handler(sig, info, ucVoid)) { -+ return true; -+ } -+ -+ if (!abort_if_unrecognized) { -+ // caller wants another chance, so give it to him -+ return false; -+ } -+ -+ if (pc == NULL && uc != NULL) { -+ pc = os::Linux::ucontext_get_pc(uc); -+ } -+ -+ // unmask current signal -+ sigset_t newset; -+ sigemptyset(&newset); -+ sigaddset(&newset, sig); -+ sigprocmask(SIG_UNBLOCK, &newset, NULL); -+ -+ VMError::report_and_die(t, sig, pc, info, ucVoid); -+ -+ ShouldNotReachHere(); -+ return true; // Mute compiler ++ return false; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { @@ -55471,7 +57440,6 @@ index 000000000..37947701b +void os::Linux::set_fpu_control_word(int fpu_control) { +} + -+ +//////////////////////////////////////////////////////////////////////////////// +// thread stack + @@ -55522,7 +57490,7 @@ index 000000000..37947701b + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. -+ address pc = os::Linux::ucontext_get_pc(uc); ++ address pc = os::Posix::ucontext_get_pc(uc); + print_instructions(st, pc, sizeof(char)); + st->cr(); +} @@ -55635,13 +57603,13 @@ index 000000000..37947701b +}; diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp new file mode 100644 -index 000000000..eae1635b0 +index 00000000000..6d415630661 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp -@@ -0,0 +1,40 @@ +@@ -0,0 +1,59 @@ +/* -+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55678,15 +57646,34 @@ index 000000000..eae1635b0 + *(jlong *) dst = *(const jlong *) src; + } + ++ // SYSCALL_RISCV_FLUSH_ICACHE is used to flush instruction cache. The "fence.i" instruction ++ // only work on the current hart, so kernel provides the icache flush syscall to flush icache ++ // on each hart. You can pass a flag to determine a global or local icache flush. ++ static void icache_flush(long int start, long int end) ++ { ++ const int SYSCALL_RISCV_FLUSH_ICACHE = 259; ++ register long int __a7 asm ("a7") = SYSCALL_RISCV_FLUSH_ICACHE; ++ register long int __a0 asm ("a0") = start; ++ register long int __a1 asm ("a1") = end; ++ // the flush can be applied to either all threads or only the current. ++ // 0 means a global icache flush, and the icache flush will be applied ++ // to other harts concurrently executing. ++ register long int __a2 asm ("a2") = 0; ++ __asm__ volatile ("ecall\n\t" ++ : "+r" (__a0) ++ : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a7) ++ : "memory"); ++ } ++ +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp new file mode 100644 -index 000000000..82b9bb6fd +index 00000000000..a6432c84ec7 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp @@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55716,7 +57703,7 @@ index 000000000..82b9bb6fd +#include "runtime/prefetch.hpp" + + -+inline void Prefetch::read (void *loc, intx interval) { ++inline void Prefetch::read (const void *loc, intx interval) { +} + +inline void Prefetch::write(void *loc, intx interval) { @@ -55725,12 +57712,12 @@ index 000000000..82b9bb6fd +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp new file mode 100644 -index 000000000..c78096931 +index 00000000000..3100572e9fd --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -0,0 +1,103 @@ +@@ -0,0 +1,92 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55755,7 +57742,6 @@ index 000000000..c78096931 + */ + +#include "precompiled.hpp" -+#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + @@ -55778,13 +57764,10 @@ index 000000000..c78096931 +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { -+ assert(this->is_Java_thread(), "must be JavaThread"); -+ JavaThread* jt = (JavaThread *)this; -+ + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. -+ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { -+ *fr_addr = jt->pd_last_frame(); ++ if (has_last_Java_frame() && frame_anchor()->walkable()) { ++ *fr_addr = pd_last_frame(); + return true; + } + @@ -55796,24 +57779,17 @@ index 000000000..c78096931 + + intptr_t* ret_fp = NULL; + intptr_t* ret_sp = NULL; -+ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, -+ &ret_sp, &ret_fp); -+ if (addr.pc() == NULL || ret_sp == NULL ) { ++ address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); ++ if (addr == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + -+ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { -+ // In the middle of a trampoline call. Bail out for safety. -+ // This happens rarely so shouldn't affect profiling. -+ return false; -+ } -+ -+ frame ret_frame(ret_sp, ret_fp, addr.pc()); -+ if (!ret_frame.safe_for_sender(jt)) { ++ frame ret_frame(ret_sp, ret_fp, addr); ++ if (!ret_frame.safe_for_sender(this)) { +#ifdef COMPILER2 -+ frame ret_frame2(ret_sp, NULL, addr.pc()); -+ if (!ret_frame2.safe_for_sender(jt)) { ++ frame ret_frame2(ret_sp, NULL, addr); ++ if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + } @@ -55834,12 +57810,12 @@ index 000000000..c78096931 +void JavaThread::cache_global_variables() { } diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp new file mode 100644 -index 000000000..657b98984 +index 00000000000..61e2cf85b63 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp -@@ -0,0 +1,67 @@ +@@ -0,0 +1,48 @@ +/* -+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55874,23 +57850,10 @@ index 000000000..657b98984 + frame pd_last_frame(); + + public: -+ // Mutators are highly dangerous.... -+ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } -+ void set_last_Java_fp(intptr_t* java_fp) { _anchor.set_last_Java_fp(java_fp); } -+ -+ void set_base_of_stack_pointer(intptr_t* base_sp) { -+ } -+ + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + -+ intptr_t* base_of_stack_pointer() { -+ return NULL; -+ } -+ void record_base_of_stack_pointer() { -+ } -+ + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + @@ -55898,21 +57861,15 @@ index 000000000..657b98984 +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); + -+ // These routines are only used on cpu architectures that -+ // have separate register stacks (Itanium). -+ static bool register_stack_overflow() { return false; } -+ static void enable_register_stack_guard() {} -+ static void disable_register_stack_guard() {} -+ +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp new file mode 100644 -index 000000000..8ee443b5d +index 00000000000..6cf7683a586 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp @@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55968,13 +57925,13 @@ index 000000000..8ee443b5d +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp new file mode 100644 -index 000000000..ef9358aa0 +index 00000000000..4623dbfad42 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -@@ -0,0 +1,116 @@ +@@ -0,0 +1,118 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -56034,6 +57991,10 @@ index 000000000..ef9358aa0 +#define HWCAP_ISA_V (1 << ('V' - 'A')) +#endif + ++#ifndef HWCAP_ISA_B ++#define HWCAP_ISA_B (1 << ('B' - 'A')) ++#endif ++ +#define read_csr(csr) \ +({ \ + register unsigned long __v; \ @@ -56053,13 +58014,23 @@ index 000000000..ef9358aa0 + + uint64_t auxv = getauxval(AT_HWCAP); + -+ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); -+ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); -+ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); -+ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); -+ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); -+ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); -+ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); ++ static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); ++ static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); ++ static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); ++ static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); ++ static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); ++ static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); ++ static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); ++ _features = auxv & ( ++ HWCAP_ISA_I | ++ HWCAP_ISA_M | ++ HWCAP_ISA_A | ++ HWCAP_ISA_F | ++ HWCAP_ISA_D | ++ HWCAP_ISA_C | ++ HWCAP_ISA_V | ++ HWCAP_ISA_B); + + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[512], *p; @@ -56075,59 +58046,18 @@ index 000000000..ef9358aa0 + } + fclose(f); + } -+ -+ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. -+ // Availability for those extensions could not be queried from HWCAP. -+ // TODO: Add proper detection for those extensions. -+ _features = auxv & ( -+ HWCAP_ISA_I | -+ HWCAP_ISA_M | -+ HWCAP_ISA_A | -+ HWCAP_ISA_F | -+ HWCAP_ISA_D | -+ HWCAP_ISA_C | -+ HWCAP_ISA_V); +} -diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp -index ba61aa4c0..4ca0b050b 100644 ---- a/src/hotspot/share/adlc/archDesc.cpp -+++ b/src/hotspot/share/adlc/archDesc.cpp -@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - // Match Vector types. - if (strncmp(idealOp, "Vec",3)==0) { - switch(last_char) { -+ case 'A': return "TypeVect::VECTA"; - case 'S': return "TypeVect::VECTS"; - case 'D': return "TypeVect::VECTD"; - case 'X': return "TypeVect::VECTX"; -@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - } - } - -+ if (strncmp(idealOp, "RegVMask", 8) == 0) { -+ return "Type::BOTTOM"; -+ } -+ - // !!!!! - switch(last_char) { - case 'I': return "TypeInt::INT"; -diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp -index f810fde76..2cf9636d1 100644 ---- a/src/hotspot/share/adlc/formssel.cpp -+++ b/src/hotspot/share/adlc/formssel.cpp -@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { - strcmp(opType,"RegL")==0 || - strcmp(opType,"RegF")==0 || - strcmp(opType,"RegD")==0 || -+ strcmp(opType,"RegVMask")==0 || -+ strcmp(opType,"VecA")==0 || - strcmp(opType,"VecS")==0 || - strcmp(opType,"VecD")==0 || - strcmp(opType,"VecX")==0 || diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp -index e30d39f73..af54dddf3 100644 +index e30d39f73d1..733ee9e654c 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { void LIR_Op2::verify() const { #ifdef ASSERT @@ -56136,44 +58066,40 @@ index e30d39f73..af54dddf3 100644 case lir_xchg: break; -@@ -252,30 +251,27 @@ void LIR_Op2::verify() const { +@@ -252,9 +251,7 @@ void LIR_Op2::verify() const { LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) - : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) - , _type(type) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(block->label()) -+ , _type(type) , _block(block) , _ublock(NULL) - , _stub(NULL) { +@@ -262,9 +259,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : - LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) - , _type(type) -+ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(stub->entry()) -+ , _type(type) , _block(NULL) , _ublock(NULL) - , _stub(stub) { +@@ -272,9 +267,7 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) - : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) - , _cond(cond) - , _type(type) -+ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) , _label(block->label()) -+ , _type(type) , _block(block) , _ublock(ublock) - , _stub(NULL) -@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { +@@ -296,13 +289,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { } void LIR_OpBranch::negate_cond() { @@ -56194,7 +58120,7 @@ index e30d39f73..af54dddf3 100644 default: ShouldNotReachHere(); } } -@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -525,6 +518,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(op->as_OpBranch() != NULL, "must be"); LIR_OpBranch* opBranch = (LIR_OpBranch*)op; @@ -56208,7 +58134,7 @@ index e30d39f73..af54dddf3 100644 if (opBranch->_info != NULL) do_info(opBranch->_info); assert(opBranch->_result->is_illegal(), "not used"); if (opBranch->_stub != NULL) opBranch->stub()->visit(this); -@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -615,17 +615,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { // to the result operand, otherwise the backend fails case lir_cmove: { @@ -56221,7 +58147,7 @@ index e30d39f73..af54dddf3 100644 - op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); - assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); + assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && -+ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be"); ++ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); - do_input(op2->_opr1); @@ -56237,7 +58163,7 @@ index e30d39f73..af54dddf3 100644 break; } -@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { +@@ -1048,6 +1050,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { masm->emit_op3(this); } @@ -56248,7 +58174,7 @@ index e30d39f73..af54dddf3 100644 void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { -@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) +@@ -1084,6 +1090,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) , _file(NULL) , _line(0) #endif @@ -56259,7 +58185,7 @@ index e30d39f73..af54dddf3 100644 { } -@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { +@@ -1101,6 +1111,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { } #endif @@ -56298,7 +58224,7 @@ index e30d39f73..af54dddf3 100644 void LIR_List::append(LIR_InsertionBuffer* buffer) { assert(this == buffer->lir_list(), "wrong lir list"); -@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const { +@@ -1680,7 +1722,6 @@ const char * LIR_Op::name() const { case lir_cmp_l2i: s = "cmp_l2i"; break; case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; case lir_cmp_fd2i: s = "comp_fd2i"; break; @@ -56306,7 +58232,7 @@ index e30d39f73..af54dddf3 100644 case lir_add: s = "add"; break; case lir_sub: s = "sub"; break; case lir_mul: s = "mul"; break; -@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const { +@@ -1705,6 +1746,8 @@ const char * LIR_Op::name() const { case lir_irem: s = "irem"; break; case lir_fmad: s = "fmad"; break; case lir_fmaf: s = "fmaf"; break; @@ -56315,7 +58241,7 @@ index e30d39f73..af54dddf3 100644 // LIR_OpJavaCall case lir_static_call: s = "static"; break; case lir_optvirtual_call: s = "optvirtual"; break; -@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { +@@ -1841,6 +1884,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { // LIR_OpBranch void LIR_OpBranch::print_instr(outputStream* out) const { print_condition(out, cond()); out->print(" "); @@ -56324,7 +58250,7 @@ index e30d39f73..af54dddf3 100644 if (block() != NULL) { out->print("[B%d] ", block()->block_id()); } else if (stub() != NULL) { -@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { +@@ -1927,7 +1972,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { @@ -56333,7 +58259,7 @@ index e30d39f73..af54dddf3 100644 print_condition(out, condition()); out->print(" "); } in_opr1()->print(out); out->print(" "); -@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const { +@@ -1978,6 +2023,15 @@ void LIR_Op3::print_instr(outputStream* out) const { result_opr()->print(out); } @@ -56350,23 +58276,25 @@ index e30d39f73..af54dddf3 100644 void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp -index 3234ca018..88cd3b24e 100644 +index 3234ca018b7..efff6bf7a30 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp -@@ -864,9 +864,11 @@ class LIR_OpConvert; - class LIR_OpAllocObj; - class LIR_OpRoundFP; - class LIR_Op2; --class LIR_OpDelay; -+class LIR_OpBranch; -+class LIR_OpDelay; +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -867,6 +867,7 @@ class LIR_Op2; + class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; +class LIR_Op4; class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; -@@ -916,8 +918,6 @@ enum LIR_Code { +@@ -916,8 +917,6 @@ enum LIR_Code { , lir_null_check , lir_return , lir_leal @@ -56375,7 +58303,7 @@ index 3234ca018..88cd3b24e 100644 , lir_move , lir_convert , lir_alloc_object -@@ -929,11 +929,12 @@ enum LIR_Code { +@@ -929,11 +928,12 @@ enum LIR_Code { , lir_unwind , end_op1 , begin_op2 @@ -56389,7 +58317,7 @@ index 3234ca018..88cd3b24e 100644 , lir_add , lir_sub , lir_mul -@@ -964,6 +965,9 @@ enum LIR_Code { +@@ -964,6 +964,9 @@ enum LIR_Code { , lir_fmad , lir_fmaf , end_op3 @@ -56399,7 +58327,19 @@ index 3234ca018..88cd3b24e 100644 , begin_opJavaCall , lir_static_call , lir_optvirtual_call -@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj { +@@ -1001,6 +1004,11 @@ enum LIR_Code { + , begin_opAssert + , lir_assert + , end_opAssert ++#ifdef INCLUDE_ZGC ++ , begin_opZLoadBarrierTest ++ , lir_zloadbarrier_test ++ , end_opZLoadBarrierTest ++#endif + }; + + +@@ -1134,6 +1142,7 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } @@ -56407,7 +58347,7 @@ index 3234ca018..88cd3b24e 100644 virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } -@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall { +@@ -1410,51 +1419,6 @@ class LIR_OpRTCall: public LIR_OpCall { virtual void verify() const; }; @@ -56459,7 +58399,7 @@ index 3234ca018..88cd3b24e 100644 class ConversionStub; class LIR_OpConvert: public LIR_Op1 { -@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op { +@@ -1614,19 +1578,19 @@ class LIR_Op2: public LIR_Op { void verify() const; public: @@ -56470,8 +58410,8 @@ index 3234ca018..88cd3b24e 100644 , _opr2(opr2) - , _type(T_ILLEGAL) - , _condition(condition) -+ , _type(type) , _fpu_stack_size(0) ++ , _type(type) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) @@ -56484,15 +58424,7 @@ index 3234ca018..88cd3b24e 100644 } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) -@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op { - , _opr1(opr1) - , _opr2(opr2) - , _type(type) -- , _condition(condition) - , _fpu_stack_size(0) - , _tmp1(LIR_OprFact::illegalOpr) - , _tmp2(LIR_OprFact::illegalOpr) -@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op { +@@ -1651,14 +1615,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(type) @@ -56510,7 +58442,7 @@ index 3234ca018..88cd3b24e 100644 } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, -@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op { +@@ -1667,14 +1631,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(T_ILLEGAL) @@ -56528,7 +58460,7 @@ index 3234ca018..88cd3b24e 100644 } LIR_Opr in_opr1() const { return _opr1; } -@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op { +@@ -1686,10 +1650,10 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { @@ -56541,7 +58473,7 @@ index 3234ca018..88cd3b24e 100644 } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } -@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op { +@@ -1703,6 +1667,51 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; @@ -56549,7 +58481,6 @@ index 3234ca018..88cd3b24e 100644 + friend class LIR_OpVisitState; + + private: -+ BasicType _type; + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block @@ -56557,9 +58488,8 @@ index 3234ca018..88cd3b24e 100644 + + public: + LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type) + , _label(lbl) -+ , _type(type) + , _block(NULL) + , _ublock(NULL) + , _stub(NULL) { } @@ -56595,7 +58525,7 @@ index 3234ca018..88cd3b24e 100644 class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; -@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op { +@@ -1766,6 +1775,63 @@ class LIR_Op3: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; @@ -56623,12 +58553,12 @@ index 3234ca018..88cd3b24e 100644 + , _opr3(opr3) + , _opr4(opr4) + , _type(type) -+ , _condition(condition) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) -+ , _tmp5(LIR_OprFact::illegalOpr) { ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(condition) { + assert(code == lir_cmove, "code check"); + assert(type != T_ILLEGAL, "cmove should have type"); + } @@ -56659,7 +58589,7 @@ index 3234ca018..88cd3b24e 100644 //-------------------------------- class LabelObj: public CompilationResourceObj { -@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj { +@@ -1988,6 +2054,10 @@ class LIR_List: public CompilationResourceObj { const char * _file; int _line; #endif @@ -56670,7 +58600,7 @@ index 3234ca018..88cd3b24e 100644 public: void append(LIR_Op* op) { -@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj { +@@ -2000,6 +2070,12 @@ class LIR_List: public CompilationResourceObj { } #endif // PRODUCT @@ -56683,7 +58613,7 @@ index 3234ca018..88cd3b24e 100644 _operations.append(op); #ifdef ASSERT -@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj { +@@ -2016,6 +2092,10 @@ class LIR_List: public CompilationResourceObj { void set_file_and_line(const char * file, int line); #endif @@ -56694,7 +58624,7 @@ index 3234ca018..88cd3b24e 100644 //---------- accessors --------------- LIR_OpList* instructions_list() { return &_operations; } int length() const { return _operations.length(); } -@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj { +@@ -2149,8 +2229,9 @@ class LIR_List: public CompilationResourceObj { void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); @@ -56707,7 +58637,7 @@ index 3234ca018..88cd3b24e 100644 void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp -index 160483d5f..42a0350f7 100644 +index 160483d5f74..42a0350f7d9 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { @@ -56740,7 +58670,7 @@ index 160483d5f..42a0350f7 100644 void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp -index 44a5bcbe5..406a58d21 100644 +index 44a5bcbe542..c677bd346fc 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { @@ -56751,21 +58681,29 @@ index 44a5bcbe5..406a58d21 100644 void emit_opBranch(LIR_OpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); -@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { +@@ -222,8 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); - void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); +- + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); - void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); + void vtable_call( LIR_OpJavaCall* op); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd9..d00bfe91a 100644 +index c28055fd996..a4dfe8552ae 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp -@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) { +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1242,11 +1242,11 @@ void LinearScan::add_register_hints(LIR_Op* op) { break; } case lir_cmove: { @@ -56775,7 +58713,11 @@ index c28055fd9..d00bfe91a 100644 + LIR_Op4* cmove = (LIR_Op4*)op; LIR_Opr move_from = cmove->in_opr1(); - LIR_Opr move_to = cmove->result_opr(); +- LIR_Opr move_to = cmove->result_opr(); ++ LIR_Opr move_to = cmove->result_opr(); + + if (move_to->is_register() && move_from->is_register()) { + Interval* from = interval_at(reg_num(move_from)); @@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { } } @@ -56812,42 +58754,17 @@ index c28055fd9..d00bfe91a 100644 assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); } if (prev_op->code() == lir_cmp) { -diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp -index 19fe196bc..d9cb8e999 100644 ---- a/src/hotspot/share/classfile/vmSymbols.cpp -+++ b/src/hotspot/share/classfile/vmSymbols.cpp -@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_compareToL: - case vmIntrinsics::_compareToU: - case vmIntrinsics::_compareToLU: -@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - if (!SpecialStringIndexOf) return true; - break; - case vmIntrinsics::_equalsL: -diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp -index cef3f530c..a31525003 100644 ---- a/src/hotspot/share/classfile/vmSymbols.hpp -+++ b/src/hotspot/share/classfile/vmSymbols.hpp -@@ -946,6 +946,7 @@ - do_intrinsic(_indexOfIU, java_lang_StringUTF16, indexOf_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \ -+ do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \ - do_name( indexOf_name, "indexOf") \ - do_name( indexOfChar_name, "indexOfChar") \ - do_name( indexOfUL_name, "indexOfLatin1") \ diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -index 4771a8b86..295f82ccc 100644 +index 4771a8b8652..6d377fa005d 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -31,7 +31,7 @@ #include "utilities/defaultStream.hpp" @@ -56857,8 +58774,28 @@ index 4771a8b86..295f82ccc 100644 vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif +diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +index 9f8ce742433..f36dd612eff 100644 +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -100,7 +100,7 @@ class LIR_OpZLoadBarrierTest : public LIR_Op { + + public: + LIR_OpZLoadBarrierTest(LIR_Opr opr) : +- LIR_Op(), ++ LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), + _opr(opr) {} + + virtual void visit(LIR_OpVisitState* state) { diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -index e01a242a5..ff16de0e7 100644 +index e01a242a57e..ff16de0e778 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { @@ -56870,1038 +58807,22 @@ index e01a242a5..ff16de0e7 100644 return false; #else #warning "Unconfigured platform" -diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp -index 7768615b7..ef006f087 100644 ---- a/src/hotspot/share/opto/c2compiler.cpp -+++ b/src/hotspot/share/opto/c2compiler.cpp -@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_toBytesStringU: - case vmIntrinsics::_getCharsStringU: - case vmIntrinsics::_getCharStringU: -diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp -index 500054218..fafbde78d 100644 ---- a/src/hotspot/share/opto/chaitin.cpp -+++ b/src/hotspot/share/opto/chaitin.cpp -@@ -77,6 +77,7 @@ void LRG::dump() const { - if( _is_oop ) tty->print("Oop "); - if( _is_float ) tty->print("Float "); - if( _is_vector ) tty->print("Vector "); -+ if( _is_scalable ) tty->print("Scalable "); - if( _was_spilled1 ) tty->print("Spilled "); - if( _was_spilled2 ) tty->print("Spilled2 "); - if( _direct_conflict ) tty->print("Direct_conflict "); -@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() { - - // Merge multidefs if multiple defs representing the same value are used in a single block. - merge_multidefs(); -+ merge_debugdefs(); - - #ifdef ASSERT - // Veify the graph after RA. -@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() { - // Live ranges record the highest register in their mask. - // We want the low register for the AD file writer's convenience. - OptoReg::Name hi = lrg.reg(); // Get hi register -- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo -+ int num_regs = lrg.num_regs(); -+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) { -+ // For scalable vector registers, when they are allocated in physical -+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable -+ // vector. If they are allocated on stack, we need to get the actual -+ // num_regs, which reflects the physical length of scalable registers. -+ num_regs = lrg.scalable_reg_slots(); -+ } -+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo - // We have to use pair [lo,lo+1] even for wide vectors because - // the rest of code generation works only with pairs. It is safe - // since for registers encoding only 'lo' is used. -@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - // Check for vector live range (only if vector register is used). - // On SPARC vector uses RegD which could be misaligned so it is not - // processes as vector in RA. -- if (RegMask::is_vector(ireg)) -+ if (RegMask::is_vector(ireg)) { - lrg._is_vector = 1; -+ if (ireg == Op_VecA) { -+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); -+ lrg._is_scalable = 1; -+ // For scalable vector, when it is allocated in physical register, -+ // num_regs is RegMask::SlotsPerVecA for reg mask, -+ // which may not be the actual physical register size. -+ // If it is allocated in stack, we need to get the actual -+ // physical length of scalable vector register. -+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); -+ } -+ } - assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, - "vector must be in vector registers"); - -@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - lrg.set_reg_pressure(1); - #endif - break; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); -+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); -+ lrg.set_num_regs(RegMask::SlotsPerVecA); -+ lrg.set_reg_pressure(1); -+ break; - case Op_VecS: - assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); - assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); -@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { - return false; - } - -+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { -+ int num_regs = lrg.num_regs(); -+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); -+ -+ if (lrg.is_scalable()) { -+ // a physical register is found -+ if (chunk == 0 && OptoReg::is_reg(assigned)) { -+ return assigned; -+ } -+ -+ // find available stack slots for scalable register -+ if (lrg._is_vector) { -+ num_regs = lrg.scalable_reg_slots(); -+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits -+ if (num_regs == RegMask::SlotsPerVecA) { -+ return assigned; -+ } -+ -+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it -+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits -+ // instead of SlotsPerVecA bits. -+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg -+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { -+ // Verify the found reg has scalable_reg_slots() bits set. -+ if (mask.is_valid_reg(assigned, num_regs)) { -+ return assigned; -+ } else { -+ // Remove more for each iteration -+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg -+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits -+ assigned = mask.find_first_set(lrg, num_regs); -+ } -+ } -+ return OptoReg::Bad; // will cause chunk change, and retry next chunk -+ } -+ } -+ -+ return assigned; -+} -+ -+ - // Choose a color using the biasing heuristic - OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - -@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - RegMask tempmask = lrg.mask(); - tempmask.AND(lrgs(copy_lrg).mask()); - tempmask.clear_to_sets(lrg.num_regs()); -- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); -+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); - if (OptoReg::is_valid(reg)) - return reg; - } -@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - // If no bias info exists, just go with the register selection ordering - if (lrg._is_vector || lrg.num_regs() == 2) { - // Find an aligned set -- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); -+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); - } - - // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate -@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) { - int n_regs = lrg->num_regs(); - assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); - if (n_regs == 1 || !lrg->_fat_proj) { -- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ if (Matcher::supports_scalable_vector()) { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); -+ } else { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ } - lrg->Clear(); // Clear the mask - lrg->Insert(reg); // Set regmask to match selected reg - // For vectors and pairs, also insert the low bit of the pair -- for (int i = 1; i < n_regs; i++) -+ // We always choose the high bit, then mask the low bits by register size -+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack -+ n_regs = lrg->scalable_reg_slots(); -+ } -+ for (int i = 1; i < n_regs; i++) { - lrg->Insert(OptoReg::add(reg,-i)); -+ } - lrg->set_mask_size(n_regs); - } else { // Else fatproj - // mask must be equal to fatproj bits, by definition -diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp -index e5be5b966..b5d1b0604 100644 ---- a/src/hotspot/share/opto/chaitin.hpp -+++ b/src/hotspot/share/opto/chaitin.hpp -@@ -115,9 +115,11 @@ public: - _msize_valid=1; - if (_is_vector) { - assert(!_fat_proj, "sanity"); -- _mask.verify_sets(_num_regs); -+ if (!(_is_scalable && OptoReg::is_stack(_reg))) { -+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); -+ } - } else if (_num_regs == 2 && !_fat_proj) { -- _mask.verify_pairs(); -+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); - } - #endif - } -@@ -143,10 +145,34 @@ public: - private: - uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else - // except _num_regs is kill count for fat_proj -+ -+ // For scalable register, num_regs may not be the actual physical register size. -+ // We need to get the actual physical length of scalable register when scalable -+ // register is spilled. The size of one slot is 32-bit. -+ uint _scalable_reg_slots; // Actual scalable register length of slots. -+ // Meaningful only when _is_scalable is true. - public: - int num_regs() const { return _num_regs; } - void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } - -+ uint scalable_reg_slots() { return _scalable_reg_slots; } -+ void set_scalable_reg_slots(uint slots) { -+ assert(_is_scalable, "scalable register"); -+ assert(slots > 0, "slots of scalable register is not valid"); -+ _scalable_reg_slots = slots; -+ } -+ -+ bool is_scalable() { -+#ifdef ASSERT -+ if (_is_scalable) { -+ // Should only be a vector for now, but it could also be a RegVMask in future. -+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); -+ } -+#endif -+ return _is_scalable; -+ } -+ -+ - private: - // Number of physical registers this live range uses when it colors - // Architecture and register-set dependent -@@ -172,6 +198,7 @@ public: - uint _is_oop:1, // Live-range holds an oop - _is_float:1, // True if in float registers - _is_vector:1, // True if in vector registers -+ _is_scalable:1, // True if register size is scalable - _was_spilled1:1, // True if prior spilling on def - _was_spilled2:1, // True if twice prior spilling on def - _is_bound:1, // live range starts life with no -@@ -756,6 +783,7 @@ private: - - // Merge nodes that are a part of a multidef lrg and produce the same value within a block. - void merge_multidefs(); -+ void merge_debugdefs(); - - private: - -diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp -index c0dfe1b0c..2d9526a39 100644 ---- a/src/hotspot/share/opto/intrinsicnode.hpp -+++ b/src/hotspot/share/opto/intrinsicnode.hpp -@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node { - // Base class for Ideal nodes used in String intrinsic code. - class StrIntrinsicNode: public Node { - public: -- // Possible encodings of the two parameters passed to the string intrinsic. -+ // Possible encodings of the parameters passed to the string intrinsic. - // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that - // the first string is Latin1 encoded and the second string is UTF16 encoded. -- typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc; -+ // 'L' means that the single string is Latin1 encoded -+ typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc; - - protected: - // Encoding of strings. Used to select the right version of the intrinsic. -diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp -index 6b6aa9e9b..8719c5b12 100644 ---- a/src/hotspot/share/opto/library_call.cpp -+++ b/src/hotspot/share/opto/library_call.cpp -@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit { - bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae); - Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count, - RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae); -- bool inline_string_indexOfChar(); -+ bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae); - bool inline_string_equals(StrIntrinsicNode::ArgEnc ae); - bool inline_string_toBytesU(); - bool inline_string_getCharsU(); -@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { - case vmIntrinsics::_indexOfIL: return inline_string_indexOfI(StrIntrinsicNode::LL); - case vmIntrinsics::_indexOfIU: return inline_string_indexOfI(StrIntrinsicNode::UU); - case vmIntrinsics::_indexOfIUL: return inline_string_indexOfI(StrIntrinsicNode::UL); -- case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(); -+ case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(StrIntrinsicNode::U); -+ case vmIntrinsics::_indexOfL_char: return inline_string_indexOfChar(StrIntrinsicNode::L); - - case vmIntrinsics::_equalsL: return inline_string_equals(StrIntrinsicNode::LL); - case vmIntrinsics::_equalsU: return inline_string_equals(StrIntrinsicNode::UU); -@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node* - } - - //-----------------------------inline_string_indexOfChar----------------------- --bool LibraryCallKit::inline_string_indexOfChar() { -+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) { - if (too_many_traps(Deoptimization::Reason_intrinsic)) { - return false; - } -@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() { - - src = must_be_not_null(src, true); - -- Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1))); -+ Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1))); - Node* src_start = array_element_address(src, src_offset, T_BYTE); - Node* src_count = _gvn.transform(new SubINode(max, from_index)); - - // Range checks -- generate_string_range_check(src, src_offset, src_count, true); -+ generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U); - if (stopped()) { - return true; - } -@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() { - RegionNode* region = new RegionNode(3); - Node* phi = new PhiNode(region, TypeInt::INT); - -- Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none); -+ Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae); - C->set_has_split_ifs(true); // Has chance for split-if optimization - _gvn.transform(result); - -diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp -index 8d526b15d..92b4f7158 100644 ---- a/src/hotspot/share/opto/machnode.cpp -+++ b/src/hotspot/share/opto/machnode.cpp -@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const { - return MachNode::emit_size(ra_); - } - --//------------------------------size------------------------------------------- -+//-------------------------emit_size------------------------------------------- - // Helper function that computes size by emitting code - uint MachNode::emit_size(PhaseRegAlloc *ra_) const { - // Emit into a trash buffer and count bytes emitted. -diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp -index a52325680..dad70565b 100644 ---- a/src/hotspot/share/opto/machnode.hpp -+++ b/src/hotspot/share/opto/machnode.hpp -@@ -334,6 +334,10 @@ public: - // Top-level ideal Opcode matched - virtual int ideal_Opcode() const { return Op_Node; } - -+ virtual bool is_Opcode_equal(Node* node) { -+ return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode()); -+ } -+ - // Adds the label for the case - virtual void add_case_label( int switch_val, Label* blockLabel); - -diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp -index 9e9b3383f..97de5e314 100644 ---- a/src/hotspot/share/opto/matcher.cpp -+++ b/src/hotspot/share/opto/matcher.cpp -@@ -84,6 +84,7 @@ Matcher::Matcher() - idealreg2spillmask [Op_RegF] = NULL; - idealreg2spillmask [Op_RegD] = NULL; - idealreg2spillmask [Op_RegP] = NULL; -+ idealreg2spillmask [Op_VecA] = NULL; - idealreg2spillmask [Op_VecS] = NULL; - idealreg2spillmask [Op_VecD] = NULL; - idealreg2spillmask [Op_VecX] = NULL; -@@ -110,6 +111,7 @@ Matcher::Matcher() - idealreg2mhdebugmask[Op_RegF] = NULL; - idealreg2mhdebugmask[Op_RegD] = NULL; - idealreg2mhdebugmask[Op_RegP] = NULL; -+ idealreg2mhdebugmask[Op_VecA] = NULL; - idealreg2mhdebugmask[Op_VecS] = NULL; - idealreg2mhdebugmask[Op_VecD] = NULL; - idealreg2mhdebugmask[Op_VecX] = NULL; -@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { - void Matcher::init_first_stack_mask() { - - // Allocate storage for spill masks as masks for the appropriate load type. -- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); -+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6)); - - idealreg2spillmask [Op_RegN] = &rms[0]; - idealreg2spillmask [Op_RegI] = &rms[1]; -@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() { - idealreg2mhdebugmask[Op_RegD] = &rms[16]; - idealreg2mhdebugmask[Op_RegP] = &rms[17]; - -- idealreg2spillmask [Op_VecS] = &rms[18]; -- idealreg2spillmask [Op_VecD] = &rms[19]; -- idealreg2spillmask [Op_VecX] = &rms[20]; -- idealreg2spillmask [Op_VecY] = &rms[21]; -- idealreg2spillmask [Op_VecZ] = &rms[22]; -+ idealreg2spillmask [Op_VecA] = &rms[18]; -+ idealreg2spillmask [Op_VecS] = &rms[19]; -+ idealreg2spillmask [Op_VecD] = &rms[20]; -+ idealreg2spillmask [Op_VecX] = &rms[21]; -+ idealreg2spillmask [Op_VecY] = &rms[22]; -+ idealreg2spillmask [Op_VecZ] = &rms[23]; - - OptoReg::Name i; - -@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() { - // Keep spill masks aligned. - aligned_stack_mask.clear_to_pairs(); - assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); -+ RegMask scalable_stack_mask = aligned_stack_mask; - - *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; - #ifdef _LP64 -@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() { - *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; - idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); - } -+ -+ if (Matcher::supports_scalable_vector()) { -+ int k = 1; -+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); -+ // Exclude last input arg stack slots to avoid spilling vector register there, -+ // otherwise vector spills could stomp over stack slots in caller frame. -+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { -+ scalable_stack_mask.Remove(in); -+ in = OptoReg::add(in, -1); -+ } -+ -+ // For VecA -+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); -+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); -+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; -+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); -+ } else { -+ *idealreg2spillmask[Op_VecA] = RegMask::Empty; -+ } -+ - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. -@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) { - idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); - - // Vector regmasks. -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));; -+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); -+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); -+ } - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE, 4); - MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); -diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp -index 244e3d1f8..9a8307102 100644 ---- a/src/hotspot/share/opto/matcher.hpp -+++ b/src/hotspot/share/opto/matcher.hpp -@@ -310,7 +310,7 @@ public: - - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- static const bool match_rule_supported_vector(int opcode, int vlen); -+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); - - // Some microarchitectures have mask registers used on vectors - static const bool has_predicated_vectors(void); -@@ -333,6 +333,10 @@ public: - Matcher::min_vector_size(bt) <= size); - } - -+ static const bool supports_scalable_vector(); -+ // Actual max scalable vector register length. -+ static const int scalable_vector_reg_size(const BasicType bt); -+ - // Vector ideal reg - static const uint vector_ideal_reg(int len); - static const uint vector_shift_count_ideal_reg(int len); -diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp -index 02bb6bb16..99d51ba05 100644 ---- a/src/hotspot/share/opto/node.cpp -+++ b/src/hotspot/share/opto/node.cpp -@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) { - return NULL; - } - -+//--------------------------is_similar----------------------------------- -+// True if a node has the same opcode and inputs as "this". -+bool Node::is_similar(Node* node) { -+ if (this == node) { -+ return true; -+ } else { -+ if (is_Opcode_equal(node) && (req() == node->req())) { -+ for (uint i = 0; i < node->req(); i++) { -+ if (in(i) != node->in(i)) { -+ return false; -+ } -+ } -+ return true; -+ } -+ } -+ return false; -+} -+ -+bool Node::is_Opcode_equal(Node* node) { -+ return Opcode() == node->Opcode(); -+} - - //--------------------------unique_ctrl_out------------------------------ - // Return the unique control out if only one. Null if none or more than one. -diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp -index 0c0b9bf69..e24456d85 100644 ---- a/src/hotspot/share/opto/node.hpp -+++ b/src/hotspot/share/opto/node.hpp -@@ -1030,6 +1030,11 @@ public: - // be found; Otherwise return NULL; - Node* find_similar(int opc); - -+ // True if a node has the same opcode and inputs as "this". -+ bool is_similar(Node* node); -+ -+ virtual bool is_Opcode_equal(Node* node); -+ - // Return the unique control out if only one. Null if none or more than one. - Node* unique_ctrl_out() const; - -diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp -index e31e8d847..aa0483c73 100644 ---- a/src/hotspot/share/opto/opcodes.cpp -+++ b/src/hotspot/share/opto/opcodes.cpp -@@ -38,12 +38,14 @@ const char *NodeClassNames[] = { - "RegF", - "RegD", - "RegL", -- "RegFlags", -+ "VecA", - "VecS", - "VecD", - "VecX", - "VecY", - "VecZ", -+ "RegVMask", -+ "RegFlags", - "_last_machine_leaf", - #include "classes.hpp" - "_last_class_name", -diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp -index ae3d61ce0..0a77c3732 100644 ---- a/src/hotspot/share/opto/opcodes.hpp -+++ b/src/hotspot/share/opto/opcodes.hpp -@@ -37,11 +37,13 @@ enum Opcodes { - macro(RegF) // Machine float register - macro(RegD) // Machine double register - macro(RegL) // Machine long register -+ macro(VecA) // Machine vectora register - macro(VecS) // Machine vectors register - macro(VecD) // Machine vectord register - macro(VecX) // Machine vectorx register - macro(VecY) // Machine vectory register - macro(VecZ) // Machine vectorz register -+ macro(RegVMask) // Vector mask/predicate register - macro(RegFlags) // Machine flags register - _last_machine_leaf, // Split between regular opcodes and machine - #include "classes.hpp" -diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp -index 397a53713..89c7fc7c8 100644 ---- a/src/hotspot/share/opto/phase.cpp -+++ b/src/hotspot/share/opto/phase.cpp -@@ -113,6 +113,7 @@ void Phase::print_timers() { - tty->print_cr (" Regalloc Split: %7.3f s", timers[_t_regAllocSplit].seconds()); - tty->print_cr (" Postalloc Copy Rem: %7.3f s", timers[_t_postAllocCopyRemoval].seconds()); - tty->print_cr (" Merge multidefs: %7.3f s", timers[_t_mergeMultidefs].seconds()); -+ tty->print_cr (" Merge debugdefs: %7.3f s", timers[_t_mergeDebugdefs].seconds()); - tty->print_cr (" Fixup Spills: %7.3f s", timers[_t_fixupSpills].seconds()); - tty->print_cr (" Compact: %7.3f s", timers[_t_chaitinCompact].seconds()); - tty->print_cr (" Coalesce 1: %7.3f s", timers[_t_chaitinCoalesce1].seconds()); -@@ -130,6 +131,7 @@ void Phase::print_timers() { - timers[_t_regAllocSplit].seconds() + - timers[_t_postAllocCopyRemoval].seconds() + - timers[_t_mergeMultidefs].seconds() + -+ timers[_t_mergeDebugdefs].seconds() + - timers[_t_fixupSpills].seconds() + - timers[_t_chaitinCompact].seconds() + - timers[_t_chaitinCoalesce1].seconds() + -diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp -index 4b0c53ffc..b3302ec86 100644 ---- a/src/hotspot/share/opto/phase.hpp -+++ b/src/hotspot/share/opto/phase.hpp -@@ -91,6 +91,7 @@ public: - _t_regAllocSplit, - _t_postAllocCopyRemoval, - _t_mergeMultidefs, -+ _t_mergeDebugdefs, - _t_fixupSpills, - _t_chaitinCompact, - _t_chaitinCoalesce1, -diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp -index 46766b604..3f608bb40 100644 ---- a/src/hotspot/share/opto/postaloc.cpp -+++ b/src/hotspot/share/opto/postaloc.cpp -@@ -27,6 +27,7 @@ - #include "memory/resourceArea.hpp" - #include "opto/chaitin.hpp" - #include "opto/machnode.hpp" -+#include "opto/addnode.hpp" - - // See if this register (or pairs, or vector) already contains the value. - static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs, -@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - Node *val = skip_copies(n->in(k)); - if (val == x) return blk_adjust; // No progress? - -- int n_regs = RegMask::num_registers(val->ideal_reg()); - uint val_idx = _lrg_map.live_range_id(val); - OptoReg::Name val_reg = lrgs(val_idx).reg(); -+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); - - // See if it happens to already be in the correct register! - // (either Phi's direct register, or the common case of the name -@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - } - - Node *vv = value[reg]; -+ // For scalable register, number of registers may be inconsistent between -+ // "val_reg" and "reg". For example, when "val" resides in register -+ // but "reg" is located in stack. -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ if (OptoReg::is_stack(reg)) { -+ n_regs = lrgs(val_idx).scalable_reg_slots(); -+ } else { -+ n_regs = RegMask::SlotsPerVecA; -+ } -+ } - if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set -- uint last = (n_regs-1); // Looking for the last part of a set -+ uint last; -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned -+ last = RegMask::SlotsPerVecA - 1; -+ } else { -+ last = (n_regs-1); // Looking for the last part of a set -+ } - if ((reg&last) != last) continue; // Wrong part of a set - if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value - } -@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() { - } - } - -+void PhaseChaitin::merge_debugdefs() { -+ Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]); -+ -+ ResourceMark rm; -+ for (uint i = 0; i < _cfg.number_of_blocks(); i++) { -+ Block* block = _cfg.get_block(i); -+ for (int j = 0; j < (int) block->number_of_nodes(); j++) { -+ Node* base = block->get_node(j); -+ if (base && base->is_Mach() && base->outcnt() == 1) { -+ Node* addp = base->unique_out(); -+ if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) { -+ Node* derived = addp->in(AddPNode::Address); -+ if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) { -+ base->subsume_by(derived, Compile::current()); -+ block->remove_node(j--); -+ } -+ } -+ } -+ } -+ } -+} -+ - int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) { - int blk_adjust = 0; - -@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - uint k; - Node *phi = block->get_node(j); - uint pidx = _lrg_map.live_range_id(phi); -- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); -+ OptoReg::Name preg = lrgs(pidx).reg(); - - // Remove copies remaining on edges. Check for junk phi. - Node *u = NULL; -@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - if( pidx ) { - value.map(preg,phi); - regnd.map(preg,phi); -- int n_regs = RegMask::num_registers(phi->ideal_reg()); -+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name preg_lo = OptoReg::add(preg,-l); - value.map(preg_lo,phi); -@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - regnd.map(ureg, def); - // Record other half of doubles - uint def_ideal_reg = def->ideal_reg(); -- int n_regs = RegMask::num_registers(def_ideal_reg); -+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); - if (!value[ureg_lo] && -@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - } - - uint n_ideal_reg = n->ideal_reg(); -- int n_regs = RegMask::num_registers(n_ideal_reg); -+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); - if (n_regs == 1) { - // If Node 'n' does not change the value mapped by the register, - // then 'n' is a useless copy. Do not update the register->node -diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp -index 2e04c42eb..34a701e84 100644 ---- a/src/hotspot/share/opto/regmask.cpp -+++ b/src/hotspot/share/opto/regmask.cpp -@@ -24,6 +24,7 @@ - - #include "precompiled.hpp" - #include "opto/ad.hpp" -+#include "opto/chaitin.hpp" - #include "opto/compile.hpp" - #include "opto/matcher.hpp" - #include "opto/node.hpp" -@@ -116,30 +117,47 @@ const RegMask RegMask::Empty( - - //============================================================================= - bool RegMask::is_vector(uint ireg) { -- return (ireg == Op_VecS || ireg == Op_VecD || -+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || - ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); - } - - int RegMask::num_registers(uint ireg) { - switch(ireg) { - case Op_VecZ: -- return 16; -+ return SlotsPerVecZ; - case Op_VecY: -- return 8; -+ return SlotsPerVecY; - case Op_VecX: -- return 4; -+ return SlotsPerVecX; - case Op_VecD: -+ return SlotsPerVecD; - case Op_RegD: - case Op_RegL: - #ifdef _LP64 - case Op_RegP: - #endif - return 2; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ return SlotsPerVecA; - } - // Op_VecS and the rest ideal registers. - return 1; - } - -+int RegMask::num_registers(uint ireg, LRG &lrg) { -+ int n_regs = num_registers(ireg); -+ -+ // assigned is OptoReg which is selected by register allocator -+ OptoReg::Name assigned = lrg.reg(); -+ assert(OptoReg::is_valid(assigned), "should be valid opto register"); -+ -+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { -+ n_regs = lrg.scalable_reg_slots(); -+ } -+ return n_regs; -+} -+ - //------------------------------find_first_pair-------------------------------- - // Find the lowest-numbered register pair in the mask. Return the - // HIGHEST register number in the pair, or BAD if no pairs. -@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { - return true; - } - -+// Check that whether given reg number with size is valid -+// for current regmask, where reg is the highest number. -+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { -+ for (int i = 0; i < size; i++) { -+ if (!Member(reg - i)) { -+ return false; -+ } -+ } -+ return true; -+} -+ - // only indicies of power 2 are accessed, so index 3 is only filled in for storage. - static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; - //------------------------------find_first_set--------------------------------- - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Works also for size 1. --OptoReg::Name RegMask::find_first_set(const int size) const { -- verify_sets(size); -+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { -+ if (lrg.is_scalable()) { -+ // For scalable vector register, regmask is SlotsPerVecA bits aligned. -+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); -+ } else { -+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); -+ } - for (int i = 0; i < RM_SIZE; i++) { - if (_A[i]) { // Found some bits - int bit = _A[i] & -_A[i]; // Extract low bit diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp -index c64d08795..2688275be 100644 +index c64d0879592..bc856d4b617 100644 --- a/src/hotspot/share/opto/regmask.hpp +++ b/src/hotspot/share/opto/regmask.hpp -@@ -28,6 +28,8 @@ - #include "code/vmreg.hpp" - #include "opto/optoreg.hpp" - -+class LRG; -+ - // Some fun naming (textual) substitutions: - // - // RegMask::get_low_elem() ==> RegMask::find_first_elem() -@@ -95,6 +97,7 @@ public: - // requirement is internal to the allocator, and independent of any - // particular platform. - enum { SlotsPerLong = 2, -+ SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8), - SlotsPerVecS = 1, - SlotsPerVecD = 2, - SlotsPerVecX = 4, -@@ -204,10 +207,14 @@ public: - return false; - } - -+ // Check that whether given reg number with size is valid -+ // for current regmask, where reg is the highest number. -+ bool is_valid_reg(OptoReg::Name reg, const int size) const; -+ - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Assert that the mask contains only bit sets. -- OptoReg::Name find_first_set(const int size) const; -+ OptoReg::Name find_first_set(LRG &lrg, const int size) const; - - // Clear out partial bits; leave only aligned adjacent bit sets of size. - void clear_to_sets(const int size); -@@ -226,6 +233,7 @@ public: - - static bool is_vector(uint ireg); - static int num_registers(uint ireg); -+ static int num_registers(uint ireg, LRG &lrg); - - // Fast overlap test. Non-zero if any registers in common. - int overlap( const RegMask &rm ) const { -diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp -index fed52e488..ee583236f 100644 ---- a/src/hotspot/share/opto/superword.cpp -+++ b/src/hotspot/share/opto/superword.cpp -@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz - //------------------------------transform_loop--------------------------- - void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { - assert(UseSuperWord, "should be"); -- // Do vectors exist on this architecture? -- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; -+ // SuperWord only works with power of two vector sizes. -+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE); -+ if (vector_width < 2 || !is_power_of_2(vector_width)) { -+ return; -+ } - - assert(lpt->_head->is_CountedLoop(), "must be"); - CountedLoopNode *cl = lpt->_head->as_CountedLoop(); -diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp -index 7d767c47c..c9948df5f 100644 ---- a/src/hotspot/share/opto/type.cpp -+++ b/src/hotspot/share/opto/type.cpp -@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { - { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY - { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ - #else // all other -+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA - { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS - { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD - { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX -@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) { - // get_zero_type() should not happen for T_CONFLICT - _zero_type[T_CONFLICT]= NULL; - -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); -+ } -+ - // Vector predefined types, it needs initialized _const_basic_type[]. - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE,4); -@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) { - if (Matcher::vector_size_supported(T_FLOAT,16)) { - TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); - } -+ mreg2type[Op_VecA] = TypeVect::VECTA; - mreg2type[Op_VecS] = TypeVect::VECTS; - mreg2type[Op_VecD] = TypeVect::VECTD; - mreg2type[Op_VecX] = TypeVect::VECTX; -@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { - - Bad, // Tuple - handled in v-call - Bad, // Array - handled in v-call -+ Bad, // VectorA - handled in v-call - Bad, // VectorS - handled in v-call - Bad, // VectorD - handled in v-call - Bad, // VectorX - handled in v-call -@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const { - - //==============================TypeVect======================================= - // Convenience common pre-built types. -+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic - const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors - const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors - const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors -@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors - const TypeVect* TypeVect::make(const Type *elem, uint length) { - BasicType elem_bt = elem->array_element_basic_type(); - assert(is_java_primitive(elem_bt), "only primitive types in vector"); -- assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); - assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); - int size = length * type2aelembytes(elem_bt); - switch (Matcher::vector_ideal_reg(size)) { -+ case Op_VecA: -+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); - case Op_VecS: - return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); - case Op_RegL: -@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { - default: // All else is a mistake - typerr(t); - -+ case VectorA: - case VectorS: - case VectorD: - case VectorX: -@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const { - #ifndef PRODUCT - void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { - switch (base()) { -+ case VectorA: -+ st->print("vectora["); break; - case VectorS: - st->print("vectors["); break; - case VectorD: -diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp -index 27d042d94..82ee2dfcb 100644 ---- a/src/hotspot/share/opto/type.hpp -+++ b/src/hotspot/share/opto/type.hpp -@@ -53,6 +53,7 @@ class TypeNarrowKlass; - class TypeAry; - class TypeTuple; - class TypeVect; -+class TypeVectA; - class TypeVectS; - class TypeVectD; - class TypeVectX; -@@ -87,6 +88,7 @@ public: - - Tuple, // Method signature or object layout - Array, // Array types -+ VectorA, // (Scalable) Vector types for vector length agnostic - VectorS, // 32bit Vector types - VectorD, // 64bit Vector types - VectorX, // 128bit Vector types -@@ -769,6 +771,7 @@ public: - virtual const Type *xmeet( const Type *t) const; - virtual const Type *xdual() const; // Compute dual right now. - -+ static const TypeVect *VECTA; - static const TypeVect *VECTS; - static const TypeVect *VECTD; - static const TypeVect *VECTX; -@@ -780,6 +783,11 @@ public: - #endif - }; - -+class TypeVectA : public TypeVect { -+ friend class TypeVect; -+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} -+}; -+ - class TypeVectS : public TypeVect { - friend class TypeVect; - TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} -@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const { - } - - inline const TypeVect *Type::is_vect() const { -- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); -+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); - return (TypeVect*)this; - } - - inline const TypeVect *Type::isa_vect() const { -- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; -+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; - } - - inline const TypePtr *Type::is_ptr() const { -diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index de22591ba..b82d631f4 100644 ---- a/src/hotspot/share/opto/vectornode.cpp -+++ b/src/hotspot/share/opto/vectornode.cpp -@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = VectorNode::opcode(opc, bt); -- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); -+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } -@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = ReductionNode::opcode(opc, bt); -- return vopc != opc && Matcher::match_rule_supported(vopc); -+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp -index c46247f2b..ee769634f 100644 +index c46247f2bdb..b5e64b65ff7 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp -@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false; - #ifdef ZERO - #define VMTYPE "Zero" - #else // ZERO -- #define VMTYPE COMPILER1_PRESENT("Client") \ -- COMPILER2_PRESENT("Server") -+ #ifdef COMPILER2 -+ #define VMTYPE "Server" -+ #elif defined(COMPILER1) -+ #define VMTYPE "Client" -+ #else -+ #define VMTYPE "Core" -+ #endif // COMPILER2 - #endif // ZERO - #endif // TIERED - #endif -@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() { +@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() { IA32_ONLY("x86") \ IA64_ONLY("ia64") \ S390_ONLY("s390") \ @@ -57911,8 +58832,19 @@ index c46247f2b..ee769634f 100644 #endif // !ZERO #endif // !CPU +diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp +index e7b32723e47..434826853ee 100644 +--- a/src/hotspot/share/runtime/synchronizer.cpp ++++ b/src/hotspot/share/runtime/synchronizer.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp -index 0a9c45f85..a96c2dd81 100644 +index aa914eccafc..a2f98e6a251 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -1234,7 +1234,7 @@ class JavaThread: public Thread { @@ -57925,32 +58857,27 @@ index 0a9c45f85..a96c2dd81 100644 void set_thread_state(JavaThreadState s) { assert(current_or_null() == NULL || current_or_null() == this, diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp -index dee8534f7..aa71d7655 100644 +index dee8534f739..9af07aeb459 100644 --- a/src/hotspot/share/runtime/thread.inline.hpp +++ b/src/hotspot/share/runtime/thread.inline.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { set_has_async_exception(); } -#if defined(PPC64) || defined (AARCH64) -+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64) ++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) inline JavaThreadState JavaThread::thread_state() const { return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); } -diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp -index 0b898dcc3..7f76486ae 100644 ---- a/src/hotspot/share/utilities/debug.cpp -+++ b/src/hotspot/share/utilities/debug.cpp -@@ -632,6 +632,7 @@ void help() { - tty->print_cr(" pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or"); - tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); - tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); -+ tty->print_cr(" pns($sp, $fp, $pc) on Linux/RISCV64 or"); - tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); - tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); - tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp -index cf8025386..e8ab3097a 100644 +index cf802538689..e8ab3097ac7 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp @@ -597,6 +597,32 @@ @@ -57986,26 +58913,17 @@ index cf8025386..e8ab3097a 100644 #ifdef VM_LITTLE_ENDIAN #define LITTLE_ENDIAN_ONLY(code) code #define BIG_ENDIAN_ONLY(code) -diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java -index 063a5ef3a..50e9cdb57 100644 ---- a/src/java.base/share/classes/java/lang/StringLatin1.java -+++ b/src/java.base/share/classes/java/lang/StringLatin1.java -@@ -209,6 +209,11 @@ final class StringLatin1 { - // Note: fromIndex might be near -1>>>1. - return -1; - } -+ return indexOfChar(value, ch, fromIndex, max); -+ } -+ -+ @HotSpotIntrinsicCandidate -+ private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { - byte c = (byte)ch; - for (int i = fromIndex; i < max; i++) { - if (value[i] == c) { diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 0d834302c..55a7b96f7 100644 +index 0d834302c57..45a927fb5ee 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58,6 +58,10 @@ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif @@ -58022,71 +58940,76 @@ index 0d834302c..55a7b96f7 100644 } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) -+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { -@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - #ifdef aarch64 - #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG +@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG #endif +#ifdef riscv64 +#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG +#endif - #if defined(sparc) || defined(sparcv9) - #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG #endif -@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo +@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo } #endif /* aarch64 */ +#if defined(riscv64) ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg + -+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg ++ regs[REG_INDEX(PC)] = gregs.pc; ++ regs[REG_INDEX(LR)] = gregs.ra; ++ regs[REG_INDEX(SP)] = gregs.sp; ++ regs[REG_INDEX(R3)] = gregs.gp; ++ regs[REG_INDEX(R4)] = gregs.tp; ++ regs[REG_INDEX(R5)] = gregs.t0; ++ regs[REG_INDEX(R6)] = gregs.t1; ++ regs[REG_INDEX(R7)] = gregs.t2; ++ regs[REG_INDEX(R8)] = gregs.s0; ++ regs[REG_INDEX(R9)] = gregs.s1; ++ regs[REG_INDEX(R10)] = gregs.a0; ++ regs[REG_INDEX(R11)] = gregs.a1; ++ regs[REG_INDEX(R12)] = gregs.a2; ++ regs[REG_INDEX(R13)] = gregs.a3; ++ regs[REG_INDEX(R14)] = gregs.a4; ++ regs[REG_INDEX(R15)] = gregs.a5; ++ regs[REG_INDEX(R16)] = gregs.a6; ++ regs[REG_INDEX(R17)] = gregs.a7; ++ regs[REG_INDEX(R18)] = gregs.s2; ++ regs[REG_INDEX(R19)] = gregs.s3; ++ regs[REG_INDEX(R20)] = gregs.s4; ++ regs[REG_INDEX(R21)] = gregs.s5; ++ regs[REG_INDEX(R22)] = gregs.s6; ++ regs[REG_INDEX(R23)] = gregs.s7; ++ regs[REG_INDEX(R24)] = gregs.s8; ++ regs[REG_INDEX(R25)] = gregs.s9; ++ regs[REG_INDEX(R26)] = gregs.s10; ++ regs[REG_INDEX(R27)] = gregs.s11; ++ regs[REG_INDEX(R28)] = gregs.t3; ++ regs[REG_INDEX(R29)] = gregs.t4; ++ regs[REG_INDEX(R30)] = gregs.t5; ++ regs[REG_INDEX(R31)] = gregs.t6; + -+ { -+ regs[REG_INDEX(PC)] = gregs.pc; -+ regs[REG_INDEX(LR)] = gregs.ra; -+ regs[REG_INDEX(SP)] = gregs.sp; -+ regs[REG_INDEX(R3)] = gregs.gp; -+ regs[REG_INDEX(R4)] = gregs.tp; -+ regs[REG_INDEX(R5)] = gregs.t0; -+ regs[REG_INDEX(R6)] = gregs.t1; -+ regs[REG_INDEX(R7)] = gregs.t2; -+ regs[REG_INDEX(R8)] = gregs.s0; -+ regs[REG_INDEX(R9)] = gregs.s1; -+ regs[REG_INDEX(R10)] = gregs.a0; -+ regs[REG_INDEX(R11)] = gregs.a1; -+ regs[REG_INDEX(R12)] = gregs.a2; -+ regs[REG_INDEX(R13)] = gregs.a3; -+ regs[REG_INDEX(R14)] = gregs.a4; -+ regs[REG_INDEX(R15)] = gregs.a5; -+ regs[REG_INDEX(R16)] = gregs.a6; -+ regs[REG_INDEX(R17)] = gregs.a7; -+ regs[REG_INDEX(R18)] = gregs.s2; -+ regs[REG_INDEX(R19)] = gregs.s3; -+ regs[REG_INDEX(R20)] = gregs.s4; -+ regs[REG_INDEX(R21)] = gregs.s5; -+ regs[REG_INDEX(R22)] = gregs.s6; -+ regs[REG_INDEX(R23)] = gregs.s7; -+ regs[REG_INDEX(R24)] = gregs.s8; -+ regs[REG_INDEX(R25)] = gregs.s9; -+ regs[REG_INDEX(R26)] = gregs.s10; -+ regs[REG_INDEX(R27)] = gregs.s11; -+ regs[REG_INDEX(R28)] = gregs.t3; -+ regs[REG_INDEX(R29)] = gregs.t4; -+ regs[REG_INDEX(R30)] = gregs.t5; -+ regs[REG_INDEX(R31)] = gregs.t6; -+ } +#endif /* riscv64 */ + #if defined(ppc64) || defined(ppc64le) #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -index 8318e8e02..9d7fda8a6 100644 +index 8318e8e0213..ab092d4ee33 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -43,6 +43,8 @@ #elif defined(arm) #include @@ -58096,41 +59019,11 @@ index 8318e8e02..9d7fda8a6 100644 #endif // This C bool type must be int for compatibility with Linux calls and -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d85..12eafc455 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr) - #endif - -+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case -+// like other platforms, so call ptrace with PTRACE_GETREGSET here. -+#ifndef riscv64 - #if defined(_LP64) && defined(PTRACE_GETREGS64) - #define PTRACE_GETREGS_REQ PTRACE_GETREGS64 - #elif defined(PTRACE_GETREGS) -@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #elif defined(PT_GETREGS) - #define PTRACE_GETREGS_REQ PT_GETREGS - #endif -+#endif - - #ifdef PTRACE_GETREGS_REQ - if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -index 0f5f0119c..82c083055 100644 +index 0f5f0119c73..9bff9ee9b15 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -@@ -1,6 +1,7 @@ - /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; +@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; @@ -58138,24 +59031,24 @@ index 0f5f0119c..82c083055 100644 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; -@@ -592,6 +594,8 @@ public class HotSpotAgent { - machDesc = new MachineDescriptionPPC64(); - } else if (cpu.equals("aarch64")) { - machDesc = new MachineDescriptionAArch64(); +@@ -598,6 +599,8 @@ private void setupDebuggerLinux() { + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } + } else if (cpu.equals("riscv64")) { + machDesc = new MachineDescriptionRISCV64(); - } else if (cpu.equals("sparc")) { - if (LinuxDebuggerLocal.getAddressSize()==8) { - machDesc = new MachineDescriptionSPARC64Bit(); + } else { + try { + machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java new file mode 100644 -index 000000000..4221937f1 +index 00000000000..a972516dee3 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58194,18 +59087,24 @@ index 000000000..4221937f1 + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -index 5e5a6bb71..acd5844ca 100644 +index 5e5a6bb7141..dc0bcb3da94 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.x86.*; +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; -@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; @@ -58213,7 +59112,7 @@ index 5e5a6bb71..acd5844ca 100644 import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { -@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger { +@@ -116,7 +118,14 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); if (pc == null) return null; return new LinuxAARCH64CFrame(dbg, fp, pc); @@ -58231,7 +59130,7 @@ index 5e5a6bb71..acd5844ca 100644 return context.getTopFrame(dbg); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java new file mode 100644 -index 000000000..eaef586b4 +index 00000000000..f06da24bd0e --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java @@ -0,0 +1,90 @@ @@ -58327,7 +59226,7 @@ index 000000000..eaef586b4 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java new file mode 100644 -index 000000000..4789e664c +index 00000000000..fdb841ccf3d --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58379,39 +59278,9 @@ index 000000000..4789e664c + return debugger.newAddress(getRegister(index)); + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -index 74e957d94..1f44d75ee 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*; - import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.proc.amd64.*; - import sun.jvm.hotspot.debugger.proc.aarch64.*; -+import sun.jvm.hotspot.debugger.proc.riscv64.*; - import sun.jvm.hotspot.debugger.proc.sparc.*; - import sun.jvm.hotspot.debugger.proc.ppc64.*; - import sun.jvm.hotspot.debugger.proc.x86.*; - import sun.jvm.hotspot.debugger.ppc64.*; - import sun.jvm.hotspot.debugger.amd64.*; - import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; - import sun.jvm.hotspot.debugger.sparc.*; - import sun.jvm.hotspot.debugger.x86.*; - import sun.jvm.hotspot.utilities.*; -@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { - threadFactory = new ProcAARCH64ThreadFactory(this); - pcRegIndex = AARCH64ThreadContext.PC; - fpRegIndex = AARCH64ThreadContext.FP; -+ } else if (cpu.equals("riscv64")) { -+ threadFactory = new ProcRISCV64ThreadFactory(this); -+ pcRegIndex = RISCV64ThreadContext.PC; -+ fpRegIndex = RISCV64ThreadContext.FP; - } else if (cpu.equals("ppc64")) { - threadFactory = new ProcPPC64ThreadFactory(this); - pcRegIndex = PPC64ThreadContext.PC; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java new file mode 100644 -index 000000000..c1cf1fb0f +index 00000000000..96d5dee47ce --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java @@ -0,0 +1,88 @@ @@ -58505,7 +59374,7 @@ index 000000000..c1cf1fb0f +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java new file mode 100644 -index 000000000..498fa0dc6 +index 00000000000..f2aa845e665 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58559,7 +59428,7 @@ index 000000000..498fa0dc6 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java new file mode 100644 -index 000000000..81afd8fdc +index 00000000000..19f64b8ce2d --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -58611,7 +59480,7 @@ index 000000000..81afd8fdc +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java new file mode 100644 -index 000000000..ab92e3e74 +index 00000000000..aecbda59023 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java @@ -0,0 +1,55 @@ @@ -58672,7 +59541,7 @@ index 000000000..ab92e3e74 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java new file mode 100644 -index 000000000..1e8cd19b2 +index 00000000000..1d3da6be5af --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58726,7 +59595,7 @@ index 000000000..1e8cd19b2 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java new file mode 100644 -index 000000000..eecb6e029 +index 00000000000..725b94e25a3 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -58776,11 +59645,11 @@ index 000000000..eecb6e029 + return new RemoteRISCV64Thread(debugger, id); + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java new file mode 100644 -index 000000000..426ff0580 +index 00000000000..fb60a70427a --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/risv64/RISCV64ThreadContext.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. @@ -58828,9 +59697,9 @@ index 000000000..426ff0580 + // */ + // struct sigcontext { + // struct user_regs_struct sc_regs; -+ // union __riscv_fp_state sc_fpregs; ++ // union __riscv_fp_state sc_fpregs; + // }; -+ // ++ // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; @@ -58955,10 +59824,17 @@ index 000000000..426ff0580 + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -index 190062785..74bd614d3 100644 +index 190062785a7..89d676fe3b9 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess; +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; @@ -58966,7 +59842,7 @@ index 190062785..74bd614d3 100644 import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; -@@ -99,6 +100,8 @@ public class Threads { +@@ -99,6 +100,8 @@ private static synchronized void initialize(TypeDataBase db) { access = new LinuxPPC64JavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); @@ -58977,10 +59853,10 @@ index 190062785..74bd614d3 100644 access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java new file mode 100644 -index 000000000..2df0837b6 +index 00000000000..f2e224f28ee --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java -@@ -0,0 +1,132 @@ +@@ -0,0 +1,134 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. @@ -59017,6 +59893,8 @@ index 000000000..2df0837b6 +import sun.jvm.hotspot.runtime.riscv64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; + +public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; @@ -59115,7 +59993,7 @@ index 000000000..2df0837b6 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java new file mode 100644 -index 000000000..a3bbf1ad1 +index 00000000000..34701c6922f --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java @@ -0,0 +1,223 @@ @@ -59344,14 +60222,14 @@ index 000000000..a3bbf1ad1 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java new file mode 100644 -index 000000000..c04def5a1 +index 00000000000..df280005d72 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java -@@ -0,0 +1,554 @@ +@@ -0,0 +1,556 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59384,6 +60262,8 @@ index 000000000..c04def5a1 +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; + +/** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ @@ -59735,11 +60615,11 @@ index 000000000..c04def5a1 + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack -+ Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize()); ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. -+ Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize()); ++ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. @@ -59904,10 +60784,10 @@ index 000000000..c04def5a1 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java new file mode 100644 -index 000000000..4d79e3ee4 +index 00000000000..d0ad2b559a6 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -@@ -0,0 +1,58 @@ +@@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. @@ -59940,6 +60820,9 @@ index 000000000..4d79e3ee4 +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.*; ++import sun.jvm.hotspot.utilities.Observable; ++import sun.jvm.hotspot.utilities.Observer; + +public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; @@ -59968,7 +60851,7 @@ index 000000000..4d79e3ee4 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java new file mode 100644 -index 000000000..d7187a5f8 +index 00000000000..4aeb1c6f557 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java @@ -0,0 +1,53 @@ @@ -60026,10 +60909,17 @@ index 000000000..d7187a5f8 + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -index 7d7a6107c..948eabcab 100644 +index 7d7a6107cab..6552ce255fc 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -@@ -54,7 +54,7 @@ public class PlatformInfo { +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -54,7 +54,7 @@ public static String getOS() throws UnsupportedPlatformException { public static boolean knownCPU(String cpu) { final String[] KNOWN = @@ -60038,42 +60928,18 @@ index 7d7a6107c..948eabcab 100644 for(String s : KNOWN) { if(s.equals(cpu)) -diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c -index d0a6f4ea8..a29c7bf8b 100644 ---- a/src/utils/hsdis/hsdis.c -+++ b/src/utils/hsdis/hsdis.c -@@ -28,9 +28,6 @@ - */ - - #include /* required by bfd.h */ --#include --#include --#include - - #include - #include -@@ -479,6 +476,9 @@ static const char* native_arch_name() { - #endif - #ifdef LIBARCH_s390x - res = "s390:64-bit"; -+#endif -+#ifdef LIBARCH_riscv64 -+ res = "riscv:rv64"; - #endif - if (res == NULL) - res = "architecture not set in Makefile!"; diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java -index 7805918c2..a21307083 100644 +index 7805918c28a..823b9f39dbf 100644 --- a/test/hotspot/jtreg/compiler/c2/TestBit.java +++ b/test/hotspot/jtreg/compiler/c2/TestBit.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools; +@@ -34,7 +34,7 @@ * * @run driver compiler.c2.TestBit * @@ -60082,7 +60948,7 @@ index 7805918c2..a21307083 100644 * @requires vm.debug == true & vm.compiler2.enabled */ public class TestBit { -@@ -54,7 +55,8 @@ public class TestBit { +@@ -54,7 +54,8 @@ static void runTest(String testName) throws Exception { String expectedTestBitInstruction = "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : "aarch64".equals(System.getProperty("os.arch")) ? "tb" : @@ -60093,25 +60959,25 @@ index 7805918c2..a21307083 100644 if (expectedTestBitInstruction != null) { output.shouldContain(expectedTestBitInstruction); diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -index 558b4218f..9d875e33f 100644 +index 558b4218f0b..55374b116e6 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), @@ -60121,25 +60987,25 @@ index 558b4218f..9d875e33f 100644 SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -index 3ed72bf0a..a7e277060 100644 +index 3ed72bf0a99..8fb82ee4531 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), @@ -60149,25 +61015,25 @@ index 3ed72bf0a..a7e277060 100644 SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -index c05cf309d..e714fcc59 100644 +index c05cf309dae..aca32137eda 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), @@ -60177,25 +61043,25 @@ index c05cf309d..e714fcc59 100644 SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -index 58ce5366b..d52d81e26 100644 +index 58ce5366bae..8deac4f7895 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli; - +@@ -41,6 +41,7 @@ import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; -@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { + +@@ -53,6 +54,8 @@ public static void main(String args[]) throws Throwable { SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA_OPTION), @@ -60205,17 +61071,17 @@ index 58ce5366b..d52d81e26 100644 SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -index faa9fdbae..50e549069 100644 +index faa9fdbae67..26635002040 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; +@@ -32,26 +32,27 @@ /** * Generic test case for SHA-related options targeted to any CPU except @@ -60243,19 +61109,19 @@ index faa9fdbae..50e549069 100644 String shouldPassMessage = String.format("JVM should start with " + "option '%s' without any warnings", optionName); - // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of -+ // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of ++ // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of // SHA-related options will not cause any warnings. CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ".*" + optionName + ".*" }, shouldPassMessage, diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java new file mode 100644 -index 000000000..d81b5b53f +index 00000000000..2ecfec07a4c --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -@@ -0,0 +1,102 @@ +@@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -60279,7 +61145,7 @@ index 000000000..d81b5b53f + +package compiler.intrinsics.sha.cli.testcases; + -+import compiler.intrinsics.sha.cli.SHAOptionsBase; ++import compiler.intrinsics.sha.cli.DigestOptionsBase; +import jdk.test.lib.process.ExitCode; +import jdk.test.lib.Platform; +import jdk.test.lib.cli.CommandLineOptionTest; @@ -60291,11 +61157,20 @@ index 000000000..d81b5b53f + * which don't support instruction required by the tested option. + */ +public class GenericTestCaseForUnsupportedRISCV64CPU extends -+ SHAOptionsBase.TestCase { ++ DigestOptionsBase.TestCase { ++ ++ final private boolean checkUseSHA; ++ + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { ++ this(optionName, true); ++ } ++ ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, -+ new NotPredicate(SHAOptionsBase.getPredicateForOption( ++ new NotPredicate(DigestOptionsBase.getPredicateForOption( + optionName)))); ++ ++ this.checkUseSHA = checkUseSHA; + } + + @Override @@ -60304,27 +61179,29 @@ index 000000000..d81b5b53f + + "option '-XX:-%s' without any warnings", optionName); + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { -+ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) ++ DigestOptionsBase.getWarningForUnsupportedCPU(optionName) + }, shouldPassMessage, shouldPassMessage, ExitCode.OK, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + -+ shouldPassMessage = String.format("If JVM is started with '-XX:-" -+ + "%s' '-XX:+%s', output should contain warning.", -+ SHAOptionsBase.USE_SHA_OPTION, optionName); ++ if (checkUseSHA) { ++ shouldPassMessage = String.format("If JVM is started with '-XX:-" ++ + "%s' '-XX:+%s', output should contain warning.", ++ DigestOptionsBase.USE_SHA_OPTION, optionName); + -+ // Verify that when the tested option is enabled, then -+ // a warning will occur in VM output if UseSHA is disabled. -+ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { -+ CommandLineOptionTest.verifySameJVMStartup( -+ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, -+ null, -+ shouldPassMessage, -+ shouldPassMessage, -+ ExitCode.OK, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ // Verify that when the tested option is enabled, then ++ // a warning will occur in VM output if UseSHA is disabled. ++ if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { ++ CommandLineOptionTest.verifySameJVMStartup( ++ new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ null, ++ shouldPassMessage, ++ shouldPassMessage, ++ ExitCode.OK, ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ } + } + } + @@ -60334,190 +61211,40 @@ index 000000000..d81b5b53f + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be disabled by default", + optionName), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + -+ // Verify that option is disabled even if it was explicitly enabled -+ // using CLI options. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if set to true directly", optionName), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ if (checkUseSHA) { ++ // Verify that option is disabled even if it was explicitly enabled ++ // using CLI options. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if set to true directly", optionName), ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + -+ // Verify that option is disabled when +UseSHA was passed to JVM. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if %s flag set to JVM", -+ optionName, CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)); -+ } -+} -diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java -new file mode 100644 -index 000000000..d3aafec8e ---- /dev/null -+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java -@@ -0,0 +1,153 @@ -+/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+ -+/* -+ * @test -+ * @bug 8173585 -+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that -+ * differing code paths are taken contingent upon the length of the input String. -+ * Hence we must test against differing string lengths in order to validate -+ * correct functionality. We also ensure the strings are long enough to trigger -+ * the looping conditions of the individual code paths. -+ * -+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all -+ * -+ * @library /compiler/patches /test/lib -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ */ -+ -+package compiler.intrinsics.string; -+ -+import jdk.test.lib.Asserts; -+ -+public class TestStringLatin1IndexOfChar{ -+ private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions -+ -+ public static void main(String[] args) throws Exception { -+ for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code... -+ findOneItem(); -+ withOffsetTest(); -+ testEmpty(); ++ // Verify that option is disabled when +UseSHA was passed to JVM. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if %s flag set to JVM", ++ optionName, CommandLineOptionTest.prepareBooleanFlag( ++ DigestOptionsBase.USE_SHA_OPTION, true)), ++ DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag( ++ DigestOptionsBase.USE_SHA_OPTION, true)); + } + } -+ -+ private static void testEmpty(){ -+ Asserts.assertEQ("".indexOf('a'), -1); -+ } -+ -+ private final static char SEARCH_CHAR = 'z'; -+ private final static char INVERLEAVING_CHAR = 'a'; -+ private final static char MISSING_CHAR = 'd'; -+ -+ private static void findOneItem(){ -+ //test strings of varying length ensuring that for all lengths one instance of the -+ //search char can be found. We check what happens when the search character is in -+ //each position of the search string (including first and last positions) -+ for(int strLength : new int[]{1, 15, 31, 32, 79}){ -+ for(int searchPos = 0; searchPos < strLength; searchPos++){ -+ String totest = makeOneItemStringLatin1(strLength, searchPos); -+ -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ Asserts.assertEQ(intri, nonintri); -+ } -+ } -+ } -+ -+ private static String makeOneItemStringLatin1(int length, int searchPos){ -+ StringBuilder sb = new StringBuilder(length); -+ -+ for(int n =0; n < length; n++){ -+ sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR); -+ } -+ -+ return sb.toString(); -+ } -+ -+ private static void withOffsetTest(){ -+ //progressivly move through string checking indexes and starting offset correctly processed -+ //string is of form azaza, aazaazaa, aaazaaazaaa, etc -+ //we find n s.t. maxlength = (n*3) + 2 -+ int maxaInstances = (MAX_LENGTH-2)/3; -+ -+ for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){ -+ String totest = makeWithOffsetStringLatin1(aInstances); -+ -+ int startoffset; -+ { -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } -+ -+ { -+ int intri = totest.indexOf(SEARCH_CHAR, startoffset); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } -+ -+ Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string -+ Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1); -+ } -+ } -+ -+ private static String makeWithOffsetStringLatin1(int aInstances){ -+ StringBuilder sb = new StringBuilder((aInstances*3) + 2); -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ -+ sb.append(SEARCH_CHAR); -+ -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ -+ sb.append(SEARCH_CHAR); -+ -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ return sb.toString(); -+ } -+ -+ private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) { -+ //non intrinsic version of indexOfChar -+ byte c = (byte)ch; -+ for (int i = fromIndex; i < value.length(); i++) { -+ if (value.charAt(i) == c) { -+ return i; -+ } -+ } -+ return -1; -+ } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -index 2e3e2717a..8093d6598 100644 +index 2e3e2717a65..7be8af6d035 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60528,9 +61255,16 @@ index 2e3e2717a..8093d6598 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -index 0e06a9e43..1ff9f36e1 100644 +index 0e06a9e4327..797927b42bf 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60541,9 +61275,16 @@ index 0e06a9e43..1ff9f36e1 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -index c3cdbf374..f3531ea74 100644 +index c3cdbf37464..be8f7d586c2 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60554,9 +61295,16 @@ index c3cdbf374..f3531ea74 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -index d33bd411f..589209447 100644 +index d33bd411f16..d96d5e29c00 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60567,9 +61315,16 @@ index d33bd411f..589209447 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -index 992fa4b51..907e21371 100644 +index 992fa4b5161..b09c873d05d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 @@ -60580,9 +61335,16 @@ index 992fa4b51..907e21371 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -index 3e79b3528..c41c0b606 100644 +index 3e79b3528b7..fe40ed6f98d 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 @@ -60593,9 +61355,16 @@ index 3e79b3528..c41c0b606 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -index 6603dd224..b626da40d 100644 +index 6603dd224ef..51631910493 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8135028 @@ -60606,9 +61375,16 @@ index 6603dd224..b626da40d 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -index d9a0c9880..92cd84a2f 100644 +index d9a0c988004..d999ae423cf 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60619,9 +61395,16 @@ index d9a0c9880..92cd84a2f 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -index 722db95ae..e72345799 100644 +index 722db95aed3..65912a5c7fa 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60632,9 +61415,16 @@ index 722db95ae..e72345799 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -index f58f21feb..f4f67cf52 100644 +index f58f21feb23..fffdc2f7565 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60644,36 +61434,17 @@ index f58f21feb..f4f67cf52 100644 * * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -index acb86812d..c5e38ba72 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -@@ -24,7 +24,7 @@ - - /* @test - * @bug 8167409 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs - */ - package compiler.runtime.criticalnatives.argumentcorruption; -diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -index eab36f931..4437367b6 100644 ---- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -@@ -24,7 +24,7 @@ - - /* @test - * @bug 8167408 -- * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") - * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp - */ - package compiler.runtime.criticalnatives.lookup; diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -index 7774dabcb..284b51019 100644 +index 7774dabcb5f..7afe3560f30 100644 --- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -61,15 +61,17 @@ public class IntrinsicPredicates { public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE @@ -60716,17 +61487,17 @@ index 7774dabcb..284b51019 100644 public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -index 57256aa5a..16c199e37 100644 +index 57256aa5a32..d4d43b01ae6 100644 --- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace { +@@ -112,7 +112,7 @@ public static void main(String args[]) throws Exception { // It's ok for ARM not to have symbols, because it does not support NMT detail // when targeting thumb2. It's also ok for Windows not to have symbols, because // they are only available if the symbols file is included with the build. @@ -60736,17 +61507,17 @@ index 57256aa5a..16c199e37 100644 } output.reportDiagnosticSummary(); diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -index 127bb6abc..46be4dc98 100644 +index 127bb6abcd9..eab19273ad8 100644 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -239,7 +240,7 @@ public class ReservedStackTest { +@@ -239,7 +239,7 @@ private static boolean isAlwaysSupportedPlatform() { return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || @@ -60755,95 +61526,56 @@ index 127bb6abc..46be4dc98 100644 Platform.isOSX() || Platform.isSolaris(); } -diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh -index 0c300d4fd..7f3698c47 100644 ---- a/test/hotspot/jtreg/test_env.sh -+++ b/test/hotspot/jtreg/test_env.sh -@@ -185,6 +185,11 @@ if [ $? = 0 ] - then - VM_CPU="arm" - fi -+grep "riscv64" vm_version.out > ${NULL} -+if [ $? = 0 ] -+then -+ VM_CPU="riscv64" -+fi - grep "ppc" vm_version.out > ${NULL} - if [ $? = 0 ] - then -diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b..73e92855d 100644 ---- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -@@ -1,5 +1,6 @@ +diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +index 54640b245f8..f0b7aed5ceb 100644 +--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java ++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +@@ -1,5 +1,4 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -45,7 +46,7 @@ import java.util.Set; +diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 77458554b76..d4bfe31dd7a 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -45,7 +45,7 @@ */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), -+ ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), -diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -index cb3348a0f..bc0d1a743 100644 ---- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -@@ -63,13 +63,13 @@ public class thrinfo001 { - try { - t_a.join(); - } catch (InterruptedException e) {} -+ checkInfo(t_a, t_a.getThreadGroup(), 1); - - thrinfo001b t_b = new thrinfo001b(); - t_b.setPriority(Thread.MIN_PRIORITY); - t_b.setDaemon(true); - checkInfo(t_b, t_b.getThreadGroup(), 2); - t_b.start(); -- checkInfo(t_b, t_b.getThreadGroup(), 2); - try { - t_b.join(); - } catch (InterruptedException e) {} diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -index 7990c49a1..bb8c79cdd 100644 +index 7990c49a1f6..abeff80e5e8 100644 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -54,8 +55,8 @@ public class TestCPUInformation { +@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -+ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); -+ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); } } } diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index f4ee0546c..a9cd63db9 100644 +index f4ee0546c70..635cdd18054 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -202,6 +203,10 @@ public class Platform { +@@ -202,6 +202,10 @@ public static boolean isARM() { return isArch("arm.*"); } @@ -60854,14 +61586,20424 @@ index f4ee0546c..a9cd63db9 100644 public static boolean isPPC() { return isArch("ppc.*"); } -diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java + +From c51e546566c937354842a27696bd2221087101ae Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 28 Mar 2023 16:30:04 +0800 +Subject: [PATCH 002/140] Drop zgc part + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- + .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 441 ------------------ + .../riscv/gc/z/zBarrierSetAssembler_riscv.hpp | 101 ---- + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp | 212 --------- + src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp | 36 -- + src/hotspot/cpu/riscv/gc/z/z_riscv64.ad | 233 --------- + .../cpu/riscv/macroAssembler_riscv.cpp | 46 -- + .../cpu/riscv/macroAssembler_riscv.hpp | 9 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 10 - + 9 files changed, 1 insertion(+), 1093 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/gc/z/z_riscv64.ad + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 742c2126e60..bba3bd4709c 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -871,11 +871,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } +- +- if (!UseZGC) { +- // Load barrier has not yet been applied, so ZGC can't verify the oop here +- __ verify_oop(dest->as_register()); +- } ++ __ verify_oop(dest->as_register()); + } + } + +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +deleted file mode 100644 +index 3d3f4d4d774..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp ++++ /dev/null +@@ -1,441 +0,0 @@ +-/* +- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/macroAssembler.inline.hpp" +-#include "code/codeBlob.hpp" +-#include "code/vmreg.inline.hpp" +-#include "gc/z/zBarrier.inline.hpp" +-#include "gc/z/zBarrierSet.hpp" +-#include "gc/z/zBarrierSetAssembler.hpp" +-#include "gc/z/zBarrierSetRuntime.hpp" +-#include "gc/z/zThreadLocalData.hpp" +-#include "memory/resourceArea.hpp" +-#include "runtime/sharedRuntime.hpp" +-#include "utilities/macros.hpp" +-#ifdef COMPILER1 +-#include "c1/c1_LIRAssembler.hpp" +-#include "c1/c1_MacroAssembler.hpp" +-#include "gc/z/c1/zBarrierSetC1.hpp" +-#endif // COMPILER1 +-#ifdef COMPILER2 +-#include "gc/z/c2/zBarrierSetC2.hpp" +-#endif // COMPILER2 +- +-#ifdef PRODUCT +-#define BLOCK_COMMENT(str) /* nothing */ +-#else +-#define BLOCK_COMMENT(str) __ block_comment(str) +-#endif +- +-#undef __ +-#define __ masm-> +- +-void ZBarrierSetAssembler::load_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Register dst, +- Address src, +- Register tmp1, +- Register tmp_thread) { +- if (!ZBarrierSet::barrier_needed(decorators, type)) { +- // Barrier not needed +- BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); +- return; +- } +- +- assert_different_registers(t1, src.base()); +- assert_different_registers(t0, t1, dst); +- +- Label done; +- +- // Load bad mask into temp register. +- __ la(t0, src); +- __ ld(t1, address_bad_mask_from_thread(xthread)); +- __ ld(dst, Address(t0)); +- +- // Test reference against bad mask. If mask bad, then we need to fix it up. +- __ andr(t1, dst, t1); +- __ beqz(t1, done); +- +- __ enter(); +- +- __ push_call_clobbered_registers_except(RegSet::of(dst)); +- +- if (c_rarg0 != dst) { +- __ mv(c_rarg0, dst); +- } +- +- __ mv(c_rarg1, t0); +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); +- +- // Make sure dst has the return value. +- if (dst != x10) { +- __ mv(dst, x10); +- } +- +- __ pop_call_clobbered_registers_except(RegSet::of(dst)); +- __ leave(); +- +- __ bind(done); +-} +- +-#ifdef ASSERT +- +-void ZBarrierSetAssembler::store_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Address dst, +- Register val, +- Register tmp1, +- Register tmp2) { +- // Verify value +- if (is_reference_type(type)) { +- // Note that src could be noreg, which means we +- // are storing null and can skip verification. +- if (val != noreg) { +- Label done; +- +- // tmp1 and tmp2 are often set to noreg. +- RegSet savedRegs = RegSet::of(t0); +- __ push_reg(savedRegs, sp); +- +- __ ld(t0, address_bad_mask_from_thread(xthread)); +- __ andr(t0, val, t0); +- __ beqz(t0, done); +- __ stop("Verify oop store failed"); +- __ should_not_reach_here(); +- __ bind(done); +- __ pop_reg(savedRegs, sp); +- } +- } +- +- // Store value +- BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); +-} +- +-#endif // ASSERT +- +-void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, +- DecoratorSet decorators, +- bool is_oop, +- Register src, +- Register dst, +- Register count, +- RegSet saved_regs) { +- if (!is_oop) { +- // Barrier not needed +- return; +- } +- +- BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {"); +- +- assert_different_registers(src, count, t0); +- +- __ push_reg(saved_regs, sp); +- +- if (count == c_rarg0 && src == c_rarg1) { +- // exactly backwards!! +- __ xorr(c_rarg0, c_rarg0, c_rarg1); +- __ xorr(c_rarg1, c_rarg0, c_rarg1); +- __ xorr(c_rarg0, c_rarg0, c_rarg1); +- } else { +- __ mv(c_rarg0, src); +- __ mv(c_rarg1, count); +- } +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), 2); +- +- __ pop_reg(saved_regs, sp); +- +- BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue"); +-} +- +-void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, +- Register jni_env, +- Register robj, +- Register tmp, +- Label& slowpath) { +- BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {"); +- +- assert_different_registers(jni_env, robj, tmp); +- +- // Resolve jobject +- BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, robj, tmp, slowpath); +- +- // Compute the offset of address bad mask from the field of jni_environment +- long int bad_mask_relative_offset = (long int) (in_bytes(ZThreadLocalData::address_bad_mask_offset()) - +- in_bytes(JavaThread::jni_environment_offset())); +- +- // Load the address bad mask +- __ ld(tmp, Address(jni_env, bad_mask_relative_offset)); +- +- // Check address bad mask +- __ andr(tmp, robj, tmp); +- __ bnez(tmp, slowpath); +- +- BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native"); +-} +- +-#ifdef COMPILER2 +- +-OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { +- if (!OptoReg::is_reg(opto_reg)) { +- return OptoReg::Bad; +- } +- +- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); +- if (vm_reg->is_FloatRegister()) { +- return opto_reg & ~1; +- } +- +- return opto_reg; +-} +- +-#undef __ +-#define __ _masm-> +- +-class ZSaveLiveRegisters { +-private: +- MacroAssembler* const _masm; +- RegSet _gp_regs; +- FloatRegSet _fp_regs; +- VectorRegSet _vp_regs; +- +-public: +- void initialize(ZLoadBarrierStubC2* stub) { +- // Record registers that needs to be saved/restored +- RegMaskIterator rmi(stub->live()); +- while (rmi.has_next()) { +- const OptoReg::Name opto_reg = rmi.next(); +- if (OptoReg::is_reg(opto_reg)) { +- const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); +- if (vm_reg->is_Register()) { +- _gp_regs += RegSet::of(vm_reg->as_Register()); +- } else if (vm_reg->is_FloatRegister()) { +- _fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); +- } else if (vm_reg->is_VectorRegister()) { +- const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~(VectorRegisterImpl::max_slots_per_register - 1)); +- _vp_regs += VectorRegSet::of(vm_reg_base->as_VectorRegister()); +- } else { +- fatal("Unknown register type"); +- } +- } +- } +- +- // Remove C-ABI SOE registers, tmp regs and _ref register that will be updated +- _gp_regs -= RegSet::range(x18, x27) + RegSet::of(x2) + RegSet::of(x8, x9) + RegSet::of(x5, stub->ref()); +- } +- +- ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : +- _masm(masm), +- _gp_regs(), +- _fp_regs(), +- _vp_regs() { +- // Figure out what registers to save/restore +- initialize(stub); +- +- // Save registers +- __ push_reg(_gp_regs, sp); +- __ push_fp(_fp_regs, sp); +- __ push_vp(_vp_regs, sp); +- } +- +- ~ZSaveLiveRegisters() { +- // Restore registers +- __ pop_vp(_vp_regs, sp); +- __ pop_fp(_fp_regs, sp); +- __ pop_reg(_gp_regs, sp); +- } +-}; +- +-class ZSetupArguments { +-private: +- MacroAssembler* const _masm; +- const Register _ref; +- const Address _ref_addr; +- +-public: +- ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : +- _masm(masm), +- _ref(stub->ref()), +- _ref_addr(stub->ref_addr()) { +- +- // Setup arguments +- if (_ref_addr.base() == noreg) { +- // No self healing +- if (_ref != c_rarg0) { +- __ mv(c_rarg0, _ref); +- } +- __ mv(c_rarg1, zr); +- } else { +- // Self healing +- if (_ref == c_rarg0) { +- // _ref is already at correct place +- __ la(c_rarg1, _ref_addr); +- } else if (_ref != c_rarg1) { +- // _ref is in wrong place, but not in c_rarg1, so fix it first +- __ la(c_rarg1, _ref_addr); +- __ mv(c_rarg0, _ref); +- } else if (_ref_addr.base() != c_rarg0) { +- assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); +- __ mv(c_rarg0, _ref); +- __ la(c_rarg1, _ref_addr); +- } else { +- assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); +- if (_ref_addr.base() == c_rarg0) { +- __ mv(t1, c_rarg1); +- __ la(c_rarg1, _ref_addr); +- __ mv(c_rarg0, t1); +- } else { +- ShouldNotReachHere(); +- } +- } +- } +- } +- +- ~ZSetupArguments() { +- // Transfer result +- if (_ref != x10) { +- __ mv(_ref, x10); +- } +- } +-}; +- +-#undef __ +-#define __ masm-> +- +-void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { +- BLOCK_COMMENT("ZLoadBarrierStubC2"); +- +- // Stub entry +- __ bind(*stub->entry()); +- +- { +- ZSaveLiveRegisters save_live_registers(masm, stub); +- ZSetupArguments setup_arguments(masm, stub); +- int32_t offset = 0; +- __ la_patchable(t0, stub->slow_path(), offset); +- __ jalr(x1, t0, offset); +- } +- +- // Stub exit +- __ j(*stub->continuation()); +-} +- +-#undef __ +- +-#endif // COMPILER2 +- +-#ifdef COMPILER1 +-#undef __ +-#define __ ce->masm()-> +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { +- assert_different_registers(xthread, ref->as_register(), t1); +- __ ld(t1, address_bad_mask_from_thread(xthread)); +- __ andr(t1, t1, ref->as_register()); +-} +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce, +- ZLoadBarrierStubC1* stub) const { +- // Stub entry +- __ bind(*stub->entry()); +- +- Register ref = stub->ref()->as_register(); +- Register ref_addr = noreg; +- Register tmp = noreg; +- +- if (stub->tmp()->is_valid()) { +- // Load address into tmp register +- ce->leal(stub->ref_addr(), stub->tmp()); +- ref_addr = tmp = stub->tmp()->as_pointer_register(); +- } else { +- // Address already in register +- ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register(); +- } +- +- assert_different_registers(ref, ref_addr, noreg); +- +- // Save x10 unless it is the result or tmp register +- // Set up SP to accomodate parameters and maybe x10. +- if (ref != x10 && tmp != x10) { +- __ sub(sp, sp, 32); +- __ sd(x10, Address(sp, 16)); +- } else { +- __ sub(sp, sp, 16); +- } +- +- // Setup arguments and call runtime stub +- ce->store_parameter(ref_addr, 1); +- ce->store_parameter(ref, 0); +- +- __ far_call(stub->runtime_stub()); +- +- // Verify result +- __ verify_oop(x10, "Bad oop"); +- +- +- // Move result into place +- if (ref != x10) { +- __ mv(ref, x10); +- } +- +- // Restore x10 unless it is the result or tmp register +- if (ref != x10 && tmp != x10) { +- __ ld(x10, Address(sp, 16)); +- __ add(sp, sp, 32); +- } else { +- __ add(sp, sp, 16); +- } +- +- // Stub exit +- __ j(*stub->continuation()); +-} +- +-#undef __ +-#define __ sasm-> +- +-void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) const { +- __ prologue("zgc_load_barrier stub", false); +- +- __ push_call_clobbered_registers_except(RegSet::of(x10)); +- +- // Setup arguments +- __ load_parameter(0, c_rarg0); +- __ load_parameter(1, c_rarg1); +- +- __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); +- +- __ pop_call_clobbered_registers_except(RegSet::of(x10)); +- +- __ epilogue(); +-} +- +-#undef __ +-#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp +deleted file mode 100644 +index dc07ab635fe..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.hpp ++++ /dev/null +@@ -1,101 +0,0 @@ +-/* +- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +-#define CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +- +-#include "code/vmreg.hpp" +-#include "oops/accessDecorators.hpp" +-#ifdef COMPILER2 +-#include "opto/optoreg.hpp" +-#endif // COMPILER2 +- +-#ifdef COMPILER1 +-class LIR_Assembler; +-class LIR_Opr; +-class StubAssembler; +-class ZLoadBarrierStubC1; +-#endif // COMPILER1 +- +-#ifdef COMPILER2 +-class Node; +-class ZLoadBarrierStubC2; +-#endif // COMPILER2 +- +-class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +-public: +- virtual void load_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Register dst, +- Address src, +- Register tmp1, +- Register tmp_thread); +- +-#ifdef ASSERT +- virtual void store_at(MacroAssembler* masm, +- DecoratorSet decorators, +- BasicType type, +- Address dst, +- Register val, +- Register tmp1, +- Register tmp2); +-#endif // ASSERT +- +- virtual void arraycopy_prologue(MacroAssembler* masm, +- DecoratorSet decorators, +- bool is_oop, +- Register src, +- Register dst, +- Register count, +- RegSet saved_regs); +- +- virtual void try_resolve_jobject_in_native(MacroAssembler* masm, +- Register jni_env, +- Register robj, +- Register tmp, +- Label& slowpath); +- +-#ifdef COMPILER1 +- void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; +- +- void generate_c1_load_barrier_stub(LIR_Assembler* ce, +- ZLoadBarrierStubC1* stub) const; +- +- void generate_c1_load_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) const; +-#endif // COMPILER1 +- +-#ifdef COMPILER2 +- OptoReg::Name refine_register(const Node* node, +- OptoReg::Name opto_reg); +- +- void generate_c2_load_barrier_stub(MacroAssembler* masm, +- ZLoadBarrierStubC2* stub) const; +-#endif // COMPILER2 +-}; +- +-#endif // CPU_RISCV_GC_Z_ZBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp +deleted file mode 100644 +index d14997790af..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.cpp ++++ /dev/null +@@ -1,212 +0,0 @@ +-/* +- * Copyright (c) 2017, 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "gc/shared/gcLogPrecious.hpp" +-#include "gc/shared/gc_globals.hpp" +-#include "gc/z/zGlobals.hpp" +-#include "runtime/globals.hpp" +-#include "runtime/os.hpp" +-#include "utilities/globalDefinitions.hpp" +-#include "utilities/powerOfTwo.hpp" +- +-#ifdef LINUX +-#include +-#endif // LINUX +- +-// +-// The heap can have three different layouts, depending on the max heap size. +-// +-// Address Space & Pointer Layout 1 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000014000000000 (20TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000010000000000 (16TB) +-// . . +-// +--------------------------------+ 0x00000c0000000000 (12TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000080000000000 (8TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000040000000000 (4TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 6 5 2 1 0 +-// +--------------------+----+-----------------------------------------------+ +-// |00000000 00000000 00|1111|11 11111111 11111111 11111111 11111111 11111111| +-// +--------------------+----+-----------------------------------------------+ +-// | | | +-// | | * 41-0 Object Offset (42-bits, 4TB address space) +-// | | +-// | * 45-42 Metadata Bits (4-bits) 0001 = Marked0 (Address view 4-8TB) +-// | 0010 = Marked1 (Address view 8-12TB) +-// | 0100 = Remapped (Address view 16-20TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-46 Fixed (18-bits, always zero) +-// +-// +-// Address Space & Pointer Layout 2 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000280000000000 (40TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000200000000000 (32TB) +-// . . +-// +--------------------------------+ 0x0000180000000000 (24TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000100000000000 (16TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000080000000000 (8TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 7 6 3 2 0 +-// +------------------+-----+------------------------------------------------+ +-// |00000000 00000000 0|1111|111 11111111 11111111 11111111 11111111 11111111| +-// +-------------------+----+------------------------------------------------+ +-// | | | +-// | | * 42-0 Object Offset (43-bits, 8TB address space) +-// | | +-// | * 46-43 Metadata Bits (4-bits) 0001 = Marked0 (Address view 8-16TB) +-// | 0010 = Marked1 (Address view 16-24TB) +-// | 0100 = Remapped (Address view 32-40TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-47 Fixed (17-bits, always zero) +-// +-// +-// Address Space & Pointer Layout 3 +-// -------------------------------- +-// +-// +--------------------------------+ 0x00007FFFFFFFFFFF (127TB) +-// . . +-// . . +-// . . +-// +--------------------------------+ 0x0000500000000000 (80TB) +-// | Remapped View | +-// +--------------------------------+ 0x0000400000000000 (64TB) +-// . . +-// +--------------------------------+ 0x0000300000000000 (48TB) +-// | Marked1 View | +-// +--------------------------------+ 0x0000200000000000 (32TB) +-// | Marked0 View | +-// +--------------------------------+ 0x0000100000000000 (16TB) +-// . . +-// +--------------------------------+ 0x0000000000000000 +-// +-// 6 4 4 4 4 +-// 3 8 7 4 3 0 +-// +------------------+----+-------------------------------------------------+ +-// |00000000 00000000 |1111|1111 11111111 11111111 11111111 11111111 11111111| +-// +------------------+----+-------------------------------------------------+ +-// | | | +-// | | * 43-0 Object Offset (44-bits, 16TB address space) +-// | | +-// | * 47-44 Metadata Bits (4-bits) 0001 = Marked0 (Address view 16-32TB) +-// | 0010 = Marked1 (Address view 32-48TB) +-// | 0100 = Remapped (Address view 64-80TB) +-// | 1000 = Finalizable (Address view N/A) +-// | +-// * 63-48 Fixed (16-bits, always zero) +-// +- +-// Default value if probing is not implemented for a certain platform: 128TB +-static const size_t DEFAULT_MAX_ADDRESS_BIT = 47; +-// Minimum value returned, if probing fails: 64GB +-static const size_t MINIMUM_MAX_ADDRESS_BIT = 36; +- +-static size_t probe_valid_max_address_bit() { +-#ifdef LINUX +- size_t max_address_bit = 0; +- const size_t page_size = os::vm_page_size(); +- for (size_t i = DEFAULT_MAX_ADDRESS_BIT; i > MINIMUM_MAX_ADDRESS_BIT; --i) { +- const uintptr_t base_addr = ((uintptr_t) 1U) << i; +- if (msync((void*)base_addr, page_size, MS_ASYNC) == 0) { +- // msync suceeded, the address is valid, and maybe even already mapped. +- max_address_bit = i; +- break; +- } +- if (errno != ENOMEM) { +- // Some error occured. This should never happen, but msync +- // has some undefined behavior, hence ignore this bit. +-#ifdef ASSERT +- fatal("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +-#else // ASSERT +- log_warning_p(gc)("Received '%s' while probing the address space for the highest valid bit", os::errno_name(errno)); +-#endif // ASSERT +- continue; +- } +- // Since msync failed with ENOMEM, the page might not be mapped. +- // Try to map it, to see if the address is valid. +- void* const result_addr = mmap((void*) base_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); +- if (result_addr != MAP_FAILED) { +- munmap(result_addr, page_size); +- } +- if ((uintptr_t) result_addr == base_addr) { +- // address is valid +- max_address_bit = i; +- break; +- } +- } +- if (max_address_bit == 0) { +- // probing failed, allocate a very high page and take that bit as the maximum +- const uintptr_t high_addr = ((uintptr_t) 1U) << DEFAULT_MAX_ADDRESS_BIT; +- void* const result_addr = mmap((void*) high_addr, page_size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); +- if (result_addr != MAP_FAILED) { +- max_address_bit = BitsPerSize_t - count_leading_zeros((size_t) result_addr) - 1; +- munmap(result_addr, page_size); +- } +- } +- log_info_p(gc, init)("Probing address space for the highest valid bit: " SIZE_FORMAT, max_address_bit); +- return MAX2(max_address_bit, MINIMUM_MAX_ADDRESS_BIT); +-#else // LINUX +- return DEFAULT_MAX_ADDRESS_BIT; +-#endif // LINUX +-} +- +-size_t ZPlatformAddressOffsetBits() { +- const static size_t valid_max_address_offset_bits = probe_valid_max_address_bit() + 1; +- const size_t max_address_offset_bits = valid_max_address_offset_bits - 3; +- const size_t min_address_offset_bits = max_address_offset_bits - 2; +- const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio); +- const size_t address_offset_bits = log2i_exact(address_offset); +- return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits); +-} +- +-size_t ZPlatformAddressMetadataShift() { +- return ZPlatformAddressOffsetBits(); +-} +diff --git a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp b/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp +deleted file mode 100644 +index f20ecd9b073..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/zGlobals_riscv.hpp ++++ /dev/null +@@ -1,36 +0,0 @@ +-/* +- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +-#define CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +- +-const size_t ZPlatformGranuleSizeShift = 21; // 2MB +-const size_t ZPlatformHeapViews = 3; +-const size_t ZPlatformCacheLineSize = 64; +- +-size_t ZPlatformAddressOffsetBits(); +-size_t ZPlatformAddressMetadataShift(); +- +-#endif // CPU_RISCV_GC_Z_ZGLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad b/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad +deleted file mode 100644 +index 6b6f87814a5..00000000000 +--- a/src/hotspot/cpu/riscv/gc/z/z_riscv64.ad ++++ /dev/null +@@ -1,233 +0,0 @@ +-// +-// Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. +-// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +-// +-// This code is free software; you can redistribute it and/or modify it +-// under the terms of the GNU General Public License version 2 only, as +-// published by the Free Software Foundation. +-// +-// This code is distributed in the hope that it will be useful, but WITHOUT +-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-// version 2 for more details (a copy is included in the LICENSE file that +-// accompanied this code). +-// +-// You should have received a copy of the GNU General Public License version +-// 2 along with this work; if not, write to the Free Software Foundation, +-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +-// +-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +-// or visit www.oracle.com if you need additional information or have any +-// questions. +-// +- +-source_hpp %{ +- +-#include "gc/shared/gc_globals.hpp" +-#include "gc/z/c2/zBarrierSetC2.hpp" +-#include "gc/z/zThreadLocalData.hpp" +- +-%} +- +-source %{ +- +-static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, int barrier_data) { +- if (barrier_data == ZLoadBarrierElided) { +- return; +- } +- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, barrier_data); +- __ ld(tmp, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(tmp, tmp, ref); +- __ bnez(tmp, *stub->entry(), true /* far */); +- __ bind(*stub->continuation()); +-} +- +-static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { +- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, ZLoadBarrierStrong); +- __ j(*stub->entry()); +- __ bind(*stub->continuation()); +-} +- +-%} +- +-// Load Pointer +-instruct zLoadP(iRegPNoSp dst, memory mem) +-%{ +- match(Set dst (LoadP mem)); +- predicate(UseZGC && (n->as_Load()->barrier_data() != 0)); +- effect(TEMP dst); +- +- ins_cost(4 * DEFAULT_COST); +- +- format %{ "ld $dst, $mem, #@zLoadP" %} +- +- ins_encode %{ +- const Address ref_addr (as_Register($mem$$base), $mem$$disp); +- __ ld($dst$$Register, ref_addr); +- z_load_barrier(_masm, this, ref_addr, $dst$$Register, t0 /* tmp */, barrier_data()); +- %} +- +- ins_pipe(iload_reg_mem); +-%} +- +-instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ +- match(Set res (CompareAndSwapP mem (Binary oldval newval))); +- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(KILL cr, TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapP\n\t" +- "mv $res, $res == $oldval" %} +- +- ins_encode %{ +- Label failed; +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ beqz($res$$Register, failed); +- __ mv(t0, $oldval$$Register); +- __ bind(failed); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); +- __ andr(t1, t1, t0); +- __ beqz(t1, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ +- match(Set res (CompareAndSwapP mem (Binary oldval newval))); +- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); +- effect(KILL cr, TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $mem, $oldval, $newval, #@zCompareAndSwapPAcq\n\t" +- "mv $res, $res == $oldval" %} +- +- ins_encode %{ +- Label failed; +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ beqz($res$$Register, failed); +- __ mv(t0, $oldval$$Register); +- __ bind(failed); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t1, Address(xthread, ZThreadLocalData::address_bad_mask_offset()), t1 /* tmp */); +- __ andr(t1, t1, t0); +- __ beqz(t1, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), t0 /* ref */, t1 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, +- true /* result_as_bool */); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ +- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangeP" %} +- +- ins_encode %{ +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(t0, t0, $res$$Register); +- __ beqz(t0, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) %{ +- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); +- effect(TEMP_DEF res); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "cmpxchg $res = $mem, $oldval, $newval, #@zCompareAndExchangePAcq" %} +- +- ins_encode %{ +- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); +- if (barrier_data() != ZLoadBarrierElided) { +- Label good; +- __ ld(t0, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(t0, t0, $res$$Register); +- __ beqz(t0, good); +- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, t0 /* tmp */); +- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, +- Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register); +- __ bind(good); +- } +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ +- match(Set prev (GetAndSetP mem newv)); +- predicate(UseZGC && !needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); +- effect(TEMP_DEF prev, KILL cr); +- +- ins_cost(2 * VOLATILE_REF_COST); +- +- format %{ "atomic_xchg $prev, $newv, [$mem], #@zGetAndSetP" %} +- +- ins_encode %{ +- __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); +- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); +- %} +- +- ins_pipe(pipe_serial); +-%} +- +-instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ +- match(Set prev (GetAndSetP mem newv)); +- predicate(UseZGC && needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() != 0)); +- effect(TEMP_DEF prev, KILL cr); +- +- ins_cost(VOLATILE_REF_COST); +- +- format %{ "atomic_xchg_acq $prev, $newv, [$mem], #@zGetAndSetPAcq" %} +- +- ins_encode %{ +- __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); +- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, t0 /* tmp */, barrier_data()); +- %} +- ins_pipe(pipe_serial); +-%} +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 86710295444..9d2cc4cf89f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1046,52 +1046,6 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + return count; + } + +-#ifdef COMPILER2 +-int MacroAssembler::push_vp(unsigned int bitset, Register stack) { +- CompressibleRegion cr(this); +- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- +- // Scan bitset to accumulate register pairs +- unsigned char regs[32]; +- int count = 0; +- for (int reg = 31; reg >= 0; reg--) { +- if ((1U << 31) & bitset) { +- regs[count++] = reg; +- } +- bitset <<= 1; +- } +- +- for (int i = 0; i < count; i++) { +- sub(stack, stack, vector_size_in_bytes); +- vs1r_v(as_VectorRegister(regs[i]), stack); +- } +- +- return count * vector_size_in_bytes / wordSize; +-} +- +-int MacroAssembler::pop_vp(unsigned int bitset, Register stack) { +- CompressibleRegion cr(this); +- int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- +- // Scan bitset to accumulate register pairs +- unsigned char regs[32]; +- int count = 0; +- for (int reg = 31; reg >= 0; reg--) { +- if ((1U << 31) & bitset) { +- regs[count++] = reg; +- } +- bitset <<= 1; +- } +- +- for (int i = count - 1; i >= 0; i--) { +- vl1r_v(as_VectorRegister(regs[i]), stack); +- add(stack, stack, vector_size_in_bytes); +- } +- +- return count * vector_size_in_bytes / wordSize; +-} +-#endif // COMPILER2 +- + void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { + CompressibleRegion cr(this); + // Push integer registers x7, x10-x17, x28-x31. +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 23e09475be1..b2f0455a1f1 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -484,12 +484,6 @@ class MacroAssembler: public Assembler { + void pop_reg(Register Rd); + int push_reg(unsigned int bitset, Register stack); + int pop_reg(unsigned int bitset, Register stack); +- void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } +- void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } +-#ifdef COMPILER2 +- void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } +- void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } +-#endif // COMPILER2 + + // Push and pop everything that might be clobbered by a native + // runtime call except t0 and t1. (They are always +@@ -783,9 +777,6 @@ class MacroAssembler: public Assembler { + int push_fp(unsigned int bitset, Register stack); + int pop_fp(unsigned int bitset, Register stack); + +- int push_vp(unsigned int bitset, Register stack); +- int pop_vp(unsigned int bitset, Register stack); +- + // vext + void vmnot_m(VectorRegister vd, VectorRegister vs); + void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index b3fdd04db1b..b05edf7172c 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -546,16 +546,6 @@ class StubGenerator: public StubCodeGenerator { + // make sure object is 'reasonable' + __ beqz(x10, exit); // if obj is NULL it is OK + +-#if INCLUDE_ZGC +- if (UseZGC) { +- // Check if mask is good. +- // verifies that ZAddressBadMask & x10 == 0 +- __ ld(c_rarg3, Address(xthread, ZThreadLocalData::address_bad_mask_offset())); +- __ andr(c_rarg2, x10, c_rarg3); +- __ bnez(c_rarg2, error); +- } +-#endif +- + // Check if the oop is in the right area of memory + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, x10, c_rarg3); + +From 7772140df96747b42b13007d0827fc21d2a8b926 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 27 Mar 2023 15:43:39 +0800 +Subject: [PATCH 003/140] Drop the C2 Vector part + +--- + make/hotspot/gensrc/GensrcAdlc.gmk | 1 - + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 325 --- + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 52 - + src/hotspot/cpu/riscv/globals_riscv.hpp | 8 +- + .../cpu/riscv/macroAssembler_riscv.cpp | 22 +- + .../cpu/riscv/macroAssembler_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/matcher_riscv.hpp | 44 +- + src/hotspot/cpu/riscv/register_riscv.cpp | 5 - + src/hotspot/cpu/riscv/register_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/riscv.ad | 476 +--- + src/hotspot/cpu/riscv/riscv_v.ad | 2065 ----------------- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 61 +- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 110 - + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 4 - + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 10 +- + src/hotspot/cpu/riscv/vmreg_riscv.hpp | 17 +- + 16 files changed, 41 insertions(+), 3167 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/riscv_v.ad + +diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk +index 67f4c6f0574..51137b99db2 100644 +--- a/make/hotspot/gensrc/GensrcAdlc.gmk ++++ b/make/hotspot/gensrc/GensrcAdlc.gmk +@@ -152,7 +152,6 @@ ifeq ($(call check-jvm-feature, compiler2), true) + + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ +- $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +index 27770dc17aa..73f84a724ca 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +@@ -1319,328 +1319,3 @@ void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRe + + bind(Done); + } +- +-void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, +- VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { +- Label loop; +- Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; +- +- bind(loop); +- vsetvli(tmp1, cnt, sew, Assembler::m2); +- vlex_v(vr1, a1, sew); +- vlex_v(vr2, a2, sew); +- vmsne_vv(vrs, vr1, vr2); +- vfirst_m(tmp2, vrs); +- bgez(tmp2, DONE); +- sub(cnt, cnt, tmp1); +- if (!islatin) { +- slli(tmp1, tmp1, 1); // get byte counts +- } +- add(a1, a1, tmp1); +- add(a2, a2, tmp1); +- bnez(cnt, loop); +- +- mv(result, true); +-} +- +-void C2_MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { +- Label DONE; +- Register tmp1 = t0; +- Register tmp2 = t1; +- +- BLOCK_COMMENT("string_equals_v {"); +- +- mv(result, false); +- +- if (elem_size == 2) { +- srli(cnt, cnt, 1); +- } +- +- element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); +- +- bind(DONE); +- BLOCK_COMMENT("} string_equals_v"); +-} +- +-// used by C2 ClearArray patterns. +-// base: Address of a buffer to be zeroed +-// cnt: Count in HeapWords +-// +-// base, cnt, v0, v1 and t0 are clobbered. +-void C2_MacroAssembler::clear_array_v(Register base, Register cnt) { +- Label loop; +- +- // making zero words +- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); +- vxor_vv(v0, v0, v0); +- +- bind(loop); +- vsetvli(t0, cnt, Assembler::e64, Assembler::m4); +- vse64_v(v0, base); +- sub(cnt, cnt, t0); +- shadd(base, t0, base, t0, 3); +- bnez(cnt, loop); +-} +- +-void C2_MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, +- Register cnt1, int elem_size) { +- Label DONE; +- Register tmp1 = t0; +- Register tmp2 = t1; +- Register cnt2 = tmp2; +- int length_offset = arrayOopDesc::length_offset_in_bytes(); +- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); +- +- BLOCK_COMMENT("arrays_equals_v {"); +- +- // if (a1 == a2), return true +- mv(result, true); +- beq(a1, a2, DONE); +- +- mv(result, false); +- // if a1 == null or a2 == null, return false +- beqz(a1, DONE); +- beqz(a2, DONE); +- // if (a1.length != a2.length), return false +- lwu(cnt1, Address(a1, length_offset)); +- lwu(cnt2, Address(a2, length_offset)); +- bne(cnt1, cnt2, DONE); +- +- la(a1, Address(a1, base_offset)); +- la(a2, Address(a2, base_offset)); +- +- element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); +- +- bind(DONE); +- +- BLOCK_COMMENT("} arrays_equals_v"); +-} +- +-void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, +- Register result, Register tmp1, Register tmp2, int encForm) { +- Label DIFFERENCE, DONE, L, loop; +- bool encLL = encForm == StrIntrinsicNode::LL; +- bool encLU = encForm == StrIntrinsicNode::LU; +- bool encUL = encForm == StrIntrinsicNode::UL; +- +- bool str1_isL = encLL || encLU; +- bool str2_isL = encLL || encUL; +- +- int minCharsInWord = encLL ? wordSize : wordSize / 2; +- +- BLOCK_COMMENT("string_compare {"); +- +- // for Lating strings, 1 byte for 1 character +- // for UTF16 strings, 2 bytes for 1 character +- if (!str1_isL) +- sraiw(cnt1, cnt1, 1); +- if (!str2_isL) +- sraiw(cnt2, cnt2, 1); +- +- // if str1 == str2, return the difference +- // save the minimum of the string lengths in cnt2. +- sub(result, cnt1, cnt2); +- bgt(cnt1, cnt2, L); +- mv(cnt2, cnt1); +- bind(L); +- +- if (str1_isL == str2_isL) { // LL or UU +- element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); +- j(DONE); +- } else { // LU or UL +- Register strL = encLU ? str1 : str2; +- Register strU = encLU ? str2 : str1; +- VectorRegister vstr1 = encLU ? v4 : v0; +- VectorRegister vstr2 = encLU ? v0 : v4; +- +- bind(loop); +- vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); +- vle8_v(vstr1, strL); +- vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); +- vzext_vf2(vstr2, vstr1); +- vle16_v(vstr1, strU); +- vmsne_vv(v0, vstr2, vstr1); +- vfirst_m(tmp2, v0); +- bgez(tmp2, DIFFERENCE); +- sub(cnt2, cnt2, tmp1); +- add(strL, strL, tmp1); +- shadd(strU, tmp1, strU, tmp1, 1); +- bnez(cnt2, loop); +- j(DONE); +- } +- bind(DIFFERENCE); +- slli(tmp1, tmp2, 1); +- add(str1, str1, str1_isL ? tmp2 : tmp1); +- add(str2, str2, str2_isL ? tmp2 : tmp1); +- str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); +- str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); +- sub(result, tmp1, tmp2); +- +- bind(DONE); +-} +- +-void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { +- Label loop; +- assert_different_registers(src, dst, len, tmp, t0); +- +- BLOCK_COMMENT("byte_array_inflate_v {"); +- bind(loop); +- vsetvli(tmp, len, Assembler::e8, Assembler::m2); +- vle8_v(v2, src); +- vsetvli(t0, len, Assembler::e16, Assembler::m4); +- vzext_vf2(v0, v2); +- vse16_v(v0, dst); +- sub(len, len, tmp); +- add(src, src, tmp); +- shadd(dst, tmp, dst, tmp, 1); +- bnez(len, loop); +- BLOCK_COMMENT("} byte_array_inflate_v"); +-} +- +-// Compress char[] array to byte[]. +-// result: the array length if every element in array can be encoded; 0, otherwise. +-void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { +- Label done; +- encode_iso_array_v(src, dst, len, result, tmp); +- beqz(len, done); +- mv(result, zr); +- bind(done); +-} +- +-// result: the number of elements had been encoded. +-void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { +- Label loop, DIFFERENCE, DONE; +- +- BLOCK_COMMENT("encode_iso_array_v {"); +- mv(result, 0); +- +- bind(loop); +- mv(tmp, 0xff); +- vsetvli(t0, len, Assembler::e16, Assembler::m2); +- vle16_v(v2, src); +- // if element > 0xff, stop +- vmsgtu_vx(v1, v2, tmp); +- vfirst_m(tmp, v1); +- vmsbf_m(v0, v1); +- // compress char to byte +- vsetvli(t0, len, Assembler::e8); +- vncvt_x_x_w(v1, v2, Assembler::v0_t); +- vse8_v(v1, dst, Assembler::v0_t); +- +- bgez(tmp, DIFFERENCE); +- add(result, result, t0); +- add(dst, dst, t0); +- sub(len, len, t0); +- shadd(src, t0, src, t0, 1); +- bnez(len, loop); +- j(DONE); +- +- bind(DIFFERENCE); +- add(result, result, tmp); +- +- bind(DONE); +- BLOCK_COMMENT("} encode_iso_array_v"); +-} +- +-void C2_MacroAssembler::count_positives_v(Register ary, Register len, Register result, Register tmp) { +- Label LOOP, SET_RESULT, DONE; +- +- BLOCK_COMMENT("count_positives_v {"); +- mv(result, zr); +- +- bind(LOOP); +- vsetvli(t0, len, Assembler::e8, Assembler::m4); +- vle8_v(v0, ary); +- vmslt_vx(v0, v0, zr); +- vfirst_m(tmp, v0); +- bgez(tmp, SET_RESULT); +- // if tmp == -1, all bytes are positive +- add(result, result, t0); +- +- sub(len, len, t0); +- add(ary, ary, t0); +- bnez(len, LOOP); +- j(DONE); +- +- // add remaining positive bytes count +- bind(SET_RESULT); +- add(result, result, tmp); +- +- bind(DONE); +- BLOCK_COMMENT("} count_positives_v"); +-} +- +-void C2_MacroAssembler::string_indexof_char_v(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- bool isL) { +- mv(result, zr); +- +- Label loop, MATCH, DONE; +- Assembler::SEW sew = isL ? Assembler::e8 : Assembler::e16; +- bind(loop); +- vsetvli(tmp1, cnt1, sew, Assembler::m4); +- vlex_v(v0, str1, sew); +- vmseq_vx(v0, v0, ch); +- vfirst_m(tmp2, v0); +- bgez(tmp2, MATCH); // if equal, return index +- +- add(result, result, tmp1); +- sub(cnt1, cnt1, tmp1); +- if (!isL) slli(tmp1, tmp1, 1); +- add(str1, str1, tmp1); +- bnez(cnt1, loop); +- +- mv(result, -1); +- j(DONE); +- +- bind(MATCH); +- add(result, result, tmp2); +- +- bind(DONE); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, +- bool is_double, bool is_min) { +- assert_different_registers(dst, src1, src2); +- +- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); +- +- is_min ? vfmin_vv(dst, src1, src2) +- : vfmax_vv(dst, src1, src2); +- +- vmfne_vv(v0, src1, src1); +- vfadd_vv(dst, src1, src1, Assembler::v0_t); +- vmfne_vv(v0, src2, src2); +- vfadd_vv(dst, src2, src2, Assembler::v0_t); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst, +- FloatRegister src1, VectorRegister src2, +- VectorRegister tmp1, VectorRegister tmp2, +- bool is_double, bool is_min) { +- assert_different_registers(src2, tmp1, tmp2); +- +- Label L_done, L_NaN; +- vsetvli(t0, x0, is_double ? Assembler::e64 : Assembler::e32); +- vfmv_s_f(tmp2, src1); +- +- is_min ? vfredmin_vs(tmp1, src2, tmp2) +- : vfredmax_vs(tmp1, src2, tmp2); +- +- fsflags(zr); +- // Checking NaNs +- vmflt_vf(tmp2, src2, src1); +- frflags(t0); +- bnez(t0, L_NaN); +- j(L_done); +- +- bind(L_NaN); +- vfmv_s_f(tmp2, src1); +- vfredsum_vs(tmp1, src2, tmp2); +- +- bind(L_done); +- vfmv_f_s(dst, tmp1); +-} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +index c71df4c101b..90b6554af02 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +@@ -28,13 +28,6 @@ + + // C2_MacroAssembler contains high-level macros for C2 + +- private: +- void element_compare(Register r1, Register r2, +- Register result, Register cnt, +- Register tmp1, Register tmp2, +- VectorRegister vr1, VectorRegister vr2, +- VectorRegister vrs, +- bool is_latin, Label& DONE); + public: + + void string_compare(Register str1, Register str2, +@@ -145,49 +138,4 @@ + FloatRegister src1, FloatRegister src2, + bool is_double, bool is_min); + +- // intrinsic methods implemented by rvv instructions +- void string_equals_v(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- void arrays_equals_v(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- void string_compare_v(Register str1, Register str2, +- Register cnt1, Register cnt2, +- Register result, +- Register tmp1, Register tmp2, +- int encForm); +- +- void clear_array_v(Register base, Register cnt); +- +- void byte_array_inflate_v(Register src, Register dst, +- Register len, Register tmp); +- +- void char_array_compress_v(Register src, Register dst, +- Register len, Register result, +- Register tmp); +- +- void encode_iso_array_v(Register src, Register dst, +- Register len, Register result, +- Register tmp); +- +- void count_positives_v(Register ary, Register len, +- Register result, Register tmp); +- +- void string_indexof_char_v(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- bool isL); +- +- void minmax_FD_v(VectorRegister dst, +- VectorRegister src1, VectorRegister src2, +- bool is_double, bool is_min); +- +- void reduce_minmax_FD_v(FloatRegister dst, +- FloatRegister src1, VectorRegister src2, +- VectorRegister tmp1, VectorRegister tmp2, +- bool is_double, bool is_min); +- + #endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index cbfc0583883..845064d6cbc 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -90,10 +90,8 @@ define_pd_global(intx, InlineSmallCode, 1000); + "Extend fence.i to fence.i + fence.") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ +- product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ +- product(bool, UseRVB, false, EXPERIMENTAL, "Use RVB instructions") \ +- product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \ +- product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \ +- "Use RVV instructions for left/right shift of BigInteger") ++ experimental(bool, UseRVV, false, "Use RVV instructions") \ ++ experimental(bool, UseRVB, false, "Use RVB instructions") \ ++ experimental(bool, UseRVC, false, "Use RVC instructions") + + #endif // CPU_RISCV_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 9d2cc4cf89f..8b8d126f6c9 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1086,7 +1086,7 @@ void MacroAssembler::popa() { + pop_reg(0xffffffe2, sp); + } + +-void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++void MacroAssembler::push_CPU_state() { + CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) + push_reg(0xffffffe0, sp); +@@ -1096,28 +1096,10 @@ void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) + for (int i = 0; i < 32; i++) { + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } +- +- // vector registers +- if (save_vectors) { +- sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); +- vsetvli(t0, x0, Assembler::e64, Assembler::m8); +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { +- add(t0, sp, vector_size_in_bytes * i); +- vse64_v(as_VectorRegister(i), t0); +- } +- } + } + +-void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { ++void MacroAssembler::pop_CPU_state() { + CompressibleRegion cr(this); +- // vector registers +- if (restore_vectors) { +- vsetvli(t0, x0, Assembler::e64, Assembler::m8); +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { +- vle64_v(as_VectorRegister(i), sp); +- add(sp, sp, vector_size_in_bytes * 8); +- } +- } + + // float registers + for (int i = 0; i < 32; i++) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b2f0455a1f1..b43131514c1 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -501,8 +501,8 @@ class MacroAssembler: public Assembler { + + void pusha(); + void popa(); +- void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); +- void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); ++ void push_CPU_state(); ++ void pop_CPU_state(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +index 23a75d20502..4c7fabd7240 100644 +--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp ++++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp +@@ -31,16 +31,9 @@ + // false => size gets scaled to BytesPerLong, ok. + static const bool init_array_count_is_in_bytes = false; + +- // Whether this platform implements the scalable vector feature +- static const bool implements_scalable_vector = true; +- +- static const bool supports_scalable_vector() { +- return UseRVV; +- } +- +- // riscv supports misaligned vectors store/load. ++ // riscv doesn't support misaligned vectors store/load on JDK11. + static constexpr bool misaligned_vectors_ok() { +- return true; ++ return false; + } + + // Whether code generation need accurate ConvI2L types. +@@ -53,9 +46,6 @@ + // the cpu only look at the lower 5/6 bits anyway? + static const bool need_masked_shift_count = false; + +- // No support for generic vector operands. +- static const bool supports_generic_vector_operands = false; +- + static constexpr bool isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + // Probably always true, even if a temp register is required. +@@ -127,31 +117,6 @@ + // the relevant 32 bits. + static const bool int_in_long = true; + +- // Does the CPU supports vector variable shift instructions? +- static constexpr bool supports_vector_variable_shifts(void) { +- return false; +- } +- +- // Does the CPU supports vector variable rotate instructions? +- static constexpr bool supports_vector_variable_rotates(void) { +- return false; +- } +- +- // Does the CPU supports vector constant rotate instructions? +- static constexpr bool supports_vector_constant_rotates(int shift) { +- return false; +- } +- +- // Does the CPU supports vector unsigned comparison instructions? +- static const bool supports_vector_comparison_unsigned(int vlen, BasicType bt) { +- return false; +- } +- +- // Some microarchitectures have mask registers used on vectors +- static const bool has_predicated_vectors(void) { +- return false; +- } +- + // true means we have fast l2f convers + // false means that conversion is done by runtime call + static constexpr bool convL2FSupported(void) { +@@ -161,9 +126,4 @@ + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + +- // Returns pre-selection estimated size of a vector operation. +- static int vector_op_pre_select_sz_estimate(int vopc, BasicType ety, int vlen) { +- return 0; +- } +- + #endif // CPU_RISCV_MATCHER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +index f8116e9df8c..96cf1996a83 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.cpp ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -37,11 +37,6 @@ const int ConcreteRegisterImpl::max_fpr = + ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + +-const int ConcreteRegisterImpl::max_vpr = +- ConcreteRegisterImpl::max_fpr + +- VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; +- +- + const char* RegisterImpl::name() const { + static const char *const names[number_of_registers] = { + "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +index a9200cac647..d697751f55f 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.hpp ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -307,14 +307,12 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { + // it's optoregs. + + number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + +- FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + +- VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; +- static const int max_vpr; + }; + + typedef AbstractRegSet RegSet; +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 588887e1d96..85593a942e9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -226,177 +226,6 @@ reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); + reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); + reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + +-// ---------------------------- +-// Vector Registers +-// ---------------------------- +- +-// For RVV vector registers, we simply extend vector register size to 4 +-// 'logical' slots. This is nominally 128 bits but it actually covers +-// all possible 'physical' RVV vector register lengths from 128 ~ 1024 +-// bits. The 'physical' RVV vector register length is detected during +-// startup, so the register allocator is able to identify the correct +-// number of bytes needed for an RVV spill/unspill. +- +-reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); +-reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); +-reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); +-reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); +- +-reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); +-reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); +-reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); +-reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); +- +-reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); +-reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); +-reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); +-reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); +- +-reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); +-reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); +-reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); +-reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); +- +-reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); +-reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); +-reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); +-reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); +- +-reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); +-reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); +-reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); +-reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); +- +-reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); +-reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); +-reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); +-reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); +- +-reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); +-reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); +-reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); +-reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); +- +-reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); +-reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); +-reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); +-reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); +- +-reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); +-reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); +-reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); +-reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); +- +-reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); +-reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); +-reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); +-reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); +- +-reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); +-reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); +-reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); +-reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); +- +-reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); +-reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); +-reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); +-reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); +- +-reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); +-reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); +-reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); +-reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); +- +-reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); +-reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); +-reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); +-reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); +- +-reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); +-reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); +-reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); +-reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); +- +-reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); +-reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); +-reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); +-reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); +- +-reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); +-reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); +-reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); +-reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); +- +-reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); +-reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); +-reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); +-reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); +- +-reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); +-reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); +-reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); +-reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); +- +-reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); +-reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); +-reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); +-reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); +- +-reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); +-reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); +-reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); +-reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); +- +-reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); +-reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); +-reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); +-reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); +- +-reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); +-reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); +-reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); +-reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); +- +-reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); +-reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); +-reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); +-reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); +- +-reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); +-reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); +-reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); +-reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); +- +-reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); +-reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); +-reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); +-reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); +- +-reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); +-reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); +-reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); +-reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); +- +-reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); +-reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); +-reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); +-reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); +- +-reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); +-reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); +-reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); +-reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); +- +-reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); +-reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); +-reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); +-reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); +- +-reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); +-reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); +-reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); +-reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); +- + // ---------------------------- + // Special Registers + // ---------------------------- +@@ -495,42 +324,7 @@ alloc_class chunk1( + F27, F27_H, + ); + +-alloc_class chunk2( +- V0, V0_H, V0_J, V0_K, +- V1, V1_H, V1_J, V1_K, +- V2, V2_H, V2_J, V2_K, +- V3, V3_H, V3_J, V3_K, +- V4, V4_H, V4_J, V4_K, +- V5, V5_H, V5_J, V5_K, +- V6, V6_H, V6_J, V6_K, +- V7, V7_H, V7_J, V7_K, +- V8, V8_H, V8_J, V8_K, +- V9, V9_H, V9_J, V9_K, +- V10, V10_H, V10_J, V10_K, +- V11, V11_H, V11_J, V11_K, +- V12, V12_H, V12_J, V12_K, +- V13, V13_H, V13_J, V13_K, +- V14, V14_H, V14_J, V14_K, +- V15, V15_H, V15_J, V15_K, +- V16, V16_H, V16_J, V16_K, +- V17, V17_H, V17_J, V17_K, +- V18, V18_H, V18_J, V18_K, +- V19, V19_H, V19_J, V19_K, +- V20, V20_H, V20_J, V20_K, +- V21, V21_H, V21_J, V21_K, +- V22, V22_H, V22_J, V22_K, +- V23, V23_H, V23_J, V23_K, +- V24, V24_H, V24_J, V24_K, +- V25, V25_H, V25_J, V25_K, +- V26, V26_H, V26_J, V26_K, +- V27, V27_H, V27_J, V27_K, +- V28, V28_H, V28_J, V28_K, +- V29, V29_H, V29_J, V29_K, +- V30, V30_H, V30_J, V30_K, +- V31, V31_H, V31_J, V31_K, +-); +- +-alloc_class chunk3(RFLAGS); ++alloc_class chunk2(RFLAGS); + + //----------Architecture Description Register Classes-------------------------- + // Several register classes are automatically defined based upon information in +@@ -826,41 +620,6 @@ reg_class double_reg( + F31, F31_H + ); + +-// Class for all RVV vector registers +-reg_class vectora_reg( +- V1, V1_H, V1_J, V1_K, +- V2, V2_H, V2_J, V2_K, +- V3, V3_H, V3_J, V3_K, +- V4, V4_H, V4_J, V4_K, +- V5, V5_H, V5_J, V5_K, +- V6, V6_H, V6_J, V6_K, +- V7, V7_H, V7_J, V7_K, +- V8, V8_H, V8_J, V8_K, +- V9, V9_H, V9_J, V9_K, +- V10, V10_H, V10_J, V10_K, +- V11, V11_H, V11_J, V11_K, +- V12, V12_H, V12_J, V12_K, +- V13, V13_H, V13_J, V13_K, +- V14, V14_H, V14_J, V14_K, +- V15, V15_H, V15_J, V15_K, +- V16, V16_H, V16_J, V16_K, +- V17, V17_H, V17_J, V17_K, +- V18, V18_H, V18_J, V18_K, +- V19, V19_H, V19_J, V19_K, +- V20, V20_H, V20_J, V20_K, +- V21, V21_H, V21_J, V21_K, +- V22, V22_H, V22_J, V22_K, +- V23, V23_H, V23_J, V23_K, +- V24, V24_H, V24_J, V24_K, +- V25, V25_H, V25_J, V25_K, +- V26, V26_H, V26_J, V26_K, +- V27, V27_H, V27_J, V27_K, +- V28, V28_H, V28_J, V28_K, +- V29, V29_H, V29_J, V29_K, +- V30, V30_H, V30_J, V30_K, +- V31, V31_H, V31_J, V31_K +-); +- + // Class for 64 bit register f0 + reg_class f0_reg( + F0, F0_H +@@ -881,31 +640,6 @@ reg_class f3_reg( + F3, F3_H + ); + +-// class for vector register v1 +-reg_class v1_reg( +- V1, V1_H, V1_J, V1_K +-); +- +-// class for vector register v2 +-reg_class v2_reg( +- V2, V2_H, V2_J, V2_K +-); +- +-// class for vector register v3 +-reg_class v3_reg( +- V3, V3_H, V3_J, V3_K +-); +- +-// class for vector register v4 +-reg_class v4_reg( +- V4, V4_H, V4_J, V4_K +-); +- +-// class for vector register v5 +-reg_class v5_reg( +- V5, V5_H, V5_J, V5_K +-); +- + // class for condition codes + reg_class reg_flags(RFLAGS); + %} +@@ -1447,7 +1181,7 @@ const Pipeline * MachEpilogNode::pipeline() const { + + // Figure out which register class each belongs in: rc_int, rc_float or + // rc_stack. +-enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; + + static enum RC rc_class(OptoReg::Name reg) { + +@@ -1468,13 +1202,7 @@ static enum RC rc_class(OptoReg::Name reg) { + return rc_float; + } + +- // we have 32 vector register * 4 halves +- int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; +- if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { +- return rc_vector; +- } +- +- // Between vector regs & stack is the flags regs. ++ // Between float regs & stack is the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +@@ -1512,30 +1240,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + +- if (bottom_type()->isa_vect() != NULL) { +- uint ireg = ideal_reg(); +- if (ireg == Op_VecA && cbuf) { +- C2_MacroAssembler _masm(cbuf); +- Assembler::CompressibleRegion cr(&_masm); +- int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); +- if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { +- // stack to stack +- __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, +- vector_reg_size_in_bytes); +- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { +- // vpr to stack +- __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); +- } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { +- // stack to vpr +- __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); +- } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { +- // vpr to vpr +- __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); +- } else { +- ShouldNotReachHere(); +- } +- } +- } else if (cbuf != NULL) { ++ if (cbuf != NULL) { + C2_MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { +@@ -1619,17 +1324,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + } else { + st->print("%s", Matcher::regName[dst_lo]); + } +- if (bottom_type()->isa_vect() != NULL) { +- int vsize = 0; +- if (ideal_reg() == Op_VecA) { +- vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; +- } else { +- ShouldNotReachHere(); +- } +- st->print("\t# vector spill size = %d", vsize); +- } else { +- st->print("\t# spill size = %d", is64 ? 64 : 32); +- } ++ st->print("\t# spill size = %d", is64 ? 64 : 32); + } + + return 0; +@@ -1796,14 +1491,6 @@ const bool Matcher::match_rule_supported(int opcode) { + } + break; + +- case Op_StrCompressedCopy: // fall through +- case Op_StrInflatedCopy: // fall through +- case Op_CountPositives: +- return UseRVV; +- +- case Op_EncodeISOArray: +- return UseRVV && SpecialEncodeISOArray; +- + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; +@@ -1821,37 +1508,15 @@ const bool Matcher::match_rule_supported(int opcode) { + } + + // Identify extra cases that we might want to provide match rules for vector nodes and +-// other intrinsics guarded with vector length (vlen) and element type (bt). +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { +- if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { +- return false; +- } +- +- return op_vec_supported(opcode); +-} +- +-const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { ++// other intrinsics guarded with vector length (vlen). ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + return false; + } + +-const RegMask* Matcher::predicate_reg_mask(void) { +- return NULL; +-} +- +-const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { +- return NULL; +-} +- +-// Vector calling convention not yet implemented. +-const bool Matcher::supports_vector_calling_convention(void) { ++const bool Matcher::has_predicated_vectors(void) { + return false; + } + +-OptoRegPair Matcher::vector_return_value(uint ideal_reg) { +- Unimplemented(); +- return OptoRegPair(0, 0); +-} +- + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then +@@ -1877,11 +1542,6 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + + // Vector width in bytes. + const int Matcher::vector_width_in_bytes(BasicType bt) { +- if (UseRVV) { +- // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. +- // MaxVectorSize == VM_Version::_initial_vector_length +- return MaxVectorSize; +- } + return 0; + } + +@@ -1895,34 +1555,10 @@ const int Matcher::min_vector_size(const BasicType bt) { + + // Vector ideal reg. + const uint Matcher::vector_ideal_reg(int len) { +- assert(MaxVectorSize >= len, ""); +- if (UseRVV) { +- return Op_VecA; +- } +- + ShouldNotReachHere(); + return 0; + } + +-const int Matcher::scalable_vector_reg_size(const BasicType bt) { +- return Matcher::max_vector_size(bt); +-} +- +-MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* original_opnd, uint ideal_reg, bool is_temp) { +- ShouldNotReachHere(); // generic vector operands not supported +- return NULL; +-} +- +-bool Matcher::is_reg2reg_move(MachNode* m) { +- ShouldNotReachHere(); // generic vector operands not supported +- return false; +-} +- +-bool Matcher::is_generic_vector(MachOper* opnd) { +- ShouldNotReachHere(); // generic vector operands not supported +- return false; +-} +- + // Return whether or not this register is ever used as an argument. + // This function is used on startup to build the trampoline stubs in + // generateOptoStub. Registers not mentioned will be killed by the VM +@@ -3384,67 +3020,6 @@ operand fRegD() + interface(REG_INTER); + %} + +-// Generic vector class. This will be used for +-// all vector operands. +-operand vReg() +-%{ +- constraint(ALLOC_IN_RC(vectora_reg)); +- match(VecA); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V1() +-%{ +- constraint(ALLOC_IN_RC(v1_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V2() +-%{ +- constraint(ALLOC_IN_RC(v2_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V3() +-%{ +- constraint(ALLOC_IN_RC(v3_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V4() +-%{ +- constraint(ALLOC_IN_RC(v4_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- +-operand vReg_V5() +-%{ +- constraint(ALLOC_IN_RC(v5_reg)); +- match(VecA); +- match(vReg); +- op_cost(0); +- format %{ %} +- interface(REG_INTER); +-%} +- + // Java Thread Register + operand javaThread_RegP(iRegP reg) + %{ +@@ -7939,17 +7514,6 @@ instruct castDD(fRegD dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castVV(vReg dst) +-%{ +- match(Set dst (CastVV dst)); +- +- size(0); +- format %{ "# castVV of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + // ============================================================================ + // Convert Instructions + +@@ -10076,7 +9640,7 @@ instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 su + instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10094,7 +9658,7 @@ instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R + instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10111,7 +9675,7 @@ instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R + instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10129,7 +9693,7 @@ instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_ + iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, + rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); + effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + +@@ -10275,7 +9839,7 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -10294,7 +9858,7 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(!UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -10310,7 +9874,6 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + // clearing of an array + instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + %{ +- predicate(!UseRVV); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL cnt, USE_KILL base); + +@@ -10330,8 +9893,7 @@ instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + + instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) + %{ +- predicate(!UseRVV && (uint64_t)n->in(2)->get_long() +- < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); + match(Set dummy (ClearArray cnt base)); + effect(USE_KILL base, KILL cr); + +@@ -10348,7 +9910,7 @@ instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg + instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + +@@ -10364,7 +9926,7 @@ instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, + iRegI_R10 result, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + +@@ -10381,7 +9943,7 @@ instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + +@@ -10398,7 +9960,7 @@ instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, + iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, + iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) + %{ +- predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + +diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad +deleted file mode 100644 +index 3828e096b21..00000000000 +--- a/src/hotspot/cpu/riscv/riscv_v.ad ++++ /dev/null +@@ -1,2065 +0,0 @@ +-// +-// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +-// Copyright (c) 2020, Arm Limited. All rights reserved. +-// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +-// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +-// +-// This code is free software; you can redistribute it and/or modify it +-// under the terms of the GNU General Public License version 2 only, as +-// published by the Free Software Foundation. +-// +-// This code is distributed in the hope that it will be useful, but WITHOUT +-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +-// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-// version 2 for more details (a copy is included in the LICENSE file that +-// accompanied this code). +-// +-// You should have received a copy of the GNU General Public License version +-// 2 along with this work; if not, write to the Free Software Foundation, +-// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +-// +-// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +-// or visit www.oracle.com if you need additional information or have any +-// questions. +-// +-// +- +-// RISCV Vector Extension Architecture Description File +- +-opclass vmemA(indirect); +- +-source_hpp %{ +- bool op_vec_supported(int opcode); +-%} +- +-source %{ +- +- static void loadStore(C2_MacroAssembler masm, bool is_store, +- VectorRegister reg, BasicType bt, Register base) { +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- masm.vsetvli(t0, x0, sew); +- if (is_store) { +- masm.vsex_v(reg, base, sew); +- } else { +- masm.vlex_v(reg, base, sew); +- } +- } +- +- bool op_vec_supported(int opcode) { +- switch (opcode) { +- // No multiply reduction instructions +- case Op_MulReductionVD: +- case Op_MulReductionVF: +- case Op_MulReductionVI: +- case Op_MulReductionVL: +- // Others +- case Op_Extract: +- case Op_ExtractB: +- case Op_ExtractC: +- case Op_ExtractD: +- case Op_ExtractF: +- case Op_ExtractI: +- case Op_ExtractL: +- case Op_ExtractS: +- case Op_ExtractUB: +- // Vector API specific +- case Op_AndReductionV: +- case Op_OrReductionV: +- case Op_XorReductionV: +- case Op_LoadVectorGather: +- case Op_StoreVectorScatter: +- case Op_VectorBlend: +- case Op_VectorCast: +- case Op_VectorCastB2X: +- case Op_VectorCastD2X: +- case Op_VectorCastF2X: +- case Op_VectorCastI2X: +- case Op_VectorCastL2X: +- case Op_VectorCastS2X: +- case Op_VectorInsert: +- case Op_VectorLoadConst: +- case Op_VectorLoadMask: +- case Op_VectorLoadShuffle: +- case Op_VectorMaskCmp: +- case Op_VectorRearrange: +- case Op_VectorReinterpret: +- case Op_VectorStoreMask: +- case Op_VectorTest: +- return false; +- default: +- return UseRVV; +- } +- } +- +-%} +- +-definitions %{ +- int_def VEC_COST (200, 200); +-%} +- +-// All VEC instructions +- +-// vector load/store +-instruct loadV(vReg dst, vmemA mem) %{ +- match(Set dst (LoadVector mem)); +- ins_cost(VEC_COST); +- format %{ "vle $dst, $mem\t#@loadV" %} +- ins_encode %{ +- VectorRegister dst_reg = as_VectorRegister($dst$$reg); +- loadStore(C2_MacroAssembler(&cbuf), false, dst_reg, +- Matcher::vector_element_basic_type(this), as_Register($mem$$base)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct storeV(vReg src, vmemA mem) %{ +- match(Set mem (StoreVector mem src)); +- ins_cost(VEC_COST); +- format %{ "vse $src, $mem\t#@storeV" %} +- ins_encode %{ +- VectorRegister src_reg = as_VectorRegister($src$$reg); +- loadStore(C2_MacroAssembler(&cbuf), true, src_reg, +- Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector abs +- +-instruct vabsB(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVB src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsS(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVS src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsI(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVI src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsL(vReg dst, vReg src, vReg tmp) %{ +- match(Set dst (AbsVL src)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" +- "vmax.vv $dst, $tmp, $src" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); +- __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsF(vReg dst, vReg src) %{ +- match(Set dst (AbsVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vabsD(vReg dst, vReg src) %{ +- match(Set dst (AbsVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector add +- +-instruct vaddB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vaddD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AddVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfadd_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector and +- +-instruct vand(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (AndV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vand_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector or +- +-instruct vor(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (OrV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vor_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector xor +- +-instruct vxor(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (XorV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vxor_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float div +- +-instruct vdivF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (DivVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfdiv_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vdivD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (DivVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfdiv_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer max/min +- +-instruct vmax(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && +- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); +- match(Set dst (MaxV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %} +- ins_encode %{ +- BasicType bt = Matcher::vector_element_basic_type(this); +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- __ vsetvli(t0, x0, sew); +- __ vmax_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmin(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT && +- n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE); +- match(Set dst (MinV src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %} +- ins_encode %{ +- BasicType bt = Matcher::vector_element_basic_type(this); +- Assembler::SEW sew = Assembler::elemtype_to_sew(bt); +- __ vsetvli(t0, x0, sew); +- __ vmin_vv(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float-point max/min +- +-instruct vmaxF(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MaxV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- false /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmaxD(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MaxV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- true /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vminF(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MinV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vminF $dst, $src1, $src2\t#@vminF" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- false /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vminD(vReg dst, vReg src1, vReg src2) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MinV src1 src2)); +- effect(TEMP_DEF dst); +- ins_cost(VEC_COST); +- format %{ "vminD $dst, $src1, $src2\t#@vminD" %} +- ins_encode %{ +- __ minmax_FD_v(as_VectorRegister($dst$$reg), +- as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg), +- true /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fmla +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fmls +- +-// dst_src1 = dst_src1 + -src2 * src3 +-// dst_src1 = dst_src1 + src2 * -src3 +-instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); +- match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + -src2 * src3 +-// dst_src1 = dst_src1 + src2 * -src3 +-instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); +- match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fnmla +- +-// dst_src1 = -dst_src1 + -src2 * src3 +-// dst_src1 = -dst_src1 + src2 * -src3 +-instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = -dst_src1 + -src2 * src3 +-// dst_src1 = -dst_src1 + src2 * -src3 +-instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); +- ins_cost(VEC_COST); +- format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fnmls +- +-// dst_src1 = -dst_src1 + src2 * src3 +-instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = -dst_src1 + src2 * src3 +-instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ +- predicate(UseFMA); +- match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mla +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 + src2 * src3 +-instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmacc_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mls +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// dst_src1 = dst_src1 - src2 * src3 +-instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ +- match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); +- ins_cost(VEC_COST); +- format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), +- as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector mul +- +-instruct vmulB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vmulD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (MulVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector fneg +- +-instruct vnegF(vReg dst, vReg src) %{ +- match(Set dst (NegVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vnegD(vReg dst, vReg src) %{ +- match(Set dst (NegVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// popcount vector +- +-instruct vpopcountI(iRegINoSp dst, vReg src) %{ +- match(Set dst (PopCountVI src)); +- format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector add reduction +- +-instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (AddReductionVI src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (AddReductionVL src1 src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" +- "vredsum.vs $tmp, $src2, $tmp\n\t" +- "vmv.x.s $dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ +- match(Set src1_dst (AddReductionVF src1_dst src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" +- "vfredosum.vs $tmp, $src2, $tmp\n\t" +- "vfmv.f.s $src1_dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); +- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ +- match(Set src1_dst (AddReductionVD src1_dst src2)); +- effect(TEMP tmp); +- ins_cost(VEC_COST); +- format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" +- "vfredosum.vs $tmp, $src2, $tmp\n\t" +- "vfmv.f.s $src1_dst, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); +- __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp$$reg)); +- __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer max reduction +-instruct vreduce_maxB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector integer min reduction +-instruct vreduce_minB(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minS(iRegINoSp dst, iRegI src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- Label Ldone; +- __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone); +- __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); +- __ bind(Ldone); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP tmp); +- format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); +- __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg)); +- __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float max reduction +- +-instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- false /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MaxReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- true /* is_double */, false /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector float min reduction +- +-instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- false /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{ +- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (MinReductionV src1 src2)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2); +- format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %} +- ins_encode %{ +- __ reduce_minmax_FD_v($dst$$FloatRegister, +- $src1$$FloatRegister, as_VectorRegister($src2$$reg), +- as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg), +- true /* is_double */, true /* is_min */); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector Math.rint, floor, ceil +- +-instruct vroundD(vReg dst, vReg src, immI rmode) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); +- match(Set dst (RoundDoubleModeV src rmode)); +- format %{ "vroundD $dst, $src, $rmode" %} +- ins_encode %{ +- switch ($rmode$$constant) { +- case RoundDoubleModeNode::rmode_rint: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rne); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- case RoundDoubleModeNode::rmode_floor: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rdn); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- case RoundDoubleModeNode::rmode_ceil: +- __ csrwi(CSR_FRM, C2_MacroAssembler::rup); +- __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- break; +- default: +- ShouldNotReachHere(); +- break; +- } +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector replicate +- +-instruct replicateB(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateB src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateS(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateS src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateI(vReg dst, iRegIorL2I src) %{ +- match(Set dst (ReplicateI src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateL(vReg dst, iRegL src) %{ +- match(Set dst (ReplicateL src)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.x $dst, $src\t#@replicateL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateB_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateB con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateS_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateS con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateI_imm5(vReg dst, immI5 con) %{ +- match(Set dst (ReplicateI con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateL_imm5(vReg dst, immL5 con) %{ +- match(Set dst (ReplicateL con)); +- ins_cost(VEC_COST); +- format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateF(vReg dst, fRegF src) %{ +- match(Set dst (ReplicateF src)); +- ins_cost(VEC_COST); +- format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct replicateD(vReg dst, fRegD src) %{ +- match(Set dst (ReplicateD src)); +- ins_cost(VEC_COST); +- format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector shift +- +-instruct vasrB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVB src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" +- "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- BitsPerByte - 1, Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" +- "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- BitsPerShort - 1, Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (RShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVB src shift)); +- ins_cost(VEC_COST); +- effect( TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (LShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrB(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVB src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- // if shift > BitsPerByte - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrS(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVS src shift)); +- ins_cost(VEC_COST); +- effect(TEMP_DEF dst); +- format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" +- "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" +- "vmnot.m v0, v0\n\t" +- "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- // if shift > BitsPerShort - 1, clear the element +- __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg), Assembler::v0_t); +- // otherwise, shift +- __ vmnot_m(v0, v0); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg), Assembler::v0_t); +- %} +- ins_pipe(pipe_slow); +-%} +- +- +-instruct vlsrI(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVI src shift)); +- ins_cost(VEC_COST); +- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +- +-instruct vlsrL(vReg dst, vReg src, vReg shift) %{ +- match(Set dst (URShiftVL src shift)); +- ins_cost(VEC_COST); +- format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($shift$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVB src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerByte) con = BitsPerByte - 1; +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVS src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerShort) con = BitsPerShort - 1; +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (RShiftVI src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (RShiftVL src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVB src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerByte) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVS src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- if (con >= BitsPerShort) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (URShiftVI src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (URShiftVL src (RShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- if (con == 0) { +- __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVB src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e8); +- if (con >= BitsPerByte) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVS src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e16); +- if (con >= BitsPerShort) { +- __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), +- as_VectorRegister($src$$reg)); +- return; +- } +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ +- match(Set dst (LShiftVI src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ +- predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32); +- match(Set dst (LShiftVL src (LShiftCntV shift))); +- ins_cost(VEC_COST); +- format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} +- ins_encode %{ +- uint32_t con = (unsigned)$shift$$constant & 0x1f; +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || +- n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ +- predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); +- match(Set dst (LShiftCntV cnt)); +- match(Set dst (RShiftCntV cnt)); +- format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector sqrt +- +-instruct vsqrtF(vReg dst, vReg src) %{ +- match(Set dst (SqrtVF src)); +- ins_cost(VEC_COST); +- format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsqrtD(vReg dst, vReg src) %{ +- match(Set dst (SqrtVD src)); +- ins_cost(VEC_COST); +- format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-// vector sub +- +-instruct vsubB(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVB src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e8); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubS(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVS src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e16); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubI(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVI src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubL(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVL src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubF(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVF src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e32); +- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vsubD(vReg dst, vReg src1, vReg src2) %{ +- match(Set dst (SubVD src1 src2)); +- ins_cost(VEC_COST); +- format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} +- ins_encode %{ +- __ vsetvli(t0, x0, Assembler::e64); +- __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), +- as_VectorRegister($src2$$reg)); +- %} +- ins_pipe(pipe_slow); +-%} +- +-instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, +- iRegI_R10 result, vReg_V1 v1, +- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result (StrEquals (Binary str1 str2) cnt)); +- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_equals_v($str1$$Register, $str2$$Register, +- $result$$Register, $cnt$$Register, 1); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, +- iRegI_R10 result, vReg_V1 v1, +- vReg_V2 v2, vReg_V3 v3, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result (StrEquals (Binary str1 str2) cnt)); +- effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_equals_v($str1$$Register, $str2$$Register, +- $result$$Register, $cnt$$Register, 2); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result (AryEq ary1 ary2)); +- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} +- ins_encode %{ +- __ arrays_equals_v($ary1$$Register, $ary2$$Register, +- $result$$Register, $tmp$$Register, 1); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg cr) +-%{ +- predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result (AryEq ary1 ary2)); +- effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL cr); +- +- format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} +- ins_encode %{ +- __ arrays_equals_v($ary1$$Register, $ary2$$Register, +- $result$$Register, $tmp$$Register, 2); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} +- ins_encode %{ +- // Count is in 8-bit bytes; non-Compact chars are 16 bits. +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::UU); +- %} +- ins_pipe(pipe_class_memory); +-%} +-instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::LL); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::UL); +- %} +- ins_pipe(pipe_class_memory); +-%} +-instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, +- iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, +- iRegP_R28 tmp1, iRegL_R29 tmp2) +-%{ +- predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); +- match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); +- effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, +- TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); +- +- format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} +- ins_encode %{ +- __ string_compare_v($str1$$Register, $str2$$Register, +- $cnt1$$Register, $cnt2$$Register, $result$$Register, +- $tmp1$$Register, $tmp2$$Register, +- StrIntrinsicNode::LU); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-// fast byte[] to char[] inflation +-instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set dummy (StrInflatedCopy src (Binary dst len))); +- effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); +- +- format %{ "String Inflate $src,$dst" %} +- ins_encode %{ +- __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); +- %} +- ins_pipe(pipe_class_memory); +-%} +- +-// encode char[] to byte[] in ISO_8859_1 +-instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (EncodeISOArray src (Binary dst len))); +- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, +- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); +- +- format %{ "Encode array $src,$dst,$len -> $result" %} +- ins_encode %{ +- __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, +- $result$$Register, $tmp$$Register); +- %} +- ins_pipe( pipe_class_memory ); +-%} +- +-// fast char[] to byte[] compression +-instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (StrCompressedCopy src (Binary dst len))); +- effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, +- TEMP v1, TEMP v2, TEMP v3, TEMP tmp); +- +- format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} +- ins_encode %{ +- __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, +- $result$$Register, $tmp$$Register); +- %} +- ins_pipe( pipe_slow ); +-%} +- +-instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result, iRegL tmp) +-%{ +- predicate(UseRVV); +- match(Set result (CountPositives ary len)); +- effect(USE_KILL ary, USE_KILL len, TEMP tmp); +- +- format %{ "count positives byte[] $ary, $len -> $result" %} +- ins_encode %{ +- __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +-%{ +- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, +- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); +- +- format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %} +- +- ins_encode %{ +- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- false /* isL */); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +- +-instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- vReg_V1 v1, vReg_V2 v2, vReg_V3 v3) +-%{ +- predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, +- TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3); +- +- format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %} +- +- ins_encode %{ +- __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- true /* isL */); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +- +-// clearing of an array +-instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, +- vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) +-%{ +- predicate(UseRVV); +- match(Set dummy (ClearArray cnt base)); +- effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); +- +- format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} +- +- ins_encode %{ +- __ clear_array_v($base$$Register, $cnt$$Register); +- %} +- +- ins_pipe(pipe_class_memory); +-%} +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index f85d4b25a76..4daed17df10 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -80,9 +80,8 @@ class SimpleRuntimeFrame { + }; + + class RegisterSaver { +- const bool _save_vectors; + public: +- RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} ++ RegisterSaver() {} + ~RegisterSaver() {} + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); +@@ -91,11 +90,7 @@ class RegisterSaver { + // Used by deoptimization when it is managing result register + // values on its own + // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) +- // |---v0---|<---SP +- // |---v1---|save vectors only in generate_handler_blob +- // |-- .. --| +- // |---v31--|----- +- // |---f0---| ++ // |---f0---|<---SP + // |---f1---| + // | .. | + // |---f31--| +@@ -106,16 +101,8 @@ class RegisterSaver { + // |---x31--| + // |---fp---| + // |---ra---| +- int v0_offset_in_bytes(void) { return 0; } + int f0_offset_in_bytes(void) { +- int f0_offset = 0; +-#ifdef COMPILER2 +- if (_save_vectors) { +- f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * +- BytesPerInt; +- } +-#endif +- return f0_offset; ++ return 0; + } + int reserved_slot_offset_in_bytes(void) { + return f0_offset_in_bytes() + +@@ -142,15 +129,6 @@ class RegisterSaver { + }; + + OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { +- int vector_size_in_bytes = 0; +- int vector_size_in_slots = 0; +-#ifdef COMPILER2 +- if (_save_vectors) { +- vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); +- vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); +- } +-#endif +- + assert_cond(masm != NULL && total_frame_words != NULL); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words +@@ -161,9 +139,9 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + +- // Save Integer, Float and Vector registers. ++ // Save Integer and Float registers. + __ enter(); +- __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++ __ push_CPU_state(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This +@@ -176,13 +154,6 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + + int sp_offset_in_slots = 0; + int step_in_slots = 0; +- if (_save_vectors) { +- step_in_slots = vector_size_in_slots; +- for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { +- VectorRegister r = as_VectorRegister(i); +- oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); +- } +- } + + step_in_slots = FloatRegisterImpl::max_slots_per_register; + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { +@@ -207,18 +178,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + + void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + assert_cond(masm != NULL); +-#ifdef COMPILER2 +- __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); +-#else +- __ pop_CPU_state(_save_vectors); +-#endif ++ __ pop_CPU_state(); + __ leave(); + } + + // Is vector's size (in bytes) bigger than a size saved by default? +-// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. + bool SharedRuntime::is_wide_vector(int size) { +- return UseRVV; ++ return false; + } + + // The java_calling_convention describes stack locations as ideal slots on +@@ -674,13 +640,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); + } + +-int SharedRuntime::vector_calling_convention(VMRegPair *regs, +- uint num_bits, +- uint total_args_passed) { +- Unimplemented(); +- return 0; +-} +- + int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, +@@ -1891,7 +1850,7 @@ void SharedRuntime::generate_deopt_blob() { + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(masm != NULL && oop_maps != NULL); +- RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ RegisterSaver reg_saver; + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return +@@ -2423,7 +2382,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t + address call_pc = NULL; + int frame_size_in_words = -1; + bool cause_return = (poll_type == POLL_AT_RETURN); +- RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ RegisterSaver reg_saver; + + // Save Integer and Float registers. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); +@@ -2542,7 +2501,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha + assert_cond(masm != NULL); + + int frame_size_in_words = -1; +- RegisterSaver reg_saver(false /* save_vectors */); ++ RegisterSaver reg_saver; + + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index b05edf7172c..39416441bdf 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -2843,111 +2843,6 @@ class StubGenerator: public StubCodeGenerator { + + return entry; + } +- +- // Arguments: +- // +- // Input: +- // c_rarg0 - newArr address +- // c_rarg1 - oldArr address +- // c_rarg2 - newIdx +- // c_rarg3 - shiftCount +- // c_rarg4 - numIter +- // +- address generate_bigIntegerLeftShift() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); +- address entry = __ pc(); +- +- Label loop, exit; +- +- Register newArr = c_rarg0; +- Register oldArr = c_rarg1; +- Register newIdx = c_rarg2; +- Register shiftCount = c_rarg3; +- Register numIter = c_rarg4; +- +- Register shiftRevCount = c_rarg5; +- Register oldArrNext = t1; +- +- __ beqz(numIter, exit); +- __ shadd(newArr, newIdx, newArr, t0, 2); +- +- __ li(shiftRevCount, 32); +- __ sub(shiftRevCount, shiftRevCount, shiftCount); +- +- __ bind(loop); +- __ addi(oldArrNext, oldArr, 4); +- __ vsetvli(t0, numIter, Assembler::e32, Assembler::m4); +- __ vle32_v(v0, oldArr); +- __ vle32_v(v4, oldArrNext); +- __ vsll_vx(v0, v0, shiftCount); +- __ vsrl_vx(v4, v4, shiftRevCount); +- __ vor_vv(v0, v0, v4); +- __ vse32_v(v0, newArr); +- __ sub(numIter, numIter, t0); +- __ shadd(oldArr, t0, oldArr, t1, 2); +- __ shadd(newArr, t0, newArr, t1, 2); +- __ bnez(numIter, loop); +- +- __ bind(exit); +- __ ret(); +- +- return entry; +- } +- +- // Arguments: +- // +- // Input: +- // c_rarg0 - newArr address +- // c_rarg1 - oldArr address +- // c_rarg2 - newIdx +- // c_rarg3 - shiftCount +- // c_rarg4 - numIter +- // +- address generate_bigIntegerRightShift() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); +- address entry = __ pc(); +- +- Label loop, exit; +- +- Register newArr = c_rarg0; +- Register oldArr = c_rarg1; +- Register newIdx = c_rarg2; +- Register shiftCount = c_rarg3; +- Register numIter = c_rarg4; +- Register idx = numIter; +- +- Register shiftRevCount = c_rarg5; +- Register oldArrNext = c_rarg6; +- Register newArrCur = t0; +- Register oldArrCur = t1; +- +- __ beqz(idx, exit); +- __ shadd(newArr, newIdx, newArr, t0, 2); +- +- __ li(shiftRevCount, 32); +- __ sub(shiftRevCount, shiftRevCount, shiftCount); +- +- __ bind(loop); +- __ vsetvli(t0, idx, Assembler::e32, Assembler::m4); +- __ sub(idx, idx, t0); +- __ shadd(oldArrNext, idx, oldArr, t1, 2); +- __ shadd(newArrCur, idx, newArr, t1, 2); +- __ addi(oldArrCur, oldArrNext, 4); +- __ vle32_v(v0, oldArrCur); +- __ vle32_v(v4, oldArrNext); +- __ vsrl_vx(v0, v0, shiftCount); +- __ vsll_vx(v4, v4, shiftRevCount); +- __ vor_vv(v0, v0, v4); +- __ vse32_v(v0, newArrCur); +- __ bnez(idx, loop); +- +- __ bind(exit); +- __ ret(); +- +- return entry; +- } + #endif + + #ifdef COMPILER2 +@@ -3813,11 +3708,6 @@ class StubGenerator: public StubCodeGenerator { + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } +- +- if (UseRVVForBigIntegerShiftIntrinsics) { +- StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); +- StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); +- } + #endif + + generate_compare_long_strings(); +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 768c7633ca6..2c15a834542 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -167,10 +167,6 @@ void VM_Version::c2_initialize() { + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + +- if (!UseRVV) { +- FLAG_SET_DEFAULT(UseRVVForBigIntegerShiftIntrinsics, false); +- } +- + if (UseRVV) { + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = _initial_vector_length; +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index aa7222dc64a..1f6eff96cba 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -45,16 +45,8 @@ void VMRegImpl::set_regName() { + freg = freg->successor(); + } + +- VectorRegister vreg = ::as_VectorRegister(0); +- for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { +- for (int j = 0 ; j < VectorRegisterImpl::max_slots_per_register ; j++) { +- regName[i++] = reg->name(); +- } +- vreg = vreg->successor(); +- } +- + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { +- regName[i] = "NON-GPR-FPR-VPR"; ++ regName[i] = "NON-GPR-FPR"; + } + } + +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +index 9e611b1f671..6f613a8f11a 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +@@ -34,10 +34,6 @@ inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; + } + +-inline bool is_VectorRegister() { +- return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; +-} +- + inline Register as_Register() { + assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +@@ -49,20 +45,9 @@ inline FloatRegister as_FloatRegister() { + FloatRegisterImpl::max_slots_per_register); + } + +-inline VectorRegister as_VectorRegister() { +- assert(is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be"); +- return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / +- VectorRegisterImpl::max_slots_per_register); +-} +- + inline bool is_concrete() { + assert(is_reg(), "must be"); +- if (is_VectorRegister()) { +- int base = value() - ConcreteRegisterImpl::max_fpr; +- return (base % VectorRegisterImpl::max_slots_per_register) == 0; +- } else { +- return is_even(value()); +- } ++ return is_even(value()); + } + + #endif // CPU_RISCV_VMREG_RISCV_HPP + +From b2011bad9b7404c1f6d0c1aa3176569d7f07d7a9 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 27 Mar 2023 16:05:55 +0800 +Subject: [PATCH 004/140] Revert: JDK-8253180: ZGC: Implementation of JEP 376: + ZGC: Concurrent Thread-Stack Processing JDK-8220051: Remove global safepoint + code + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 14 ------ + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 14 +++--- + .../riscv/c2_safepointPollStubTable_riscv.cpp | 47 ------------------ + src/hotspot/cpu/riscv/frame_riscv.cpp | 9 +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 19 +------- + .../cpu/riscv/macroAssembler_riscv.cpp | 48 +++++++++++-------- + .../cpu/riscv/macroAssembler_riscv.hpp | 5 +- + src/hotspot/cpu/riscv/riscv.ad | 14 ++---- + src/hotspot/cpu/riscv/vm_version_riscv.hpp | 2 - + 9 files changed, 45 insertions(+), 127 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index dcd0472c540..af7bd067f33 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -39,20 +39,6 @@ + + #define __ ce->masm()-> + +-void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { +- __ bind(_entry); +- InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); +- __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); +- __ la(t0, safepoint_pc.target()); +- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); +- +- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, +- "polling page return stub not created yet"); +- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); +- +- __ far_jump(RuntimeAddress(stub)); +-} +- + void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index bba3bd4709c..0e383a3c139 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -424,7 +424,7 @@ int LIR_Assembler::emit_deopt_handler() { + return offset; + } + +-void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { ++void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); + + // Pop the stack before the safepoint code +@@ -434,18 +434,20 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { + __ reserved_stack_check(); + } + +- code_stub->set_safepoint_offset(__ offset()); +- __ relocate(relocInfo::poll_return_type); +- __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ address polling_page(os::get_polling_page()); ++ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); + __ ret(); + } + + int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); +- __ get_polling_page(t0, relocInfo::poll_type); ++ assert(os::is_poll_address(polling_page), "should be"); ++ int32_t offset = 0; ++ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map +- __ read_polling_page(t0, 0, relocInfo::poll_type); ++ __ read_polling_page(t0, offset, relocInfo::poll_type); + return __ offset(); + } + +diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +deleted file mode 100644 +index a90d9fdc160..00000000000 +--- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp ++++ /dev/null +@@ -1,47 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/macroAssembler.hpp" +-#include "opto/compile.hpp" +-#include "opto/node.hpp" +-#include "opto/output.hpp" +-#include "runtime/sharedRuntime.hpp" +- +-#define __ masm. +-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { +- assert(SharedRuntime::polling_page_return_handler_blob() != NULL, +- "polling page return stub not created yet"); +- address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); +- RuntimeAddress callback_addr(stub); +- +- __ bind(entry->_stub_label); +- InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); +- masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); +- __ la(t0, safepoint_pc.target()); +- __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); +- __ far_jump(callback_addr); +-} +-#undef __ +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 6e38960598a..41e52a4d491 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -39,7 +39,6 @@ + #include "runtime/monitorChunk.hpp" + #include "runtime/os.inline.hpp" + #include "runtime/signature.hpp" +-#include "runtime/stackWatermarkSet.hpp" + #include "runtime/stubCodeGenerator.hpp" + #include "runtime/stubRoutines.hpp" + #include "vmreg_riscv.inline.hpp" +@@ -509,13 +508,7 @@ frame frame::sender_raw(RegisterMap* map) const { + } + + frame frame::sender(RegisterMap* map) const { +- frame result = sender_raw(map); +- +- if (map->process_frames()) { +- StackWatermarkSet::on_iteration(map->thread(), result); +- } +- +- return result; ++ return sender_raw(map); + } + + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index d12dcb2af19..9090ad0c058 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -519,7 +519,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); +- ld(t1, Address(xthread, JavaThread::polling_word_offset())); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); + andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } +@@ -591,23 +591,6 @@ void InterpreterMacroAssembler::remove_activation( + // result check if synchronized method + Label unlocked, unlock, no_unlock; + +- // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, +- // that would normally not be safe to use. Such bad returns into unsafe territory of +- // the stack, will call InterpreterRuntime::at_unwind. +- Label slow_path; +- Label fast_path; +- safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); +- j(fast_path); +- +- bind(slow_path); +- push(state); +- set_last_Java_frame(esp, fp, (address)pc(), t0); +- super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), xthread); +- reset_last_Java_frame(true); +- pop(state); +- +- bind(fast_path); +- + // get the value of _do_not_unlock_if_synchronized into x13 + const Address do_not_unlock_if_synchronized(xthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 8b8d126f6c9..4b6136ae36b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2122,15 +2122,16 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, + } + + void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { +- ld(t0, Address(xthread, JavaThread::polling_word_offset())); +- if (acquire) { +- membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +- } +- if (at_return) { +- bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); + } else { +- andi(t0, t0, SafepointMechanism::poll_bit()); +- bnez(t0, slow_path, true /* is_far */); ++ int32_t offset = 0; ++ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); ++ lwu(t0, Address(t0, offset)); ++ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); ++ bnez(t0, slow_path); + } + } + +@@ -2752,22 +2753,29 @@ void MacroAssembler::reserved_stack_check() { + } + + // Move the address of the polling page into dest. +-void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) { +- ld(dest, Address(xthread, JavaThread::polling_page_offset())); ++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(dest, Address(xthread, Thread::polling_page_offset())); ++ } else { ++ uint64_t align = (uint64_t)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ la_patchable(dest, Address(page, rtype), offset); ++ } + } + + // Read the polling page. The address of the polling page must + // already be in r. +-address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { +- address mark; +- { +- InstructionMark im(this); +- code_section()->relocate(inst_mark(), rtype); +- lwu(zr, Address(r, offset)); +- mark = inst_mark(); +- } +- verify_cross_modify_fence_not_required(); +- return mark; ++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { ++ int32_t offset = 0; ++ get_polling_page(dest, page, offset, rtype); ++ read_polling_page(dest, offset, rtype); ++} ++ ++// Read the polling page. The address of the polling page must ++// already be in r. ++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { ++ code_section()->relocate(pc(), rtype); ++ lwu(zr, Address(dest, offset)); + } + + void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b43131514c1..041c696add6 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -625,8 +625,9 @@ class MacroAssembler: public Assembler { + + void reserved_stack_check(); + +- void get_polling_page(Register dest, relocInfo::relocType rtype); +- address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); ++ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); ++ void read_polling_page(Register r, address page, relocInfo::relocType rtype); ++ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + + address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); + address ic_call(address entry, jint method_index = 0); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 85593a942e9..996fa1fb68f 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1132,9 +1132,9 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + } + + if (do_polling() && C->is_method_compilation()) { +- st->print("# test polling word\n\t"); +- st->print("ld t0, [xthread,#%d]\n\t", in_bytes(JavaThread::polling_word_offset())); +- st->print("bgtu sp, t0, #slow_path"); ++ st->print("# touch polling page\n\t"); ++ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); ++ st->print("ld zr, [t0]"); + } + } + #endif +@@ -1153,13 +1153,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + } + + if (do_polling() && C->is_method_compilation()) { +- Label dummy_label; +- Label* code_stub = &dummy_label; +- if (!C->output()->in_scratch_emit_size()) { +- code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); +- } +- __ relocate(relocInfo::poll_return_type); +- __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); ++ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); + } + } + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +index 8e35530359a..7586af01d99 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -48,8 +48,6 @@ class VM_Version : public Abstract_VM_Version { + // Initialization + static void initialize(); + +- constexpr static bool supports_stack_watermark_barrier() { return true; } +- + enum Feature_Flag { + #define CPU_FEATURE_FLAGS(decl) \ + decl(I, "i", 8) \ + +From a032c615883fe2bd557baf40f1439cbae55be206 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 1 May 2023 15:42:09 +0800 +Subject: [PATCH 005/140] Revert JDK-8221554: aarch64 cross-modifying code + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 22 ------------------- + .../cpu/riscv/macroAssembler_riscv.hpp | 2 -- + 2 files changed, 24 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 4b6136ae36b..269d76ba69e 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2716,7 +2716,6 @@ void MacroAssembler::build_frame(int framesize) { + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); + if (PreserveFramePointer) { add(fp, sp, framesize); } +- verify_cross_modify_fence_not_required(); + } + + void MacroAssembler::remove_frame(int framesize) { +@@ -3935,26 +3934,5 @@ void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Registe + + void MacroAssembler::safepoint_ifence() { + ifence(); +-#ifndef PRODUCT +- if (VerifyCrossModifyFence) { +- // Clear the thread state. +- sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); +- } +-#endif + } + +-#ifndef PRODUCT +-void MacroAssembler::verify_cross_modify_fence_not_required() { +- if (VerifyCrossModifyFence) { +- // Check if thread needs a cross modify fence. +- lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset()))); +- Label fence_not_required; +- beqz(t0, fence_not_required); +- // If it does then fail. +- la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure))); +- mv(c_rarg0, xthread); +- jalr(t0); +- bind(fence_not_required); +- } +-} +-#endif +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 041c696add6..b59bdadb8bf 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -821,8 +821,6 @@ class MacroAssembler: public Assembler { + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + +- // Check the current thread doesn't need a cross modify fence. +- void verify_cross_modify_fence_not_required() PRODUCT_RETURN; + }; + + #ifdef ASSERT + +From fd89cf689015649a5cb850e1e24dcbb7bb59735a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:11:30 +0800 +Subject: [PATCH 006/140] Revert JDK-8242263: Diagnose synchronization on + primitive wrappers + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 7 ------- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 7 ------- + src/hotspot/cpu/riscv/riscv.ad | 7 ------- + 3 files changed, 21 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 6f656c8c533..348546a9ea0 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -64,13 +64,6 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + + null_check_offset = offset(); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- load_klass(hdr, obj); +- lwu(hdr, Address(hdr, Klass::access_flags_offset())); +- andi(t0, hdr, JVM_ACC_IS_VALUE_BASED_CLASS); +- bnez(t0, slow_case, true /* is_far */); +- } +- + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 9090ad0c058..8adc7b1320d 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -782,13 +782,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- load_klass(tmp, obj_reg); +- lwu(tmp, Address(tmp, Klass::access_flags_offset())); +- andi(tmp, tmp, JVM_ACC_IS_VALUE_BASED_CLASS); +- bnez(tmp, slow_case); +- } +- + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 996fa1fb68f..2eefc71dde0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1965,13 +1965,6 @@ encode %{ + // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + +- if (DiagnoseSyncOnValueBasedClasses != 0) { +- __ load_klass(flag, oop); +- __ lwu(flag, Address(flag, Klass::access_flags_offset())); +- __ andi(flag, flag, JVM_ACC_IS_VALUE_BASED_CLASS, tmp /* tmp */); +- __ bnez(flag, cont, true /* is_far */); +- } +- + // Check for existing monitor + __ andi(t0, disp_hdr, markWord::monitor_value); + __ bnez(t0, object_has_monitor); + +From feea78c5a227c0a57e57d6d1d544a14682310053 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:24:12 +0800 +Subject: [PATCH 007/140] Revert JDK-8278104: C1 should support the compiler + directive 'BreakAtExecute' + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 348546a9ea0..e5ed25616d6 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -311,7 +311,7 @@ void C1_MacroAssembler::remove_frame(int framesize) { + } + + +-void C1_MacroAssembler::verified_entry(bool breakAtEntry) { ++void C1_MacroAssembler::verified_entry() { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a J, JAL or NOP. + +From 651009a5783f6f5150b3e75a50069dc841622d33 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:57:14 +0800 +Subject: [PATCH 008/140] Revert: JDK-8234562: Move + OrderAccess::release_store*/load_acquire to Atomic JDK-8234736: Harmonize + parameter order in Atomic - store JDK-8234737: Harmonize parameter order in + Atomic - add JDK-8234740: Harmonize parameter order in Atomic - cmpxchg + JDK-8234739: Harmonize parameter order in Atomic - xchg JDK-8236778: Add + Atomic::fetch_and_add + +--- + .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 51 +++++++------------ + .../linux_riscv/orderAccess_linux_riscv.hpp | 31 +++++++---- + 2 files changed, 39 insertions(+), 43 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +index 761da5d743e..9b8b1a31774 100644 +--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +@@ -33,25 +33,31 @@ + // Note that memory_order_conservative requires a full barrier after atomic stores. + // See https://patchwork.kernel.org/patch/3575821/ + ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ + template +-struct Atomic::PlatformAdd { +- template +- D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const { ++struct Atomic::PlatformAdd ++ : Atomic::FetchAndAdd > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } + +- template +- D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { +- return add_and_fetch(dest, add_value, order) - add_value; ++ template ++ D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const { ++ return add_and_fetch(add_value, dest, order) - add_value; + } + }; + + template + template +-inline T Atomic::PlatformXchg::operator()(T volatile* dest, +- T exchange_value, ++inline T Atomic::PlatformXchg::operator()(T exchange_value, ++ T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); +@@ -62,9 +68,9 @@ inline T Atomic::PlatformXchg::operator()(T volatile* dest, + // __attribute__((unused)) on dest is to get rid of spurious GCC warnings. + template + template +-inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, +- T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; +@@ -83,9 +89,9 @@ inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest __attri + + template<> + template +-inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__((unused)), ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), + T compare_value, +- T exchange_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order != memory_order_relaxed) { +@@ -110,25 +116,4 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest __attribute__(( + return rv; + } + +-template +-struct Atomic::PlatformOrderedLoad +-{ +- template +- T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } +-}; +- +-template +-struct Atomic::PlatformOrderedStore +-{ +- template +- void operator()(volatile T* p, T v) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } +-}; +- +-template +-struct Atomic::PlatformOrderedStore +-{ +- template +- void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); } +-}; +- + #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +index 1c33dc1e87f..5b5d35553f7 100644 +--- a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +@@ -37,10 +37,6 @@ inline void OrderAccess::storestore() { release(); } + inline void OrderAccess::loadstore() { acquire(); } + inline void OrderAccess::storeload() { fence(); } + +-#define FULL_MEM_BARRIER __sync_synchronize() +-#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +-#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); +- + inline void OrderAccess::acquire() { + READ_MEM_BARRIER; + } +@@ -53,11 +49,26 @@ inline void OrderAccess::fence() { + FULL_MEM_BARRIER; + } + +-inline void OrderAccess::cross_modify_fence_impl() { +- asm volatile("fence.i" : : : "memory"); +- if (UseConservativeFence) { +- asm volatile("fence ir, ir" : : : "memory"); +- } +-} ++ ++template ++struct OrderAccess::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); } ++}; + + #endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP + +From b078a2ec01598fbcd99aea61af15d44f9c884aaa Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 21:07:42 +0800 +Subject: [PATCH 009/140] Revert JDK-8229258: Rework markOop and markOopDesc + into a simpler mark word value carrier + +--- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 ++-- + .../shenandoahBarrierSetAssembler_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/riscv.ad | 22 +++++++++---------- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- + 4 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index e5ed25616d6..2d52343587e 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -67,7 +67,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // Load object header + ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked +- ori(hdr, hdr, markWord::unlocked_value); ++ ori(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the +@@ -141,7 +141,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i + void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); + // This assumes that all prototype bits fitr in an int32_t +- mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +index d0ac6e52436..84e1205bc25 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -216,9 +216,9 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 +- __ andi(t2, tmp, markWord::lock_mask_in_place); ++ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); + __ bnez(t2, done); +- __ ori(tmp, tmp, markWord::marked_value); ++ __ ori(tmp, tmp, markOopDesc::marked_value); + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2eefc71dde0..44ab44dece1 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1966,12 +1966,12 @@ encode %{ + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + // Check for existing monitor +- __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + + if (!UseHeavyMonitors) { + // Set tmp to be (markWord of object | UNLOCK_VALUE). +- __ ori(tmp, disp_hdr, markWord::unlocked_value); ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); + + // Initialize the box. (Must happen before we update the object mark!) + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); +@@ -1993,7 +1993,7 @@ encode %{ + // Check if the owner is self by comparing the value in the + // markWord of object (disp_hdr) with the stack pointer. + __ sub(disp_hdr, disp_hdr, sp); +- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); + // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, + // hence we can store 0 as the displaced header in the box, which indicates that it is a + // recursive lock. +@@ -2012,15 +2012,15 @@ encode %{ + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. +- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value)); ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); + __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, + Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for +- // markWord::monitor_value so use markWord::unused_mark which has the ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. +- __ mv(tmp, (address)markWord::unused_mark().value()); ++ __ mv(tmp, (address)markOopDesc::unused_mark()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + __ beqz(flag, cont); // CAS success means locking succeeded +@@ -2029,9 +2029,9 @@ encode %{ + + // Recursive lock case + __ mv(flag, zr); +- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); + __ add(tmp, tmp, 1u); +- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value)); ++ __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); + + __ bind(cont); + %} +@@ -2060,7 +2060,7 @@ encode %{ + + // Handle existing monitor. + __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); +- __ andi(t0, disp_hdr, markWord::monitor_value); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + + if (!UseHeavyMonitors) { +@@ -2080,8 +2080,8 @@ encode %{ + + // Handle existing monitor. + __ bind(object_has_monitor); +- STATIC_ASSERT(markWord::monitor_value <= INT_MAX); +- __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); + + Label notRecursive; +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index d2a301c6e74..4e388ac4eaa 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3559,7 +3559,7 @@ void TemplateTable::_new() { + + // initialize object hader only. + __ bind(initialize_header); +- __ mv(t0, (intptr_t)markWord::prototype().value()); ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + +From 4b27cd8d4cfa8fb5f0f78aecaebb17d19362f300 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 31 Mar 2023 16:24:36 +0800 +Subject: [PATCH 010/140] Revert: JDK-8239895: assert(_stack_base != 0LL) + failed: Sanity check JDK-8238988: Rename thread "in stack" methods and add + in_stack_range JDK-8234372: Investigate use of Thread::stack_base() and + queries for "in stack" JDK-8203481: Incorrect constraint for unextended_sp in + frame:safe_for_sender + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 32 +++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 9 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 41e52a4d491..8e7babe2c61 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -56,13 +56,21 @@ void RegisterMap::check_location_valid() { + // Profiling/safepoint support + + bool frame::safe_for_sender(JavaThread *thread) { +- address addr_sp = (address)_sp; +- address addr_fp = (address)_fp; ++ address sp = (address)_sp; ++ address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ + // sp must be within the usable part of the stack (not in guards) +- if (!thread->is_in_usable_stack(addr_sp)) { ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { + return false; + } + +@@ -79,14 +87,15 @@ bool frame::safe_for_sender(JavaThread *thread) { + // So unextended sp must be within the stack but we need not to check + // that unextended sp >= sp + +- if (!thread->is_in_full_stack_checked(unextended_sp)) { ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); ++ ++ if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 +- bool fp_safe = thread->is_in_stack_range_excl(addr_fp, addr_sp) && +- thread->is_in_full_stack_checked(addr_fp + (return_addr_offset * sizeof(void*))); ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + +@@ -147,7 +156,7 @@ bool frame::safe_for_sender(JavaThread *thread) { + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? +- if (!thread->is_in_full_stack_checked((address)sender_sp)) { ++ if ((address)sender_sp >= thread->stack_base()) { + return false; + } + +@@ -163,7 +172,10 @@ bool frame::safe_for_sender(JavaThread *thread) { + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. +- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { + return false; + } + +@@ -196,7 +208,9 @@ bool frame::safe_for_sender(JavaThread *thread) { + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { +- if (!thread->is_in_stack_range_excl((address)saved_fp, (address)sender_sp)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { + return false; + } + + +From d1b463b6c00c75664a49719f75bef8e6408f12df Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 31 Mar 2023 17:10:33 +0800 +Subject: [PATCH 011/140] Revert JDK-8173585: Intrinsify + StringLatin1.indexOf(char) + +--- + src/hotspot/cpu/riscv/riscv.ad | 19 ------------------- + 1 file changed, 19 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 44ab44dece1..8c7a8ede815 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -9826,7 +9826,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, + TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + +@@ -9840,24 +9839,6 @@ instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, + %} + + +-instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, +- iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, +- iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) +-%{ +- match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); +- predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); +- effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, +- TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); +- +- format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} +- ins_encode %{ +- __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, +- $result$$Register, $tmp1$$Register, $tmp2$$Register, +- $tmp3$$Register, $tmp4$$Register, true /* isL */); +- %} +- ins_pipe(pipe_class_memory); +-%} +- + // clearing of an array + instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) + %{ + +From a0cdf8dfb05dbff34d2ca23104d08ae21b2d7f70 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 12:25:36 +0800 +Subject: [PATCH 012/140] Revert JDK-8281632: riscv: Improve interpreter stack + banging, and change the register t1->t0 + +--- + .../templateInterpreterGenerator_riscv.cpp | 42 ++++--------------- + 1 file changed, 8 insertions(+), 34 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 6537b2dbd94..76ae6f89e27 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -895,42 +895,16 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract + } + + void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { +- // See more discussion in stackOverflow.hpp. +- +- const int shadow_zone_size = checked_cast(StackOverflow::stack_shadow_zone_size()); ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); ++ const int start_page = native_call ? n_shadow_pages : 1; + const int page_size = os::vm_page_size(); +- const int n_shadow_pages = shadow_zone_size / page_size; +- +-#ifdef ASSERT +- Label L_good_limit; +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); +- __ bnez(t0, L_good_limit); +- __ stop("shadow zone safe limit is not initialized"); +- __ bind(L_good_limit); +- +- Label L_good_watermark; +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- __ bnez(t0, L_good_watermark); +- __ stop("shadow zone growth watermark is not initialized"); +- __ bind(L_good_watermark); +-#endif +- +- Label L_done; +- +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- __ bgtu(sp, t0, L_done); +- +- for (int p = 1; p <= n_shadow_pages; p++) { +- __ bang_stack_with_offset(p * page_size); ++ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { ++ __ sub(t0, sp, pages * page_size); ++ __ sd(zr, Address(t0)); + } +- +- // Record the new watermark, but only if the update is above the safe limit. +- // Otherwise, the next time around the check above would pass the safe limit. +- __ ld(t0, Address(xthread, JavaThread::shadow_zone_safe_limit())); +- __ bleu(sp, t0, L_done); +- __ sd(sp, Address(xthread, JavaThread::shadow_zone_growth_watermark())); +- +- __ bind(L_done); + } + + // Interpreter stub for calling a native method. (asm interpreter) + +From 8db4bf1400d92c80a0adef8a5ec12adbf595c03f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 14:56:25 +0800 +Subject: [PATCH 013/140] Port aarch64 style sig handler from + os_linux_aarch64.cpp + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 224 +++++++++++++----- + 1 file changed, 168 insertions(+), 56 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index 1f46bbab0a2..db15f1946e2 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -48,7 +48,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.inline.hpp" + #include "runtime/timer.hpp" +-#include "signals_posix.hpp" + #include "utilities/debug.hpp" + #include "utilities/events.hpp" + #include "utilities/vmError.hpp" +@@ -172,31 +171,138 @@ NOINLINE frame os::current_frame() { + } + + // Utility functions +-bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, +- ucontext_t* uc, JavaThread* thread) { ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away ++ // (no destructors can be run) ++ os::ThreadCrashProtection::check_crash_protection(sig, t); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE || sig == SIGXFSZ) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { ++ return 1; ++ } ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++ thread = (JavaThread *) t; ++ } ++ else if(t->is_VM_thread()){ ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults ++ if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } + + // decide if this trap can be handled by a stub + address stub = NULL; + +- address pc = NULL; ++ address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { +- pc = (address) os::Posix::ucontext_get_pc(uc); +- +- address addr = (address) info->si_addr; +- +- // Make sure the high order byte is sign extended, as it may be masked away by the hardware. +- if ((uintptr_t(addr) & (uintptr_t(1) << 55)) != 0) { +- addr = address(uintptr_t(addr) | (uintptr_t(0xFF) << 56)); +- } ++ pc = (address) os::Linux::ucontext_get_pc(uc); + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++ + // check if fault address is within thread stack +- if (thread->is_in_full_stack(addr)) { +- if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) { +- return true; // continue ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } + } + } + } +@@ -212,7 +318,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); +- } else if (sig == SIGSEGV && SafepointMechanism::is_poll_address((address)info->si_addr)) { ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault +@@ -220,34 +326,12 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; +- bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); +- if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { ++ if (nm != NULL && nm->has_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; +- if (is_unsafe_arraycopy) { +- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); +- } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } +- } else if (sig == SIGILL && nativeInstruction_at(pc)->is_stop()) { +- // Pull a pointer to the error message out of the instruction +- // stream. +- const uint64_t *detail_msg_ptr +- = (uint64_t*)(pc + NativeInstruction::instruction_size); +- const char *detail_msg = (const char *)*detail_msg_ptr; +- const char *msg = "stop"; +- if (TraceTraps) { +- tty->print_cr("trap: %s: (SIGILL)", msg); +- } +- +- // End life with a fatal error, message and detail message and the context. +- // Note: no need to do any post-processing here (e.g. signal chaining) +- va_list va_dummy; +- VMError::report_and_die(thread, uc, NULL, 0, msg, detail_msg, va_dummy); +- va_end(va_dummy); +- +- ShouldNotReachHere(); + } else if (sig == SIGFPE && +- (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, +@@ -255,42 +339,70 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && +- MacroAssembler::uses_implicit_null_check((void*)addr)) { ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } +- } else if ((thread->thread_state() == _thread_in_vm || +- thread->thread_state() == _thread_in_native) && +- sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ +- thread->doing_unsafe_access()) { ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { + address next_pc = pc + NativeCall::instruction_size; +- if (UnsafeCopyMemory::contains_pc(pc)) { +- next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); +- } + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +- address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); +- if (addr_slow != (address)-1) { +- stub = addr_slow; ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; + } + } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it +- if (thread != NULL) { +- thread->set_saved_exception_pc(pc); +- } ++ if (thread != NULL) thread->set_saved_exception_pc(pc); + +- os::Posix::ucontext_set_pc(uc, stub); ++ os::Linux::ucontext_set_pc(uc, stub); + return true; + } + +- return false; // Mute compiler ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++ ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler + } + + void os::Linux::init_thread_fpu_state(void) { + +From fd3897410308e2fc54d84a9bd453b1b375e6aace Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:24:57 +0800 +Subject: [PATCH 014/140] Revert: JDK-8248240: Remove extendedPC.hpp and + fetch_frame_from_ucontext JDK-8253742: POSIX signal code cleanup + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 38 ++++++++++++++----- + .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 9 +++-- + 2 files changed, 33 insertions(+), 14 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index db15f1946e2..4f1c84c60a0 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -37,6 +37,7 @@ + #include "prims/jniFastGetField.hpp" + #include "prims/jvm_misc.hpp" + #include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/interfaceSupport.inline.hpp" + #include "runtime/java.hpp" +@@ -85,11 +86,11 @@ char* os::non_memory_address_word() { + return (char*) -1; + } + +-address os::Posix::ucontext_get_pc(const ucontext_t * uc) { ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.__gregs[REG_PC]; + } + +-void os::Posix::ucontext_set_pc(ucontext_t * uc, address pc) { ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; + } + +@@ -101,13 +102,29 @@ intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; + } + +-address os::fetch_frame_from_context(const void* ucVoid, +- intptr_t** ret_sp, intptr_t** ret_fp) { +- address epc; ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { +- epc = os::Posix::ucontext_get_pc(uc); ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp != NULL) { + *ret_sp = os::Linux::ucontext_get_sp(uc); + } +@@ -115,7 +132,8 @@ address os::fetch_frame_from_context(const void* ucVoid, + *ret_fp = os::Linux::ucontext_get_fp(uc); + } + } else { +- epc = NULL; ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); + if (ret_sp != NULL) { + *ret_sp = (intptr_t *)NULL; + } +@@ -142,8 +160,8 @@ frame os::fetch_compiled_frame_from_context(const void* ucVoid) { + frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; +- address epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); +- return frame(frame_sp, frame_fp, epc); ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc.pc()); + } + + // By default, gcc always saves frame pointer rfp on this stack. This +@@ -465,7 +483,7 @@ void os::print_context(outputStream *st, const void *context) { + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. +- address pc = os::Posix::ucontext_get_pc(uc); ++ address pc = os::Linux::ucontext_get_pc(uc); + print_instructions(st, pc, sizeof(char)); + st->cr(); + } +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index 3100572e9fd..e46efc420b0 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -61,16 +61,17 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) + + intptr_t* ret_fp = NULL; + intptr_t* ret_sp = NULL; +- address addr = os::fetch_frame_from_context(uc, &ret_sp, &ret_fp); +- if (addr == NULL || ret_sp == NULL ) { ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + +- frame ret_frame(ret_sp, ret_fp, addr); ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(this)) { + #ifdef COMPILER2 +- frame ret_frame2(ret_sp, NULL, addr); ++ frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + +From 892b40a435ae3f7e85659100ef68db1aeda7ef23 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:33:50 +0800 +Subject: [PATCH 015/140] Revert JDK-8263002: Remove CDS MiscCode region + +--- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 ++++++++++ + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 6 ++++++ + 2 files changed, 16 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 4daed17df10..21aa3b58c09 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -187,6 +187,16 @@ bool SharedRuntime::is_wide_vector(int size) { + return false; + } + ++size_t SharedRuntime::trampoline_size() { ++ return 6 * NativeInstruction::instruction_size; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, destination, offset); ++ __ jalr(x0, t0, offset); ++} ++ + // The java_calling_convention describes stack locations as ideal slots on + // a frame with no abi restrictions. Since we must observe abi restrictions + // (like the placement of the register window) the slots must be biased by +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index e46efc420b0..31d9254d8ad 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -68,6 +68,12 @@ bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) + return false; + } + ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(this)) { + #ifdef COMPILER2 + +From 945a317797bc96efe3f0717ca7258f081b96b14d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 15:52:43 +0800 +Subject: [PATCH 016/140] Revert JDK-8254158: Consolidate per-platform stack + overflow handling code + +--- + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 52 ++++++++++++++----- + 1 file changed, 40 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +index 4f1c84c60a0..8b772892b4b 100644 +--- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -145,18 +145,6 @@ ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + return epc; + } + +-frame os::fetch_compiled_frame_from_context(const void* ucVoid) { +- const ucontext_t* uc = (const ucontext_t*)ucVoid; +- // In compiled code, the stack banging is performed before RA +- // has been saved in the frame. RA is live, and SP and FP +- // belong to the caller. +- intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); +- intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); +- address frame_pc = (address)(uc->uc_mcontext.__gregs[REG_LR] +- - NativeInstruction::instruction_size); +- return frame(frame_sp, frame_fp, frame_pc); +-} +- + frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* frame_sp = NULL; + intptr_t* frame_fp = NULL; +@@ -164,6 +152,46 @@ frame os::fetch_frame_from_context(const void* ucVoid) { + return frame(frame_sp, frame_fp, epc.pc()); + } + ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ + // By default, gcc always saves frame pointer rfp on this stack. This + // may get turned off by -fomit-frame-pointer. + frame os::get_sender_for_C_frame(frame* fr) { + +From c1a03e0a376cc2c8748d83d66b576b66ee2e6962 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 16:14:19 +0800 +Subject: [PATCH 017/140] Revert JDK-8202579: Revisit VM_Version and + VM_Version_ext for overlap and consolidation + +--- + .../cpu/riscv/vm_version_ext_riscv.cpp | 87 +++++++++++++++++++ + .../cpu/riscv/vm_version_ext_riscv.hpp | 55 ++++++++++++ + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 14 --- + 3 files changed, 142 insertions(+), 14 deletions(-) + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp new file mode 100644 -index 000000000..6852c0540 +index 00000000000..6bdce51506e --- /dev/null -+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -@@ -0,0 +1,221 @@ ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +@@ -0,0 +1,87 @@ +/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/allocation.hpp" ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_riscv.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +new file mode 100644 +index 00000000000..711e4aeaf68 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++ ++}; ++ ++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 2c15a834542..dd65f32277f 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -210,17 +210,3 @@ void VM_Version::c2_initialize() { + } + } + #endif // COMPILER2 +- +-void VM_Version::initialize_cpu_information(void) { +- // do nothing if cpu info has been initialized +- if (_initialized) { +- return; +- } +- +- _no_of_cores = os::processor_count(); +- _no_of_threads = _no_of_cores; +- _no_of_sockets = _no_of_cores; +- snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); +- snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); +- _initialized = true; +-} + +From 0cfdbd8595c710b71be008bb531b59acf9c4b016 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 17:16:05 +0800 +Subject: [PATCH 018/140] Revert JDK-8191278: MappedByteBuffer bulk access + memory failures are not handled gracefully + +--- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 19 ++----------------- + 1 file changed, 2 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 39416441bdf..8392b768847 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -1049,12 +1049,7 @@ class StubGenerator: public StubCodeGenerator { + __ push_reg(RegSet::of(d, count), sp); + } + +- { +- // UnsafeCopyMemory page error: continue after ucm +- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); +- UnsafeCopyMemoryMark ucmm(this, add_entry, true); +- copy_memory(aligned, s, d, count, t0, size); +- } ++ copy_memory(aligned, s, d, count, t0, size); + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); +@@ -1122,12 +1117,7 @@ class StubGenerator: public StubCodeGenerator { + __ push_reg(RegSet::of(d, count), sp); + } + +- { +- // UnsafeCopyMemory page error: continue after ucm +- bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); +- UnsafeCopyMemoryMark ucmm(this, add_entry, true); +- copy_memory(aligned, s, d, count, t0, -size); +- } ++ copy_memory(aligned, s, d, count, t0, -size); + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); +@@ -3734,11 +3724,6 @@ class StubGenerator: public StubCodeGenerator { + ~StubGenerator() {} + }; // end class declaration + +-#define UCM_TABLE_MAX_ENTRIES 8 + void StubGenerator_generate(CodeBuffer* code, bool all) { +- if (UnsafeCopyMemory::_table == NULL) { +- UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); +- } +- + StubGenerator g(code, all); + } + +From dd6a7c520a5adeef5b6686c161554adcba61113f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 15:55:09 +0800 +Subject: [PATCH 019/140] Revert JDK-8282085: The REGISTER_DEFINITION macro is + useless after JDK-8269122 + +--- + .../cpu/riscv/register_definitions_riscv.cpp | 192 ++++++++++++++++++ + 1 file changed, 192 insertions(+) + create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +new file mode 100644 +index 00000000000..583f67573ca +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +@@ -0,0 +1,192 @@ ++/* ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "interp_masm_riscv.hpp" ++#include "register_riscv.hpp" ++ ++REGISTER_DEFINITION(Register, noreg); ++ ++REGISTER_DEFINITION(Register, x0); ++REGISTER_DEFINITION(Register, x1); ++REGISTER_DEFINITION(Register, x2); ++REGISTER_DEFINITION(Register, x3); ++REGISTER_DEFINITION(Register, x4); ++REGISTER_DEFINITION(Register, x5); ++REGISTER_DEFINITION(Register, x6); ++REGISTER_DEFINITION(Register, x7); ++REGISTER_DEFINITION(Register, x8); ++REGISTER_DEFINITION(Register, x9); ++REGISTER_DEFINITION(Register, x10); ++REGISTER_DEFINITION(Register, x11); ++REGISTER_DEFINITION(Register, x12); ++REGISTER_DEFINITION(Register, x13); ++REGISTER_DEFINITION(Register, x14); ++REGISTER_DEFINITION(Register, x15); ++REGISTER_DEFINITION(Register, x16); ++REGISTER_DEFINITION(Register, x17); ++REGISTER_DEFINITION(Register, x18); ++REGISTER_DEFINITION(Register, x19); ++REGISTER_DEFINITION(Register, x20); ++REGISTER_DEFINITION(Register, x21); ++REGISTER_DEFINITION(Register, x22); ++REGISTER_DEFINITION(Register, x23); ++REGISTER_DEFINITION(Register, x24); ++REGISTER_DEFINITION(Register, x25); ++REGISTER_DEFINITION(Register, x26); ++REGISTER_DEFINITION(Register, x27); ++REGISTER_DEFINITION(Register, x28); ++REGISTER_DEFINITION(Register, x29); ++REGISTER_DEFINITION(Register, x30); ++REGISTER_DEFINITION(Register, x31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++ ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); ++ ++REGISTER_DEFINITION(VectorRegister, vnoreg); ++ ++REGISTER_DEFINITION(VectorRegister, v0); ++REGISTER_DEFINITION(VectorRegister, v1); ++REGISTER_DEFINITION(VectorRegister, v2); ++REGISTER_DEFINITION(VectorRegister, v3); ++REGISTER_DEFINITION(VectorRegister, v4); ++REGISTER_DEFINITION(VectorRegister, v5); ++REGISTER_DEFINITION(VectorRegister, v6); ++REGISTER_DEFINITION(VectorRegister, v7); ++REGISTER_DEFINITION(VectorRegister, v8); ++REGISTER_DEFINITION(VectorRegister, v9); ++REGISTER_DEFINITION(VectorRegister, v10); ++REGISTER_DEFINITION(VectorRegister, v11); ++REGISTER_DEFINITION(VectorRegister, v12); ++REGISTER_DEFINITION(VectorRegister, v13); ++REGISTER_DEFINITION(VectorRegister, v14); ++REGISTER_DEFINITION(VectorRegister, v15); ++REGISTER_DEFINITION(VectorRegister, v16); ++REGISTER_DEFINITION(VectorRegister, v17); ++REGISTER_DEFINITION(VectorRegister, v18); ++REGISTER_DEFINITION(VectorRegister, v19); ++REGISTER_DEFINITION(VectorRegister, v20); ++REGISTER_DEFINITION(VectorRegister, v21); ++REGISTER_DEFINITION(VectorRegister, v22); ++REGISTER_DEFINITION(VectorRegister, v23); ++REGISTER_DEFINITION(VectorRegister, v24); ++REGISTER_DEFINITION(VectorRegister, v25); ++REGISTER_DEFINITION(VectorRegister, v26); ++REGISTER_DEFINITION(VectorRegister, v27); ++REGISTER_DEFINITION(VectorRegister, v28); ++REGISTER_DEFINITION(VectorRegister, v29); ++REGISTER_DEFINITION(VectorRegister, v30); ++REGISTER_DEFINITION(VectorRegister, v31); ++ ++REGISTER_DEFINITION(Register, c_rarg0); ++REGISTER_DEFINITION(Register, c_rarg1); ++REGISTER_DEFINITION(Register, c_rarg2); ++REGISTER_DEFINITION(Register, c_rarg3); ++REGISTER_DEFINITION(Register, c_rarg4); ++REGISTER_DEFINITION(Register, c_rarg5); ++REGISTER_DEFINITION(Register, c_rarg6); ++REGISTER_DEFINITION(Register, c_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, c_farg0); ++REGISTER_DEFINITION(FloatRegister, c_farg1); ++REGISTER_DEFINITION(FloatRegister, c_farg2); ++REGISTER_DEFINITION(FloatRegister, c_farg3); ++REGISTER_DEFINITION(FloatRegister, c_farg4); ++REGISTER_DEFINITION(FloatRegister, c_farg5); ++REGISTER_DEFINITION(FloatRegister, c_farg6); ++REGISTER_DEFINITION(FloatRegister, c_farg7); ++ ++REGISTER_DEFINITION(Register, j_rarg0); ++REGISTER_DEFINITION(Register, j_rarg1); ++REGISTER_DEFINITION(Register, j_rarg2); ++REGISTER_DEFINITION(Register, j_rarg3); ++REGISTER_DEFINITION(Register, j_rarg4); ++REGISTER_DEFINITION(Register, j_rarg5); ++REGISTER_DEFINITION(Register, j_rarg6); ++REGISTER_DEFINITION(Register, j_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, j_farg0); ++REGISTER_DEFINITION(FloatRegister, j_farg1); ++REGISTER_DEFINITION(FloatRegister, j_farg2); ++REGISTER_DEFINITION(FloatRegister, j_farg3); ++REGISTER_DEFINITION(FloatRegister, j_farg4); ++REGISTER_DEFINITION(FloatRegister, j_farg5); ++REGISTER_DEFINITION(FloatRegister, j_farg6); ++REGISTER_DEFINITION(FloatRegister, j_farg7); ++ ++REGISTER_DEFINITION(Register, zr); ++REGISTER_DEFINITION(Register, gp); ++REGISTER_DEFINITION(Register, tp); ++REGISTER_DEFINITION(Register, xmethod); ++REGISTER_DEFINITION(Register, ra); ++REGISTER_DEFINITION(Register, sp); ++REGISTER_DEFINITION(Register, fp); ++REGISTER_DEFINITION(Register, xheapbase); ++REGISTER_DEFINITION(Register, xcpool); ++REGISTER_DEFINITION(Register, xmonitors); ++REGISTER_DEFINITION(Register, xlocals); ++REGISTER_DEFINITION(Register, xthread); ++REGISTER_DEFINITION(Register, xbcp); ++REGISTER_DEFINITION(Register, xdispatch); ++REGISTER_DEFINITION(Register, esp); ++ ++REGISTER_DEFINITION(Register, t0); ++REGISTER_DEFINITION(Register, t1); ++REGISTER_DEFINITION(Register, t2); + +From 561261b051d88ddb0053733f03cbefc75dedcea8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:41:03 +0800 +Subject: [PATCH 020/140] Revert JDK-7175279: Don't use x87 FPU on x86-64 + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 0e383a3c139..977563fe5f4 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -2019,6 +2019,18 @@ address LIR_Assembler::int_constant(jlong n) { + } + } + ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ + void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, + Assembler::rl /* release */, t0, true /* result as bool */); + +From ff4e1443fd000208714b506d52c0fab1c91e4ac8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:41:15 +0800 +Subject: [PATCH 021/140] Revert JDK-8255909: Remove unused delayed_value + methods + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 7 +++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 16 ++++++++++++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 4 ++++ + 3 files changed, 27 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 4923962a496..44e8d4b4ff1 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -3027,6 +3027,13 @@ enum Nf { + Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ ShouldNotCallThis(); ++ return RegisterOrConstant(); ++ } ++ + // Stack overflow checking + virtual void bang_stack_with_offset(int offset) { Unimplemented(); } + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 269d76ba69e..878957cbede 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -191,6 +191,22 @@ void MacroAssembler::call_VM(Register oop_result, + void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} + void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ ++ // load indirectly to solve generation ordering problem ++ ld(tmp, ExternalAddress((address) delayed_value_addr)); ++ ++ if (offset != 0) ++ add(tmp, tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ + // Calls to C land + // + // When entering C land, the fp, & esp of the last Java frame have to be recorded +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b59bdadb8bf..f23f7e7d1e6 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -625,6 +625,10 @@ class MacroAssembler: public Assembler { + + void reserved_stack_check(); + ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ + void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); + void read_polling_page(Register r, address page, relocInfo::relocType rtype); + void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); + +From afe35a3fdc705645bfe2a2e797a95ce1d5203872 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:51:39 +0800 +Subject: [PATCH 022/140] Revert JDK-8263679: C1: Remove vtable call + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 977563fe5f4..a0ecc63d851 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1382,6 +1382,11 @@ void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + add_call_info(code_offset(), op->info()); + } + ++/* Currently, vtable-dispatch is only enabled for sparc platforms */ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + assert((__ offset() % 4) == 0, "bad alignment"); + +From 655b34c00ec5ff6fa7e82de96a78a0c58ba91985 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 16:55:57 +0800 +Subject: [PATCH 023/140] Revert JDK-8264063: Outer Safepoint poll load should + not reference the head of inner strip mined loop. + +--- + src/hotspot/cpu/riscv/riscv.ad | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 8c7a8ede815..fcddf752564 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -952,6 +952,20 @@ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const + return align_up(current_offset, alignment_required()) - current_offset; + } + ++// Indicate if the safepoint node needs the polling page as an input ++ ++// the shared code plants the oop data at the start of the generated ++// code for the safepoint node and that needs ot be at the load ++// instruction itself. so we cannot plant a mov of the safepoint poll ++// address followed by a load. setting this to true means the mov is ++// scheduled as a prior instruction. that's better for scheduling ++// anyway. ++ ++bool SafePointNode::needs_polling_address_input() ++{ ++ return true; ++} ++ + //============================================================================= + + #ifndef PRODUCT + +From 4a6f7dafdb4e0cf054b7867de60f789d4ca1d9f3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:26:29 +0800 +Subject: [PATCH 024/140] Revert: JDK-8266810: Move trivial Matcher code to + cpu-specific header files JDK-8254966: Remove unused code from Matcher + +--- + src/hotspot/cpu/riscv/matcher_riscv.hpp | 129 ------------------------ + src/hotspot/cpu/riscv/riscv.ad | 108 +++++++++++++++++++- + 2 files changed, 107 insertions(+), 130 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/matcher_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp +deleted file mode 100644 +index 4c7fabd7240..00000000000 +--- a/src/hotspot/cpu/riscv/matcher_riscv.hpp ++++ /dev/null +@@ -1,129 +0,0 @@ +-/* +- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_MATCHER_RISCV_HPP +-#define CPU_RISCV_MATCHER_RISCV_HPP +- +- // Defined within class Matcher +- +- // false => size gets scaled to BytesPerLong, ok. +- static const bool init_array_count_is_in_bytes = false; +- +- // riscv doesn't support misaligned vectors store/load on JDK11. +- static constexpr bool misaligned_vectors_ok() { +- return false; +- } +- +- // Whether code generation need accurate ConvI2L types. +- static const bool convi2l_type_required = false; +- +- // Does the CPU require late expand (see block.cpp for description of late expand)? +- static const bool require_postalloc_expand = false; +- +- // Do we need to mask the count passed to shift instructions or does +- // the cpu only look at the lower 5/6 bits anyway? +- static const bool need_masked_shift_count = false; +- +- static constexpr bool isSimpleConstant64(jlong value) { +- // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. +- // Probably always true, even if a temp register is required. +- return true; +- } +- +- // Use conditional move (CMOVL) +- static constexpr int long_cmove_cost() { +- // long cmoves are no more expensive than int cmoves +- return 0; +- } +- +- static constexpr int float_cmove_cost() { +- // float cmoves are no more expensive than int cmoves +- return 0; +- } +- +- // This affects two different things: +- // - how Decode nodes are matched +- // - how ImplicitNullCheck opportunities are recognized +- // If true, the matcher will try to remove all Decodes and match them +- // (as operands) into nodes. NullChecks are not prepared to deal with +- // Decodes by final_graph_reshaping(). +- // If false, final_graph_reshaping() forces the decode behind the Cmp +- // for a NullCheck. The matcher matches the Decode node into a register. +- // Implicit_null_check optimization moves the Decode along with the +- // memory operation back up before the NullCheck. +- static bool narrow_oop_use_complex_address() { +- return CompressedOops::shift() == 0; +- } +- +- static bool narrow_klass_use_complex_address() { +- return false; +- } +- +- static bool const_oop_prefer_decode() { +- // Prefer ConN+DecodeN over ConP in simple compressed oops mode. +- return CompressedOops::base() == NULL; +- } +- +- static bool const_klass_prefer_decode() { +- // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. +- return CompressedKlassPointers::base() == NULL; +- } +- +- // Is it better to copy float constants, or load them directly from +- // memory? Intel can load a float constant from a direct address, +- // requiring no extra registers. Most RISCs will have to materialize +- // an address into a register first, so they would do better to copy +- // the constant from stack. +- static const bool rematerialize_float_constants = false; +- +- // If CPU can load and store mis-aligned doubles directly then no +- // fixup is needed. Else we split the double into 2 integer pieces +- // and move it piece-by-piece. Only happens when passing doubles into +- // C code as the Java calling convention forces doubles to be aligned. +- static const bool misaligned_doubles_ok = true; +- +- // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. +- static const bool strict_fp_requires_explicit_rounding = false; +- +- // Are floats converted to double when stored to stack during +- // deoptimization? +- static constexpr bool float_in_double() { return false; } +- +- // Do ints take an entire long register or just half? +- // The relevant question is how the int is callee-saved: +- // the whole long is written but de-opt'ing will have to extract +- // the relevant 32 bits. +- static const bool int_in_long = true; +- +- // true means we have fast l2f convers +- // false means that conversion is done by runtime call +- static constexpr bool convL2FSupported(void) { +- return true; +- } +- +- // Implements a variant of EncodeISOArrayNode that encode ASCII only +- static const bool supports_encode_ascii_array = false; +- +-#endif // CPU_RISCV_MATCHER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index fcddf752564..a9e5f2e6841 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -330,7 +330,9 @@ alloc_class chunk2(RFLAGS); + // Several register classes are automatically defined based upon information in + // this architecture description. + // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) +-// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) ++// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) ++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) + // + + // Class for all 32 bit general purpose registers +@@ -1548,6 +1550,17 @@ bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + return (-4096 <= offs && offs < 4096); + } + ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++} ++ ++// true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ + // Vector width in bytes. + const int Matcher::vector_width_in_bytes(BasicType bt) { + return 0; +@@ -1567,6 +1580,94 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++// RISC-V supports misaligned vectors store/load. ++const bool Matcher::misaligned_vectors_ok() { ++ return true; ++} ++ ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Use conditional move (CMOVL) ++const int Matcher::long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++const int Matcher::float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++// This affects two different things: ++// - how Decode nodes are matched ++// - how ImplicitNullCheck opportunities are recognized ++// If true, the matcher will try to remove all Decodes and match them ++// (as operands) into nodes. NullChecks are not prepared to deal with ++// Decodes by final_graph_reshaping(). ++// If false, final_graph_reshaping() forces the decode behind the Cmp ++// for a NullCheck. The matcher matches the Decode node into a register. ++// Implicit_null_check optimization moves the Decode along with the ++// memory operation back up before the NullCheck. ++bool Matcher::narrow_oop_use_complex_address() { ++ return Universe::narrow_oop_shift() == 0; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++// TODO ++// decide whether we need to set this to true ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return Universe::narrow_oop_base() == NULL; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return Universe::narrow_klass_base() == NULL; ++} ++ ++// Is it better to copy float constants, or load them directly from ++// memory? Intel can load a float constant from a direct address, ++// requiring no extra registers. Most RISCs will have to materialize ++// an address into a register first, so they would do better to copy ++// the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++ ++// If CPU can load and store mis-aligned doubles directly then no ++// fixup is needed. Else we split the double into 2 integer pieces ++// and move it piece-by-piece. Only happens when passing doubles into ++// C code as the Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = true; ++ ++// No-op on amd64 ++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { ++ Unimplemented(); ++} ++ ++// Advertise here if the CPU requires explicit rounding operations to ++// implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++ ++// Are floats converted to double when stored to stack during ++// deoptimization? ++bool Matcher::float_in_double() { return false; } ++ ++// Do ints take an entire long register or just half? ++// The relevant question is how the int is callee-saved: ++// the whole long is written but de-opt'ing will have to extract ++// the relevant 32 bits. ++const bool Matcher::int_in_long = true; ++ + // Return whether or not this register is ever used as an argument. + // This function is used on startup to build the trampoline stubs in + // generateOptoStub. Registers not mentioned will be killed by the VM +@@ -1671,6 +1772,8 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + return true; + } + ++const bool Matcher::convi2l_type_required = false; ++ + // Should the Matcher clone input 'm' of node 'n'? + bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + assert_cond(m != NULL); +@@ -2250,6 +2353,9 @@ frame %{ + // Inline Cache Register or methodOop for I2C. + inline_cache_reg(R31); + ++ // Method Oop Register when calling interpreter. ++ interpreter_method_oop_reg(R31); ++ + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + +From 4b0f20882cd9b5e5da92d61c2fa02e0cbea0ef0c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:30:42 +0800 +Subject: [PATCH 025/140] Revert JDK-8256238: Remove + Matcher::pass_original_key_for_aes + +--- + src/hotspot/cpu/riscv/riscv.ad | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a9e5f2e6841..0d1afd5584a 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++// AES support not yet implemented ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ + // RISC-V supports misaligned vectors store/load. + const bool Matcher::misaligned_vectors_ok() { + return true; + +From 36d7ecedbcd95911d1b355bbab3e8fdf81b36e7d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:42:37 +0800 +Subject: [PATCH 026/140] Revert JDK-8242492: C2: Remove + Matcher::vector_shift_count_ideal_reg() + +--- + src/hotspot/cpu/riscv/riscv.ad | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 0d1afd5584a..c10e91633a5 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1580,6 +1580,11 @@ const uint Matcher::vector_ideal_reg(int len) { + return 0; + } + ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ + // AES support not yet implemented + const bool Matcher::pass_original_key_for_aes() { + return false; + +From b78e448a460fcdc66553e66342e93e5ac87c0c61 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:47:13 +0800 +Subject: [PATCH 027/140] Revert JDK-8266937: Remove Compile::reshape_address + +--- + src/hotspot/cpu/riscv/riscv.ad | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c10e91633a5..2c5ec0451b8 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1801,6 +1801,9 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, + return clone_base_plus_offset_address(m, mstack, address_visited); + } + ++void Compile::reshape_address(AddPNode* addp) { ++} ++ + %} + + + +From cd34a5ce5d120cdac939217976d1e7b7e98bf654 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:49:09 +0800 +Subject: [PATCH 028/140] Revert JDK-8272771: frame::pd_ps() is not implemented + on any platform + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 8e7babe2c61..8e4f20fe561 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -683,6 +683,7 @@ frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { + init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); + } + ++void frame::pd_ps() {} + #endif + + void JavaFrameAnchor::make_walkable(JavaThread* thread) { + +From bdb16daf6d809d0c38256be99ecbe922d24b889b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:56:27 +0800 +Subject: [PATCH 029/140] Revert JDK-8268858: Determine register pressure + automatically by the number of available registers for allocation + +--- + src/hotspot/cpu/riscv/riscv.ad | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2c5ec0451b8..a6aa52de29e 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1527,6 +1527,10 @@ const bool Matcher::has_predicated_vectors(void) { + return false; + } + ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ return default_pressure_threshold; ++} ++ + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then + +From bbaa7a97b5d8110ead9dc44f31e2c5fe3bcd83d5 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 17:58:16 +0800 +Subject: [PATCH 030/140] Revert JDK-8253040: Remove unused + Matcher::regnum_to_fpu_offset() + +--- + src/hotspot/cpu/riscv/riscv.ad | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a6aa52de29e..2d847cb6454 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1531,6 +1531,12 @@ const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; + } + ++int Matcher::regnum_to_fpu_offset(int regnum) ++{ ++ Unimplemented(); ++ return 0; ++} ++ + // Is this branch offset short enough that a short branch can be used? + // + // NOTE: If the platform does not provide any short branch variants, then + +From ce9ad0af72e405153534369bff1b1725697f3e40 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 18:03:23 +0800 +Subject: [PATCH 031/140] Revert JDK-8254084: Remove + TemplateTable::pd_initialize + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 4e388ac4eaa..c9d399ccdaf 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -48,6 +48,12 @@ + + #define __ _masm-> + ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No RISC-V specific initialization ++} ++ + // Address computation: local variables + + static inline Address iaddress(int n) { + +From 49429187846e6f2b00ab2853e27097eae274a947 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 20:17:07 +0800 +Subject: [PATCH 032/140] Revert JDK-8224815: 8224815: Remove non-GC uses of + CollectedHeap::is_in_reserved() + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 878957cbede..cf01d7d74bb 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1632,7 +1632,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + #ifdef ASSERT + { + ThreadInVMfromUnknown tiv; +- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } + #endif + oop_index = oop_recorder()->find_index(obj); +@@ -2800,7 +2800,7 @@ void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed oops"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); +- assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } + #endif + int oop_index = oop_recorder()->find_index(obj); +@@ -2815,7 +2815,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(k); +- assert(!Universe::heap()->is_in(k), "should not be an oop"); ++ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + +From a71fabb1ff05db9955557a888be6cd1b5f87deea Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 21:14:30 +0800 +Subject: [PATCH 033/140] Revert JDK-8253540: InterpreterRuntime::monitorexit + should be a JRT_LEAF function + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 8adc7b1320d..48957803fdc 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -839,7 +839,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + if (UseHeavyMonitors) { +- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + } else { + Label done; + +@@ -871,7 +873,9 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + + // Call the runtime routine for slow case. + sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj +- call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); + + bind(done); + + +From a0b18eea3c83ef8f1de2c1b3cd55452f0f6b9af2 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 7 Apr 2023 12:51:33 +0800 +Subject: [PATCH 034/140] Revert JDK-8278387: riscv: Implement UseHeavyMonitors + consistently && JDK-8279826: riscv: Preserve result in native wrapper with + +UseHeavyMonitors + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +- + src/hotspot/cpu/riscv/riscv.ad | 92 +++++++++---------- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 85 ++++++++--------- + 3 files changed, 80 insertions(+), 105 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index a0ecc63d851..dd657963438 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -367,11 +367,7 @@ int LIR_Assembler::emit_unwind_handler() { + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r10_opr); + stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); +- if (UseHeavyMonitors) { +- __ j(*stub->entry()); +- } else { +- __ unlock_object(x15, x14, x10, *stub->entry()); +- } ++ __ unlock_object(x15, x14, x10, *stub->entry()); + __ bind(*stub->continuation()); + } + +@@ -1512,7 +1508,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); +- if (UseHeavyMonitors) { ++ if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2d847cb6454..29027d594a0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2109,40 +2109,36 @@ encode %{ + __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + +- if (!UseHeavyMonitors) { +- // Set tmp to be (markWord of object | UNLOCK_VALUE). +- __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); +- +- // Initialize the box. (Must happen before we update the object mark!) +- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- +- // Compare object markWord with an unlocked value (tmp) and if +- // equal exchange the stack address of our box with object markWord. +- // On failure disp_hdr contains the possibly locked markWord. +- __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/disp_hdr); +- __ mv(flag, zr); +- __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas +- +- assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); +- +- // If the compare-and-exchange succeeded, then we found an unlocked +- // object, will have now locked it will continue at label cont +- // We did not see an unlocked object so try the fast recursive case. +- +- // Check if the owner is self by comparing the value in the +- // markWord of object (disp_hdr) with the stack pointer. +- __ sub(disp_hdr, disp_hdr, sp); +- __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); +- // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, +- // hence we can store 0 as the displaced header in the box, which indicates that it is a +- // recursive lock. +- __ andr(tmp/*==0?*/, disp_hdr, tmp); +- __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- __ mv(flag, tmp); // we can use the value of tmp as the result here +- } else { +- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path +- } ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); ++ ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. ++ ++ // Check if the owner is self by comparing the value in the ++ // markWord of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here + + __ j(cont); + +@@ -2189,31 +2185,25 @@ encode %{ + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + +- if (!UseHeavyMonitors) { +- // Find the lock address and load the displaced header from the stack. +- __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +- // If the displaced header is 0, we have a recursive unlock. +- __ mv(flag, disp_hdr); +- __ beqz(disp_hdr, cont); +- } ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); + + // Handle existing monitor. + __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); + __ andi(t0, disp_hdr, markOopDesc::monitor_value); + __ bnez(t0, object_has_monitor); + +- if (!UseHeavyMonitors) { +- // Check if it is still a light weight lock, this is true if we +- // see the stack address of the basicLock in the markWord of the +- // object. ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. + +- __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, +- Assembler::rl, /*result*/tmp); +- __ xorr(flag, box, tmp); // box == tmp if cas succeeds +- } else { +- __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path +- } ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds + __ j(cont); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 21aa3b58c09..5203200b068 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1488,39 +1488,35 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + +- if (!UseHeavyMonitors) { +- // Load (object->mark() | 1) into swap_reg % x10 +- __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); +- __ ori(swap_reg, t0, 1); +- +- // Save (object->mark() | 1) into BasicLock's displaced header +- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); +- +- // src -> dest if dest == x10 else x10 <- dest +- { +- Label here; +- __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); +- } ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); + +- // Test if the oopMark is an obvious stack pointer, i.e., +- // 1) (mark & 3) == 0, and +- // 2) sp <= mark < mark + os::pagesize() +- // These 3 tests can be done by evaluating the following +- // expression: ((mark - sp) & (3 - os::vm_page_size())), +- // assuming both stack pointer and pagesize have their +- // least significant 2 bits clear. +- // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg +- +- __ sub(swap_reg, swap_reg, sp); +- __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); +- +- // Save the test result, for recursive case, the result is zero +- __ sd(swap_reg, Address(lock_reg, mark_word_offset)); +- __ bnez(swap_reg, slow_path_lock); +- } else { +- __ j(slow_path_lock); ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); + } + ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg ++ ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); ++ ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); ++ + // Slow path will re-enter here + __ bind(lock_done); + } +@@ -1608,31 +1604,24 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + Label done; + +- if (!UseHeavyMonitors) { +- // Simple recursive lock? +- __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); +- __ beqz(t0, done); +- } +- ++ // Simple recursive lock? ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); + + // Must save x10 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + +- if (!UseHeavyMonitors) { +- // get address of the stack lock +- __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); +- // get old displaced header +- __ ld(old_hdr, Address(x10, 0)); ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); + +- // Atomic swap old header if oop still contains the stack lock +- Label succeed; +- __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); +- __ bind(succeed); +- } else { +- __ j(slow_path_unlock); +- } ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + +From 1e844b8019cb3516c0843826de2bd3fcd2222f41 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 4 Apr 2023 16:49:19 +0800 +Subject: [PATCH 035/140] Revert JDK-8258192: Obsolete the CriticalJNINatives + flag. CriticalJNINatives is unimplemented() even on AArch64. See + https://bugs.openjdk.org/browse/JDK-8254694. + +Also following up 8191129: AARCH64: Invalid value passed to critical JNI function +--- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 3 ++- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ + .../criticalnatives/argumentcorruption/CheckLongArgs.java | 2 +- + .../jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java | 2 +- + 4 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 5203200b068..f8585afbdc2 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1111,7 +1111,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, +- BasicType ret_type) { ++ BasicType ret_type, ++ address critical_entry) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index dd65f32277f..c0491d23fa6 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -147,6 +147,8 @@ void VM_Version::initialize() { + #ifdef COMPILER2 + c2_initialize(); + #endif // COMPILER2 ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); + } + + #ifdef COMPILER2 +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index acb86812d25..2c866f26f08 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167409 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index eab36f93113..1da369fde23 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167408 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; + +From 58ad930e78501c6fad024e7ef05066ec19eb6219 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 11:45:04 +0800 +Subject: [PATCH 036/140] 8202976: Add C1 lea patching support for x86 (RISC-V + part) + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index dd657963438..46a20a64194 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1818,6 +1818,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C + return; + } + ++ assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + +From 2074b8ec0ea3562f3999b4f4010b3f5b57dbe502 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 12:15:44 +0800 +Subject: [PATCH 037/140] Revert 8232365: Implementation for JEP 363: Remove + the Concurrent Mark Sweep (CMS) Garbage Collector + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 3 +++ + src/hotspot/cpu/riscv/riscv.ad | 27 +++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 845064d6cbc..50bbb6a77b8 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -63,6 +63,9 @@ define_pd_global(bool, RewriteFrequentPairs, true); + + define_pd_global(bool, PreserveFramePointer, false); + ++// GC Ergo Flags ++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ + define_pd_global(uintx, TypeProfileLevel, 111); + + define_pd_global(bool, CompactStrings, true); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 29027d594a0..386ef731696 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -752,6 +752,9 @@ bool is_CAS(int opcode, bool maybe_volatile); + // predicate controlling translation of CompareAndSwapX + bool needs_acquiring_load_reserved(const Node *load); + ++// predicate controlling translation of StoreCM ++bool unnecessary_storestore(const Node *storecm); ++ + // predicate controlling addressing modes + bool size_fits_all_mem_uses(AddPNode* addp, int shift); + %} +@@ -874,6 +877,29 @@ bool needs_acquiring_load_reserved(const Node *n) + // so we can just return true here + return true; + } ++ ++// predicate controlling translation of StoreCM ++// ++// returns true if a StoreStore must precede the card write otherwise ++// false ++ ++bool unnecessary_storestore(const Node *storecm) ++{ ++ assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); ++ ++ // we need to generate a dmb ishst between an object put and the ++ // associated card mark when we are using CMS without conditional ++ // card marking ++ ++ if (UseConcMarkSweepGC && !UseCondCardMark) { ++ return false; ++ } ++ ++ // a storestore is unnecesary in all other cases ++ ++ return true; ++} ++ + #define __ _masm. + + // advance declarations for helper functions to convert register +@@ -4566,6 +4592,7 @@ instruct loadConD0(fRegD dst, immD0 con) %{ + instruct storeimmCM0(immI0 zero, memory mem) + %{ + match(Set mem (StoreCM mem zero)); ++ predicate(unnecessary_storestore(n)); + + ins_cost(STORE_COST); + format %{ "storestore (elided)\n\t" + +From f838cf41b48c6bc17d052531ab5594de236b1302 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 22:06:58 +0800 +Subject: [PATCH 038/140] Revert 8220051: Remove global safepoint code + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 3 +- + .../cpu/riscv/macroAssembler_riscv.cpp | 26 ++++++++++- + .../cpu/riscv/macroAssembler_riscv.hpp | 3 +- + src/hotspot/cpu/riscv/riscv.ad | 43 +++++++++++++++++++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 +- + .../templateInterpreterGenerator_riscv.cpp | 2 +- + 6 files changed, 75 insertions(+), 6 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 48957803fdc..74dded77d19 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -515,7 +515,8 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); +- bool needs_thread_local_poll = generate_poll && table != safepoint_table; ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index cf01d7d74bb..73629e3dba3 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -264,6 +264,30 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, + } + } + ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ membar(MacroAssembler::AnyAny); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ safepoint_poll(slow_path); ++ } ++} ++ + void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); +@@ -2137,7 +2161,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass, + bind(L_failure); + } + +-void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { ++void MacroAssembler::safepoint_poll(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(t1, Address(xthread, Thread::polling_page_offset())); + andi(t0, t1, SafepointMechanism::poll_bit()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index f23f7e7d1e6..8a2c6e07d88 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -44,7 +44,8 @@ class MacroAssembler: public Assembler { + } + virtual ~MacroAssembler() {} + +- void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); ++ void safepoint_poll(Label& slow_path); ++ void safepoint_poll_acquire(Label& slow_path); + + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 386ef731696..2dde4453dac 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1213,6 +1213,14 @@ const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); + } + ++// This method seems to be obsolete. It is declared in machnode.hpp ++// and defined in all *.ad files, but it is never called. Should we ++// get rid of it? ++int MachEpilogNode::safepoint_offset() const { ++ assert(do_polling(), "no return for this epilog node"); ++ return 4; ++} ++ + //============================================================================= + + // Figure out which register class each belongs in: rc_int, rc_float or +@@ -1907,6 +1915,17 @@ encode %{ + __ li(dst_reg, 1); + %} + ++ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ ++ MacroAssembler _masm(&cbuf); ++ int32_t offset = 0; ++ address page = (address)$src$$constant; ++ unsigned long align = (unsigned long)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ Register dst_reg = as_Register($dst$$reg); ++ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); ++ __ addi(dst_reg, dst_reg, offset); ++ %} ++ + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ + C2_MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); +@@ -2688,6 +2707,17 @@ operand immP_1() + interface(CONST_INTER); + %} + ++// Polling Page Pointer Immediate ++operand immPollPage() ++%{ ++ predicate((address)n->get_ptr() == os::get_polling_page()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ + // Card Table Byte Map Base + operand immByteMapBase() + %{ +@@ -4476,6 +4506,19 @@ instruct loadConP1(iRegPNoSp dst, immP_1 con) + ins_pipe(ialu_imm); + %} + ++// Load Poll Page Constant ++instruct loadConPollPage(iRegPNoSp dst, immPollPage con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 6); ++ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} ++ ++ ins_encode(riscv_enc_mov_poll_page(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ + // Load Byte Map Base Constant + instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) + %{ +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index f8585afbdc2..c501c8f7bac 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1573,7 +1573,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + +- __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ safepoint_poll_acquire(safepoint_in_progress); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); + __ bind(safepoint_in_progress_done); +@@ -2439,7 +2439,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t + __ bind(noException); + + Label no_adjust, bail; +- if (!cause_return) { ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { + // If our stashed return pc was modified by the runtime we avoid touching it + __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); + __ bne(x18, t0, no_adjust); +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 76ae6f89e27..2d4baab2ab7 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1143,7 +1143,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. +- __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); ++ __ safepoint_poll_acquire(L); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + __ bind(L); + +From 13faeae35312c59a1366d4f9c84da7157f06efc7 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 22:15:14 +0800 +Subject: [PATCH 039/140] Revert 8253180: ZGC: Implementation of JEP 376: ZGC: + Concurrent Thread-Stack Processing + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 8 ++------ + src/hotspot/cpu/riscv/frame_riscv.hpp | 3 --- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 -------- + .../cpu/riscv/templateInterpreterGenerator_riscv.cpp | 9 --------- + 5 files changed, 2 insertions(+), 27 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 8e4f20fe561..b056eb2488a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -495,8 +495,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { + } + + //------------------------------------------------------------------------------ +-// frame::sender_raw +-frame frame::sender_raw(RegisterMap* map) const { ++// frame::sender ++frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + assert(map != NULL, "map must be set"); +@@ -521,10 +521,6 @@ frame frame::sender_raw(RegisterMap* map) const { + return frame(sender_sp(), link(), sender_pc()); + } + +-frame frame::sender(RegisterMap* map) const { +- return sender_raw(map); +-} +- + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp +index c06aaa9e391..3b88f6d5a1a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -196,7 +196,4 @@ + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +- // returns the sending frame, without applying any barriers +- frame sender_raw(RegisterMap* map) const; +- + #endif // CPU_RISCV_FRAME_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 74dded77d19..4e642af87c4 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -571,7 +571,6 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + + // remove activation + // +-// Apply stack watermark barrier. + // Unlock the receiver if this is a synchronized method. + // Unlock any Java monitors from syncronized blocks. + // Remove the activation from the stack. +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index c501c8f7bac..d740c99c979 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1565,14 +1565,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + // check for safepoint operation in progress and/or pending suspend requests + { +- // We need an acquire here to ensure that any subsequent load of the +- // global SafepointSynchronize::_state flag is ordered after this load +- // of the thread-local polling word. We don't want this poll to +- // return false (i.e. not safepointing) and a later poll of the global +- // SafepointSynchronize::_state spuriously to return true. +- // This is to avoid a race when we're in a native->Java transition +- // racing the code which wakes up from a safepoint. +- + __ safepoint_poll_acquire(safepoint_in_progress); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); + __ bnez(t0, safepoint_in_progress); +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 2d4baab2ab7..a07dea35b73 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1134,15 +1134,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; +- +- // We need an acquire here to ensure that any subsequent load of the +- // global SafepointSynchronize::_state flag is ordered after this load +- // of the thread-local polling word. We don't want this poll to +- // return false (i.e. not safepointing) and a later poll of the global +- // SafepointSynchronize::_state spuriously to return true. +- // +- // This is to avoid a race when we're in a native->Java transition +- // racing the code which wakes up from a safepoint. + __ safepoint_poll_acquire(L); + __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); + __ beqz(t1, Continue); + +From 99ca43f1e7e74f161b40466f49fc61aa734d334d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 12:35:33 +0800 +Subject: [PATCH 040/140] JDK-8243155: AArch64: Add support for SqrtVF + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2dde4453dac..9da8a76c190 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7206,7 +7206,7 @@ instruct absD_reg(fRegD dst, fRegD src) %{ + %} + + instruct sqrtF_reg(fRegF dst, fRegF src) %{ +- match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ match(Set dst (SqrtF src)); + + ins_cost(FSQRT_COST); + format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} + +From 4bbd814dfbc33d3f1277dbb64f19a18f9f8c1a81 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 15:11:49 +0800 +Subject: [PATCH 041/140] Revert JDK-8267098: AArch64: C1 StubFrames end + confusingly + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 52 ++++++++++----------- + 1 file changed, 24 insertions(+), 28 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index f523c9ed50a..1f58bde4df5 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -167,19 +167,14 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres + return call_RT(oop_result, metadata_result, entry, arg_num); + } + +-enum return_state_t { +- does_not_return, requires_return +-}; +- + // Implementation of StubFrame + + class StubFrame: public StackObj { + private: + StubAssembler* _sasm; +- bool _return_state; + + public: +- StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return); ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +@@ -197,9 +192,8 @@ void StubAssembler::epilogue() { + + #define __ _sasm-> + +-StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) { ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; +- _return_state = return_state; + __ prologue(name, must_gc_arguments); + } + +@@ -211,11 +205,7 @@ void StubFrame::load_argument(int offset_in_words, Register reg) { + + + StubFrame::~StubFrame() { +- if (_return_state == requires_return) { +- __ epilogue(); +- } else { +- __ should_not_reach_here(); +- } ++ __ epilogue(); + _sasm = NULL; + } + +@@ -378,6 +368,7 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe + assert_cond(oop_maps != NULL); + oop_maps->add_gc_map(call_offset, oop_map); + ++ __ should_not_reach_here(); + return oop_maps; + } + +@@ -425,7 +416,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + sasm->set_frame_size(frame_size); + break; + } +- default: ShouldNotReachHere(); ++ default: ++ __ should_not_reach_here(); ++ break; + } + + // verify that only x10 and x13 are valid at this time +@@ -481,6 +474,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: ++ // Pop the return address. ++ __ leave(); ++ __ ret(); // jump to exception handler + break; + default: ShouldNotReachHere(); + } +@@ -641,13 +637,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_div0_exception_id: + { +- StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: +- { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments, does_not_return); ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; +@@ -926,14 +922,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_class_cast_exception_id: + { +- StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { +- StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, + CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } +@@ -1027,7 +1023,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case deoptimize_id: + { +- StubFrame f(sasm, "deoptimize", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + assert_cond(oop_map != NULL); + f.load_argument(0, c_rarg1); +@@ -1046,7 +1042,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_range_check_failed_id: + { +- StubFrame f(sasm, "range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; +@@ -1062,7 +1058,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case access_field_patching_id: + { +- StubFrame f(sasm, "access_field_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } +@@ -1070,7 +1066,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_klass_patching_id: + { +- StubFrame f(sasm, "load_klass_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } +@@ -1078,7 +1074,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_mirror_patching_id: + { +- StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } +@@ -1086,7 +1082,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case load_appendix_patching_id: + { +- StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } +@@ -1109,14 +1105,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case throw_index_exception_id: + { +- StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { +- StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); +@@ -1125,7 +1121,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + case predicate_failed_trap_id: + { +- StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + assert_cond(map != NULL); +@@ -1156,7 +1152,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + default: + { +- StubFrame f(sasm, "unimplemented entry", dont_gc_arguments, does_not_return); ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + +From eb37cfd42e7801c5ce64666c3cd25d40cfb22e76 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 12 Apr 2023 18:06:40 +0800 +Subject: [PATCH 042/140] Revert JDK-8247691: [aarch64] Incorrect handling of + VM exceptions in C1 deopt stub/traps + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 87 +++++++++++++++------ + 1 file changed, 65 insertions(+), 22 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index 1f58bde4df5..1f45fba9de0 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -581,37 +581,80 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + #endif + __ reset_last_Java_frame(true); + +-#ifdef ASSERT +- // Check that fields in JavaThread for exception oop and issuing pc are empty +- Label oop_empty; +- __ ld(t0, Address(xthread, Thread::pending_exception_offset())); +- __ beqz(t0, oop_empty); +- __ stop("exception oop must be empty"); +- __ bind(oop_empty); ++ // check for pending exceptions ++ { Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // exception pending => remove activation and forward to exception handler + +- Label pc_empty; +- __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); +- __ beqz(t0, pc_empty); +- __ stop("exception pc must be empty"); +- __ bind(pc_empty); ++ { Label L1; ++ __ bnez(x10, L1); // have we deoptimized? ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); ++ __ bind(L1); ++ } ++ ++ // the deopt blob expects exceptions in the special fields of ++ // JavaThread, so copy and clear pending exception. ++ ++ // load and clear pending exception ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++ // check that there is really a valid exception ++ __ verify_not_null_oop(x10); ++ ++ // load throwing pc: this is the return address of the stub ++ __ ld(x13, Address(fp, wordSize)); ++ ++#ifdef ASSERT ++ // Check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); + #endif + +- // Runtime will return true if the nmethod has been deoptimized, this is the +- // expected scenario and anything else is an error. Note that we maintain a +- // check on the result purely as a defensive measure. +- Label no_deopt; +- __ beqz(x10, no_deopt); // Have we deoptimized? ++ // store exception oop and throwing pc to JavaThread ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ restore_live_registers(sasm); + +- // Perform a re-execute. The proper return address is already on the stack, +- // we just need to restore registers, pop all of our frames but the return +- // address and jump to the deopt blob. ++ __ leave(); ++ ++ // Forward the exception directly to deopt blob. We can blow no ++ // registers and must leave throwing pc on the stack. A patch may ++ // have values live in registers so the entry point with the ++ // exception in tls. ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); ++ ++ __ bind(L); ++ } ++ ++ // Runtime will return true if the nmethod has been deoptimized during ++ // the patching process. In that case we must do a deopt reexecute instead. ++ Label cont; ++ ++ __ beqz(x10, cont); // have we deoptimized? ++ ++ // Will reexecute. Proper return address is already on the stack we just restore ++ // registers, pop all of our frame but the return address and jump to the deopt blob + + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + +- __ bind(no_deopt); +- __ stop("deopt not performed"); ++ __ bind(cont); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ ret(); + + return oop_maps; + } + +From 3fa279b459fffd1bd1ce158a7fdaa9d8704450a8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:29:27 +0800 +Subject: [PATCH 043/140] Revert JDK-8212681: Refactor IC locking to use a fine + grained CompiledICLocker + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 3 +-- + 2 files changed, 2 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 75bc4be7840..4d1687301fc 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -113,10 +113,10 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad + } + + void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); +- assert(CompiledICLocker::is_safe(stub), "mt unsafe call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index 0a05c577860..459683735e9 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -146,8 +146,7 @@ address NativeCall::destination() const { + // during code generation, where no patching lock is needed. + void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || +- (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) || +- CompiledICLocker::is_safe(addr_at(0)), ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + +From 727f1a8f9b4a6dfbb0cf2002f12b86b5d5f23362 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:36:11 +0800 +Subject: [PATCH 044/140] Revert JDK-8225681: + vmTestbase/nsk/jvmti/RedefineClasses/StressRedefine fails due a) MT-unsafe + modification of inline cache + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 4d1687301fc..0b13e44c8d6 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -99,10 +99,15 @@ void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, ad + // Creation also verifies the object. + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); +-#ifdef ASSERT ++#ifndef PRODUCT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + +- verify_mt_safe(callee, entry, method_holder, jump); ++ // read the value once ++ volatile intptr_t data = method_holder->data(); ++ assert(data == 0 || data == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(data == 0 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); + #endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + +From 26e37551ecc41db0cf8eeb775a5501b4f45b4ffa Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:39:52 +0800 +Subject: [PATCH 045/140] Revert JDK-8232046: AArch64 build failure after + JDK-8225681 + +--- + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 2 -- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 19 ++++--------------- + 2 files changed, 4 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 0b13e44c8d6..1cfc92b28fa 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -126,8 +126,6 @@ void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_ + NativeMovConstReg* method_holder + = nativeMovConstReg_at(stub + NativeFenceI::instruction_size()); + method_holder->set_data(0); +- NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +- jump->set_jump_destination((address)-1); + } + + //----------------------------------------------------------------------------- +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index 459683735e9..bfe84fa4e30 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -272,15 +272,9 @@ address NativeJump::jump_destination() const { + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about +- // As a special case we also use sequence movptr_with_offset(r,0), jalr(r,0) +- // i.e. jump to 0 when we need leave space for a wide immediate +- // load +- +- // return -1 if jump to self or to 0 +- if ((dest == (address) this) || dest == 0) { +- dest = (address) -1; +- } + ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; + }; + +@@ -302,14 +296,9 @@ address NativeGeneralJump::jump_destination() const { + + // We use jump to self as the unresolved address which the inline + // cache code (and relocs) know about +- // As a special case we also use jump to 0 when first generating +- // a general jump +- +- // return -1 if jump to self or to 0 +- if ((dest == (address) this) || dest == 0) { +- dest = (address) -1; +- } + ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; + return dest; + } + + +From 4fc68bc3cd13e623276965947d6c8cb14da15873 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 18:47:08 +0800 +Subject: [PATCH 046/140] Revert JDK-8213084: Rework and enhance + Print[Opto]Assembly output + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 8 -------- + src/hotspot/cpu/riscv/disassembler_riscv.hpp | 20 -------------------- + 2 files changed, 28 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 44e8d4b4ff1..b4e7287ce08 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -268,14 +268,6 @@ class Assembler : public AbstractAssembler { + + enum { instruction_size = 4 }; + +- //---< calculate length of instruction >--- +- // We just use the values set above. +- // instruction must start at passed address +- static unsigned int instr_len(unsigned char *instr) { return instruction_size; } +- +- //---< longest instructions >--- +- static unsigned int instr_maxlen() { return instruction_size; } +- + enum RoundingMode { + rne = 0b000, // round to Nearest, ties to Even + rtz = 0b001, // round towards Zero +diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +index b0e5560c906..06bca5298cd 100644 +--- a/src/hotspot/cpu/riscv/disassembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +@@ -35,24 +35,4 @@ static const char* pd_cpu_opts() { + return ""; + } + +-// Returns address of n-th instruction preceding addr, +-// NULL if no preceding instruction can be found. +-// On riscv, we assume a constant instruction length. +-// It might be beneficial to check "is_readable" as we do on ppc and s390. +-static address find_prev_instr(address addr, int n_instr) { +- return addr - Assembler::instruction_size * n_instr; +-} +- +-// special-case instruction decoding. +-// There may be cases where the binutils disassembler doesn't do +-// the perfect job. In those cases, decode_instruction0 may kick in +-// and do it right. +-// If nothing had to be done, just return "here", otherwise return "here + instr_len(here)" +-static address decode_instruction0(address here, outputStream* st, address virtual_begin = NULL) { +- return here; +-} +- +-// platform-specific instruction annotations (like value of loaded constants) +-static void annotate(address pc, outputStream* st) {} +- + #endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP + +From f660c594eccb174c9779ebdc9ba40fe579aa50cc Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 13 Apr 2023 19:44:28 +0800 +Subject: [PATCH 047/140] Revert JDK-8241909: Remove useless code cache lookup + in frame::patch_pc + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index b056eb2488a..d03adc0bff4 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -270,7 +270,6 @@ bool frame::safe_for_sender(JavaThread *thread) { + } + + void frame::patch_pc(Thread* thread, address pc) { +- assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", +@@ -280,6 +279,7 @@ void frame::patch_pc(Thread* thread, address pc) { + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + +From 0d1ed436d9b70c9244c5de42fb492bbfa5e785e8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 21:10:06 +0800 +Subject: [PATCH 048/140] Revert JDK-8277411: C2 fast_unlock intrinsic on + AArch64 has unnecessary ownership check & JDK-8277180: Intrinsify recursive + ObjectMonitor locking for C2 x64 and A64 + +--- + src/hotspot/cpu/riscv/riscv.ad | 24 ++++-------------------- + 1 file changed, 4 insertions(+), 20 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 9da8a76c190..c0fbda4f3f9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2204,16 +2204,6 @@ encode %{ + __ mv(tmp, (address)markOopDesc::unused_mark()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +- __ beqz(flag, cont); // CAS success means locking succeeded +- +- __ bne(flag, xthread, cont); // Check for recursive locking +- +- // Recursive lock case +- __ mv(flag, zr); +- __ ld(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); +- __ add(tmp, tmp, 1u); +- __ sd(tmp, Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markOopDesc::monitor_value)); +- + __ bind(cont); + %} + +@@ -2257,18 +2247,12 @@ encode %{ + __ bind(object_has_monitor); + STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); + __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); + +- Label notRecursive; +- __ beqz(disp_hdr, notRecursive); // Will be 0 if not recursive. +- +- // Recursive lock +- __ addi(disp_hdr, disp_hdr, -1); +- __ sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); +- __ mv(flag, zr); +- __ j(cont); +- +- __ bind(notRecursive); + __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); + __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); + __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. + +From cac7117dfc03023a81030e274944921df07bbead Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 21:13:21 +0800 +Subject: [PATCH 049/140] Revert JDK-8210381: Obsolete EmitSync + +--- + src/hotspot/cpu/riscv/riscv.ad | 100 ++++++++++++++++++++------------- + 1 file changed, 60 insertions(+), 40 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c0fbda4f3f9..c3ef648b21d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2150,9 +2150,17 @@ encode %{ + // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ + // Check for existing monitor +- __ andi(t0, disp_hdr, markOopDesc::monitor_value); +- __ bnez(t0, object_has_monitor); ++ if ((EmitSync & 0x02) == 0) { ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + + // Set tmp to be (markWord of object | UNLOCK_VALUE). + __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); +@@ -2185,24 +2193,26 @@ encode %{ + __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + __ mv(flag, tmp); // we can use the value of tmp as the result here + +- __ j(cont); +- +- // Handle existing monitor. +- __ bind(object_has_monitor); +- // The object's monitor m is unlocked iff m->owner == NULL, +- // otherwise m->owner may contain a thread or a stack address. +- // +- // Try to CAS m->owner from NULL to current thread. +- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); +- __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) +- +- // Store a non-null value into the box to avoid looking like a re-entrant +- // lock. The fast-path monitor unlock code checks for +- // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the +- // relevant bit set, and also matches ObjectSynchronizer::slow_enter. +- __ mv(tmp, (address)markOopDesc::unused_mark()); +- __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ if ((EmitSync & 0x02) == 0) { ++ __ j(cont); ++ ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markOopDesc::unused_mark()); ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ } + + __ bind(cont); + %} +@@ -2220,6 +2230,12 @@ encode %{ + + assert_different_registers(oop, box, tmp, disp_hdr, flag); + ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ + // Find the lock address and load the displaced header from the stack. + __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +@@ -2228,9 +2244,11 @@ encode %{ + __ beqz(disp_hdr, cont); + + // Handle existing monitor. +- __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); +- __ andi(t0, disp_hdr, markOopDesc::monitor_value); +- __ bnez(t0, object_has_monitor); ++ if ((EmitSync & 0x02) == 0) { ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + + // Check if it is still a light weight lock, this is true if we + // see the stack address of the basicLock in the markWord of the +@@ -2244,23 +2262,25 @@ encode %{ + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // Handle existing monitor. +- __ bind(object_has_monitor); +- STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); +- __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor +- __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); +- __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); +- __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. +- __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions +- __ bnez(flag, cont); +- +- __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); +- __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); +- __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. +- __ bnez(flag, cont); +- // need a release store here +- __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); +- __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); +- __ sd(zr, Address(tmp)); // set unowned ++ if ((EmitSync & 0x02) == 0) { ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); ++ ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned ++ } + + __ bind(cont); + %} + +From ca7ab86ee886233651e1a79faff631fd7e226d57 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 16 Apr 2023 22:07:21 +0800 +Subject: [PATCH 050/140] Revert JDK-8256425: Obsolete Biased Locking in JDK 18 + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 2 + + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 6 +- + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 +- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 35 ++- + .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 3 +- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 27 ++- + .../cpu/riscv/macroAssembler_riscv.cpp | 217 ++++++++++++++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 28 +++ + src/hotspot/cpu/riscv/riscv.ad | 12 + + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 8 + + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 8 +- + 11 files changed, 341 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index b4e7287ce08..51aa052a0c7 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -3043,4 +3043,6 @@ enum Nf { + virtual ~Assembler() {} + }; + ++class BiasedLockingCounters; ++ + #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 46a20a64194..6a961ee2307 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1511,9 +1511,13 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + if (!UseFastLocking) { + __ j(*op->stub()->entry()); + } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible +- int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index e126f148cdf..c45a75b2301 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -277,6 +277,11 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ scratch = new_register(T_INT); ++ } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { +@@ -285,7 +290,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); +- monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); + } + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 2d52343587e..e486f41948e 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -35,6 +35,7 @@ + #include "oops/arrayOop.hpp" + #include "oops/markWord.hpp" + #include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/os.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" +@@ -50,7 +51,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + } + } + +-int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); +@@ -62,7 +63,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // save object being locked into the BasicObjectLock + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + +- null_check_offset = offset(); ++ if (UseBiasedLocking) { ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } + + // Load object header + ld(hdr, Address(obj, hdr_offset)); +@@ -98,6 +104,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + bind(done); ++ if (PrintBiasedLockingStatistics) { ++ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); ++ add_memory_int32(Address(t1, 0), 1); ++ } + return null_check_offset; + } + +@@ -107,13 +117,21 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + ++ if (UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); +- // load object +- ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ if (!UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to +@@ -140,8 +158,13 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i + + void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { + assert_different_registers(obj, klass, len); +- // This assumes that all prototype bits fitr in an int32_t +- mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, tmp1, tmp2); ++ ld(tmp1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +index dfd3c17d7c7..1950cee5dd5 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +@@ -59,8 +59,9 @@ using MacroAssembler::null_check; + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved ++ // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information +- int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 4e642af87c4..f0c249f0d26 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -39,6 +39,7 @@ + #include "prims/jvmtiExport.hpp" + #include "prims/jvmtiThreadState.hpp" + #include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/safepointMechanism.hpp" + #include "runtime/sharedRuntime.hpp" +@@ -782,6 +783,10 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + // Load object pointer into obj_reg c_rarg3 + ld(obj_reg, Address(lock_reg, obj_offset)); + ++ if (UseBiasedLocking) { ++ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); ++ } ++ + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); +@@ -792,7 +797,17 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + +- cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ if (PrintBiasedLockingStatistics) { ++ Label fail, fast; ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); ++ bind(fast); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ j(done); ++ bind(fail); ++ } else { ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and +@@ -809,6 +824,12 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); ++ ++ if (PrintBiasedLockingStatistics) { ++ bnez(swap_reg, slow_case); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ } + beqz(swap_reg, done); + + bind(slow_case); +@@ -861,6 +882,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + // Free entry + sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + ++ if (UseBiasedLocking) { ++ biased_locking_exit(obj_reg, header_reg, done); ++ } ++ + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 73629e3dba3..e557a134b5b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -41,6 +41,7 @@ + #include "oops/compressedOops.inline.hpp" + #include "oops/klass.inline.hpp" + #include "oops/oop.hpp" ++#include "runtime/biasedLocking.hpp" + #include "runtime/interfaceSupport.inline.hpp" + #include "runtime/jniHandles.inline.hpp" + #include "runtime/sharedRuntime.hpp" +@@ -2791,6 +2792,222 @@ void MacroAssembler::reserved_stack_check() { + bind(no_reserved_zone_enabling); + } + ++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { ++ Label retry_load; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ lr_w(tmp, counter_addr); ++ addw(tmp, tmp, 1); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_w(tmp, tmp, counter_addr); ++ bnez(tmp, retry_load); ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters, ++ Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ assert_different_registers(lock_reg, obj_reg, swap_reg); ++ ++ if (PrintBiasedLockingStatistics && counters == NULL) ++ counters = BiasedLocking::counters(); ++ ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld(swap_reg, mark_addr); ++ } ++ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); ++ li(t0, markOopDesc::biased_lock_pattern); ++ bne(t0, tmp_reg, cas_label); ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, tmp_reg, xthread); ++ xorr(tmp_reg, swap_reg, tmp_reg); ++ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); ++ if (flag->is_valid()) { ++ mv(flag, tmp_reg); ++ } ++ if (counters != NULL) { ++ Label around; ++ bnez(tmp_reg, around); ++ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); ++ j(done); ++ bind(around); ++ } else { ++ beqz(tmp_reg, done); ++ } ++ ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ bnez(t0, try_revoke_bias); ++ ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); ++ bnez(t0, try_rebias); ++ ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ { ++ Label cas_success; ++ Label counter; ++ mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, t0); ++ orr(tmp_reg, swap_reg, xthread); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success; ++ Label counter; ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, xthread, tmp_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success, nope; ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); ++ bind(cas_success); ++ ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, ++ t0); ++ } ++ bind(nope); ++ } ++ ++ bind(cas_label); ++ ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); ++ if (flag->is_valid()) { mv(flag, tmp_reg); } ++ beqz(tmp_reg, done); ++} ++ + // Move the address of the polling page into dest. + void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { + if (SafepointMechanism::uses_thread_local_poll()) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 8a2c6e07d88..c1ffa120774 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -47,6 +47,32 @@ class MacroAssembler: public Assembler { + void safepoint_poll(Label& slow_path); + void safepoint_poll_acquire(Label& slow_path); + ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // swap_reg is killed. ++ // tmp_reg must be supplied and must not be rscratch1 or rscratch2 ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL, ++ Register flag = noreg); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); ++ ++ // Helper functions for statistics gathering. ++ // Unconditional atomic increment. ++ void atomic_incw(Register counter_addr, Register tmp); ++ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { ++ la(tmp1, counter_addr); ++ atomic_incw(tmp1, tmp2); ++ } ++ + // Place a fence.i after code may have been modified due to a safepoint. + void safepoint_ifence(); + +@@ -225,6 +251,8 @@ class MacroAssembler: public Assembler { + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + ++ void load_prototype_header(Register dst, Register src); ++ + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (linke NULL) into a Register by giving + // the compiler two choices it can't resolve +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c3ef648b21d..c2a0be140e9 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2156,6 +2156,10 @@ encode %{ + return; + } + ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); ++ } ++ + // Check for existing monitor + if ((EmitSync & 0x02) == 0) { + __ andi(t0, disp_hdr, markOopDesc::monitor_value); +@@ -2236,6 +2240,10 @@ encode %{ + return; + } + ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_exit(oop, tmp, cont, flag); ++ } ++ + // Find the lock address and load the displaced header from the stack. + __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +@@ -4961,6 +4969,10 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla + ins_pipe(pipe_serial); + %} + ++// storeLConditional is used by PhaseMacroExpand::expand_lock_node ++// when attempting to rebias a lock towards the current thread. We ++// must use the acquire form of cmpxchg in order to guarantee acquire ++// semantics in this case. + instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) + %{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index d740c99c979..eaefcc2b595 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1489,6 +1489,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // Load the oop from the handle + __ ld(obj_reg, Address(oop_handle_reg, 0)); + ++ if (UseBiasedLocking) { ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); ++ } ++ + // Load (object->mark() | 1) into swap_reg % x10 + __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ ori(swap_reg, t0, 1); +@@ -1597,6 +1601,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + + Label done; + ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, old_hdr, done); ++ } ++ + // Simple recursive lock? + __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ beqz(t0, done); +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index c9d399ccdaf..1e23fb4dc09 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3563,9 +3563,13 @@ void TemplateTable::_new() { + __ bnez(x13, loop); + } + +- // initialize object hader only. ++ // initialize object header only. + __ bind(initialize_header); +- __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ if (UseBiasedLocking) { ++ __ ld(t0, Address(x14, Klass::prototype_header_offset())); ++ } else { ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ } + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + +From 864e551505bb816f3dc8a3bd1b065328ba7b5d65 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 17 Apr 2023 19:52:44 +0800 +Subject: [PATCH 051/140] Revert JDK-8227680: FastJNIAccessors: Check for JVMTI + field access event requests at runtime + +--- + .../cpu/riscv/jniFastGetField_riscv.cpp | 32 ++++--------------- + 1 file changed, 6 insertions(+), 26 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +index 814ed23e471..f6e7351c4fc 100644 +--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp ++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +@@ -83,28 +83,10 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + // An even value means there are no ongoing safepoint operations + __ andi(t0, rcounter, 1); + __ bnez(t0, slow); +- +- if (JvmtiExport::can_post_field_access()) { +- // Using barrier to order wrt. JVMTI check and load of result. +- __ membar(MacroAssembler::LoadLoad); +- +- // Check to see if a field access watch has been set before we +- // take the fast path. +- int32_t offset2; +- __ la_patchable(result, +- ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), +- offset2); +- __ lwu(result, Address(result, offset2)); +- __ bnez(result, slow); +- +- __ mv(robj, c_rarg1); +- } else { +- // Using address dependency to order wrt. load of result. +- __ xorr(robj, c_rarg1, rcounter); +- __ xorr(robj, robj, rcounter); // obj, since +- // robj ^ rcounter ^ rcounter == robj +- // robj is address dependent on rcounter. +- } ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. + + // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +@@ -137,10 +119,8 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + default: ShouldNotReachHere(); + } + +- // Using acquire: Order JVMTI check and load of result wrt. succeeding check +- // (LoadStore for volatile field). +- __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); +- ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ xorr(rcounter_addr, rcounter_addr, result); + __ lw(t0, safepoint_counter_addr); + __ bne(rcounter, t0, slow); + + +From b822b64cb6be38cb7806fda3d56675674557c163 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 18 Apr 2023 16:34:32 +0800 +Subject: [PATCH 052/140] Revert JDK-8249768: Move static oops and + NullPointerException oops from Universe into OopStorage + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 1e23fb4dc09..fbcdcf60d9c 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -411,7 +411,6 @@ void TemplateTable::fast_aldc(bool wide) + int32_t offset = 0; + __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); + __ ld(tmp, Address(rarg, offset)); +- __ resolve_oop_handle(tmp); + __ bne(result, tmp, notNull); + __ mv(result, zr); // NULL object reference + __ bind(notNull); + +From c82c482aa065ffd39eab6b87a0ad6c6cbca1e3af Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 18 Apr 2023 16:58:23 +0800 +Subject: [PATCH 053/140] Revert JDK-8217998: Remove method_type field + associated with the appendix field of an indy or method handle call + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index fbcdcf60d9c..158294f7436 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3192,6 +3192,7 @@ void TemplateTable::prepare_invoke(int byte_no, + // since the parameter_size includes it. + __ push_reg(x9); + __ mv(x9, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, x9); + __ pop_reg(x9); + __ push_reg(index); // push appendix (MethodType, CallSite, etc.) + +From 3e50d62dd06c3f8bc586e3ab2b00f2f587d950bf Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:04:31 +0800 +Subject: [PATCH 054/140] Revert JDK-8277372: Add getters for BOT and card + table members + +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 4 ++-- + .../riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 6 +++--- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index 1c46b3947d3..6b75bf63781 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -215,7 +215,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + ExternalAddress cardtable((address) ct->byte_map_base()); + const Register card_addr = tmp; + +- __ srli(card_addr, store_addr, CardTable::card_shift()); ++ __ srli(card_addr, store_addr, CardTable::card_shift); + + // get the address of the card + __ load_byte_map_base(tmp2); +@@ -437,7 +437,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* + assert_different_registers(card_offset, byte_map_base, t0); + + __ load_parameter(0, card_offset); +- __ srli(card_offset, card_offset, CardTable::card_shift()); ++ __ srli(card_offset, card_offset, CardTable::card_shift); + __ load_byte_map_base(byte_map_base); + + // Convert card offset into an address in card_addr +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index a419f92b5f6..868d022ac74 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -41,7 +41,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + +- __ srli(obj, obj, CardTable::card_shift()); ++ __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); + +@@ -74,8 +74,8 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + __ shadd(end, count, start, count, LogBytesPerHeapOop); + __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive + +- __ srli(start, start, CardTable::card_shift()); +- __ srli(end, end, CardTable::card_shift()); ++ __ srli(start, start, CardTable::card_shift); ++ __ srli(end, end, CardTable::card_shift); + __ sub(count, end, start); // number of bytes to copy + + __ load_byte_map_base(tmp); + +From 6a81a820e6c08cfdd8e29a835e953dabffdca98a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:30:58 +0800 +Subject: [PATCH 055/140] Revert JDK-8260941: Remove the conc_scan parameter + for CardTable + +--- + .../shared/cardTableBarrierSetAssembler_riscv.cpp | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index 868d022ac74..a476e5ec84d 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -41,6 +41,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); + ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ + __ srli(obj, obj, CardTable::card_shift); + + assert(CardTable::dirty_card_val() == 0, "must be"); +@@ -56,6 +59,9 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + __ sb(zr, Address(tmp)); + __ bind(L_already_dirty); + } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } + __ sb(zr, Address(tmp)); + } + } +@@ -66,6 +72,10 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); + ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ + Label L_loop, L_done; + const Register end = count; + +@@ -80,6 +90,9 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl + + __ load_byte_map_base(tmp); + __ add(start, start, tmp); ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } + + __ bind(L_loop); + __ add(tmp, start, count); + +From 24688cb665b16331b491bed2566dc97582a3d73c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:32:54 +0800 +Subject: [PATCH 056/140] Revert JDK-8220301: Remove jbyte use in CardTable + +Note: An assertion in `CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier` is removed. See the jdk11u backport for AArch64: https://mail.openjdk.org/pipermail/jdk-updates-dev/2019-August/001746.html +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 3 +++ + .../cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp | 1 + + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 3 +-- + 4 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index 6b75bf63781..b6786c6b327 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -196,6 +196,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; +@@ -213,6 +214,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base()); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + + __ srli(card_addr, store_addr, CardTable::card_shift); +@@ -419,6 +421,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +index a476e5ec84d..81d47d61d4c 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -43,6 +43,7 @@ void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register ob + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ srli(obj, obj, CardTable::card_shift); + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index e557a134b5b..6e4d22db40f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -2719,7 +2719,7 @@ void MacroAssembler::get_thread(Register thread) { + } + + void MacroAssembler::load_byte_map_base(Register reg) { +- CardTable::CardValue* byte_map_base = ++ jbyte *byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + li(reg, (uint64_t)byte_map_base); + } +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c2a0be140e9..ca6a232e1e0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2735,8 +2735,7 @@ operand immByteMapBase() + %{ + // Get base of card map + predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && +- (CardTable::CardValue*)n->get_ptr() == +- ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); + match(ConP); + + op_cost(0); + +From 6ee27261d406342a5378d4a404319866a9bae804 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 19 Apr 2023 11:51:20 +0800 +Subject: [PATCH 057/140] Revert JDK-8230486: + G1BarrierSetAssembler::g1_write_barrier_post unnecessarily pushes/pops + new_val + +--- + src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index b6786c6b327..d724876ec3a 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -250,7 +250,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + + __ bind(runtime); + // save the live input values +- RegSet saved = RegSet::of(store_addr); ++ RegSet saved = RegSet::of(store_addr, new_val); + __ push_reg(saved, sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_reg(saved, sp); + +From 57067a358ffc1b54edfb305549bd460b0fca47f0 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Fri, 21 Apr 2023 12:10:22 +0800 +Subject: [PATCH 058/140] Revert JDK-8242449: AArch64: r27 can be allocated in + CompressedOops mode + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index ca6a232e1e0..e3f976faa0d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -4846,6 +4846,8 @@ instruct storeN(iRegN src, memory mem) + instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) + %{ + match(Set mem (StoreN mem zero)); ++ predicate(Universe::narrow_oop_base() == NULL && ++ Universe::narrow_klass_base() == NULL); + + ins_cost(STORE_COST); + format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + +From 0db520768d4d268a9dc641e301df45653c52f6eb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 23 Apr 2023 14:59:09 +0800 +Subject: [PATCH 059/140] A fix for interpreter frame verification code, + skipping the locals check if there is no locals. See one of the additional + commits in JDK-8286301, the RISC-V loom port. + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index d03adc0bff4..13c482b610a 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -571,7 +571,16 @@ bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + + // validate locals + address locals = (address) *interpreter_frame_locals_addr(); +- if (locals > thread->stack_base() || locals < (address) fp()) { ++ if (locals > thread->stack_base()) { ++ return false; ++ } ++ ++ if (m->max_locals() > 0 && locals < (address) fp()) { ++ // fp in interpreter frame on RISC-V is higher than that on AArch64, ++ // pointing to sender_sp and sender_sp-2 relatively. ++ // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, ++ // pointing to sender_sp-1 (with one padding slot). ++ // So we verify the 'locals' pointer only if max_locals > 0. + return false; + } + + +From 795da5afe59658b4d89cd8501b4f4ec56471b14c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 11 Apr 2023 11:45:40 +0800 +Subject: [PATCH 060/140] ShenandoahGC adaptations on JDK11 for RISC-V backend + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 4 +- + .../c1/shenandoahBarrierSetC1_riscv.cpp | 2 +- + .../shenandoahBarrierSetAssembler_riscv.cpp | 229 +++++++++--------- + .../shenandoahBarrierSetAssembler_riscv.hpp | 15 +- + .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 88 ------- + src/hotspot/cpu/riscv/riscv.ad | 6 +- + .../templateInterpreterGenerator_riscv.cpp | 15 +- + 7 files changed, 146 insertions(+), 213 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 6a961ee2307..90c4af5d3b0 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1817,10 +1817,12 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { + + + void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { +- if (patch_code != lir_patch_none) { ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC && patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } ++#endif + + assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +index cd568cc723f..d19f5b859ce 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +@@ -103,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt + __ xchg(access.resolved_addr(), value_opr, result, tmp); + + if (access.is_oop()) { +- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), access.decorators()); ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); + LIR_Opr tmp_opr = gen->new_register(type); + __ move(result, tmp_opr); + result = tmp_opr; +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +index 84e1205bc25..b8534c52e77 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -27,7 +27,7 @@ + #include "gc/shenandoah/shenandoahBarrierSet.hpp" + #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" + #include "gc/shenandoah/shenandoahForwarding.hpp" +-#include "gc/shenandoah/shenandoahHeap.inline.hpp" ++#include "gc/shenandoah/shenandoahHeap.hpp" + #include "gc/shenandoah/shenandoahHeapRegion.hpp" + #include "gc/shenandoah/shenandoahRuntime.hpp" + #include "gc/shenandoah/shenandoahThreadLocalData.hpp" +@@ -44,6 +44,8 @@ + + #define __ masm-> + ++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; ++ + void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { +@@ -116,10 +118,10 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? +- if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp, in_progress); + } else { +- assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); +@@ -225,37 +227,21 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb + __ pop_reg(saved_regs, sp); + } + +-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, +- Register dst, +- Address load_addr, +- DecoratorSet decorators) { ++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, ++ Register dst, ++ Address load_addr) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); + assert_different_registers(load_addr.base(), t0, t1); + +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- bool is_narrow = UseCompressedOops && !is_native; +- +- Label heap_stable, not_cset; ++ Label done; + __ enter(); + Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + __ lbu(t1, gc_state); + + // Check for heap stability +- if (is_strong) { +- __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); +- __ beqz(t1, heap_stable); +- } else { +- Label lrb; +- __ andi(t0, t1, ShenandoahHeap::WEAK_ROOTS); +- __ bnez(t0, lrb); +- __ andi(t0, t1, ShenandoahHeap::HAS_FORWARDED); +- __ beqz(t0, heap_stable); +- __ bind(lrb); +- } ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, done); + + // use x11 for load address + Register result_dst = dst; +@@ -270,43 +256,12 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, + __ la(x11, load_addr); + __ mv(x10, dst); + +- // Test for in-cset +- if (is_strong) { +- __ li(t1, (uint64_t)ShenandoahHeap::in_cset_fast_test_addr()); +- __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); +- __ add(t1, t1, t0); +- __ lbu(t1, Address(t1)); +- __ andi(t0, t1, 1); +- __ beqz(t0, not_cset); +- } ++ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); + +- __ push_call_clobbered_registers(); +- if (is_strong) { +- if (is_narrow) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } +- } else if (is_weak) { +- if (is_narrow) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } +- } else { +- assert(is_phantom, "only remaining strength"); +- assert(!is_narrow, "phantom access cannot be narrow"); +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } +- __ jalr(ra); +- __ mv(t0, x10); +- __ pop_call_clobbered_registers(); +- __ mv(x10, t0); +- __ bind(not_cset); + __ mv(result_dst, x10); + __ pop_reg(saved_regs, sp); + +- __ bind(heap_stable); ++ __ bind(done); + __ leave(); + } + +@@ -320,6 +275,15 @@ void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register ds + } + } + ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { ++ if (ShenandoahLoadRefBarrier) { ++ Label is_null; ++ __ beqz(dst, is_null); ++ load_reference_barrier_not_null(masm, dst, load_addr); ++ __ bind(is_null); ++ } ++} ++ + // + // Arguments: + // +@@ -363,7 +327,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, + + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + +- load_reference_barrier(masm, dst, src, decorators); ++ load_reference_barrier(masm, dst, src); + + if (dst != result_dst) { + __ mv(result_dst, dst); +@@ -555,7 +519,7 @@ void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, Shen + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { +- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); +@@ -568,12 +532,6 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble + ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + +- DecoratorSet decorators = stub->decorators(); +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- + Register obj = stub->obj()->as_register(); + Register res = stub->result()->as_register(); + Register addr = stub->addr()->as_pointer_register(); +@@ -587,30 +545,32 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble + __ mv(res, obj); + } + +- if (is_strong) { +- // Check for object in cset. +- __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); +- __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); +- __ add(tmp2, tmp2, tmp1); +- __ lbu(tmp2, Address(tmp2)); +- __ beqz(tmp2, *stub->continuation(), true /* is_far */); +- } ++ // Check for null. ++ __ beqz(res, *stub->continuation(), /* is_far */ true); ++ ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t0, tmp2, tmp1); ++ __ lb(tmp2, Address(t0)); ++ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); ++ ++ // Check if object is already forwarded. ++ Label slow_path; ++ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp1, tmp1, -1); ++ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); ++ __ bnez(t0, slow_path); ++ ++ // Decode forwarded object. ++ __ ori(tmp1, tmp1, markOopDesc::marked_value); ++ __ xori(res, tmp1, -1); ++ __ j(*stub->continuation()); + ++ __ bind(slow_path); + ce->store_parameter(res, 0); + ce->store_parameter(addr, 1); +- +- if (is_strong) { +- if (is_native) { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin())); +- } else { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin())); +- } +- } else if (is_weak) { +- __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); +- } else { +- assert(is_phantom, "only remaining strength"); +- __ far_call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin())); +- } ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); + + __ j(*stub->continuation()); + } +@@ -664,8 +624,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss + __ epilogue(); + } + +-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, +- DecoratorSet decorators) { ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("shenandoah_load_reference_barrier", false); + // arg0 : object to be resolved + +@@ -673,31 +632,10 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s + __ load_parameter(0, x10); + __ load_parameter(1, x11); + +- bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators); +- bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators); +- bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); +- bool is_native = ShenandoahBarrierSet::is_native_access(decorators); +- if (is_strong) { +- if (is_native) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } else { +- if (UseCompressedOops) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_strong); +- } +- } +- } else if (is_weak) { +- assert(!is_native, "weak must not be called off-heap"); +- if (UseCompressedOops) { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak_narrow); +- } else { +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_weak); +- } ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + } else { +- assert(is_phantom, "only remaining strength"); +- assert(is_native, "phantom must only be called off-heap"); +- __ li(ra, (int64_t)(uintptr_t)ShenandoahRuntime::load_reference_barrier_phantom); ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + } + __ jalr(ra); + __ mv(t0, x10); +@@ -710,3 +648,68 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s + #undef __ + + #endif // COMPILER1 ++ ++address ShenandoahBarrierSetAssembler::shenandoah_lrb() { ++ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); ++ return _shenandoah_lrb; ++} ++ ++#define __ cgen->assembler()-> ++ ++// Shenandoah load reference barrier. ++// ++// Input: ++// x10: OOP to evacuate. Not null. ++// x11: load address ++// ++// Output: ++// x10: Pointer to evacuated OOP. ++// ++// Trash t0 t1 Preserve everything else. ++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { ++ __ align(6); ++ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); ++ address start = __ pc(); ++ ++ Label slow_path; ++ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1, 0)); ++ __ andi(t0, t1, 1); ++ __ bnez(t0, slow_path); ++ __ ret(); ++ ++ __ bind(slow_path); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ push_call_clobbered_registers(); ++ ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++} ++ ++#undef __ ++ ++void ShenandoahBarrierSetAssembler::barrier_stubs_init() { ++ if (ShenandoahLoadRefBarrier) { ++ int stub_code_size = 2048; ++ ResourceMark rm; ++ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); ++ CodeBuffer buf(bb); ++ StubCodeGenerator cgen(&buf); ++ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +index a705f497667..5d75035e9d4 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +@@ -40,6 +40,8 @@ class StubCodeGenerator; + class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + private: + ++ static address _shenandoah_lrb; ++ + void satb_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, +@@ -57,17 +59,22 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); +- void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, DecoratorSet decorators); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); ++ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); ++ ++ address generate_shenandoah_lrb(StubCodeGenerator* cgen); + + public: + ++ static address shenandoah_lrb(); ++ + void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); + + #ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); + void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); +- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); + #endif + + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, +@@ -81,8 +88,10 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + +- void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, + Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++ ++ virtual void barrier_stubs_init(); + }; + + #endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +index 6c855f23c2a..bab407a8b76 100644 +--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +@@ -176,48 +176,6 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva + ins_pipe(pipe_slow); + %} + +-instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP_DEF res, TEMP tmp, KILL cr); +- format %{ +- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq_shenandoah" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- true /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP_DEF res, TEMP tmp, KILL cr); +- format %{ +- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq_shenandoah" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- true /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- + instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ + match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); +@@ -237,49 +195,3 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva + + ins_pipe(pipe_slow); + %} +- +-instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP tmp, KILL cr); +- format %{ +- "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapNAcq_shenandoah" +- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. +- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- false /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +- +-instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +- predicate(needs_acquiring_load_reserved(n)); +- match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); +- ins_cost(10 * DEFAULT_COST); +- +- effect(TEMP tmp, KILL cr); +- format %{ +- "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapPAcq_shenandoah" +- "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" +- %} +- +- ins_encode %{ +- Register tmp = $tmp$$Register; +- __ mv(tmp, $oldval$$Register); // Must not clobber oldval. +- // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop +- ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, +- Assembler::aq /* acquire */, Assembler::rl /* release */, +- false /* is_cae */, $res$$Register); +- %} +- +- ins_pipe(pipe_slow); +-%} +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index e3f976faa0d..a6061de7a33 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -828,8 +828,10 @@ bool is_CAS(int opcode, bool maybe_volatile) + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: ++#if INCLUDE_SHENANDOAHGC + case Op_ShenandoahCompareAndSwapP: + case Op_ShenandoahCompareAndSwapN: ++#endif + case Op_CompareAndSwapB: + case Op_CompareAndSwapS: + case Op_GetAndSetI: +@@ -851,10 +853,6 @@ bool is_CAS(int opcode, bool maybe_volatile) + case Op_WeakCompareAndSwapL: + case Op_WeakCompareAndSwapP: + case Op_WeakCompareAndSwapN: +- case Op_ShenandoahWeakCompareAndSwapP: +- case Op_ShenandoahWeakCompareAndSwapN: +- case Op_ShenandoahCompareAndExchangeP: +- case Op_ShenandoahCompareAndExchangeN: + return maybe_volatile; + default: + return false; +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index a07dea35b73..5a87c687cf7 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -765,9 +765,18 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); + + // Get mirror and store it in the frame as GC root for this Method* +- __ load_mirror(t2, xmethod); +- __ sd(zr, Address(sp, 5 * wordSize)); +- __ sd(t2, Address(sp, 4 * wordSize)); ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC) { ++ __ load_mirror(x28, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(x28, Address(sp, 4 * wordSize)); ++ } else ++#endif ++ { ++ __ load_mirror(t2, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); ++ } + + __ ld(xcpool, Address(xmethod, Method::const_offset())); + __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); + +From d8b14fd5e6455b47cfcb02d13c0c24c74e824570 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 14:42:07 +0800 +Subject: [PATCH 061/140] Revert JDK-8248404: AArch64: Remove uses of long and + unsigned long + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 19 +++++++++++++------ + .../cpu/riscv/macroAssembler_riscv.cpp | 6 ------ + .../cpu/riscv/macroAssembler_riscv.hpp | 13 ++++++++----- + 3 files changed, 21 insertions(+), 17 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 51aa052a0c7..31aeeb9b425 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -183,13 +183,20 @@ class Address { + : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) + : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } +- +- template::value)> +- Address(Register r, T o) +- : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) {} +- ++ Address(Register r, int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned int o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long long o) ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, ByteSize disp) +- : Address(r, in_bytes(disp)) {} ++ : Address(r, in_bytes(disp)) { } + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 6e4d22db40f..b95f69cfcda 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1364,12 +1364,6 @@ void MacroAssembler::mv(Register Rd, Address dest) { + movptr(Rd, dest.target()); + } + +-void MacroAssembler::mv(Register Rd, address addr) { +- // Here in case of use with relocation, use fix length instruciton +- // movptr instead of li +- movptr(Rd, addr); +-} +- + void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index c1ffa120774..76b2716659b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -543,15 +543,18 @@ class MacroAssembler: public Assembler { + } + + // mv +- template::value)> +- inline void mv(Register Rd, T o) { +- li(Rd, (int64_t)o); +- } ++ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } ++ ++ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } + + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); +- void mv(Register Rd, address addr); + void mv(Register Rd, RegisterOrConstant src); + + // logic + +From 94c1c9c01e61d0cb7c32596ef19b347c32406546 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 16:54:36 +0800 +Subject: [PATCH 062/140] Revert JDK-8280503: Use allStatic.hpp instead of + allocation.hpp where possible + +--- + src/hotspot/cpu/riscv/bytes_riscv.hpp | 2 -- + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 1 - + 2 files changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp +index 23d982f9abd..f60e0e38ae8 100644 +--- a/src/hotspot/cpu/riscv/bytes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp +@@ -27,8 +27,6 @@ + #ifndef CPU_RISCV_BYTES_RISCV_HPP + #define CPU_RISCV_BYTES_RISCV_HPP + +-#include "memory/allStatic.hpp" +- + class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +index 83ffcc55d83..bc4e5758256 100644 +--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -27,7 +27,6 @@ + #define CPU_RISCV_JNITYPES_RISCV_HPP + + #include "jni.h" +-#include "memory/allStatic.hpp" + #include "oops/oop.hpp" + + // This file holds platform-dependent routines used to write primitive jni + +From 49e6399009b51edafa6904164528e1d051aeae6c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:07:31 +0800 +Subject: [PATCH 063/140] Revert JDK-8276453: Undefined behavior in C1 + LIR_OprDesc causes SEGV in fastdebug build + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 1 + + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index af7bd067f33..6057d43296b 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -58,7 +58,7 @@ RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + } + + RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) +- : _index(index), _array(), _throw_index_out_of_bounds_exception(true) { ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); + } +@@ -83,7 +83,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) { + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { +- assert(_array != LIR_Opr::nullOpr(), "sanity"); ++ assert(_array != NULL, "sanity"); + __ mv(t1, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +index 172031941b2..1f8b2b55100 100644 +--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +@@ -156,8 +156,8 @@ LIR_Opr FrameMap::long11_opr; + LIR_Opr FrameMap::fpu10_float_opr; + LIR_Opr FrameMap::fpu10_double_opr; + +-LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; +-LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + + //-------------------------------------------------------- + // FrameMap +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index c45a75b2301..227e7664225 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -206,6 +206,7 @@ LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + break; + default: + ShouldNotReachHere(); ++ r = NULL; + } + return r; + } + +From b94bda9d1a2c12fa379f8fe813460c498344f543 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:19:19 +0800 +Subject: [PATCH 064/140] Revert JDK-8256205: Simplify compiler calling + convention handling + +--- + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 25 +++++++++++++++++++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 11 ++------ + 4 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +index 6057d43296b..12980c12de6 100644 +--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -290,7 +290,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; +- SharedRuntime::java_calling_convention(signature, args, args_num); ++ SharedRuntime::java_calling_convention(signature, args, args_num, true); + + // push parameters + Register r[args_num]; +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +index 1f8b2b55100..682ebe82627 100644 +--- a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +@@ -314,7 +314,7 @@ void FrameMap::initialize() { + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; +- SharedRuntime::java_calling_convention(&sig_bt, ®s, 1); ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index a6061de7a33..1667994699f 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2443,6 +2443,12 @@ frame %{ + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. RISC-V needs two slots for ++ // return address and fp. ++ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); ++ + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); +@@ -2461,6 +2467,25 @@ frame %{ + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ calling_convention ++ %{ ++ // No difference between ingoing/outgoing just pass false ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ c_calling_convention ++ %{ ++ // This is obviously always outgoing ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); ++ %} ++ + // Location of compiled Java return values. Same as C for now. + return_value + %{ +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index eaefcc2b595..411bddd2ace 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -233,7 +233,8 @@ static int reg2offset_out(VMReg r) { + + int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, +- int total_args_passed) { ++ int total_args_passed, ++ int is_outgoing) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { +@@ -2155,14 +2156,6 @@ void SharedRuntime::generate_deopt_blob() { + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); + } + +-// Number of stack slots between incoming argument block and the start of +-// a new frame. The PROLOG must add this many slots to the stack. The +-// EPILOG must remove this many slots. +-// RISCV needs two words for RA (return address) and FP (frame pointer). +-uint SharedRuntime::in_preserve_stack_slots() { +- return 2 * VMRegImpl::slots_per_word; +-} +- + uint SharedRuntime::out_preserve_stack_slots() { + return 0; + } + +From 3fc948472c4a0918b967646b45c8886103b839d2 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:27:57 +0800 +Subject: [PATCH 065/140] Revert JDK-8183574: Unify the is_power_of_2 functions + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 3 +-- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1 - + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 1 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 1 - + src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 1 - + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 10 files changed, 3 insertions(+), 12 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +index 4c1c13dc290..65d0eda62ef 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +@@ -190,7 +190,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: +- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); +@@ -208,7 +208,7 @@ void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + } + break; + case lir_rem: +- assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 90c4af5d3b0..9de89a3b026 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -41,7 +41,6 @@ + #include "oops/objArrayKlass.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/sharedRuntime.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + #ifndef PRODUCT +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 227e7664225..a9345158749 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -38,7 +38,6 @@ + #include "ci/ciTypeArrayKlass.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + #ifdef ASSERT +@@ -383,7 +382,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant +- if (c > 0 && is_power_of_2(c)) { ++ if (c > 0 && is_power_of_2_long(c)) { + right.dont_load_item(); + } else { + right.load_item(); +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index 1f45fba9de0..fc88d5c180e 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -46,7 +46,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/vframe.hpp" + #include "runtime/vframeArray.hpp" +-#include "utilities/powerOfTwo.hpp" + #include "vmreg_riscv.inline.hpp" + + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index f0c249f0d26..2fc0b00e2cb 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -44,7 +44,6 @@ + #include "runtime/safepointMechanism.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/thread.inline.hpp" +-#include "utilities/powerOfTwo.hpp" + + void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index b95f69cfcda..41a415ef2cf 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -47,7 +47,6 @@ + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.hpp" +-#include "utilities/powerOfTwo.hpp" + #ifdef COMPILER2 + #include "opto/compile.hpp" + #include "opto/node.hpp" +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 76b2716659b..dd39f67d507 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -30,7 +30,6 @@ + #include "asm/assembler.hpp" + #include "metaprogramming/enableIf.hpp" + #include "oops/compressedOops.hpp" +-#include "utilities/powerOfTwo.hpp" + + // MacroAssembler extends Assembler by frequently used macros. + // +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 8392b768847..0c5b0e001ee 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -45,7 +45,6 @@ + #include "runtime/stubRoutines.hpp" + #include "runtime/thread.inline.hpp" + #include "utilities/align.hpp" +-#include "utilities/powerOfTwo.hpp" + #ifdef COMPILER2 + #include "opto/runtime.hpp" + #endif +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 5a87c687cf7..a10677bf650 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -51,7 +51,6 @@ + #include "runtime/timer.hpp" + #include "runtime/vframeArray.hpp" + #include "utilities/debug.hpp" +-#include "utilities/powerOfTwo.hpp" + #include + + #ifndef PRODUCT +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 158294f7436..2a92fb9dd49 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -44,7 +44,6 @@ + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" + #include "runtime/synchronizer.hpp" +-#include "utilities/powerOfTwo.hpp" + + #define __ _masm-> + + +From 31b18aa6a29b83e2cae7ea76c5d4759b2596eca0 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:34:39 +0800 +Subject: [PATCH 066/140] Revert JDK-8276976: Rename LIR_OprDesc to LIR_Opr + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 14 +++++++------- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 9de89a3b026..70ee6295bfb 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -1261,7 +1261,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); +- assert(addr_ptr->index() == LIR_Opr::illegalOpr(), "need 0 index"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); +diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +index 5f1c394ab3d..0317ed9003e 100644 +--- a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +@@ -27,22 +27,22 @@ + #include "asm/register.hpp" + #include "c1/c1_LIR.hpp" + +-FloatRegister LIR_Opr::as_float_reg() const { ++FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); + } + +-FloatRegister LIR_Opr::as_double_reg() const { ++FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); + } + + // Reg2 unused. + LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); +- return (LIR_Opr)(intptr_t)((reg1 << LIR_Opr::reg1_shift) | +- (reg1 << LIR_Opr::reg2_shift) | +- LIR_Opr::double_type | +- LIR_Opr::fpu_register | +- LIR_Opr::double_size); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); + } + + #ifndef PRODUCT + +From 2e64fa47eddc271d32b136ace4f062cfb9648b25 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:39:16 +0800 +Subject: [PATCH 067/140] Revert JDK-8269672: C1: Remove unaligned move on all + architectures + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp | 8 +++++--- + .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 2 +- + 2 files changed, 6 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 70ee6295bfb..e29c0df5f8b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -673,7 +673,7 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po + } + } + +-void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; +@@ -795,7 +795,7 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + reg2stack(temp, dest, dest->type(), false); + } + +-void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + +@@ -910,11 +910,13 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + Label done; + move_op(opr2, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ j(done); + __ bind(label); + move_op(opr1, result, type, lir_patch_none, NULL, + false, // pop_fpu_stack ++ false, // unaligned + false); // wide + __ bind(done); + } +@@ -1866,7 +1868,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg + + void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { +- move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index d724876ec3a..bc847388f68 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -340,7 +340,7 @@ void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrier + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { +- ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); + +From 5f15abe61c700cbf59805530c52e8e558354d552 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:54:05 +0800 +Subject: [PATCH 068/140] Revert JDK-8264805: Remove the experimental + Ahead-of-Time Compiler + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp | 1 + + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +index 051328c3a8a..5c81f1c704c 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +@@ -73,6 +73,7 @@ friend class ArrayCopyStub; + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), ++ _call_aot_stub_size = 0, + // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +index 1cfc92b28fa..a29e5be9dbb 100644 +--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -86,7 +86,7 @@ int CompiledStaticCall::reloc_to_interp_stub() { + } + + void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { +- address stub = find_stub(); ++ address stub = find_stub(false /* is_aot */); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { +@@ -138,7 +138,7 @@ void CompiledDirectStaticCall::verify() { + _call->verify_alignment(); + + // Verify stub. +- address stub = find_stub(); ++ address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder + +From 4cfd20c7d163188a1a4e63ffaa19708e15be9d96 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 17:59:45 +0800 +Subject: [PATCH 069/140] Revert JDK-8277417: C1 LIR instruction for load-klass + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 30 ++++++++----------- + 1 file changed, 12 insertions(+), 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index e29c0df5f8b..49653d04d81 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -840,7 +840,14 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: +- __ ld(dest->as_register(), as_Address(from_addr)); ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ lwu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ } + break; + case T_INT: + __ lw(dest->as_register(), as_Address(from_addr)); +@@ -869,6 +876,10 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + __ decode_heap_oop(dest->as_register()); + } + __ verify_oop(dest->as_register()); ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } + } + } + +@@ -1531,23 +1542,6 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { + __ bind(*op->stub()->continuation()); + } + +-void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { +- Register obj = op->obj()->as_pointer_register(); +- Register result = op->result_opr()->as_pointer_register(); +- +- CodeEmitInfo* info = op->info(); +- if (info != NULL) { +- add_debug_info_for_null_check_here(info); +- } +- +- if (UseCompressedClassPointers) { +- __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); +- __ decode_klass_not_null(result); +- } else { +- __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); +- } +-} +- + void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + +From eb4de6fc8f9b6192d16343382ebbe4035ce71702 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:09:31 +0800 +Subject: [PATCH 070/140] Revert JDK-8245957: Remove unused LIR_OpBranch::type + after SPARC port removal + +--- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index a9345158749..2aba4f4974f 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -393,7 +393,7 @@ void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); +- __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + } + + rlock_result(x); +@@ -467,7 +467,7 @@ void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); +- __ branch(lir_cond_equal, new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; +@@ -1055,9 +1055,9 @@ void LIRGenerator::do_If(If* x) { + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { +- __ branch(lir_cond(cond), x->tsux(), x->usux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { +- __ branch(lir_cond(cond), x->tsux()); ++ __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); + +From d34f25c618982d3ac79e6ab2a47b3a199434d01b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:14:10 +0800 +Subject: [PATCH 071/140] Revert JDK-8266950: Remove vestigial support for + non-strict floating-point execution + +--- + src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 4 ++++ + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 7 ++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +index 65d0eda62ef..2a99d49c94b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +@@ -238,7 +238,9 @@ void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); +@@ -251,7 +253,9 @@ void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr rig + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 2aba4f4974f..21ae066e9ab 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -360,7 +360,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + right.load_item(); + + LIR_Opr reg = rlock(x); +- arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); ++ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { ++ tmp = new_register(T_DOUBLE); ++ } ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); + } + +From 02c0a84d52417d4aeddbdd10c07df446ee45c5de Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:19:51 +0800 +Subject: [PATCH 072/140] Revert JDK-8276217: Harmonize StrictMath intrinsics + handling + +--- + src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index 21ae066e9ab..f9242251491 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -651,16 +651,14 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + do_LibmIntrinsic(x); + break; + case vmIntrinsics::_dabs: // fall through +- case vmIntrinsics::_dsqrt: // fall through +- case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { +- case vmIntrinsics::_dsqrt: // fall through +- case vmIntrinsics::_dsqrt_strict: { ++ case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + +From 8dbace163d42cbb41ff49463b34f8971437fe82f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:35:08 +0800 +Subject: [PATCH 073/140] Revert JDK-8276209: Some call sites doesn't pass the + parameter 'size' to SharedRuntime::dtrace_object_alloc(_base) + +--- + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +index fc88d5c180e..329df2e1ca7 100644 +--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -1186,7 +1186,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), c_rarg0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); + + restore_live_registers(sasm); + } +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 2a92fb9dd49..ddc9498dddc 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -3577,7 +3577,7 @@ void TemplateTable::_new() { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)), x10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); + __ pop(atos); // restore the return value + } + __ j(done); + +From 8930b6049a5b6e31ec9409c167b0e58d24cf6821 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:38:51 +0800 +Subject: [PATCH 074/140] Revert JDK-8229838: Rename markOop files to markWord + +--- + src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp | 1 - + src/hotspot/cpu/riscv/frame_riscv.cpp | 1 - + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1 - + 3 files changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index e486f41948e..44ceccd8bd1 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -33,7 +33,6 @@ + #include "gc/shared/collectedHeap.hpp" + #include "interpreter/interpreter.hpp" + #include "oops/arrayOop.hpp" +-#include "oops/markWord.hpp" + #include "runtime/basicLock.hpp" + #include "runtime/biasedLocking.hpp" + #include "runtime/os.hpp" +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 13c482b610a..050595389e9 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -29,7 +29,6 @@ + #include "interpreter/interpreter.hpp" + #include "memory/resourceArea.hpp" + #include "memory/universe.hpp" +-#include "oops/markWord.hpp" + #include "oops/method.hpp" + #include "oops/oop.inline.hpp" + #include "prims/methodHandles.hpp" +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 2fc0b00e2cb..006fe49b155 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -33,7 +33,6 @@ + #include "interpreter/interpreterRuntime.hpp" + #include "logging/log.hpp" + #include "oops/arrayOop.hpp" +-#include "oops/markWord.hpp" + #include "oops/method.hpp" + #include "oops/methodData.hpp" + #include "prims/jvmtiExport.hpp" + +From f11c5a2beca94c8248c30899fef90947d478e10c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:42:33 +0800 +Subject: [PATCH 075/140] Revert JDK-8235673: [C1, C2] Split inlining control + flags + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index fe46f7b21c8..fd25f8f9afd 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -42,6 +42,7 @@ define_pd_global(bool, TieredCompilation, false); + define_pd_global(intx, CompileThreshold, 1500 ); + + define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); + define_pd_global(intx, NewSizeThreadIncrease, 4*K ); + define_pd_global(intx, InitialCodeCacheSize, 160*K); + define_pd_global(intx, ReservedCodeCacheSize, 32*M ); + +From 6908dc58f2c66ca6a5adf4444a7ec2a91a80b9c8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:45:00 +0800 +Subject: [PATCH 076/140] Revert JDK-8262074: Consolidate the default value of + MetaspaceSize + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index fd25f8f9afd..1c55a23eecf 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -53,6 +53,7 @@ define_pd_global(bool, ProfileInterpreter, false); + define_pd_global(intx, CodeCacheExpansionSize, 32*K ); + define_pd_global(uintx, CodeCacheMinBlockLength, 1); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); + define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 53a41665f4b..d9e5fcc1bb0 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -75,6 +75,9 @@ define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); + define_pd_global(uintx, CodeCacheMinBlockLength, 6); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ + // Ergonomics related flags + define_pd_global(bool, NeverActAsServerClassMachine, false); + + +From a3e991b37781d90c822471b54ace915622bee0da Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:48:15 +0800 +Subject: [PATCH 077/140] Revert JDK-8246023: Obsolete LIRFillDelaySlot + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 1c55a23eecf..bd8d039de03 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -60,6 +60,7 @@ define_pd_global(bool, CICompileOSR, true ); + #endif // !COMPILER2 + define_pd_global(bool, UseTypeProfile, false); + ++define_pd_global(bool, LIRFillDelaySlots, false); + define_pd_global(bool, OptimizeSinglePrecision, true ); + define_pd_global(bool, CSEArrayLength, false); + define_pd_global(bool, TwoOperandLIRForm, false); + +From 9f6082ae9810e6a26c6803cb37cce62297d15a74 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:50:27 +0800 +Subject: [PATCH 078/140] Revert JDK-8136414: Large performance penalty + declaring a method strictfp on strict-only platforms + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index bd8d039de03..16a87b7aced 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -59,6 +59,7 @@ define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); + #endif // !COMPILER2 + define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); + + define_pd_global(bool, LIRFillDelaySlots, false); + define_pd_global(bool, OptimizeSinglePrecision, true ); + +From fbf03fc61be068f7f7c8ca1ab3854cc05519c5a3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Tue, 25 Apr 2023 18:58:36 +0800 +Subject: [PATCH 079/140] Revert JDK-8251462: Simplify compilation policy + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 4 +- + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 +- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 +- + .../templateInterpreterGenerator_riscv.cpp | 114 +++++++++--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 176 ++++++++++++------ + 5 files changed, 210 insertions(+), 88 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 16a87b7aced..8f2f4e0e81d 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -32,7 +32,7 @@ + // Sets the default values for platform dependent flags used by the client compiler. + // (see c1_globals.hpp) + +-#ifndef COMPILER2 ++#ifndef TIERED + define_pd_global(bool, BackgroundCompilation, true ); + define_pd_global(bool, InlineIntrinsics, true ); + define_pd_global(bool, PreferInterpreterNativeStubs, false); +@@ -57,7 +57,7 @@ define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); + define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); +-#endif // !COMPILER2 ++#endif // !TIERED + define_pd_global(bool, UseTypeProfile, false); + define_pd_global(bool, RoundFPResults, true ); + +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index d9e5fcc1bb0..6c301cdae04 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false); + define_pd_global(bool, ProfileTraps, true); + define_pd_global(bool, UseOnStackReplacement, true); + define_pd_global(bool, ProfileInterpreter, true); +-define_pd_global(bool, TieredCompilation, COMPILER1_PRESENT(true) NOT_COMPILER1(false)); ++define_pd_global(bool, TieredCompilation, trueInTiered); + define_pd_global(intx, CompileThreshold, 10000); + + define_pd_global(intx, OnStackReplacePercentage, 140); +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 50bbb6a77b8..b78f258a764 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -36,7 +36,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im + define_pd_global(bool, TrapBasedNullChecks, false); + define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +-define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. + define_pd_global(intx, CodeEntryAlignment, 64); + define_pd_global(intx, OptoLoopAlignment, 16); + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index a10677bf650..8aea4eca048 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -556,31 +556,81 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, + // + // xmethod: method + // +-void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow) { ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. +- int increment = InvocationCounter::count_increment; +- Label no_mdo; +- if (ProfileInterpreter) { +- // Are we profiling? +- __ ld(x10, Address(xmethod, Method::method_data_offset())); +- __ beqz(x10, no_mdo); +- // Increment counter in the MDO +- const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + +- in_bytes(InvocationCounter::counter_offset())); +- const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); +- __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); +- __ j(done); ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address backedge_counter(t1, ++ MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset()); ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ ++ __ get_method_counters(xmethod, t1, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into MethodData* ++ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ __ addw(x11, x11, 1); ++ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lwu(x11, invocation_counter); ++ __ lwu(x10, backedge_counter); ++ ++ __ addw(x11, x11, InvocationCounter::count_increment); ++ __ andi(x10, x10, InvocationCounter::count_mask_value); ++ ++ __ sw(x11, invocation_counter); ++ __ addw(x10, x10, x11); // add both counters ++ ++ // profile_method is non-null only for interpreted method so ++ // profile_method != NULL == !native_call ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t1, *profile_method_continue); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(t1, *profile_method); ++ } ++ ++ { ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); ++ __ bltu(x10, t1, done); ++ __ j(*overflow); ++ } ++ __ bind(done); + } +- __ bind(no_mdo); +- // Increment counter in MethodCounters +- const Address invocation_counter(t1, +- MethodCounters::invocation_counter_offset() + +- InvocationCounter::counter_offset()); +- __ get_method_counters(xmethod, t1, done); +- const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); +- __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); +- __ bind(done); + } + + void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { +@@ -977,7 +1027,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { +- generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; +@@ -1389,8 +1439,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + + // increment invocation count & check for overflow + Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; + if (inc_counter) { +- generate_counter_incr(&invocation_counter_overflow); ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } + } + + Label continue_after_compile; +@@ -1427,6 +1484,15 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + + // invocation counter overflow + if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ // don't think we need this ++ __ get_method(x11); ++ __ j(profile_method_continue); ++ } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index ddc9498dddc..bb20f228447 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -1745,6 +1745,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; ++ Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches +@@ -1769,31 +1770,75 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + +- Label no_mdo; +- int increment = InvocationCounter::count_increment; +- if (ProfileInterpreter) { +- // Are we profiling? +- __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); +- __ beqz(x11, no_mdo); +- // Increment the MDO backedge counter +- const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + +- in_bytes(InvocationCounter::counter_offset())); +- const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); +- __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, +- x10, t0, false, ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ j(dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, + UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); +- __ j(dispatch); ++ } else { // not TieredCompilation ++ // increment counter ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(x10, Address(t1, be_offset)); // load backedge counter ++ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter ++ __ sw(t0, Address(t1, be_offset)); // store counter ++ ++ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter ++ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits ++ __ addw(x10, x10, t0); // add both counters ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t0, dispatch); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(x10, profile_method); ++ ++ if (UseOnStackReplacement) { ++ // check for overflow against x11 which is the MDO taken count ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the MethodData*, which value does not get reset on ++ // the call to frequency_counter_overflow(). To avoid ++ // excessive calls to the overflow routine while the method is ++ // being compiled, add a second test to make sure the overflow ++ // function is called only once every overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(x11, x11, overflow_frequency - 1); ++ __ beqz(x11, backedge_counter_overflow); ++ ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against x10, which is the sum of the ++ // counters ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual ++ } ++ } + } +- __ bind(no_mdo); +- // Increment backedge counter in MethodCounters* +- __ ld(t0, Address(xmethod, Method::method_counters_offset())); +- const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); +- __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, +- x10, t1, false, +- UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); + __ bind(dispatch); + } +- + // Pre-load the next target bytecode into t0 + __ load_unsigned_byte(t0, Address(xbcp, 0)); + +@@ -1802,52 +1847,63 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) + // xbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + +- if (UseLoopCounter && UseOnStackReplacement) { +- // invocation counter overflow +- __ bind(backedge_counter_overflow); +- __ neg(x12, x12); +- __ add(x12, x12, xbcp); // branch xbcp +- // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) +- __ call_VM(noreg, +- CAST_FROM_FN_PTR(address, +- InterpreterRuntime::frequency_counter_overflow), +- x12); +- __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode +- +- // x10: osr nmethod (osr ok) or NULL (osr not possible) +- // w11: target bytecode +- // x12: temporary +- __ beqz(x10, dispatch); // test result -- no osr if null +- // nmethod may have been invalidated (VM may block upon call_VM return) +- __ lbu(x12, Address(x10, nmethod::state_offset())); +- if (nmethod::in_use != 0) { +- __ sub(x12, x12, nmethod::in_use); ++ if (UseLoopCounter) { ++ if (ProfileInterpreter && !TieredCompilation) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ __ set_method_data_pointer_for_bcp(); ++ __ j(dispatch); + } +- __ bnez(x12, dispatch); + +- // We have the address of an on stack replacement routine in x10 +- // We need to prepare to execute the OSR method. First we must +- // migrate the locals and monitors off of the stack. ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); ++ } ++ __ bnez(x12, dispatch); ++ ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. + +- __ mv(x9, x10); // save the nmethod ++ __ mv(x9, x10); // save the nmethod + +- call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + +- // x10 is OSR buffer, move it to expected parameter location +- __ mv(j_rarg0, x10); ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); + +- // remove activation +- // get sender esp +- __ ld(esp, +- Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); +- // remove frame anchor +- __ leave(); +- // Ensure compiled code always sees stack at proper alignment +- __ andi(sp, esp, -16); ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); + +- // and begin the OSR nmethod +- __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); +- __ jr(t0); ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); ++ } + } + } + + +From b1f3fd0510681324d70028443a3532d6084be504 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 27 Apr 2023 11:37:05 +0800 +Subject: [PATCH 080/140] Revert JDK-8250902: Implement MD5 Intrinsics on x86 + +--- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ---- + ...nericTestCaseForUnsupportedRISCV64CPU.java | 30 +++++++++---------- + 2 files changed, 15 insertions(+), 20 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index c0491d23fa6..d4b79162d84 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -97,11 +97,6 @@ void VM_Version::initialize() { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + +- if (UseMD5Intrinsics) { +- warning("MD5 intrinsics are not available on this CPU."); +- FLAG_SET_DEFAULT(UseMD5Intrinsics, false); +- } +- + if (UseRVV) { + if (!(_features & CPU_V)) { + warning("RVV is not supported on this CPU"); +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +index 2ecfec07a4c..8566d57c391 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +@@ -24,7 +24,7 @@ + + package compiler.intrinsics.sha.cli.testcases; + +-import compiler.intrinsics.sha.cli.DigestOptionsBase; ++import compiler.intrinsics.sha.cli.SHAOptionsBase; + import jdk.test.lib.process.ExitCode; + import jdk.test.lib.Platform; + import jdk.test.lib.cli.CommandLineOptionTest; +@@ -36,7 +36,7 @@ + * which don't support instruction required by the tested option. + */ + public class GenericTestCaseForUnsupportedRISCV64CPU extends +- DigestOptionsBase.TestCase { ++ SHAOptionsBase.TestCase { + + final private boolean checkUseSHA; + +@@ -46,7 +46,7 @@ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { + + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, +- new NotPredicate(DigestOptionsBase.getPredicateForOption( ++ new NotPredicate(SHAOptionsBase.getPredicateForOption( + optionName)))); + + this.checkUseSHA = checkUseSHA; +@@ -58,27 +58,27 @@ protected void verifyWarnings() throws Throwable { + + "option '-XX:-%s' without any warnings", optionName); + //Verify that option could be disabled without any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, new String[] { +- DigestOptionsBase.getWarningForUnsupportedCPU(optionName) ++ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) + }, shouldPassMessage, shouldPassMessage, ExitCode.OK, +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + + if (checkUseSHA) { + shouldPassMessage = String.format("If JVM is started with '-XX:-" + + "%s' '-XX:+%s', output should contain warning.", +- DigestOptionsBase.USE_SHA_OPTION, optionName); ++ SHAOptionsBase.USE_SHA_OPTION, optionName); + + // Verify that when the tested option is enabled, then + // a warning will occur in VM output if UseSHA is disabled. +- if (!optionName.equals(DigestOptionsBase.USE_SHA_OPTION)) { ++ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { + CommandLineOptionTest.verifySameJVMStartup( +- new String[] { DigestOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, + null, + shouldPassMessage, + shouldPassMessage, + ExitCode.OK, +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, +- CommandLineOptionTest.prepareBooleanFlag(DigestOptionsBase.USE_SHA_OPTION, false), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + } + } +@@ -90,7 +90,7 @@ protected void verifyOptionValues() throws Throwable { + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be disabled by default", + optionName), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + + if (checkUseSHA) { + // Verify that option is disabled even if it was explicitly enabled +@@ -98,7 +98,7 @@ protected void verifyOptionValues() throws Throwable { + CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if set to true directly", optionName), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + + // Verify that option is disabled when +UseSHA was passed to JVM. +@@ -106,10 +106,10 @@ protected void verifyOptionValues() throws Throwable { + String.format("Option '%s' should be off on unsupported " + + "RISCV64CPU even if %s flag set to JVM", + optionName, CommandLineOptionTest.prepareBooleanFlag( +- DigestOptionsBase.USE_SHA_OPTION, true)), +- DigestOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ SHAOptionsBase.USE_SHA_OPTION, true)), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag( +- DigestOptionsBase.USE_SHA_OPTION, true)); ++ SHAOptionsBase.USE_SHA_OPTION, true)); + } + } + } + +From b5e96cb7663b2def3a064b9aede7209fb0c5eeda Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 27 Apr 2023 15:41:48 +0800 +Subject: [PATCH 081/140] Revert JDK-8253555: Make ByteSize and WordSize typed + scoped enums + +--- + src/hotspot/cpu/riscv/assembler_riscv.hpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 31aeeb9b425..9959ac1d02c 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -195,8 +195,10 @@ class Address { + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, unsigned long long o) + : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++#ifdef ASSERT + Address(Register r, ByteSize disp) +- : Address(r, in_bytes(disp)) { } ++ : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } ++#endif + Address(address target, RelocationHolder const& rspec) + : _base(noreg), + _index(noreg), + +From 592afab705a4d4c8b2773a0808e47efc2a14517d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:18:12 +0800 +Subject: [PATCH 082/140] Revert JDK-8253457: Remove unimplemented register + stack functions + +--- + .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +index 61e2cf85b63..313a7b932c3 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -34,15 +34,31 @@ + frame pd_last_frame(); + + public: ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); + private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} + + #endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP + +From 28238cf776bd25c9805d9dd686c08fe8d3a1500b Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:22:30 +0800 +Subject: [PATCH 083/140] Revert JDK-8253539: Remove unused JavaThread + functions for set_last_Java_fp/pc + +--- + src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp | 3 +++ + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +index 9a6084afa1d..5a0c9b812fc 100644 +--- a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp ++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +@@ -83,4 +83,7 @@ + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } ++ + #endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +index 313a7b932c3..4b91fa855ae 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -34,6 +34,9 @@ + frame pd_last_frame(); + + public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + +From f9322bb6235b603eac825c6e6751093ada1e6cfe Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 15:45:56 +0800 +Subject: [PATCH 084/140] Revert JDK-8269853: Prefetch::read should accept + pointer to const + +--- + src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +index a6432c84ec7..2bd48e09c34 100644 +--- a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp ++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +@@ -29,7 +29,7 @@ + #include "runtime/prefetch.hpp" + + +-inline void Prefetch::read (const void *loc, intx interval) { ++inline void Prefetch::read (void *loc, intx interval) { + } + + inline void Prefetch::write(void *loc, intx interval) { + +From aa6f7320d8d849b8e47b6e77a20257e3d99fd14f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 16:14:55 +0800 +Subject: [PATCH 085/140] Revert: JDK-8254231: Implementation of Foreign Linker + API (Incubator) JDK-8264774: Implementation of Foreign Function and Memory + API (Incubator) + +--- + .../cpu/riscv/foreign_globals_riscv.cpp | 44 ------------------- + .../cpu/riscv/foreign_globals_riscv.hpp | 32 -------------- + src/hotspot/cpu/riscv/frame_riscv.cpp | 15 ------- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 12 +---- + src/hotspot/cpu/riscv/riscv.ad | 5 --- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 10 +---- + .../riscv/universalNativeInvoker_riscv.cpp | 33 -------------- + .../cpu/riscv/universalUpcallHandle_riscv.cpp | 42 ------------------ + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 5 --- + 9 files changed, 2 insertions(+), 196 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/foreign_globals_riscv.hpp + delete mode 100644 src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp +deleted file mode 100644 +index 5c700be9c91..00000000000 +--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.cpp ++++ /dev/null +@@ -1,44 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/foreign_globals.hpp" +-#include "utilities/debug.hpp" +- +-// Stubbed out, implement later +-const ABIDescriptor ForeignGlobals::parse_abi_descriptor_impl(jobject jabi) const { +- Unimplemented(); +- return {}; +-} +- +-const BufferLayout ForeignGlobals::parse_buffer_layout_impl(jobject jlayout) const { +- Unimplemented(); +- return {}; +-} +- +-const CallRegs ForeignGlobals::parse_call_regs_impl(jobject jconv) const { +- ShouldNotCallThis(); +- return {}; +-} +diff --git a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp b/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp +deleted file mode 100644 +index 3ac89752c27..00000000000 +--- a/src/hotspot/cpu/riscv/foreign_globals_riscv.hpp ++++ /dev/null +@@ -1,32 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +-#define CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +- +-class ABIDescriptor {}; +-class BufferLayout {}; +- +-#endif // CPU_RISCV_FOREIGN_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 050595389e9..40ec584b994 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -361,21 +361,6 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { + return fr; + } + +-OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const { +- ShouldNotCallThis(); +- return nullptr; +-} +- +-bool frame::optimized_entry_frame_is_first() const { +- ShouldNotCallThis(); +- return false; +-} +- +-frame frame::sender_for_optimized_entry_frame(RegisterMap* map) const { +- ShouldNotCallThis(); +- return {}; +-} +- + //------------------------------------------------------------------------------ + // frame::verify_deopt_original_pc + // +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 1f7c0c87c21..3bf5cfb16c3 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -181,13 +181,6 @@ address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* + return NULL; + } + +- // No need in interpreter entry for linkToNative for now. +- // Interpreter calls compiled entry through i2c. +- if (iid == vmIntrinsics::_linkToNative) { +- __ ebreak(); +- return NULL; +- } +- + // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) + // xmethod: Method* + // x13: argument locator (parameter slot count, added to sp) +@@ -280,10 +273,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + +- if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { +- if (iid == vmIntrinsics::_linkToNative) { +- assert(for_compiler_entry, "only compiler entry is supported"); +- } ++ if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); + } else { +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 1667994699f..7ec76e72ff0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -945,11 +945,6 @@ int MachCallRuntimeNode::ret_addr_offset() { + } + } + +-int MachCallNativeNode::ret_addr_offset() { +- Unimplemented(); +- return -1; +-} +- + // + // Compute padding required for nodes which need alignment + // +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 411bddd2ace..897dafcc99c 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1037,7 +1037,7 @@ static void gen_special_dispatch(MacroAssembler* masm, + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = x9; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); +- } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) { ++ } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); +@@ -2566,14 +2566,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha + } + + #ifdef COMPILER2 +-RuntimeStub* SharedRuntime::make_native_invoker(address call_target, +- int shadow_space_bytes, +- const GrowableArray& input_registers, +- const GrowableArray& output_registers) { +- Unimplemented(); +- return nullptr; +-} +- + //------------------------------generate_exception_blob--------------------------- + // creates exception blob at the end + // Using exception blob, this code is jumped from a compiled method. +diff --git a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp b/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp +deleted file mode 100644 +index 4f50adb05c3..00000000000 +--- a/src/hotspot/cpu/riscv/universalNativeInvoker_riscv.cpp ++++ /dev/null +@@ -1,33 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/universalNativeInvoker.hpp" +-#include "utilities/debug.hpp" +- +-address ProgrammableInvoker::generate_adapter(jobject jabi, jobject jlayout) { +- Unimplemented(); +- return nullptr; +-} +diff --git a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp b/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp +deleted file mode 100644 +index ce70da72f2e..00000000000 +--- a/src/hotspot/cpu/riscv/universalUpcallHandle_riscv.cpp ++++ /dev/null +@@ -1,42 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "prims/universalUpcallHandler.hpp" +-#include "utilities/debug.hpp" +- +-address ProgrammableUpcallHandler::generate_upcall_stub(jobject jrec, jobject jabi, jobject jlayout) { +- Unimplemented(); +- return nullptr; +-} +- +-address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv) { +- ShouldNotCallThis(); +- return nullptr; +-} +- +-bool ProgrammableUpcallHandler::supports_optimized_upcalls() { +- return false; +-} +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index 1f6eff96cba..5d1187c2a27 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -49,8 +49,3 @@ void VMRegImpl::set_regName() { + regName[i] = "NON-GPR-FPR"; + } + } +- +-VMReg VMRegImpl::vmStorageToVMReg(int type, int index) { +- Unimplemented(); +- return VMRegImpl::Bad(); +-} + +From a5889735a97f3712bb649c454dee192d75457f96 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 17:35:20 +0800 +Subject: [PATCH 086/140] Revert JDK-8256254: Convert vmIntrinsics::ID to enum + class + +--- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 006fe49b155..1133e80a210 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -1841,7 +1841,7 @@ void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); +- li(t1, static_cast(vmIntrinsics::_compiledLambdaForm)); ++ li(t1, vmIntrinsics::_compiledLambdaForm); + bne(t0, t1, profile_continue); + bind(do_profile); + } +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 3bf5cfb16c3..4442b5991b1 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -411,7 +411,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + } + + default: +- fatal("unexpected intrinsic %d: %s", vmIntrinsics::as_int(iid), vmIntrinsics::name_at(iid)); ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 897dafcc99c..5b934b04e8e 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1040,7 +1040,7 @@ static void gen_special_dispatch(MacroAssembler* masm, + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { +- fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); ++ fatal("unexpected intrinsic id %d", iid); + } + + if (member_reg != noreg) { + +From 245d01e2cae27e41b875450f5f92751e4f36a095 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Thu, 6 Apr 2023 20:27:58 +0800 +Subject: [PATCH 087/140] Revert JDK-8216557: Aarch64: Add support for + Concurrent Class Unloading + +--- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 4 - + .../gc/shared/barrierSetAssembler_riscv.cpp | 71 -------- + .../gc/shared/barrierSetAssembler_riscv.hpp | 3 - + .../gc/shared/barrierSetNMethod_riscv.cpp | 171 ------------------ + .../cpu/riscv/macroAssembler_riscv.cpp | 35 +--- + .../cpu/riscv/macroAssembler_riscv.hpp | 2 - + src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 1 - + src/hotspot/cpu/riscv/riscv.ad | 16 -- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 7 - + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 49 ----- + src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 1 - + src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 6 - + 12 files changed, 5 insertions(+), 361 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 44ceccd8bd1..a6d1b1470f9 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -322,10 +322,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); +- +- // Insert nmethod entry barrier into frame. +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->nmethod_entry_barrier(this); + } + + void C1_MacroAssembler::remove_frame(int framesize) { +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +index 3c115a2ea02..2b556b95d71 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +@@ -27,7 +27,6 @@ + #include "classfile/classLoaderData.hpp" + #include "gc/shared/barrierSet.hpp" + #include "gc/shared/barrierSetAssembler.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" + #include "gc/shared/collectedHeap.hpp" + #include "interpreter/interp_masm.hpp" + #include "memory/universe.hpp" +@@ -230,73 +229,3 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + } + __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); + } +- +-void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { +- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); +- +- if (bs_nm == NULL) { +- return; +- } +- +- // RISCV atomic operations require that the memory address be naturally aligned. +- __ align(4); +- +- Label skip, guard; +- Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); +- +- __ lwu(t0, guard); +- +- // Subsequent loads of oops must occur after load of guard value. +- // BarrierSetNMethod::disarm sets guard with release semantics. +- __ membar(MacroAssembler::LoadLoad); +- __ lwu(t1, thread_disarmed_addr); +- __ beq(t0, t1, skip); +- +- int32_t offset = 0; +- __ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset); +- __ jalr(ra, t0, offset); +- __ j(skip); +- +- __ bind(guard); +- +- assert(__ offset() % 4 == 0, "bad alignment"); +- __ emit_int32(0); // nmethod guard value. Skipped over in common case. +- +- __ bind(skip); +-} +- +-void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { +- BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); +- if (bs == NULL) { +- return; +- } +- +- Label bad_call; +- __ beqz(xmethod, bad_call); +- +- // Pointer chase to the method holder to find out if the method is concurrently unloading. +- Label method_live; +- __ load_method_holder_cld(t0, xmethod); +- +- // Is it a strong CLD? +- __ lwu(t1, Address(t0, ClassLoaderData::keep_alive_offset())); +- __ bnez(t1, method_live); +- +- // Is it a weak but alive CLD? +- __ push_reg(RegSet::of(x28, x29), sp); +- +- __ ld(x28, Address(t0, ClassLoaderData::holder_offset())); +- +- // Uses x28 & x29, so we must pass new temporaries. +- __ resolve_weak_handle(x28, x29); +- __ mv(t0, x28); +- +- __ pop_reg(RegSet::of(x28, x29), sp); +- +- __ bnez(t0, method_live); +- +- __ bind(bad_call); +- +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(method_live); +-} +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +index b85f7f5582b..984d94f4c3d 100644 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +@@ -28,7 +28,6 @@ + + #include "asm/macroAssembler.hpp" + #include "gc/shared/barrierSet.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" + #include "memory/allocation.hpp" + #include "oops/access.hpp" + +@@ -71,8 +70,6 @@ class BarrierSetAssembler: public CHeapObj { + ); + virtual void barrier_stubs_init() {} + +- virtual void nmethod_entry_barrier(MacroAssembler* masm); +- virtual void c2i_entry_barrier(MacroAssembler* masm); + virtual ~BarrierSetAssembler() {} + }; + +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +deleted file mode 100644 +index ae7ee4c5a44..00000000000 +--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp ++++ /dev/null +@@ -1,171 +0,0 @@ +-/* +- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "code/codeCache.hpp" +-#include "code/nativeInst.hpp" +-#include "gc/shared/barrierSetNMethod.hpp" +-#include "logging/log.hpp" +-#include "memory/resourceArea.hpp" +-#include "runtime/sharedRuntime.hpp" +-#include "runtime/registerMap.hpp" +-#include "runtime/thread.hpp" +-#include "utilities/align.hpp" +-#include "utilities/debug.hpp" +- +-class NativeNMethodBarrier: public NativeInstruction { +- address instruction_address() const { return addr_at(0); } +- +- int *guard_addr() { +- /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ +- return reinterpret_cast(instruction_address() + 12 * 4); +- } +- +-public: +- int get_value() { +- return Atomic::load_acquire(guard_addr()); +- } +- +- void set_value(int value) { +- Atomic::release_store(guard_addr(), value); +- } +- +- void verify() const; +-}; +- +-// Store the instruction bitmask, bits and name for checking the barrier. +-struct CheckInsn { +- uint32_t mask; +- uint32_t bits; +- const char *name; +-}; +- +-static const struct CheckInsn barrierInsn[] = { +- { 0x00000fff, 0x00000297, "auipc t0, 0 "}, +- { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, +- { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, +- { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, +- { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, +- { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, +- { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, +- { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, +- { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, +- { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, +- { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, +- { 0x00000fff, 0x0000006f, "j skip "} +- /* guard: */ +- /* 32bit nmethod guard value */ +- /* skip: */ +-}; +- +-// The encodings must match the instructions emitted by +-// BarrierSetAssembler::nmethod_entry_barrier. The matching ignores the specific +-// register numbers and immediate values in the encoding. +-void NativeNMethodBarrier::verify() const { +- intptr_t addr = (intptr_t) instruction_address(); +- for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { +- uint32_t inst = *((uint32_t*) addr); +- if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { +- tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); +- fatal("not an %s instruction.", barrierInsn[i].name); +- } +- addr += 4; +- } +-} +- +- +-/* We're called from an nmethod when we need to deoptimize it. We do +- this by throwing away the nmethod's frame and jumping to the +- ic_miss stub. This looks like there has been an IC miss at the +- entry of the nmethod, so we resolve the call, which will fall back +- to the interpreter if the nmethod has been unloaded. */ +-void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { +- +- typedef struct { +- intptr_t *sp; intptr_t *fp; address ra; address pc; +- } frame_pointers_t; +- +- frame_pointers_t *new_frame = (frame_pointers_t *)(return_address_ptr - 5); +- +- JavaThread *thread = JavaThread::current(); +- RegisterMap reg_map(thread, false); +- frame frame = thread->last_frame(); +- +- assert(frame.is_compiled_frame() || frame.is_native_frame(), "must be"); +- assert(frame.cb() == nm, "must be"); +- frame = frame.sender(®_map); +- +- LogTarget(Trace, nmethod, barrier) out; +- if (out.is_enabled()) { +- ResourceMark mark; +- log_trace(nmethod, barrier)("deoptimize(nmethod: %s(%p), return_addr: %p, osr: %d, thread: %p(%s), making rsp: %p) -> %p", +- nm->method()->name_and_sig_as_C_string(), +- nm, *(address *) return_address_ptr, nm->is_osr_method(), thread, +- thread->name(), frame.sp(), nm->verified_entry_point()); +- } +- +- new_frame->sp = frame.sp(); +- new_frame->fp = frame.fp(); +- new_frame->ra = frame.pc(); +- new_frame->pc = SharedRuntime::get_handle_wrong_method_stub(); +-} +- +-// This is the offset of the entry barrier from where the frame is completed. +-// If any code changes between the end of the verified entry where the entry +-// barrier resides, and the completion of the frame, then +-// NativeNMethodCmpBarrier::verify() will immediately complain when it does +-// not find the expected native instruction at this offset, which needs updating. +-// Note that this offset is invariant of PreserveFramePointer. +- +-// see BarrierSetAssembler::nmethod_entry_barrier +-// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 +-static const int entry_barrier_offset = -4 * 13; +- +-static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { +- address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; +- NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); +- debug_only(barrier->verify()); +- return barrier; +-} +- +-void BarrierSetNMethod::disarm(nmethod* nm) { +- if (!supports_entry_barrier(nm)) { +- return; +- } +- +- // Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier. +- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); +- +- barrier->set_value(disarmed_value()); +-} +- +-bool BarrierSetNMethod::is_armed(nmethod* nm) { +- if (!supports_entry_barrier(nm)) { +- return false; +- } +- +- NativeNMethodBarrier* barrier = native_nmethod_barrier(nm); +- return barrier->get_value() != disarmed_value(); +-} +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 41a415ef2cf..a75bd9dfa89 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1638,10 +1638,10 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, + beq(trial_klass, tmp, L); + } + +-// Move an oop into a register. immediate is true if we want +-// immediate instructions and nmethod entry barriers are not enabled. +-// i.e. we are not going to patch this instruction while the code is being +-// executed by another thread. ++// Move an oop into a register. immediate is true if we want ++// immediate instructions, i.e. we are not going to patch this ++// instruction while the code is being executed by another thread. In ++// that case we can use move immediates rather than the constant pool. + void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { +@@ -1656,11 +1656,7 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = oop_Relocation::spec(oop_index); +- +- // nmethod entry barrier necessitate using the constant pool. They have to be +- // ordered with respected to oop access. +- // Using immediate literals would necessitate fence.i. +- if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) { ++ if (!immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ld_constant(dst, Address(dummy, rspec)); + } else +@@ -1738,22 +1734,6 @@ void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); + } + +-// ((WeakHandle)result).resolve() +-void MacroAssembler::resolve_weak_handle(Register result, Register tmp) { +- assert_different_registers(result, tmp); +- Label resolved; +- +- // A null weak handle resolves to null. +- beqz(result, resolved); +- +- // Only 64 bit platforms support GCs that require a tmp register +- // Only IN_HEAP loads require a thread_tmp register +- // WeakHandle::resolve is an indirection like jweak. +- access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, +- result, Address(result), tmp, noreg /* tmp_thread */); +- bind(resolved); +-} +- + void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { +@@ -3195,11 +3175,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + beq(src1, t0, equal); + } + +-void MacroAssembler::load_method_holder_cld(Register result, Register method) { +- load_method_holder(result, method); +- ld(result, Address(result, InstanceKlass::class_loader_data_offset())); +-} +- + void MacroAssembler::load_method_holder(Register holder, Register method) { + ld(holder, Address(method, Method::const_offset())); // ConstMethod* + ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index dd39f67d507..b16fe904888 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -207,7 +207,6 @@ class MacroAssembler: public Assembler { + virtual void check_and_handle_earlyret(Register java_thread); + virtual void check_and_handle_popframe(Register java_thread); + +- void resolve_weak_handle(Register result, Register tmp); + void resolve_oop_handle(Register result, Register tmp = x15); + void resolve_jobject(Register value, Register thread, Register tmp); + +@@ -673,7 +672,6 @@ class MacroAssembler: public Assembler { + void cmpptr(Register src1, Address src2, Label& equal); + + void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); +- void load_method_holder_cld(Register result, Register method); + void load_method_holder(Register holder, Register method); + + void compute_index(Register str1, Register trailing_zeros, Register match_mask, +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +index 228a64eae2c..047ea2276ca 100644 +--- a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +@@ -41,7 +41,6 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + switch (type()) { + case relocInfo::oop_type: { + oop_Relocation *reloc = (oop_Relocation *)this; +- // in movoop when BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate + if (NativeInstruction::is_load_pc_relative_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 7ec76e72ff0..0a1838695e1 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1068,17 +1068,6 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + st->print("sd ra, [sp, #%d]\n\t", - wordSize); + if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } + st->print("sub sp, sp, #%d\n\t", framesize); +- +- if (C->stub_function() == NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) { +- st->print("ld t0, [guard]\n\t"); +- st->print("membar LoadLoad\n\t"); +- st->print("ld t1, [xthread, #thread_disarmed_offset]\n\t"); +- st->print("beq t0, t1, skip\n\t"); +- st->print("jalr #nmethod_entry_barrier_stub\n\t"); +- st->print("j skip\n\t"); +- st->print("guard: int\n\t"); +- st->print("skip:\n\t"); +- } + } + #endif + +@@ -1114,11 +1103,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + __ build_frame(framesize); + +- if (C->stub_function() == NULL) { +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->nmethod_entry_barrier(&_masm); +- } +- + if (VerifyStackAtCalls) { + Unimplemented(); + } +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 5b934b04e8e..326ba62fcb0 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -642,9 +642,6 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + c2i_no_clinit_check_entry = __ pc(); + } + +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- bs->c2i_entry_barrier(masm); +- + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); +@@ -1290,10 +1287,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // -2 because return address is already present and so is saved fp + __ sub(sp, sp, stack_size - 2 * wordSize); + +- BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); +- assert_cond(bs != NULL); +- bs->nmethod_entry_barrier(masm); +- + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 0c5b0e001ee..74c38c3d044 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -2352,50 +2352,6 @@ class StubGenerator: public StubCodeGenerator { + return entry; + } + +- address generate_method_entry_barrier() { +- __ align(CodeEntryAlignment); +- StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); +- +- Label deoptimize_label; +- +- address start = __ pc(); +- +- __ set_last_Java_frame(sp, fp, ra, t0); +- +- __ enter(); +- __ add(t1, sp, wordSize); +- +- __ sub(sp, sp, 4 * wordSize); +- +- __ push_call_clobbered_registers(); +- +- __ mv(c_rarg0, t1); +- __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier), 1); +- +- __ reset_last_Java_frame(true); +- +- __ mv(t0, x10); +- +- __ pop_call_clobbered_registers(); +- +- __ bnez(t0, deoptimize_label); +- +- __ leave(); +- __ ret(); +- +- __ BIND(deoptimize_label); +- +- __ ld(t0, Address(sp, 0)); +- __ ld(fp, Address(sp, wordSize)); +- __ ld(ra, Address(sp, wordSize * 2)); +- __ ld(t1, Address(sp, wordSize * 3)); +- +- __ mv(sp, t0); +- __ jr(t1); +- +- return start; +- } +- + // x10 = result + // x11 = str1 + // x12 = cnt1 +@@ -3703,11 +3659,6 @@ class StubGenerator: public StubCodeGenerator { + + generate_string_indexof_stubs(); + +- BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); +- if (bs_nm != NULL) { +- StubRoutines::riscv::_method_entry_barrier = generate_method_entry_barrier(); +- } +- + StubRoutines::riscv::set_completed(); + } + +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +index 395a2d338e4..9202d9ec4b0 100644 +--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +@@ -53,6 +53,5 @@ address StubRoutines::riscv::_string_indexof_linear_ll = NULL; + address StubRoutines::riscv::_string_indexof_linear_uu = NULL; + address StubRoutines::riscv::_string_indexof_linear_ul = NULL; + address StubRoutines::riscv::_large_byte_array_inflate = NULL; +-address StubRoutines::riscv::_method_entry_barrier = NULL; + + bool StubRoutines::riscv::_completed = false; +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +index 51f07819c33..0c9445e18a7 100644 +--- a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +@@ -67,8 +67,6 @@ class riscv { + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; + +- static address _method_entry_barrier; +- + static bool _completed; + + public: +@@ -145,10 +143,6 @@ class riscv { + return _large_byte_array_inflate; + } + +- static address method_entry_barrier() { +- return _method_entry_barrier; +- } +- + static bool complete() { + return _completed; + } + +From aee31440dde84c54449b5c0dbdfb43b4d3826f5a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 17:59:40 +0800 +Subject: [PATCH 088/140] Revert JDK-8223173: Implement fast class + initialization checks on AARCH64 && JDK-8227260: JNI upcalls should bypass + class initialization barrier in c2i adapter + +--- + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 12 ------- + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 12 +++---- + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 12 ------- + src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 2 -- + .../cpu/riscv/macroAssembler_riscv.cpp | 36 ------------------- + .../cpu/riscv/macroAssembler_riscv.hpp | 3 -- + src/hotspot/cpu/riscv/riscv.ad | 11 ------ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 30 +--------------- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 17 +++------ + 9 files changed, 11 insertions(+), 124 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +index 49653d04d81..1e482d7cc2b 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -90,18 +90,6 @@ static void select_different_registers(Register preserve, + + bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +-void LIR_Assembler::clinit_barrier(ciMethod* method) { +- assert(VM_Version::supports_fast_class_init_checks(), "sanity"); +- assert(!method->holder()->is_not_initialized(), "initialization should have been started"); +- +- Label L_skip_barrier; +- +- __ mov_metadata(t1, method->holder()->constant_encoding()); +- __ clinit_barrier(t1, t0, &L_skip_barrier /* L_fast_path */); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(L_skip_barrier); +-} +- + LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; + } +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index a6d1b1470f9..99d981f97f4 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -317,6 +317,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, L + } + + void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ nop(); ++ + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. +@@ -330,12 +336,6 @@ void C1_MacroAssembler::remove_frame(int framesize) { + + + void C1_MacroAssembler::verified_entry() { +- // If we have to make this method not-entrant we'll overwrite its +- // first instruction with a jump. For this action to be legal we +- // must ensure that this first instruction is a J, JAL or NOP. +- // Make it a NOP. +- +- nop(); + } + + void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 1133e80a210..b50be7e726c 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -295,18 +295,6 @@ void InterpreterMacroAssembler::load_resolved_klass_at_offset( + ld(klass, Address(klass, Array::base_offset_in_bytes())); + } + +-void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, +- Register method, +- Register cache) { +- const int method_offset = in_bytes( +- ConstantPoolCache::base_offset() + +- ((byte_no == TemplateTable::f2_byte) +- ? ConstantPoolCacheEntry::f2_offset() +- : ConstantPoolCacheEntry::f1_offset())); +- +- ld(method, Address(cache, method_offset)); // get f1 Method* +-} +- + // Generate a subtype check: branch to ok_is_subtype if sub_klass is a + // subtype of super_klass. + // +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +index 4d8cb086f82..4126e8ee70f 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +@@ -122,8 +122,6 @@ class InterpreterMacroAssembler: public MacroAssembler { + // Load cpool->resolved_klass_at(index). + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + +- void load_resolved_method_at_index(int byte_no, Register method, Register cache); +- + void pop_ptr(Register r = x10); + void pop_i(Register r = x10); + void pop_l(Register r = x10); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index a75bd9dfa89..304b6f2b06c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -372,36 +372,6 @@ void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thr + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); + } + +-void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) { +- assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); +- assert_different_registers(klass, xthread, tmp); +- +- Label L_fallthrough, L_tmp; +- if (L_fast_path == NULL) { +- L_fast_path = &L_fallthrough; +- } else if (L_slow_path == NULL) { +- L_slow_path = &L_fallthrough; +- } +- +- // Fast path check: class is fully initialized +- lbu(tmp, Address(klass, InstanceKlass::init_state_offset())); +- sub(tmp, tmp, InstanceKlass::fully_initialized); +- beqz(tmp, *L_fast_path); +- +- // Fast path check: current thread is initializer thread +- ld(tmp, Address(klass, InstanceKlass::init_thread_offset())); +- +- if (L_slow_path == &L_fallthrough) { +- beq(xthread, tmp, *L_fast_path); +- bind(*L_slow_path); +- } else if (L_fast_path == &L_fallthrough) { +- bne(xthread, tmp, *L_slow_path); +- bind(*L_fast_path); +- } else { +- Unimplemented(); +- } +-} +- + void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } + +@@ -3175,12 +3145,6 @@ void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { + beq(src1, t0, equal); + } + +-void MacroAssembler::load_method_holder(Register holder, Register method) { +- ld(holder, Address(method, Method::const_offset())); // ConstMethod* +- ld(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* +- ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +-} +- + // string indexof + // compute index by trailing zeros + void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index b16fe904888..c6b71bdbc3c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -671,9 +671,6 @@ class MacroAssembler: public Assembler { + + void cmpptr(Register src1, Address src2, Label& equal); + +- void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); +- void load_method_holder(Register holder, Register method); +- + void compute_index(Register str1, Register trailing_zeros, Register match_mask, + Register result, Register char_tmp, Register tmp, + bool haystack_isL); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 0a1838695e1..13546ab328b 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1085,17 +1085,6 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + assert_cond(C != NULL); + +- if (C->clinit_barrier_on_entry()) { +- assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); +- +- Label L_skip_barrier; +- +- __ mov_metadata(t1, C->method()->holder()->constant_encoding()); +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- __ bind(L_skip_barrier); +- } +- + int bangsize = C->output()->bang_size_in_bytes(); + if (C->output()->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index 326ba62fcb0..ae414224c5b 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -623,29 +623,10 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm + + address c2i_entry = __ pc(); + +- // Class initialization barrier for static methods +- address c2i_no_clinit_check_entry = NULL; +- if (VM_Version::supports_fast_class_init_checks()) { +- Label L_skip_barrier; +- +- { // Bypass the barrier for non-static methods +- __ lwu(t0, Address(xmethod, Method::access_flags_offset())); +- __ andi(t1, t0, JVM_ACC_STATIC); +- __ beqz(t1, L_skip_barrier); // non-static +- } +- +- __ load_method_holder(t1, xmethod); +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- +- __ bind(L_skip_barrier); +- c2i_no_clinit_check_entry = __ pc(); +- } +- + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); +- return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + } + + int SharedRuntime::c_calling_convention(const BasicType *sig_bt, +@@ -1270,15 +1251,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + // first instruction with a jump. + __ nop(); + +- if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { +- Label L_skip_barrier; +- __ mov_metadata(t1, method->method_holder()); // InstanceKlass* +- __ clinit_barrier(t1, t0, &L_skip_barrier); +- __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); +- +- __ bind(L_skip_barrier); +- } +- + // Generate stack overflow check + __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index bb20f228447..1f4409a9c9a 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -2307,7 +2307,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + const Register temp = x9; + assert_different_registers(Rcache, index, temp); + +- Label resolved, clinit_barrier_slow; ++ Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { +@@ -2321,10 +2321,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + __ mv(t0, (int) code); + __ beq(temp, t0, resolved); + +- // resolve first time through +- // Class initialization barrier slow path lands here as well. +- __ bind(clinit_barrier_slow); +- + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mv(temp, (int) code); + __ call_VM(noreg, entry, temp); +@@ -2334,13 +2330,6 @@ void TemplateTable::resolve_cache_and_index(int byte_no, + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); +- +- // Class initialization barrier for static methods +- if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { +- __ load_resolved_method_at_index(byte_no, temp, Rcache); +- __ load_method_holder(temp, temp); +- __ clinit_barrier(temp, t0, NULL, &clinit_barrier_slow); +- } + } + + // The Rcache and index registers must be set before call +@@ -3431,7 +3420,9 @@ void TemplateTable::invokeinterface(int byte_no) { + __ profile_virtual_call(x13, x30, x9); + + // Get declaring interface class from method, and itable index +- __ load_method_holder(x10, xmethod); ++ __ ld(x10, Address(xmethod, Method::const_offset())); ++ __ ld(x10, Address(x10, ConstMethod::constants_offset())); ++ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); + __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); + __ subw(xmethod, xmethod, Method::itable_index_max); + __ negw(xmethod, xmethod); + +From c259a42eac0a11e080d28dabe7f745ee79a53663 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 18:36:13 +0800 +Subject: [PATCH 089/140] Revert JDK-8268119: Rename copy_os_cpu.inline.hpp + files to copy_os_cpu.hpp && JDK-8142362: Lots of code duplication in Copy + class + +--- + src/hotspot/cpu/riscv/copy_riscv.hpp | 85 +----------- + .../os_cpu/linux_riscv/copy_linux_riscv.hpp | 31 ----- + .../linux_riscv/copy_linux_riscv.inline.hpp | 124 ++++++++++++++++++ + 3 files changed, 128 insertions(+), 112 deletions(-) + delete mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp + +diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp +index bceadcc5dcc..05da242e354 100644 +--- a/src/hotspot/cpu/riscv/copy_riscv.hpp ++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp +@@ -27,7 +27,10 @@ + #ifndef CPU_RISCV_COPY_RISCV_HPP + #define CPU_RISCV_COPY_RISCV_HPP + +-#include OS_CPU_HEADER(copy) ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) + + static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; +@@ -53,84 +56,4 @@ static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); + } + +-static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- (void)memmove(to, from, count * HeapWordSize); +-} +- +-static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- switch (count) { +- case 8: to[7] = from[7]; // fall through +- case 7: to[6] = from[6]; // fall through +- case 6: to[5] = from[5]; // fall through +- case 5: to[4] = from[4]; // fall through +- case 4: to[3] = from[3]; // fall through +- case 3: to[2] = from[2]; // fall through +- case 2: to[1] = from[1]; // fall through +- case 1: to[0] = from[0]; // fall through +- case 0: break; +- default: +- memcpy(to, from, count * HeapWordSize); +- break; +- } +-} +- +-static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { +- shared_disjoint_words_atomic(from, to, count); +-} +- +-static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- pd_conjoint_words(from, to, count); +-} +- +-static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { +- pd_disjoint_words(from, to, count); +-} +- +-static void pd_conjoint_bytes(const void* from, void* to, size_t count) { +- (void)memmove(to, from, count); +-} +- +-static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { +- pd_conjoint_bytes(from, to, count); +-} +- +-static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { +- _Copy_conjoint_jshorts_atomic(from, to, count); +-} +- +-static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { +- _Copy_conjoint_jints_atomic(from, to, count); +-} +- +-static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { +- _Copy_conjoint_jlongs_atomic(from, to, count); +-} +- +-static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { +- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); +- _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); +-} +- +-static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_bytes(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jshorts(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jints(from, to, count); +-} +- +-static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { +- _Copy_arrayof_conjoint_jlongs(from, to, count); +-} +- +-static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { +- assert(!UseCompressedOops, "foo!"); +- assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); +- _Copy_arrayof_conjoint_jlongs(from, to, count); +-} +- + #endif // CPU_RISCV_COPY_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp +deleted file mode 100644 +index 147cfdf3c10..00000000000 +--- a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.hpp ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +-#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +- +-// Empty for build system +- +-#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +new file mode 100644 +index 00000000000..bdf36d6b4c3 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +@@ -0,0 +1,124 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP + +From 6033e30ebd94f2315bf809a42ef00c85bdbc780e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 29 Apr 2023 19:33:21 +0800 +Subject: [PATCH 090/140] Revert JDK-8241436: C2: Factor out C2-specific code + from MacroAssembler + +--- + .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 1321 ----------------- + .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 141 -- + .../cpu/riscv/macroAssembler_riscv.cpp | 1282 ++++++++++++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 103 ++ + src/hotspot/cpu/riscv/riscv.ad | 124 +- + 5 files changed, 1447 insertions(+), 1524 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp + delete mode 100644 src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +deleted file mode 100644 +index 73f84a724ca..00000000000 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp ++++ /dev/null +@@ -1,1321 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "asm/assembler.hpp" +-#include "asm/assembler.inline.hpp" +-#include "opto/c2_MacroAssembler.hpp" +-#include "opto/intrinsicnode.hpp" +-#include "opto/subnode.hpp" +-#include "runtime/stubRoutines.hpp" +- +-#ifdef PRODUCT +-#define BLOCK_COMMENT(str) /* nothing */ +-#define STOP(error) stop(error) +-#else +-#define BLOCK_COMMENT(str) block_comment(str) +-#define STOP(error) block_comment(error); stop(error) +-#endif +- +-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +- +-// short string +-// StringUTF16.indexOfChar +-// StringLatin1.indexOfChar +-void C2_MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, +- Register ch, Register result, +- bool isL) +-{ +- Register ch1 = t0; +- Register index = t1; +- +- BLOCK_COMMENT("string_indexof_char_short {"); +- +- Label LOOP, LOOP1, LOOP4, LOOP8; +- Label MATCH, MATCH1, MATCH2, MATCH3, +- MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; +- +- mv(result, -1); +- mv(index, zr); +- +- bind(LOOP); +- addi(t0, index, 8); +- ble(t0, cnt1, LOOP8); +- addi(t0, index, 4); +- ble(t0, cnt1, LOOP4); +- j(LOOP1); +- +- bind(LOOP8); +- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); +- beq(ch, ch1, MATCH); +- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); +- beq(ch, ch1, MATCH1); +- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); +- beq(ch, ch1, MATCH2); +- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); +- beq(ch, ch1, MATCH3); +- isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); +- beq(ch, ch1, MATCH4); +- isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); +- beq(ch, ch1, MATCH5); +- isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); +- beq(ch, ch1, MATCH6); +- isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); +- beq(ch, ch1, MATCH7); +- addi(index, index, 8); +- addi(str1, str1, isL ? 8 : 16); +- blt(index, cnt1, LOOP); +- j(NOMATCH); +- +- bind(LOOP4); +- isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); +- beq(ch, ch1, MATCH); +- isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); +- beq(ch, ch1, MATCH1); +- isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); +- beq(ch, ch1, MATCH2); +- isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); +- beq(ch, ch1, MATCH3); +- addi(index, index, 4); +- addi(str1, str1, isL ? 4 : 8); +- bge(index, cnt1, NOMATCH); +- +- bind(LOOP1); +- isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); +- beq(ch, ch1, MATCH); +- addi(index, index, 1); +- addi(str1, str1, isL ? 1 : 2); +- blt(index, cnt1, LOOP1); +- j(NOMATCH); +- +- bind(MATCH1); +- addi(index, index, 1); +- j(MATCH); +- +- bind(MATCH2); +- addi(index, index, 2); +- j(MATCH); +- +- bind(MATCH3); +- addi(index, index, 3); +- j(MATCH); +- +- bind(MATCH4); +- addi(index, index, 4); +- j(MATCH); +- +- bind(MATCH5); +- addi(index, index, 5); +- j(MATCH); +- +- bind(MATCH6); +- addi(index, index, 6); +- j(MATCH); +- +- bind(MATCH7); +- addi(index, index, 7); +- +- bind(MATCH); +- mv(result, index); +- bind(NOMATCH); +- BLOCK_COMMENT("} string_indexof_char_short"); +-} +- +-// StringUTF16.indexOfChar +-// StringLatin1.indexOfChar +-void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- bool isL) +-{ +- Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; +- Register ch1 = t0; +- Register orig_cnt = t1; +- Register mask1 = tmp3; +- Register mask2 = tmp2; +- Register match_mask = tmp1; +- Register trailing_char = tmp4; +- Register unaligned_elems = tmp4; +- +- BLOCK_COMMENT("string_indexof_char {"); +- beqz(cnt1, NOMATCH); +- +- addi(t0, cnt1, isL ? -32 : -16); +- bgtz(t0, DO_LONG); +- string_indexof_char_short(str1, cnt1, ch, result, isL); +- j(DONE); +- +- bind(DO_LONG); +- mv(orig_cnt, cnt1); +- if (AvoidUnalignedAccesses) { +- Label ALIGNED; +- andi(unaligned_elems, str1, 0x7); +- beqz(unaligned_elems, ALIGNED); +- sub(unaligned_elems, unaligned_elems, 8); +- neg(unaligned_elems, unaligned_elems); +- if (!isL) { +- srli(unaligned_elems, unaligned_elems, 1); +- } +- // do unaligned part per element +- string_indexof_char_short(str1, unaligned_elems, ch, result, isL); +- bgez(result, DONE); +- mv(orig_cnt, cnt1); +- sub(cnt1, cnt1, unaligned_elems); +- bind(ALIGNED); +- } +- +- // duplicate ch +- if (isL) { +- slli(ch1, ch, 8); +- orr(ch, ch1, ch); +- } +- slli(ch1, ch, 16); +- orr(ch, ch1, ch); +- slli(ch1, ch, 32); +- orr(ch, ch1, ch); +- +- if (!isL) { +- slli(cnt1, cnt1, 1); +- } +- +- uint64_t mask0101 = UCONST64(0x0101010101010101); +- uint64_t mask0001 = UCONST64(0x0001000100010001); +- mv(mask1, isL ? mask0101 : mask0001); +- uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); +- uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); +- mv(mask2, isL ? mask7f7f : mask7fff); +- +- bind(CH1_LOOP); +- ld(ch1, Address(str1)); +- addi(str1, str1, 8); +- addi(cnt1, cnt1, -8); +- compute_match_mask(ch1, ch, match_mask, mask1, mask2); +- bnez(match_mask, HIT); +- bgtz(cnt1, CH1_LOOP); +- j(NOMATCH); +- +- bind(HIT); +- ctzc_bit(trailing_char, match_mask, isL, ch1, result); +- srli(trailing_char, trailing_char, 3); +- addi(cnt1, cnt1, 8); +- ble(cnt1, trailing_char, NOMATCH); +- // match case +- if (!isL) { +- srli(cnt1, cnt1, 1); +- srli(trailing_char, trailing_char, 1); +- } +- +- sub(result, orig_cnt, cnt1); +- add(result, result, trailing_char); +- j(DONE); +- +- bind(NOMATCH); +- mv(result, -1); +- +- bind(DONE); +- BLOCK_COMMENT("} string_indexof_char"); +-} +- +-typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); +- +-// Search for needle in haystack and return index or -1 +-// x10: result +-// x11: haystack +-// x12: haystack_len +-// x13: needle +-// x14: needle_len +-void C2_MacroAssembler::string_indexof(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, int ae) +-{ +- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); +- +- Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; +- +- Register ch1 = t0; +- Register ch2 = t1; +- Register nlen_tmp = tmp1; // needle len tmp +- Register hlen_tmp = tmp2; // haystack len tmp +- Register result_tmp = tmp4; +- +- bool isLL = ae == StrIntrinsicNode::LL; +- +- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; +- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; +- int needle_chr_shift = needle_isL ? 0 : 1; +- int haystack_chr_shift = haystack_isL ? 0 : 1; +- int needle_chr_size = needle_isL ? 1 : 2; +- int haystack_chr_size = haystack_isL ? 1 : 2; +- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- +- BLOCK_COMMENT("string_indexof {"); +- +- // Note, inline_string_indexOf() generates checks: +- // if (pattern.count > src.count) return -1; +- // if (pattern.count == 0) return 0; +- +- // We have two strings, a source string in haystack, haystack_len and a pattern string +- // in needle, needle_len. Find the first occurence of pattern in source or return -1. +- +- // For larger pattern and source we use a simplified Boyer Moore algorithm. +- // With a small pattern and source we use linear scan. +- +- // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. +- sub(result_tmp, haystack_len, needle_len); +- // needle_len < 8, use linear scan +- sub(t0, needle_len, 8); +- bltz(t0, LINEARSEARCH); +- // needle_len >= 256, use linear scan +- sub(t0, needle_len, 256); +- bgez(t0, LINEARSTUB); +- // needle_len >= haystack_len/4, use linear scan +- srli(t0, haystack_len, 2); +- bge(needle_len, t0, LINEARSTUB); +- +- // Boyer-Moore-Horspool introduction: +- // The Boyer Moore alogorithm is based on the description here:- +- // +- // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm +- // +- // This describes and algorithm with 2 shift rules. The 'Bad Character' rule +- // and the 'Good Suffix' rule. +- // +- // These rules are essentially heuristics for how far we can shift the +- // pattern along the search string. +- // +- // The implementation here uses the 'Bad Character' rule only because of the +- // complexity of initialisation for the 'Good Suffix' rule. +- // +- // This is also known as the Boyer-Moore-Horspool algorithm: +- // +- // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm +- // +- // #define ASIZE 256 +- // +- // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { +- // int i, j; +- // unsigned c; +- // unsigned char bc[ASIZE]; +- // +- // /* Preprocessing */ +- // for (i = 0; i < ASIZE; ++i) +- // bc[i] = m; +- // for (i = 0; i < m - 1; ) { +- // c = pattern[i]; +- // ++i; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef PATTERN_STRING_IS_LATIN1 +- // bc[c] = m - i; +- // #else +- // if (c < ASIZE) bc[c] = m - i; +- // #endif +- // } +- // +- // /* Searching */ +- // j = 0; +- // while (j <= n - m) { +- // c = src[i+j]; +- // if (pattern[m-1] == c) +- // int k; +- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); +- // if (k < 0) return j; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 +- // // LL case: (c< 256) always true. Remove branch +- // j += bc[pattern[j+m-1]]; +- // #endif +- // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF +- // // UU case: need if (c if not. +- // if (c < ASIZE) +- // j += bc[pattern[j+m-1]]; +- // else +- // j += m +- // #endif +- // } +- // return -1; +- // } +- +- // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result +- Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, +- BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; +- +- Register haystack_end = haystack_len; +- Register skipch = tmp2; +- +- // pattern length is >=8, so, we can read at least 1 register for cases when +- // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for +- // UL case. We'll re-read last character in inner pre-loop code to have +- // single outer pre-loop load +- const int firstStep = isLL ? 7 : 3; +- +- const int ASIZE = 256; +- const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) +- +- sub(sp, sp, ASIZE); +- +- // init BC offset table with default value: needle_len +- slli(t0, needle_len, 8); +- orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] +- slli(tmp1, t0, 16); +- orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] +- slli(tmp1, t0, 32); +- orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] +- +- mv(ch1, sp); // ch1 is t0 +- mv(tmp6, ASIZE / STORE_BYTES); // loop iterations +- +- bind(BM_INIT_LOOP); +- // for (i = 0; i < ASIZE; ++i) +- // bc[i] = m; +- for (int i = 0; i < 4; i++) { +- sd(tmp5, Address(ch1, i * wordSize)); +- } +- add(ch1, ch1, 32); +- sub(tmp6, tmp6, 4); +- bgtz(tmp6, BM_INIT_LOOP); +- +- sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern +- Register orig_haystack = tmp5; +- mv(orig_haystack, haystack); +- // result_tmp = tmp4 +- shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); +- sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 +- mv(tmp3, needle); +- +- // for (i = 0; i < m - 1; ) { +- // c = pattern[i]; +- // ++i; +- // // c < 256 for Latin1 string, so, no need for branch +- // #ifdef PATTERN_STRING_IS_LATIN1 +- // bc[c] = m - i; +- // #else +- // if (c < ASIZE) bc[c] = m - i; +- // #endif +- // } +- bind(BCLOOP); +- (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); +- add(tmp3, tmp3, needle_chr_size); +- if (!needle_isL) { +- // ae == StrIntrinsicNode::UU +- mv(tmp6, ASIZE); +- bgeu(ch1, tmp6, BCSKIP); +- } +- add(tmp4, sp, ch1); +- sb(ch2, Address(tmp4)); // store skip offset to BC offset table +- +- bind(BCSKIP); +- sub(ch2, ch2, 1); // for next pattern element, skip distance -1 +- bgtz(ch2, BCLOOP); +- +- // tmp6: pattern end, address after needle +- shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); +- if (needle_isL == haystack_isL) { +- // load last 8 bytes (8LL/4UU symbols) +- ld(tmp6, Address(tmp6, -wordSize)); +- } else { +- // UL: from UTF-16(source) search Latin1(pattern) +- lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) +- // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d +- // We'll have to wait until load completed, but it's still faster than per-character loads+checks +- srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a +- slli(ch2, tmp6, XLEN - 24); +- srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b +- slli(ch1, tmp6, XLEN - 16); +- srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c +- andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d +- slli(ch2, ch2, 16); +- orr(ch2, ch2, ch1); // 0x00000b0c +- slli(result, tmp3, 48); // use result as temp register +- orr(tmp6, tmp6, result); // 0x0a00000d +- slli(result, ch2, 16); +- orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d +- } +- +- // i = m - 1; +- // skipch = j + i; +- // if (skipch == pattern[m - 1] +- // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); +- // else +- // move j with bad char offset table +- bind(BMLOOPSTR2); +- // compare pattern to source string backward +- shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); +- (this->*haystack_load_1chr)(skipch, Address(result), noreg); +- sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 +- if (needle_isL == haystack_isL) { +- // re-init tmp3. It's for free because it's executed in parallel with +- // load above. Alternative is to initialize it before loop, but it'll +- // affect performance on in-order systems with 2 or more ld/st pipelines +- srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] +- } +- if (!isLL) { // UU/UL case +- slli(ch2, nlen_tmp, 1); // offsets in bytes +- } +- bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char +- add(result, haystack, isLL ? nlen_tmp : ch2); +- ld(ch2, Address(result)); // load 8 bytes from source string +- mv(ch1, tmp6); +- if (isLL) { +- j(BMLOOPSTR1_AFTER_LOAD); +- } else { +- sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 +- j(BMLOOPSTR1_CMP); +- } +- +- bind(BMLOOPSTR1); +- shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); +- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); +- shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- +- bind(BMLOOPSTR1_AFTER_LOAD); +- sub(nlen_tmp, nlen_tmp, 1); +- bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); +- +- bind(BMLOOPSTR1_CMP); +- beq(ch1, ch2, BMLOOPSTR1); +- +- bind(BMSKIP); +- if (!isLL) { +- // if we've met UTF symbol while searching Latin1 pattern, then we can +- // skip needle_len symbols +- if (needle_isL != haystack_isL) { +- mv(result_tmp, needle_len); +- } else { +- mv(result_tmp, 1); +- } +- mv(t0, ASIZE); +- bgeu(skipch, t0, BMADV); +- } +- add(result_tmp, sp, skipch); +- lbu(result_tmp, Address(result_tmp)); // load skip offset +- +- bind(BMADV); +- sub(nlen_tmp, needle_len, 1); +- // move haystack after bad char skip offset +- shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); +- ble(haystack, haystack_end, BMLOOPSTR2); +- add(sp, sp, ASIZE); +- j(NOMATCH); +- +- bind(BMLOOPSTR1_LASTCMP); +- bne(ch1, ch2, BMSKIP); +- +- bind(BMMATCH); +- sub(result, haystack, orig_haystack); +- if (!haystack_isL) { +- srli(result, result, 1); +- } +- add(sp, sp, ASIZE); +- j(DONE); +- +- bind(LINEARSTUB); +- sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm +- bltz(t0, LINEARSEARCH); +- mv(result, zr); +- RuntimeAddress stub = NULL; +- if (isLL) { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); +- assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); +- } else if (needle_isL) { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); +- assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); +- } else { +- stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); +- assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); +- } +- trampoline_call(stub); +- j(DONE); +- +- bind(NOMATCH); +- mv(result, -1); +- j(DONE); +- +- bind(LINEARSEARCH); +- string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); +- +- bind(DONE); +- BLOCK_COMMENT("} string_indexof"); +-} +- +-// string_indexof +-// result: x10 +-// src: x11 +-// src_count: x12 +-// pattern: x13 +-// pattern_count: x14 or 1/2/3/4 +-void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- int needle_con_cnt, Register result, int ae) +-{ +- // Note: +- // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant +- // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 +- assert(needle_con_cnt <= 4, "Invalid needle constant count"); +- assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); +- +- Register ch1 = t0; +- Register ch2 = t1; +- Register hlen_neg = haystack_len, nlen_neg = needle_len; +- Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; +- +- bool isLL = ae == StrIntrinsicNode::LL; +- +- bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; +- bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; +- int needle_chr_shift = needle_isL ? 0 : 1; +- int haystack_chr_shift = haystack_isL ? 0 : 1; +- int needle_chr_size = needle_isL ? 1 : 2; +- int haystack_chr_size = haystack_isL ? 1 : 2; +- +- load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : +- (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; +- load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; +- +- Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; +- +- Register first = tmp3; +- +- if (needle_con_cnt == -1) { +- Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; +- +- sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); +- bltz(t0, DOSHORT); +- +- (this->*needle_load_1chr)(first, Address(needle), noreg); +- slli(t0, needle_len, needle_chr_shift); +- add(needle, needle, t0); +- neg(nlen_neg, t0); +- slli(t0, result_tmp, haystack_chr_shift); +- add(haystack, haystack, t0); +- neg(hlen_neg, t0); +- +- bind(FIRST_LOOP); +- add(t0, haystack, hlen_neg); +- (this->*haystack_load_1chr)(ch2, Address(t0), noreg); +- beq(first, ch2, STR1_LOOP); +- +- bind(STR2_NEXT); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, FIRST_LOOP); +- j(NOMATCH); +- +- bind(STR1_LOOP); +- add(nlen_tmp, nlen_neg, needle_chr_size); +- add(hlen_tmp, hlen_neg, haystack_chr_size); +- bgez(nlen_tmp, MATCH); +- +- bind(STR1_NEXT); +- add(ch1, needle, nlen_tmp); +- (this->*needle_load_1chr)(ch1, Address(ch1), noreg); +- add(ch2, haystack, hlen_tmp); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- bne(ch1, ch2, STR2_NEXT); +- add(nlen_tmp, nlen_tmp, needle_chr_size); +- add(hlen_tmp, hlen_tmp, haystack_chr_size); +- bltz(nlen_tmp, STR1_NEXT); +- j(MATCH); +- +- bind(DOSHORT); +- if (needle_isL == haystack_isL) { +- sub(t0, needle_len, 2); +- bltz(t0, DO1); +- bgtz(t0, DO3); +- } +- } +- +- if (needle_con_cnt == 4) { +- Label CH1_LOOP; +- (this->*load_4chr)(ch1, Address(needle), noreg); +- sub(result_tmp, haystack_len, 4); +- slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(CH1_LOOP); +- add(ch2, haystack, hlen_neg); +- (this->*load_4chr)(ch2, Address(ch2), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, CH1_LOOP); +- j(NOMATCH); +- } +- +- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { +- Label CH1_LOOP; +- BLOCK_COMMENT("string_indexof DO2 {"); +- bind(DO2); +- (this->*load_2chr)(ch1, Address(needle), noreg); +- if (needle_con_cnt == 2) { +- sub(result_tmp, haystack_len, 2); +- } +- slli(tmp3, result_tmp, haystack_chr_shift); +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(CH1_LOOP); +- add(tmp3, haystack, hlen_neg); +- (this->*load_2chr)(ch2, Address(tmp3), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, CH1_LOOP); +- j(NOMATCH); +- BLOCK_COMMENT("} string_indexof DO2"); +- } +- +- if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { +- Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; +- BLOCK_COMMENT("string_indexof DO3 {"); +- +- bind(DO3); +- (this->*load_2chr)(first, Address(needle), noreg); +- (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); +- if (needle_con_cnt == 3) { +- sub(result_tmp, haystack_len, 3); +- } +- slli(hlen_tmp, result_tmp, haystack_chr_shift); +- add(haystack, haystack, hlen_tmp); +- neg(hlen_neg, hlen_tmp); +- +- bind(FIRST_LOOP); +- add(ch2, haystack, hlen_neg); +- (this->*load_2chr)(ch2, Address(ch2), noreg); +- beq(first, ch2, STR1_LOOP); +- +- bind(STR2_NEXT); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, FIRST_LOOP); +- j(NOMATCH); +- +- bind(STR1_LOOP); +- add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); +- add(ch2, haystack, hlen_tmp); +- (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); +- bne(ch1, ch2, STR2_NEXT); +- j(MATCH); +- BLOCK_COMMENT("} string_indexof DO3"); +- } +- +- if (needle_con_cnt == -1 || needle_con_cnt == 1) { +- Label DO1_LOOP; +- +- BLOCK_COMMENT("string_indexof DO1 {"); +- bind(DO1); +- (this->*needle_load_1chr)(ch1, Address(needle), noreg); +- sub(result_tmp, haystack_len, 1); +- mv(tmp3, result_tmp); +- if (haystack_chr_shift) { +- slli(tmp3, result_tmp, haystack_chr_shift); +- } +- add(haystack, haystack, tmp3); +- neg(hlen_neg, tmp3); +- +- bind(DO1_LOOP); +- add(tmp3, haystack, hlen_neg); +- (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); +- beq(ch1, ch2, MATCH); +- add(hlen_neg, hlen_neg, haystack_chr_size); +- blez(hlen_neg, DO1_LOOP); +- BLOCK_COMMENT("} string_indexof DO1"); +- } +- +- bind(NOMATCH); +- mv(result, -1); +- j(DONE); +- +- bind(MATCH); +- srai(t0, hlen_neg, haystack_chr_shift); +- add(result, result_tmp, t0); +- +- bind(DONE); +-} +- +-// Compare strings. +-void C2_MacroAssembler::string_compare(Register str1, Register str2, +- Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, +- Register tmp3, int ae) +-{ +- Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, +- DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, +- SHORT_LOOP_START, TAIL_CHECK, L; +- +- const int STUB_THRESHOLD = 64 + 8; +- bool isLL = ae == StrIntrinsicNode::LL; +- bool isLU = ae == StrIntrinsicNode::LU; +- bool isUL = ae == StrIntrinsicNode::UL; +- +- bool str1_isL = isLL || isLU; +- bool str2_isL = isLL || isUL; +- +- // for L strings, 1 byte for 1 character +- // for U strings, 2 bytes for 1 character +- int str1_chr_size = str1_isL ? 1 : 2; +- int str2_chr_size = str2_isL ? 1 : 2; +- int minCharsInWord = isLL ? wordSize : wordSize / 2; +- +- load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; +- load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; +- +- BLOCK_COMMENT("string_compare {"); +- +- // Bizzarely, the counts are passed in bytes, regardless of whether they +- // are L or U strings, however the result is always in characters. +- if (!str1_isL) { +- sraiw(cnt1, cnt1, 1); +- } +- if (!str2_isL) { +- sraiw(cnt2, cnt2, 1); +- } +- +- // Compute the minimum of the string lengths and save the difference in result. +- sub(result, cnt1, cnt2); +- bgt(cnt1, cnt2, L); +- mv(cnt2, cnt1); +- bind(L); +- +- // A very short string +- li(t0, minCharsInWord); +- ble(cnt2, t0, SHORT_STRING); +- +- // Compare longwords +- // load first parts of strings and finish initialization while loading +- { +- if (str1_isL == str2_isL) { // LL or UU +- // load 8 bytes once to compare +- ld(tmp1, Address(str1)); +- beq(str1, str2, DONE); +- ld(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- sub(cnt2, cnt2, minCharsInWord); +- beqz(cnt2, TAIL_CHECK); +- // convert cnt2 from characters to bytes +- if (!str1_isL) { +- slli(cnt2, cnt2, 1); +- } +- add(str2, str2, cnt2); +- add(str1, str1, cnt2); +- sub(cnt2, zr, cnt2); +- } else if (isLU) { // LU case +- lwu(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- addi(cnt2, cnt2, -4); +- add(str1, str1, cnt2); +- sub(cnt1, zr, cnt2); +- slli(cnt2, cnt2, 1); +- add(str2, str2, cnt2); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- sub(cnt2, zr, cnt2); +- addi(cnt1, cnt1, 4); +- } else { // UL case +- ld(tmp1, Address(str1)); +- lwu(tmp2, Address(str2)); +- li(t0, STUB_THRESHOLD); +- bge(cnt2, t0, STUB); +- addi(cnt2, cnt2, -4); +- slli(t0, cnt2, 1); +- sub(cnt1, zr, t0); +- add(str1, str1, t0); +- add(str2, str2, cnt2); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- sub(cnt2, zr, cnt2); +- addi(cnt1, cnt1, 8); +- } +- addi(cnt2, cnt2, isUL ? 4 : 8); +- bgez(cnt2, TAIL); +- xorr(tmp3, tmp1, tmp2); +- bnez(tmp3, DIFFERENCE); +- +- // main loop +- bind(NEXT_WORD); +- if (str1_isL == str2_isL) { // LL or UU +- add(t0, str1, cnt2); +- ld(tmp1, Address(t0)); +- add(t0, str2, cnt2); +- ld(tmp2, Address(t0)); +- addi(cnt2, cnt2, 8); +- } else if (isLU) { // LU case +- add(t0, str1, cnt1); +- lwu(tmp1, Address(t0)); +- add(t0, str2, cnt2); +- ld(tmp2, Address(t0)); +- addi(cnt1, cnt1, 4); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- addi(cnt2, cnt2, 8); +- } else { // UL case +- add(t0, str2, cnt2); +- lwu(tmp2, Address(t0)); +- add(t0, str1, cnt1); +- ld(tmp1, Address(t0)); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- addi(cnt1, cnt1, 8); +- addi(cnt2, cnt2, 4); +- } +- bgez(cnt2, TAIL); +- +- xorr(tmp3, tmp1, tmp2); +- beqz(tmp3, NEXT_WORD); +- j(DIFFERENCE); +- bind(TAIL); +- xorr(tmp3, tmp1, tmp2); +- bnez(tmp3, DIFFERENCE); +- // Last longword. In the case where length == 4 we compare the +- // same longword twice, but that's still faster than another +- // conditional branch. +- if (str1_isL == str2_isL) { // LL or UU +- ld(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- } else if (isLU) { // LU case +- lwu(tmp1, Address(str1)); +- ld(tmp2, Address(str2)); +- inflate_lo32(tmp3, tmp1); +- mv(tmp1, tmp3); +- } else { // UL case +- lwu(tmp2, Address(str2)); +- ld(tmp1, Address(str1)); +- inflate_lo32(tmp3, tmp2); +- mv(tmp2, tmp3); +- } +- bind(TAIL_CHECK); +- xorr(tmp3, tmp1, tmp2); +- beqz(tmp3, DONE); +- +- // Find the first different characters in the longwords and +- // compute their difference. +- bind(DIFFERENCE); +- ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb +- srl(tmp1, tmp1, result); +- srl(tmp2, tmp2, result); +- if (isLL) { +- andi(tmp1, tmp1, 0xFF); +- andi(tmp2, tmp2, 0xFF); +- } else { +- andi(tmp1, tmp1, 0xFFFF); +- andi(tmp2, tmp2, 0xFFFF); +- } +- sub(result, tmp1, tmp2); +- j(DONE); +- } +- +- bind(STUB); +- RuntimeAddress stub = NULL; +- switch (ae) { +- case StrIntrinsicNode::LL: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); +- break; +- case StrIntrinsicNode::UU: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); +- break; +- case StrIntrinsicNode::LU: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); +- break; +- case StrIntrinsicNode::UL: +- stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); +- break; +- default: +- ShouldNotReachHere(); +- } +- assert(stub.target() != NULL, "compare_long_string stub has not been generated"); +- trampoline_call(stub); +- j(DONE); +- +- bind(SHORT_STRING); +- // Is the minimum length zero? +- beqz(cnt2, DONE); +- // arrange code to do most branches while loading and loading next characters +- // while comparing previous +- (this->*str1_load_chr)(tmp1, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST_INIT); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- j(SHORT_LOOP_START); +- bind(SHORT_LOOP); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST); +- bind(SHORT_LOOP_START); +- (this->*str1_load_chr)(tmp2, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- (this->*str2_load_chr)(t0, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- bne(tmp1, cnt1, SHORT_LOOP_TAIL); +- addi(cnt2, cnt2, -1); +- beqz(cnt2, SHORT_LAST2); +- (this->*str1_load_chr)(tmp1, Address(str1), t0); +- addi(str1, str1, str1_chr_size); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- beq(tmp2, t0, SHORT_LOOP); +- sub(result, tmp2, t0); +- j(DONE); +- bind(SHORT_LOOP_TAIL); +- sub(result, tmp1, cnt1); +- j(DONE); +- bind(SHORT_LAST2); +- beq(tmp2, t0, DONE); +- sub(result, tmp2, t0); +- +- j(DONE); +- bind(SHORT_LAST_INIT); +- (this->*str2_load_chr)(cnt1, Address(str2), t0); +- addi(str2, str2, str2_chr_size); +- bind(SHORT_LAST); +- beq(tmp1, cnt1, DONE); +- sub(result, tmp1, cnt1); +- +- bind(DONE); +- +- BLOCK_COMMENT("} string_compare"); +-} +- +-void C2_MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, +- Register tmp4, Register tmp5, Register tmp6, Register result, +- Register cnt1, int elem_size) { +- Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; +- Register tmp1 = t0; +- Register tmp2 = t1; +- Register cnt2 = tmp2; // cnt2 only used in array length compare +- Register elem_per_word = tmp6; +- int log_elem_size = exact_log2(elem_size); +- int length_offset = arrayOopDesc::length_offset_in_bytes(); +- int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); +- +- assert(elem_size == 1 || elem_size == 2, "must be char or byte"); +- assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); +- li(elem_per_word, wordSize / elem_size); +- +- BLOCK_COMMENT("arrays_equals {"); +- +- // if (a1 == a2), return true +- beq(a1, a2, SAME); +- +- mv(result, false); +- beqz(a1, DONE); +- beqz(a2, DONE); +- lwu(cnt1, Address(a1, length_offset)); +- lwu(cnt2, Address(a2, length_offset)); +- bne(cnt2, cnt1, DONE); +- beqz(cnt1, SAME); +- +- slli(tmp5, cnt1, 3 + log_elem_size); +- sub(tmp5, zr, tmp5); +- add(a1, a1, base_offset); +- add(a2, a2, base_offset); +- ld(tmp3, Address(a1, 0)); +- ld(tmp4, Address(a2, 0)); +- ble(cnt1, elem_per_word, SHORT); // short or same +- +- // Main 16 byte comparison loop with 2 exits +- bind(NEXT_DWORD); { +- ld(tmp1, Address(a1, wordSize)); +- ld(tmp2, Address(a2, wordSize)); +- sub(cnt1, cnt1, 2 * wordSize / elem_size); +- blez(cnt1, TAIL); +- bne(tmp3, tmp4, DONE); +- ld(tmp3, Address(a1, 2 * wordSize)); +- ld(tmp4, Address(a2, 2 * wordSize)); +- add(a1, a1, 2 * wordSize); +- add(a2, a2, 2 * wordSize); +- ble(cnt1, elem_per_word, TAIL2); +- } beq(tmp1, tmp2, NEXT_DWORD); +- j(DONE); +- +- bind(TAIL); +- xorr(tmp4, tmp3, tmp4); +- xorr(tmp2, tmp1, tmp2); +- sll(tmp2, tmp2, tmp5); +- orr(tmp5, tmp4, tmp2); +- j(IS_TMP5_ZR); +- +- bind(TAIL2); +- bne(tmp1, tmp2, DONE); +- +- bind(SHORT); +- xorr(tmp4, tmp3, tmp4); +- sll(tmp5, tmp4, tmp5); +- +- bind(IS_TMP5_ZR); +- bnez(tmp5, DONE); +- +- bind(SAME); +- mv(result, true); +- // That's it. +- bind(DONE); +- +- BLOCK_COMMENT("} array_equals"); +-} +- +-// Compare Strings +- +-// For Strings we're passed the address of the first characters in a1 +-// and a2 and the length in cnt1. +-// elem_size is the element size in bytes: either 1 or 2. +-// There are two implementations. For arrays >= 8 bytes, all +-// comparisons (including the final one, which may overlap) are +-// performed 8 bytes at a time. For strings < 8 bytes, we compare a +-// halfword, then a short, and then a byte. +- +-void C2_MacroAssembler::string_equals(Register a1, Register a2, +- Register result, Register cnt1, int elem_size) +-{ +- Label SAME, DONE, SHORT, NEXT_WORD; +- Register tmp1 = t0; +- Register tmp2 = t1; +- +- assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); +- assert_different_registers(a1, a2, result, cnt1, t0, t1); +- +- BLOCK_COMMENT("string_equals {"); +- +- mv(result, false); +- +- // Check for short strings, i.e. smaller than wordSize. +- sub(cnt1, cnt1, wordSize); +- bltz(cnt1, SHORT); +- +- // Main 8 byte comparison loop. +- bind(NEXT_WORD); { +- ld(tmp1, Address(a1, 0)); +- add(a1, a1, wordSize); +- ld(tmp2, Address(a2, 0)); +- add(a2, a2, wordSize); +- sub(cnt1, cnt1, wordSize); +- bne(tmp1, tmp2, DONE); +- } bgtz(cnt1, NEXT_WORD); +- +- // Last longword. In the case where length == 4 we compare the +- // same longword twice, but that's still faster than another +- // conditional branch. +- // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when +- // length == 4. +- add(tmp1, a1, cnt1); +- ld(tmp1, Address(tmp1, 0)); +- add(tmp2, a2, cnt1); +- ld(tmp2, Address(tmp2, 0)); +- bne(tmp1, tmp2, DONE); +- j(SAME); +- +- bind(SHORT); +- Label TAIL03, TAIL01; +- +- // 0-7 bytes left. +- andi(t0, cnt1, 4); +- beqz(t0, TAIL03); +- { +- lwu(tmp1, Address(a1, 0)); +- add(a1, a1, 4); +- lwu(tmp2, Address(a2, 0)); +- add(a2, a2, 4); +- bne(tmp1, tmp2, DONE); +- } +- +- bind(TAIL03); +- // 0-3 bytes left. +- andi(t0, cnt1, 2); +- beqz(t0, TAIL01); +- { +- lhu(tmp1, Address(a1, 0)); +- add(a1, a1, 2); +- lhu(tmp2, Address(a2, 0)); +- add(a2, a2, 2); +- bne(tmp1, tmp2, DONE); +- } +- +- bind(TAIL01); +- if (elem_size == 1) { // Only needed when comparing 1-byte elements +- // 0-1 bytes left. +- andi(t0, cnt1, 1); +- beqz(t0, SAME); +- { +- lbu(tmp1, a1, 0); +- lbu(tmp2, a2, 0); +- bne(tmp1, tmp2, DONE); +- } +- } +- +- // Arrays are equal. +- bind(SAME); +- mv(result, true); +- +- // That's it. +- bind(DONE); +- BLOCK_COMMENT("} string_equals"); +-} +- +-typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); +-typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, +- bool is_far, bool is_unordered); +- +-static conditional_branch_insn conditional_branches[] = +-{ +- /* SHORT branches */ +- (conditional_branch_insn)&Assembler::beq, +- (conditional_branch_insn)&Assembler::bgt, +- NULL, // BoolTest::overflow +- (conditional_branch_insn)&Assembler::blt, +- (conditional_branch_insn)&Assembler::bne, +- (conditional_branch_insn)&Assembler::ble, +- NULL, // BoolTest::no_overflow +- (conditional_branch_insn)&Assembler::bge, +- +- /* UNSIGNED branches */ +- (conditional_branch_insn)&Assembler::beq, +- (conditional_branch_insn)&Assembler::bgtu, +- NULL, +- (conditional_branch_insn)&Assembler::bltu, +- (conditional_branch_insn)&Assembler::bne, +- (conditional_branch_insn)&Assembler::bleu, +- NULL, +- (conditional_branch_insn)&Assembler::bgeu +-}; +- +-static float_conditional_branch_insn float_conditional_branches[] = +-{ +- /* FLOAT SHORT branches */ +- (float_conditional_branch_insn)&MacroAssembler::float_beq, +- (float_conditional_branch_insn)&MacroAssembler::float_bgt, +- NULL, // BoolTest::overflow +- (float_conditional_branch_insn)&MacroAssembler::float_blt, +- (float_conditional_branch_insn)&MacroAssembler::float_bne, +- (float_conditional_branch_insn)&MacroAssembler::float_ble, +- NULL, // BoolTest::no_overflow +- (float_conditional_branch_insn)&MacroAssembler::float_bge, +- +- /* DOUBLE SHORT branches */ +- (float_conditional_branch_insn)&MacroAssembler::double_beq, +- (float_conditional_branch_insn)&MacroAssembler::double_bgt, +- NULL, +- (float_conditional_branch_insn)&MacroAssembler::double_blt, +- (float_conditional_branch_insn)&MacroAssembler::double_bne, +- (float_conditional_branch_insn)&MacroAssembler::double_ble, +- NULL, +- (float_conditional_branch_insn)&MacroAssembler::double_bge +-}; +- +-void C2_MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { +- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), +- "invalid conditional branch index"); +- (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); +-} +- +-// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use +-// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). +-void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { +- assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), +- "invalid float conditional branch index"); +- int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask); +- (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, +- (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); +-} +- +-void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { +- switch (cmpFlag) { +- case BoolTest::eq: +- case BoolTest::le: +- beqz(op1, L, is_far); +- break; +- case BoolTest::ne: +- case BoolTest::gt: +- bnez(op1, L, is_far); +- break; +- default: +- ShouldNotReachHere(); +- } +-} +- +-void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { +- switch (cmpFlag) { +- case BoolTest::eq: +- beqz(op1, L, is_far); +- break; +- case BoolTest::ne: +- bnez(op1, L, is_far); +- break; +- default: +- ShouldNotReachHere(); +- } +-} +- +-void C2_MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { +- Label L; +- cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); +- mv(dst, src); +- bind(L); +-} +- +-// Set dst to NaN if any NaN input. +-void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, +- bool is_double, bool is_min) { +- assert_different_registers(dst, src1, src2); +- +- Label Done; +- fsflags(zr); +- if (is_double) { +- is_min ? fmin_d(dst, src1, src2) +- : fmax_d(dst, src1, src2); +- // Checking NaNs +- flt_d(zr, src1, src2); +- } else { +- is_min ? fmin_s(dst, src1, src2) +- : fmax_s(dst, src1, src2); +- // Checking NaNs +- flt_s(zr, src1, src2); +- } +- +- frflags(t0); +- beqz(t0, Done); +- +- // In case of NaNs +- is_double ? fadd_d(dst, src1, src2) +- : fadd_s(dst, src1, src2); +- +- bind(Done); +-} +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +deleted file mode 100644 +index 90b6554af02..00000000000 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp ++++ /dev/null +@@ -1,141 +0,0 @@ +-/* +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +-#define CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +- +-// C2_MacroAssembler contains high-level macros for C2 +- +- public: +- +- void string_compare(Register str1, Register str2, +- Register cnt1, Register cnt2, Register result, +- Register tmp1, Register tmp2, Register tmp3, +- int ae); +- +- void string_indexof_char_short(Register str1, Register cnt1, +- Register ch, Register result, +- bool isL); +- +- void string_indexof_char(Register str1, Register cnt1, +- Register ch, Register result, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- bool isL); +- +- void string_indexof(Register str1, Register str2, +- Register cnt1, Register cnt2, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, int ae); +- +- void string_indexof_linearscan(Register haystack, Register needle, +- Register haystack_len, Register needle_len, +- Register tmp1, Register tmp2, +- Register tmp3, Register tmp4, +- int needle_con_cnt, Register result, int ae); +- +- void arrays_equals(Register r1, Register r2, +- Register tmp3, Register tmp4, +- Register tmp5, Register tmp6, +- Register result, Register cnt1, +- int elem_size); +- +- void string_equals(Register r1, Register r2, +- Register result, Register cnt1, +- int elem_size); +- +- // refer to conditional_branches and float_conditional_branches +- static const int bool_test_bits = 3; +- static const int neg_cond_bits = 2; +- static const int unsigned_branch_mask = 1 << bool_test_bits; +- static const int double_branch_mask = 1 << bool_test_bits; +- +- // cmp +- void cmp_branch(int cmpFlag, +- Register op1, Register op2, +- Label& label, bool is_far = false); +- +- void float_cmp_branch(int cmpFlag, +- FloatRegister op1, FloatRegister op2, +- Label& label, bool is_far = false); +- +- void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, +- Label& L, bool is_far = false); +- +- void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, +- Label& L, bool is_far = false); +- +- void enc_cmove(int cmpFlag, +- Register op1, Register op2, +- Register dst, Register src); +- +- void spill(Register r, bool is64, int offset) { +- is64 ? sd(r, Address(sp, offset)) +- : sw(r, Address(sp, offset)); +- } +- +- void spill(FloatRegister f, bool is64, int offset) { +- is64 ? fsd(f, Address(sp, offset)) +- : fsw(f, Address(sp, offset)); +- } +- +- void spill(VectorRegister v, int offset) { +- add(t0, sp, offset); +- vs1r_v(v, t0); +- } +- +- void unspill(Register r, bool is64, int offset) { +- is64 ? ld(r, Address(sp, offset)) +- : lw(r, Address(sp, offset)); +- } +- +- void unspillu(Register r, bool is64, int offset) { +- is64 ? ld(r, Address(sp, offset)) +- : lwu(r, Address(sp, offset)); +- } +- +- void unspill(FloatRegister f, bool is64, int offset) { +- is64 ? fld(f, Address(sp, offset)) +- : flw(f, Address(sp, offset)); +- } +- +- void unspill(VectorRegister v, int offset) { +- add(t0, sp, offset); +- vl1r_v(v, t0); +- } +- +- void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, int vec_reg_size_in_bytes) { +- assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); +- unspill(v0, src_offset); +- spill(v0, dst_offset); +- } +- +- void minmax_FD(FloatRegister dst, +- FloatRegister src1, FloatRegister src2, +- bool is_double, bool is_min); +- +-#endif // CPU_RISCV_C2_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 304b6f2b06c..d175a62aeeb 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -4125,3 +4125,1285 @@ void MacroAssembler::safepoint_ifence() { + ifence(); + } + ++#ifdef COMPILER2 ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; ++ ++ BLOCK_COMMENT("string_indexof_char_short {"); ++ ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++ ++ mv(result, -1); ++ mv(index, zr); ++ ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); ++ ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); ++ ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); ++ ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); ++ ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); ++ ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); ++ ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); ++ ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); ++ ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); ++ ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); ++ ++ bind(MATCH7); ++ addi(index, index, 7); ++ ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); ++} ++ ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; ++ ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); ++ ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); ++ ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); ++ } ++ ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); ++ ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } ++ ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); ++ ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); ++ ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } ++ ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); ++} ++ ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); ++ ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_indexof {"); ++ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; ++ ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. ++ ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); ++ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } ++ ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; ++ ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; ++ ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++ ++ sub(sp, sp, ASIZE); ++ ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++ ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); ++ } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); ++ ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); ++ ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } ++ ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } ++ ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); ++ } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset ++ ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); ++ ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); ++ ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); ++ ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); ++} ++ ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ ++ Register first = tmp3; ++ ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); ++ ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); ++ ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); ++ ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); ++ ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } ++ } ++ ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); ++ } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); ++ ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); ++ ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); ++ } ++ ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; ++ ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); ++} ++ ++// Compare strings. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; ++ ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); ++ } ++ ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); ++ ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); ++ ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); ++ ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } ++ ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); ++ ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} string_compare"); ++} ++ ++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); ++ ++ BLOCK_COMMENT("arrays_equals {"); ++ ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); ++ ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); ++ ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same ++ ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); ++ ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); ++ ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); ++ ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); ++ ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); ++ ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); ++ ++ BLOCK_COMMENT("} array_equals"); ++} ++ ++// Compare Strings ++ ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. ++ ++void MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ ++ BLOCK_COMMENT("string_equals {"); ++ ++ mv(result, false); ++ ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); ++ ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); ++ ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); ++ ++ bind(SHORT); ++ Label TAIL03, TAIL01; ++ ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); ++ { ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); ++ { ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); ++ } ++ ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); ++ } ++ } ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); ++} ++ ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, ++ ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; ++ ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++ ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; ++ ++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++} ++ ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); ++} ++ ++// Set dst to NaN if any NaN input. ++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); ++ } else { ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); ++ } ++ ++ frflags(t0); ++ beqz(t0, Done); ++ ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); ++ ++ bind(Done); ++} ++ ++#endif // COMPILER2 ++ +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index c6b71bdbc3c..2ef28771e2e 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -851,6 +851,109 @@ class MacroAssembler: public Assembler { + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + ++public: ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); ++ ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); ++ ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); ++ ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); ++ ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); ++ ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; ++ ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); ++ ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); ++ ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); ++ ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); ++ } ++ ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } ++ ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } ++ ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } ++ ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); ++ } ++ ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } ++ ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } ++ ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); ++ + }; + + #ifdef ASSERT +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 13546ab328b..2e7eed8fb52 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -997,7 +997,7 @@ void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + #endif + + void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ebreak(); + } +@@ -1015,7 +1015,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + #endif + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. + for (int i = 0; i < _count; i++) { + __ nop(); +@@ -1074,7 +1074,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp + const int framesize = C->output()->frame_size_in_bytes(); +@@ -1150,7 +1150,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + assert_cond(ra_ != NULL); + Compile* C = ra_->C; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); + int framesize = C->output()->frame_size_in_bytes(); + +@@ -1251,7 +1251,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + int dst_offset = ra_->reg2offset(dst_lo); + + if (cbuf != NULL) { +- C2_MacroAssembler _masm(cbuf); ++ MacroAssembler _masm(cbuf); + Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { + case rc_int: +@@ -1371,7 +1371,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + #endif + + void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + assert_cond(ra_ != NULL); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); +@@ -1422,7 +1422,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const + void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const + { + // This is the unverified entry point. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + Label skip; + __ cmp_klass(j_rarg0, t1, t0, skip); +@@ -1449,7 +1449,7 @@ int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) + // j #exception_blob_entry_point + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); +@@ -1467,7 +1467,7 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) + { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); +@@ -1848,7 +1848,7 @@ encode %{ + // BEGIN Non-volatile memory access + + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + int64_t con = (int64_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); +@@ -1856,7 +1856,7 @@ encode %{ + %} + + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL || con == (address)1) { +@@ -1875,7 +1875,7 @@ encode %{ + %} + + enc_class riscv_enc_mov_p1(iRegP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register dst_reg = as_Register($dst$$reg); + __ li(dst_reg, 1); +@@ -1893,12 +1893,12 @@ encode %{ + %} + + enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ load_byte_map_base($dst$$Register); + %} + + enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { +@@ -1911,13 +1911,13 @@ encode %{ + %} + + enc_class riscv_enc_mov_zero(iRegNorP dst) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + __ mv(dst_reg, zr); + %} + + enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + address con = (address)$src$$constant; + if (con == NULL) { +@@ -1930,42 +1930,42 @@ encode %{ + %} + + enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + + enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); +@@ -1974,13 +1974,13 @@ encode %{ + // compare and branch instruction encodings + + enc_class riscv_enc_j(label lbl) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + __ j(*L); + %} + + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; + switch ($cmp$$cmpcode) { + case(BoolTest::ge): +@@ -2004,7 +2004,7 @@ encode %{ + + Label miss; + Label done; +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, + NULL, &miss); + if ($primary) { +@@ -2023,7 +2023,7 @@ encode %{ + %} + + enc_class riscv_enc_java_static_call(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + address addr = (address)$meth$$method; + address call = NULL; +@@ -2055,7 +2055,7 @@ encode %{ + %} + + enc_class riscv_enc_java_dynamic_call(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + int method_index = resolved_method_index(cbuf); + address call = __ ic_call((address)$meth$$method, method_index); + if (call == NULL) { +@@ -2065,7 +2065,7 @@ encode %{ + %} + + enc_class riscv_enc_call_epilog() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + __ call_Unimplemented(); +@@ -2073,7 +2073,7 @@ encode %{ + %} + + enc_class riscv_enc_java_to_runtime(method meth) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + + // some calls to generated routines (arraycopy code) are scheduled + // by C2 as runtime calls. if so we can call them using a jr (they +@@ -2102,7 +2102,7 @@ encode %{ + + // using the cr register as the bool result: 0 for success; others failed. + enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); +@@ -2189,7 +2189,7 @@ encode %{ + + // using cr flag to indicate the fast_unlock result: 0 for success; others failed. + enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp1, iRegP tmp2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); +@@ -2262,7 +2262,7 @@ encode %{ + // arithmetic encodings + + enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2270,7 +2270,7 @@ encode %{ + %} + + enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2278,7 +2278,7 @@ encode %{ + %} + + enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2286,7 +2286,7 @@ encode %{ + %} + + enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); +@@ -2294,14 +2294,14 @@ encode %{ + %} + + enc_class riscv_enc_tail_call(iRegP jump_target) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + __ jr(target_reg); + %} + + enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + Register target_reg = as_Register($jump_target$$reg); + // exception oop should be in x10 +@@ -2312,12 +2312,12 @@ encode %{ + %} + + enc_class riscv_enc_rethrow() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); + %} + + enc_class riscv_enc_ret() %{ +- C2_MacroAssembler _masm(&cbuf); ++ MacroAssembler _masm(&cbuf); + Assembler::CompressibleRegion cr(&_masm); + __ ret(); + %} +@@ -8506,7 +8506,7 @@ instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8526,7 +8526,7 @@ instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8585,7 +8585,7 @@ instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8604,7 +8604,7 @@ instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8625,7 +8625,7 @@ instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8645,7 +8645,7 @@ instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8666,7 +8666,7 @@ instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8686,7 +8686,7 @@ instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label)); + %} + +@@ -8741,7 +8741,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + +@@ -8759,7 +8759,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + +@@ -9080,7 +9080,7 @@ instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9095,7 +9095,7 @@ instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9138,7 +9138,7 @@ instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9153,7 +9153,7 @@ instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9171,7 +9171,7 @@ instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9189,7 +9189,7 @@ instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9207,7 +9207,7 @@ instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9225,7 +9225,7 @@ instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) + format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + + ins_encode %{ +- __ cmp_branch($cmp$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), + as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9276,7 +9276,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9292,7 +9292,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} + + ins_encode %{ +- __ float_cmp_branch($cmp$$cmpcode | C2_MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), + as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + +@@ -9616,7 +9616,7 @@ instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} +@@ -9673,7 +9673,7 @@ instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} +@@ -9691,7 +9691,7 @@ instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) + %} + + ins_encode %{ +- __ enc_cmove($cop$$cmpcode | C2_MacroAssembler::unsigned_branch_mask, ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, + as_Register($op1$$reg), as_Register($op2$$reg), + as_Register($dst$$reg), as_Register($src$$reg)); + %} + +From 115cd21290080b157d0ca8b7080e66ebd814fbdb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:15:18 +0800 +Subject: [PATCH 091/140] Revert JDK-8222297: IRT_ENTRY/IRT_LEAF etc are the + same as JRT && JDK-8263709: Cleanup THREAD/TRAPS/CHECK usage in JRT_ENTRY + routines + +--- + src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +index d93530d8564..776b0787238 100644 +--- a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +@@ -278,12 +278,12 @@ class SlowSignatureHandler + }; + + +-JRT_ENTRY(address, +- InterpreterRuntime::slow_signature_handler(JavaThread* current, ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) +- methodHandle m(current, (Method*)method); ++ methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments +@@ -292,4 +292,4 @@ JRT_ENTRY(address, + + // return result handler + return Interpreter::result_handler(m->result_type()); +-JRT_END ++IRT_END + +From 6cbf43d5f095aef93ef0bf595f51019a03cc1989 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:20:06 +0800 +Subject: [PATCH 092/140] Revert JDK-8245289: Clean up offset code in + JavaClasses + +--- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 18 +++++++++--------- + .../templateInterpreterGenerator_riscv.cpp | 2 +- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index 4442b5991b1..e070b8096a6 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -53,7 +53,7 @@ void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_ + verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), + "MH argument is a Class"); + } +- __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset())); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); + } + + #ifdef ASSERT +@@ -140,13 +140,13 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); +- __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset())), temp2); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); + __ verify_oop(method_temp); +- __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())), noreg, noreg); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack +@@ -284,10 +284,10 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + "MemberName required for invokeVirtual etc."); + } + +- Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset())); +- Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset())); +- Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset())); +- Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset())); ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index 8aea4eca048..ce6166030b4 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -894,7 +894,7 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + + address entry = __ pc(); + +- const int referent_offset = java_lang_ref_Reference::referent_offset(); ++ const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; + +From 8c9b9f4246f4ede3c31f59749f9d4bc625f106b3 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:30:35 +0800 +Subject: [PATCH 093/140] Revert JDK-8242629: Remove references to deprecated + java.util.Observer and Observable + +--- + .../runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java | 2 -- + .../classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java | 2 -- + .../sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java | 2 -- + 3 files changed, 6 deletions(-) + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +index f2e224f28ee..5c2b6e0e3ea 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +@@ -34,8 +34,6 @@ + import sun.jvm.hotspot.runtime.riscv64.*; + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +index df280005d72..e372bc5f7be 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +@@ -34,8 +34,6 @@ + import sun.jvm.hotspot.runtime.*; + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + /** Specialization of and implementation of abstract methods of the + Frame class for the riscv64 family of CPUs. */ +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +index d0ad2b559a6..850758a7ed4 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +@@ -31,8 +31,6 @@ + import sun.jvm.hotspot.types.*; + import sun.jvm.hotspot.runtime.*; + import sun.jvm.hotspot.utilities.*; +-import sun.jvm.hotspot.utilities.Observable; +-import sun.jvm.hotspot.utilities.Observer; + + public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + +From 43f2a4fec6b4922fa8c187deda310ad636aeed2e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:33:56 +0800 +Subject: [PATCH 094/140] Revert JDK-8256155: Allow multiple large page sizes + to be used on Linux + +--- + src/hotspot/os/linux/os_linux.cpp | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 6f75e623a9a..7fc9588301b 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -4078,7 +4078,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) +- SPARC_ONLY(4 * M); ++ SPARC_ONLY(4 * M) ++ RISCV64_ONLY(2 * M); + #endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); + +From a93191be0155882a0f4d92bba4de9fdf4f508a4a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:38:53 +0800 +Subject: [PATCH 095/140] Revert JDK-8252204: AArch64: Implement SHA3 + accelerator/intrinsic + +--- + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index d4b79162d84..50ee7edb708 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -82,11 +82,6 @@ void VM_Version::initialize() { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + +- if (UseSHA3Intrinsics) { +- warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); +- FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); +- } +- + if (UseCRC32Intrinsics) { + warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + +From 29acd4f1bb99e856418f7d9d3da4f205812b1663 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:43:23 +0800 +Subject: [PATCH 096/140] Revert JDK-8253717: Relocate stack overflow code out + of thread.hpp/cpp && JDK-8255766: Fix linux+arm64 build after 8254072 + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 4 ++-- + src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index d175a62aeeb..d94074b4a3c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1666,7 +1666,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) { + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. +- for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + sub(tmp, tmp, os::vm_page_size()); +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index ae414224c5b..dc3ac548d73 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1252,7 +1252,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + __ nop(); + + // Generate stack overflow check +- __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + + // Generate a new frame for the wrapper. + __ enter(); +@@ -1551,7 +1551,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + Label reguard; + Label reguard_done; + __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); +- __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); + __ beq(t0, t1, reguard); + __ bind(reguard_done); + +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +index ce6166030b4..e639fa7e12f 100644 +--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -1248,7 +1248,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); +- __ addi(t1, zr, (u1)StackOverflow::stack_guard_yellow_reserved_disabled); ++ __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + + __ pusha(); // only save smashed registers + +From 6fa17c662dd2488108809e77dcff921bb475813c Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:50:51 +0800 +Subject: [PATCH 097/140] Revert JDK-8258459: Decouple gc_globals.hpp from + globals.hpp + +--- + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +index 1f4409a9c9a..84b1afc7dc6 100644 +--- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -28,7 +28,6 @@ + #include "asm/macroAssembler.inline.hpp" + #include "gc/shared/barrierSetAssembler.hpp" + #include "gc/shared/collectedHeap.hpp" +-#include "gc/shared/tlab_globals.hpp" + #include "interpreter/interp_masm.hpp" + #include "interpreter/interpreter.hpp" + #include "interpreter/interpreterRuntime.hpp" + +From bcc26e749ccc20db5a4ba51c2cf8740a908a8a74 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 16:56:58 +0800 +Subject: [PATCH 098/140] Revert JDK-8223136: Move compressed oops functions to + CompressedOops class + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 64 +++++++++---------- + .../cpu/riscv/macroAssembler_riscv.hpp | 1 - + src/hotspot/cpu/riscv/riscv.ad | 10 +-- + 3 files changed, 37 insertions(+), 38 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index d94074b4a3c..becc1656358 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1318,10 +1318,10 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { + void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops) { + if (Universe::is_fully_initialized()) { +- mv(xheapbase, CompressedOops::ptrs_base()); ++ mv(xheapbase, Universe::narrow_ptrs_base()); + } else { + int32_t offset = 0; +- la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); ++ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); + ld(xheapbase, Address(xheapbase, offset)); + } + } +@@ -1596,8 +1596,8 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R + void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { + if (UseCompressedClassPointers) { + lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); +- if (CompressedKlassPointers::base() == NULL) { +- slli(tmp, tmp, CompressedKlassPointers::shift()); ++ if (Universe::narrow_klass_base() == NULL) { ++ slli(tmp, tmp, Universe::narrow_klass_shift()); + beq(trial_klass, tmp, L); + return; + } +@@ -1745,9 +1745,9 @@ void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + // Algorithm must match CompressedOops::encode. + void MacroAssembler::encode_heap_oop(Register d, Register s) { + verify_oop(s, "broken oop in encode_heap_oop"); +- if (CompressedOops::base() == NULL) { +- if (CompressedOops::shift() != 0) { +- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli(d, s, LogMinObjAlignmentInBytes); + } else { + mv(d, s); +@@ -1758,9 +1758,9 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) { + bgez(d, notNull); + mv(d, zr); + bind(notNull); +- if (CompressedOops::shift() != 0) { +- assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); +- srli(d, d, CompressedOops::shift()); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, d, Universe::narrow_oop_shift()); + } + } + } +@@ -1799,9 +1799,9 @@ void MacroAssembler::decode_klass_not_null(Register r) { + void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + +- if (CompressedKlassPointers::base() == NULL) { +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + slli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); +@@ -1815,10 +1815,10 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register + } + + assert_different_registers(src, xbase); +- li(xbase, (uintptr_t)CompressedKlassPointers::base()); ++ li(xbase, (uintptr_t)Universe::narrow_klass_base()); + +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); + shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); + } else { +@@ -1835,9 +1835,9 @@ void MacroAssembler::encode_klass_not_null(Register r) { + void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + +- if (CompressedKlassPointers::base() == NULL) { +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli(dst, src, LogKlassAlignmentInBytes); + } else { + mv(dst, src); +@@ -1845,8 +1845,8 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register + return; + } + +- if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 && +- CompressedKlassPointers::shift() == 0) { ++ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && ++ Universe::narrow_klass_shift() == 0) { + zero_extend(dst, src, 32); + return; + } +@@ -1857,10 +1857,10 @@ void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register + } + + assert_different_registers(src, xbase); +- li(xbase, (intptr_t)CompressedKlassPointers::base()); ++ li(xbase, (intptr_t)Universe::narrow_klass_base()); + sub(dst, src, xbase); +- if (CompressedKlassPointers::shift() != 0) { +- assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong"); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli(dst, dst, LogKlassAlignmentInBytes); + } + if (xbase == xheapbase) { +@@ -1878,22 +1878,22 @@ void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. +- if (CompressedOops::shift() != 0) { +- assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong"); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + slli(dst, src, LogMinObjAlignmentInBytes); +- if (CompressedOops::base() != NULL) { ++ if (Universe::narrow_oop_base() != NULL) { + add(dst, xheapbase, dst); + } + } else { +- assert(CompressedOops::base() == NULL, "sanity"); ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); + mv(dst, src); + } + } + + void MacroAssembler::decode_heap_oop(Register d, Register s) { +- if (CompressedOops::base() == NULL) { +- if (CompressedOops::shift() != 0 || d != s) { +- slli(d, s, CompressedOops::shift()); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0 || d != s) { ++ slli(d, s, Universe::narrow_oop_shift()); + } + } else { + Label done; +@@ -3004,7 +3004,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); +- narrowKlass nk = CompressedKlassPointers::encode(k); ++ narrowKlass nk = Klass::encode_klass(k); + li32(dst, nk); + zero_extend(dst, dst, 32); + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 2ef28771e2e..953bca3cbd8 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -29,7 +29,6 @@ + + #include "asm/assembler.hpp" + #include "metaprogramming/enableIf.hpp" +-#include "oops/compressedOops.hpp" + + // MacroAssembler extends Assembler by frequently used macros. + // +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 2e7eed8fb52..24214964243 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1407,7 +1407,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { + st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); +- if (CompressedKlassPointers::shift() != 0) { ++ if (Universe::narrow_klass_shift() != 0) { + st->print_cr("\tdecode_klass_not_null t0, t0"); + } + } else { +@@ -3255,7 +3255,7 @@ operand indOffL(iRegP reg, immLOffset off) + + operand indirectN(iRegN reg) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(DecodeN reg); + op_cost(0); +@@ -3270,7 +3270,7 @@ operand indirectN(iRegN reg) + + operand indOffIN(iRegN reg, immIOffset off) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); +@@ -3285,7 +3285,7 @@ operand indOffIN(iRegN reg, immIOffset off) + + operand indOffLN(iRegN reg, immLOffset off) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP (DecodeN reg) off); + op_cost(0); +@@ -7947,7 +7947,7 @@ instruct convP2I(iRegINoSp dst, iRegP src) %{ + // in case of 32bit oops (heap < 4Gb). + instruct convN2I(iRegINoSp dst, iRegN src) + %{ +- predicate(CompressedOops::shift() == 0); ++ predicate(Universe::narrow_oop_shift() == 0); + match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + + ins_cost(ALU_COST); + +From 81d8ea9077484f1dd20033390cbd3c1844b1b966 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:11:20 +0800 +Subject: [PATCH 099/140] Revert JDK-8247912: Make narrowOop a scoped enum + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index becc1656358..e2841c28c37 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1305,7 +1305,7 @@ int MacroAssembler::patch_oop(address insn_addr, address o) { + // instruction. + if (NativeInstruction::is_li32_at(insn_addr)) { + // Move narrow OOP +- uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o)); ++ narrowOop n = CompressedOops::encode((oop)o); + return patch_imm_in_li32(insn_addr, (int32_t)n); + } else if (NativeInstruction::is_movptr_at(insn_addr)) { + // Move wide OOP + +From f980e03cb17804ff72958dd13505058048c04da8 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:20:05 +0800 +Subject: [PATCH 100/140] Revert JDK-8260467: Move well-known classes from + systemDictionary.hpp to vmClasses.hpp + +--- + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 11 +++++------ + src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 4 ++-- + 2 files changed, 7 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +index e070b8096a6..fd907f77afb 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -27,7 +27,6 @@ + #include "precompiled.hpp" + #include "asm/macroAssembler.hpp" + #include "classfile/javaClasses.inline.hpp" +-#include "classfile/vmClasses.hpp" + #include "interpreter/interpreter.hpp" + #include "interpreter/interpreterRuntime.hpp" + #include "memory/allocation.inline.hpp" +@@ -50,7 +49,7 @@ + void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + assert_cond(_masm != NULL); + if (VerifyMethodHandles) { +- verify_klass(_masm, klass_reg, VM_CLASS_ID(java_lang_Class), ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + } + __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +@@ -68,11 +67,11 @@ static int check_nonzero(const char* xname, int x) { + + #ifdef ASSERT + void MethodHandles::verify_klass(MacroAssembler* _masm, +- Register obj, vmClassID klass_id, ++ Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { + assert_cond(_masm != NULL); +- InstanceKlass** klass_addr = vmClasses::klass_addr_at(klass_id); +- Klass* klass = vmClasses::klass_at(klass_id); ++ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); ++ Klass* klass = SystemDictionary::well_known_klass(klass_id); + Register temp = t1; + Register temp2 = t0; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; +@@ -280,7 +279,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) +- verify_klass(_masm, member_reg, VM_CLASS_ID(java_lang_invoke_MemberName), ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +index f73aba29d67..65493eba764 100644 +--- a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +@@ -36,11 +36,11 @@ enum /* platform_dependent_constants */ { + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, +- Register obj, vmClassID klass_id, ++ Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { +- verify_klass(_masm, mh_reg, VM_CLASS_ID(java_lang_invoke_MethodHandle), ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + +From 2c68b064100b5abaca80926e213280ea82ff161a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:32:15 +0800 +Subject: [PATCH 101/140] Revert JDK-8268858: Determine register pressure + automatically by the number of available registers for allocation + +--- + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/riscv.ad | 27 ---------------------- + 2 files changed, 2 insertions(+), 27 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 6c301cdae04..33d78fb2f6f 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -44,8 +44,10 @@ define_pd_global(intx, CompileThreshold, 10000); + + define_pd_global(intx, OnStackReplacePercentage, 140); + define_pd_global(intx, ConditionalMoveLimit, 0); ++define_pd_global(intx, FLOATPRESSURE, 32); + define_pd_global(intx, FreqInlineSize, 325); + define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 24); + define_pd_global(intx, InteriorEntryAlignment, 16); + define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); + define_pd_global(intx, LoopUnrollLimit, 60); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 24214964243..c5e0ae23029 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1719,33 +1719,6 @@ bool Matcher::is_spillable_arg(int reg) + return can_be_java_arg(reg); + } + +-uint Matcher::int_pressure_limit() +-{ +- // A derived pointer is live at CallNode and then is flagged by RA +- // as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip +- // derived pointers and lastly fail to spill after reaching maximum +- // number of iterations. Lowering the default pressure threshold to +- // (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become +- // a high register pressure area of the code so that split_DEF can +- // generate DefinitionSpillCopy for the derived pointer. +- uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1; +- if (!PreserveFramePointer) { +- // When PreserveFramePointer is off, frame pointer is allocatable, +- // but different from other SOC registers, it is excluded from +- // fatproj's mask because its save type is No-Save. Decrease 1 to +- // ensure high pressure at fatproj when PreserveFramePointer is off. +- // See check_pressure_at_fatproj(). +- default_int_pressure_threshold--; +- } +- return (INTPRESSURE == -1) ? default_int_pressure_threshold : INTPRESSURE; +-} +- +-uint Matcher::float_pressure_limit() +-{ +- // _FLOAT_REG_mask is generated by adlc from the float_reg register class. +- return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE; +-} +- + bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; + } + +From 932ebd6238ea7703dc3164e4506af332f6847592 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 17:51:12 +0800 +Subject: [PATCH 102/140] Revert JDK-8276563: Undefined Behaviour in class + Assembler && 8257882: Implement linkToNative intrinsic on AArch64 (the + register part) + +--- + .../cpu/riscv/globalDefinitions_riscv.hpp | 2 - + src/hotspot/cpu/riscv/register_riscv.cpp | 4 - + src/hotspot/cpu/riscv/register_riscv.hpp | 123 +++++++++++++----- + 3 files changed, 91 insertions(+), 38 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index 2936837d951..ffd420da024 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -47,6 +47,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false + +-#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY +- + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +index 96cf1996a83..ef60cb3bb05 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.cpp ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -26,10 +26,6 @@ + #include "precompiled.hpp" + #include "register_riscv.hpp" + +-REGISTER_IMPL_DEFINITION(Register, RegisterImpl, RegisterImpl::number_of_registers); +-REGISTER_IMPL_DEFINITION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); +-REGISTER_IMPL_DEFINITION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); +- + const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * + RegisterImpl::max_slots_per_register; + +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +index d697751f55f..f64a06eb89f 100644 +--- a/src/hotspot/cpu/riscv/register_riscv.hpp ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -47,13 +47,13 @@ typedef VMRegImpl* VMReg; + + // Use Register as shortcut + class RegisterImpl; +-typedef const RegisterImpl* Register; ++typedef RegisterImpl* Register; + +-inline constexpr Register as_Register(int encoding); ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} + + class RegisterImpl: public AbstractRegisterImpl { +- static constexpr Register first(); +- + public: + enum { + number_of_registers = 32, +@@ -66,16 +66,16 @@ class RegisterImpl: public AbstractRegisterImpl { + }; + + // derived registers, offsets, and addresses +- const Register successor() const { return this + 1; } ++ const Register successor() const { return as_Register(encoding() + 1); } + + // construction +- inline friend constexpr Register as_Register(int encoding); ++ inline friend Register as_Register(int encoding); + + VMReg as_VMReg() const; + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +@@ -93,9 +93,11 @@ class RegisterImpl: public AbstractRegisterImpl { + return encoding_nocheck() >= compressed_register_base && + encoding_nocheck() <= compressed_register_top; + } +-}; + +-REGISTER_IMPL_DECLARATION(Register, RegisterImpl, RegisterImpl::number_of_registers); ++ // Return the bit which represents this register. This is intended ++ // to be ORed into a bitmask: for usage see class RegSet below. ++ uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++}; + + // The integer registers of the RISCV architecture + +@@ -136,14 +138,14 @@ CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); + + // Use FloatRegister as shortcut + class FloatRegisterImpl; +-typedef const FloatRegisterImpl* FloatRegister; ++typedef FloatRegisterImpl* FloatRegister; + +-inline constexpr FloatRegister as_FloatRegister(int encoding); ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} + + // The implementation of floating point registers for the architecture + class FloatRegisterImpl: public AbstractRegisterImpl { +- static constexpr FloatRegister first(); +- + public: + enum { + number_of_registers = 32, +@@ -155,18 +157,16 @@ class FloatRegisterImpl: public AbstractRegisterImpl { + }; + + // construction +- inline friend constexpr FloatRegister as_FloatRegister(int encoding); ++ inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg() const; + + // derived registers, offsets, and addresses +- FloatRegister successor() const { +- return as_FloatRegister((encoding() + 1) % (unsigned)number_of_registers); +- } ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +@@ -186,8 +186,6 @@ class FloatRegisterImpl: public AbstractRegisterImpl { + } + }; + +-REGISTER_IMPL_DECLARATION(FloatRegister, FloatRegisterImpl, FloatRegisterImpl::number_of_registers); +- + // The float registers of the RISCV architecture + + CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); +@@ -227,14 +225,14 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + + // Use VectorRegister as shortcut + class VectorRegisterImpl; +-typedef const VectorRegisterImpl* VectorRegister; ++typedef VectorRegisterImpl* VectorRegister; + +-inline constexpr VectorRegister as_VectorRegister(int encoding); ++inline VectorRegister as_VectorRegister(int encoding) { ++ return (VectorRegister)(intptr_t) encoding; ++} + + // The implementation of vector registers for RVV + class VectorRegisterImpl: public AbstractRegisterImpl { +- static constexpr VectorRegister first(); +- + public: + enum { + number_of_registers = 32, +@@ -242,23 +240,21 @@ class VectorRegisterImpl: public AbstractRegisterImpl { + }; + + // construction +- inline friend constexpr VectorRegister as_VectorRegister(int encoding); ++ inline friend VectorRegister as_VectorRegister(int encoding); + + VMReg as_VMReg() const; + + // derived registers, offsets, and addresses +- VectorRegister successor() const { return this + 1; } ++ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } +- int encoding_nocheck() const { return this - first(); } ++ int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + + }; + +-REGISTER_IMPL_DECLARATION(VectorRegister, VectorRegisterImpl, VectorRegisterImpl::number_of_registers); +- + // The vector registers of RVV + CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); + +@@ -315,8 +311,71 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { + static const int max_fpr; + }; + +-typedef AbstractRegSet RegSet; +-typedef AbstractRegSet FloatRegSet; +-typedef AbstractRegSet VectorRegSet; ++// A set of registers ++class RegSet { ++ uint32_t _bitset; ++ ++ RegSet(uint32_t bitset) : _bitset(bitset) { } ++ ++public: ++ ++ RegSet() : _bitset(0) { } ++ ++ RegSet(Register r1) : _bitset(r1->bit()) { } ++ ++ RegSet operator+(const RegSet aSet) const { ++ RegSet result(_bitset | aSet._bitset); ++ return result; ++ } ++ ++ RegSet operator-(const RegSet aSet) const { ++ RegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } ++ ++ RegSet &operator+=(const RegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } ++ ++ RegSet &operator-=(const RegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } ++ ++ static RegSet of(Register r1) { ++ return RegSet(r1); ++ } ++ ++ static RegSet of(Register r1, Register r2) { ++ return of(r1) + r2; ++ } ++ ++ static RegSet of(Register r1, Register r2, Register r3) { ++ return of(r1, r2) + r3; ++ } ++ ++ static RegSet of(Register r1, Register r2, Register r3, Register r4) { ++ return of(r1, r2, r3) + r4; ++ } ++ ++ static RegSet range(Register start, Register end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); ++ ++ return RegSet(bits); ++ } ++ ++ uint32_t bits() const { return _bitset; } ++ ++private: ++ ++ Register first() { ++ uint32_t first = _bitset & -_bitset; ++ return first ? as_Register(exact_log2(first)) : noreg; ++ } ++}; + + #endif // CPU_RISCV_REGISTER_RISCV_HPP + +From 9c85aa8d3387d795f9c2f4795ffc7f9d7f814d92 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:24:49 +0800 +Subject: [PATCH 103/140] Revert JDK-8240363: Refactor Compile::Output() to its + own Phase + +--- + .../cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 20 +++++++++---------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index e2841c28c37..656334f326b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3027,7 +3027,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && +- Compile::current()->output()->in_scratch_emit_size()); ++ Compile::current()->in_scratch_emit_size()); + #endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c5e0ae23029..d736750d02d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1029,7 +1029,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + //============================================================================= + const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; + +-int ConstantTable::calculate_table_base_offset() const { ++int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset + } + +@@ -1058,9 +1058,9 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + +- int framesize = C->output()->frame_slots() << LogBytesPerInt; ++ int framesize = C->frame_slots() << LogBytesPerInt; + +- if (C->output()->need_stack_bang(framesize)) { ++ if (C->need_stack_bang(framesize)) { + st->print("# stack bang size=%d\n\t", framesize); + } + +@@ -1077,7 +1077,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + + // n.b. frame size includes space for return pc and fp +- const int framesize = C->output()->frame_size_in_bytes(); ++ const int framesize = C->frame_size_in_bytes(); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later +@@ -1085,8 +1085,8 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + + assert_cond(C != NULL); + +- int bangsize = C->output()->bang_size_in_bytes(); +- if (C->output()->need_stack_bang(bangsize)) { ++ int bangsize = C->bang_size_in_bytes(); ++ if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + +@@ -1096,12 +1096,12 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Unimplemented(); + } + +- C->output()->set_frame_complete(cbuf.insts_size()); ++ C->set_frame_complete(cbuf.insts_size()); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. +- ConstantTable& constant_table = C->output()->constant_table(); ++ Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } + } +@@ -1125,7 +1125,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + assert_cond(st != NULL && ra_ != NULL); + Compile* C = ra_->C; + assert_cond(C != NULL); +- int framesize = C->output()->frame_size_in_bytes(); ++ int framesize = C->frame_size_in_bytes(); + + st->print("# pop frame %d\n\t", framesize); + +@@ -1152,7 +1152,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + assert_cond(C != NULL); +- int framesize = C->output()->frame_size_in_bytes(); ++ int framesize = C->frame_size_in_bytes(); + + __ remove_frame(framesize); + + +From 3a58114310a56ebca04ba44b4883d205096eb844 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:36:09 +0800 +Subject: [PATCH 104/140] Revert RotateLeft && RotateRight matching rules + +--- + src/hotspot/cpu/riscv/riscv.ad | 2 - + src/hotspot/cpu/riscv/riscv_b.ad | 76 -------------------------------- + 2 files changed, 78 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index d736750d02d..1e6495692da 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1505,8 +1505,6 @@ const bool Matcher::match_rule_supported(int opcode) { + case Op_PopCountL: + return UsePopCountInstruction; + +- case Op_RotateRight: +- case Op_RotateLeft: + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: +diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad +index 4488c1c4031..b9e04c432e1 100644 +--- a/src/hotspot/cpu/riscv/riscv_b.ad ++++ b/src/hotspot/cpu/riscv/riscv_b.ad +@@ -25,82 +25,6 @@ + + // RISCV Bit-Manipulation Extension Architecture Description File + +-instruct rorI_imm_rvb(iRegINoSp dst, iRegI src, immI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "roriw $dst, $src, ($shift & 0x1f)\t#@rorI_imm_rvb" %} +- +- ins_cost(ALU_COST); +- ins_encode %{ +- __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x1f); +- %} +- +- ins_pipe(ialu_reg_shift); +-%} +- +-instruct rorL_imm_rvb(iRegLNoSp dst, iRegL src, immI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "rori $dst, $src, ($shift & 0x3f)\t#@rorL_imm_rvb" %} +- +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rori(as_Register($dst$$reg), as_Register($src$$reg), $shift$$constant & 0x3f); +- %} +- +- ins_pipe(ialu_reg_shift); +-%} +- +-instruct rorI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "rorw $dst, $src, $shift\t#@rorI_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rorL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateRight src shift)); +- +- format %{ "ror $dst, $src, $shift\t#@rorL_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rolI_reg_rvb(iRegINoSp dst, iRegI src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateLeft src shift)); +- +- format %{ "rolw $dst, $src, $shift\t#@rolI_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- +-instruct rolL_reg_rvb(iRegLNoSp dst, iRegL src, iRegI shift) %{ +- predicate(UseRVB); +- match(Set dst (RotateLeft src shift)); +- +- format %{ "rol $dst, $src, $shift\t#@rolL_reg_rvb" %} +- ins_cost(ALU_COST); +- ins_encode %{ +- __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); +- %} +- ins_pipe(ialu_reg_reg); +-%} +- + // Convert oop into int for vectors alignment masking + instruct convP2I_rvb(iRegINoSp dst, iRegP src) %{ + predicate(UseRVB); + +From 21577388eda0218eeb4b28bc71ecf5737d40639e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:49:28 +0800 +Subject: [PATCH 105/140] Revert JDK-8230565: ZGC: Redesign C2 load barrier to + expand on the MachNode level + +--- + src/hotspot/cpu/riscv/riscv.ad | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 1e6495692da..533eaf843e3 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -4324,7 +4324,6 @@ instruct loadRange(iRegINoSp dst, memory mem) + instruct loadP(iRegPNoSp dst, memory mem) + %{ + match(Set dst (LoadP mem)); +- predicate(n->as_Load()->barrier_data() == 0); + + ins_cost(LOAD_COST); + format %{ "ld $dst, $mem\t# ptr, #@loadP" %} +@@ -5060,8 +5059,6 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS + + instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); +- + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); +@@ -5181,7 +5178,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL + + instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + +@@ -5327,7 +5324,6 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne + + instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +@@ -5462,7 +5458,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN + + instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + +@@ -5592,7 +5588,6 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne + + instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + + ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); +@@ -5731,7 +5726,7 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN + + instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + +@@ -5798,7 +5793,6 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) + + instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) + %{ +- predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetP mem newv)); + + ins_cost(ALU_COST); +@@ -5865,7 +5859,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) + + instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) + %{ +- predicate(needs_acquiring_load_reserved(n) && (n->as_LoadStore()->barrier_data() == 0)); ++ predicate(needs_acquiring_load_reserved(n)); + + match(Set prev (GetAndSetP mem newv)); + + +From 4673921af60f4779d4322256f92bb60a850cb035 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:51:09 +0800 +Subject: [PATCH 106/140] Revert JDK-8252990: Intrinsify Unsafe.storeStoreFence + +--- + src/hotspot/cpu/riscv/riscv.ad | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 533eaf843e3..5fa3b85c001 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7537,7 +7537,6 @@ instruct membar_release() %{ + + instruct membar_storestore() %{ + match(MemBarStoreStore); +- match(StoreStoreFence); + ins_cost(ALU_COST); + + format %{ "MEMBAR-store-store\t#@membar_storestore" %} + +From e254a03e87ffc6d8f563dbd7db1b607a95657263 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 19:54:02 +0800 +Subject: [PATCH 107/140] Revert JDK-8255150: Add utility methods to check long + indexes and ranges && JDK-8252372: Check if cloning is required to move loads + out of loops in PhaseIdealLoop::split_if_with_blocks_post() + +--- + src/hotspot/cpu/riscv/riscv.ad | 33 --------------------------------- + 1 file changed, 33 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 5fa3b85c001..388e65f623d 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -7621,17 +7621,6 @@ instruct castPP(iRegPNoSp dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castLL(iRegL dst) +-%{ +- match(Set dst (CastLL dst)); +- +- size(0); +- format %{ "# castLL of $dst, #@castLL" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + instruct castII(iRegI dst) + %{ + match(Set dst (CastII dst)); +@@ -7654,28 +7643,6 @@ instruct checkCastPP(iRegPNoSp dst) + ins_pipe(pipe_class_empty); + %} + +-instruct castFF(fRegF dst) +-%{ +- match(Set dst (CastFF dst)); +- +- size(0); +- format %{ "# castFF of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- +-instruct castDD(fRegD dst) +-%{ +- match(Set dst (CastDD dst)); +- +- size(0); +- format %{ "# castDD of $dst" %} +- ins_encode(/* empty encoding */); +- ins_cost(0); +- ins_pipe(pipe_class_empty); +-%} +- + // ============================================================================ + // Convert Instructions + + +From 2c1820363992d09ef0cd2ed2553c04e0f7afd91f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:02:14 +0800 +Subject: [PATCH 108/140] Revert reset_label part of JDK-8248411: [aarch64] + Insufficient error handling when CodeBuffer is exhausted + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 2 +- + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 14 +++++--------- + 2 files changed, 6 insertions(+), 10 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 656334f326b..37ccf132986 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3784,7 +3784,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) + if (StubRoutines::riscv::complete()) { + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { +- DEBUG_ONLY(reset_labels(around)); ++ DEBUG_ONLY(reset_labels1(around)); + postcond(pc() == badAddress); + return NULL; + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 953bca3cbd8..45ffc663963 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -815,17 +815,13 @@ class MacroAssembler: public Assembler { + private: + + #ifdef ASSERT +- // Template short-hand support to clean-up after a failed call to trampoline ++ // Macro short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). +- template +- void reset_labels(Label& lbl, More&... more) { +- lbl.reset(); reset_labels(more...); +- } +- template +- void reset_labels(Label& lbl) { +- lbl.reset(); +- } ++#define reset_labels1(L1) L1.reset() ++#define reset_labels2(L1, L2) L1.reset(); L2.reset() ++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) ++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) + #endif + void repne_scan(Register addr, Register value, Register count, Register tmp); + + +From 014972a0778b8c5568fae9e92d286b634cb44674 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:30:49 +0800 +Subject: [PATCH 109/140] Revert JDK-8242289: C2: Support platform-specific + node cloning in Matcher + +--- + src/hotspot/cpu/riscv/riscv.ad | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 388e65f623d..7cd6c2995ba 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1765,20 +1765,10 @@ bool size_fits_all_mem_uses(AddPNode* addp, int shift) { + + const bool Matcher::convi2l_type_required = false; + +-// Should the Matcher clone input 'm' of node 'n'? +-bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { +- assert_cond(m != NULL); +- if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) +- mstack.push(m, Visit); // m = ShiftCntV +- return true; +- } +- return false; +-} +- + // Should the Matcher clone shifts on addressing modes, expecting them + // to be subsumed into complex addressing expressions or compute them + // into registers? +-bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); + } + + +From d15e155e9b84f4789cfbb1cf75382be859b0a8ca Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:40:00 +0800 +Subject: [PATCH 110/140] Revert JDK-8255782: Turn UseTLAB and ResizeTLAB from + product_pd to product, defaulting to "true" + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 8f2f4e0e81d..25e00bea901 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -34,6 +34,8 @@ + + #ifndef TIERED + define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); + define_pd_global(bool, InlineIntrinsics, true ); + define_pd_global(bool, PreferInterpreterNativeStubs, false); + define_pd_global(bool, ProfileTraps, false); +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +index 33d78fb2f6f..3da1f1c6d86 100644 +--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -33,6 +33,8 @@ + // (see c2_globals.hpp). Alpha-sorted. + + define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); + define_pd_global(bool, CICompileOSR, true); + define_pd_global(bool, InlineIntrinsics, true); + define_pd_global(bool, PreferInterpreterNativeStubs, false); + +From f3fa0cfa987743b4ee83332ddf71add421561908 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:49:57 +0800 +Subject: [PATCH 111/140] Revert JDK-8265245: depChecker_ don't have any + functionalities + +--- + src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 ++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp + +diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +new file mode 100644 +index 00000000000..e9ff307b647 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++ ++// Nothing to do on riscv ++ ++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP + +From 97a3d4d3b98a450aa316eaa94103cf8473d12d50 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 20:58:34 +0800 +Subject: [PATCH 112/140] Revert JDK-8241438: Move IntelJccErratum mitigation + code to platform-specific code + +--- + src/hotspot/cpu/riscv/riscv.ad | 18 ------------------ + 1 file changed, 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index 7cd6c2995ba..fc6823daf8b 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -740,13 +740,6 @@ class HandlerImpl { + } + }; + +-class Node::PD { +-public: +- enum NodeFlags { +- _last_flag = Node::_last_flag +- }; +-}; +- + bool is_CAS(int opcode, bool maybe_volatile); + + // predicate controlling translation of CompareAndSwapX +@@ -805,17 +798,6 @@ void reg_mask_init() { + } + } + +-void PhaseOutput::pd_perform_mach_node_analysis() { +-} +- +-int MachNode::pd_alignment_required() const { +- return 1; +-} +- +-int MachNode::compute_padding(int current_offset) const { +- return 0; +-} +- + // is_CAS(int opcode, bool maybe_volatile) + // + // return true if opcode is one of the possible CompareAndSwapX + +From 8a3e7b81b79918a4f2feb4d9226ab8be6c43c28a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:03:47 +0800 +Subject: [PATCH 113/140] Revert JDK-8260355: AArch64: deoptimization stub + should save vector registers + +--- + src/hotspot/cpu/riscv/registerMap_riscv.cpp | 45 --------------------- + src/hotspot/cpu/riscv/registerMap_riscv.hpp | 1 - + 2 files changed, 46 deletions(-) + delete mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.cpp + +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.cpp b/src/hotspot/cpu/riscv/registerMap_riscv.cpp +deleted file mode 100644 +index 26c1edc36ff..00000000000 +--- a/src/hotspot/cpu/riscv/registerMap_riscv.cpp ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* +- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#include "precompiled.hpp" +-#include "runtime/registerMap.hpp" +-#include "vmreg_riscv.inline.hpp" +- +-address RegisterMap::pd_location(VMReg base_reg, int slot_idx) const { +- if (base_reg->is_VectorRegister()) { +- assert(base_reg->is_concrete(), "must pass base reg"); +- int base_reg_enc = (base_reg->value() - ConcreteRegisterImpl::max_fpr) / +- VectorRegisterImpl::max_slots_per_register; +- intptr_t offset_in_bytes = slot_idx * VMRegImpl::stack_slot_size; +- address base_location = location(base_reg); +- if (base_location != NULL) { +- return base_location + offset_in_bytes; +- } else { +- return NULL; +- } +- } else { +- return location(base_reg->next(slot_idx)); +- } +-} +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +index f34349811a9..fef8ca9b64e 100644 +--- a/src/hotspot/cpu/riscv/registerMap_riscv.hpp ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +@@ -33,7 +33,6 @@ + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + address pd_location(VMReg reg) const { return NULL; } +- address pd_location(VMReg base_reg, int slot_idx) const; + + // no PD state to clear or copy: + void pd_clear() {} + +From 5fc20f93a312f9189b55c5236c15a55b3da10cf9 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:05:37 +0800 +Subject: [PATCH 114/140] Revert JDK-8250914: Matcher::stack_direction() is + unused + +--- + src/hotspot/cpu/riscv/riscv.ad | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index fc6823daf8b..c21508b6e7c 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2326,6 +2326,9 @@ encode %{ + // SP meets the minimum alignment. + + frame %{ ++ // What direction does stack grow in (assumed to be same for C & Java) ++ stack_direction(TOWARDS_LOW); ++ + // These three registers define part of the calling convention + // between compiled code and the interpreter. + + +From aab3322fd2507a3aeae39c69ba871400dd342834 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:15:45 +0800 +Subject: [PATCH 115/140] Revert CacheWB*Node matching rules + +--- + src/hotspot/cpu/riscv/riscv.ad | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index c21508b6e7c..e410bd06aa6 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -1475,14 +1475,6 @@ const bool Matcher::match_rule_supported(int opcode) { + } + + switch (opcode) { +- case Op_CacheWB: // fall through +- case Op_CacheWBPreSync: // fall through +- case Op_CacheWBPostSync: +- if (!VM_Version::supports_data_cache_line_flush()) { +- return false; +- } +- break; +- + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; + +From 705981aaff19b442b55df8a038aab9c61133bc3a Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:21:10 +0800 +Subject: [PATCH 116/140] Revert JDK-8263595: Remove oop type punning in + JavaCallArguments + +--- + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +index bc4e5758256..df3c0267eea 100644 +--- a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -65,8 +65,9 @@ class JNITypes : private AllStatic { + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. +- static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } +- static inline void put_obj(jobject from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle; } ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + +From bba22725b9f1386d8899941ccee3e8dc7f9a4a6f Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:33:01 +0800 +Subject: [PATCH 117/140] Revert JDK-8260012: Reduce inclusion of + collectedHeap.hpp and heapInspection.hpp + +--- + src/hotspot/cpu/riscv/frame_riscv.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +index 40ec584b994..d4fcbdcbbde 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.cpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -598,7 +598,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } +- assert(Universe::is_in_heap_or_null(obj), "sanity check"); ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + +From 49000a43408aba29d3dc9ee4e03219e6f85be602 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:35:21 +0800 +Subject: [PATCH 118/140] Revert JDK-8271869: AArch64: build errors with GCC11 + in frame::saved_oop_result + +--- + src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +index 5ac1bf57f57..abd5bda7e49 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +@@ -230,8 +230,6 @@ inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + + + // Compiled frames +-PRAGMA_DIAG_PUSH +-PRAGMA_NONNULL_IGNORED + inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); +@@ -243,6 +241,5 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + guarantee(result_adr != NULL, "bad register save location"); + *result_adr = obj; + } +-PRAGMA_DIAG_POP + + #endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP + +From 14a46a85e65f6fec09ac566d49a6232216881adb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:40:43 +0800 +Subject: [PATCH 119/140] Revert JDK-8230392: Define AArch64 as + MULTI_COPY_ATOMIC + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index ffd420da024..606f0fa0da3 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -33,10 +33,6 @@ const int StackAlignmentInBytes = 16; + // 32-bit integer argument values are extended to 64 bits. + const bool CCallingConventionRequiresIntsAsLongs = false; + +-// RISCV has adopted a multicopy atomic model closely following +-// that of ARMv8. +-#define CPU_MULTI_COPY_ATOMIC +- + // To be safe, we deoptimize when we come across an access that needs + // patching. This is similar to what is done on aarch64. + #define DEOPTIMIZE_WHEN_PATCHING + +From 8740928267a831c62f1deb20c910e3c27716bc40 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:42:20 +0800 +Subject: [PATCH 120/140] Revert: JDK-8246689: Enable independent compressed + oops/class ptrs on Aarch64 JDK-8241825: Make compressed oops and compressed + class pointers independent (x86_64, PPC, S390) + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index 606f0fa0da3..acdf75d324e 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -41,6 +41,4 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define SUPPORT_RESERVED_STACK_AREA + +-#define COMPRESSED_CLASS_POINTERS_DEPENDS_ON_COMPRESSED_OOPS false +- + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +From 94b40f4efccc19c8ac66eda6c57381a222b02d2d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:50:49 +0800 +Subject: [PATCH 121/140] Revert JDK-8222637: Obsolete NeedsDeoptSuspend + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b78f258a764..a838a377829 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -32,6 +32,8 @@ + // Sets the default values for platform dependent flags used by the runtime system. + // (see globals.hpp) + ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ + define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks + define_pd_global(bool, TrapBasedNullChecks, false); + define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +From 09968c9fc102fd32bc628d3e6fd9d9adcbec4373 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 21:52:44 +0800 +Subject: [PATCH 122/140] Revert JDK-8220051: Remove global safepoint code + +--- + src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +index acdf75d324e..d6ce8da07b8 100644 +--- a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -41,4 +41,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false; + + #define SUPPORT_RESERVED_STACK_AREA + ++#define THREAD_LOCAL_POLL ++ + #endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP + +From 2f4fb2b5ac420d456421592dc09b81244636ba4d Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:00:52 +0800 +Subject: [PATCH 123/140] Revert JDK-8272873: C2: Inlining should not depend on + absolute call site counts + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index a838a377829..b4f71c45ec1 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -41,6 +41,7 @@ define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs + define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. + define_pd_global(intx, CodeEntryAlignment, 64); + define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); + + #define DEFAULT_STACK_YELLOW_PAGES (2) + #define DEFAULT_STACK_RED_PAGES (1) + +From 2df3625eea16fc0d45c0e4cf12c9433f0ec070fd Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:02:13 +0800 +Subject: [PATCH 124/140] Revert JDK-8220049: Obsolete ThreadLocalHandshakes + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b4f71c45ec1..b7d85373c4a 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -76,6 +76,8 @@ define_pd_global(bool, CompactStrings, true); + // Clear short arrays bigger than one word in an arch-specific way + define_pd_global(intx, InitArrayShortSize, BytesPerLong); + ++define_pd_global(bool, ThreadLocalHandshakes, true); ++ + define_pd_global(intx, InlineSmallCode, 1000); + + #define ARCH_FLAGS(develop, \ + +From a875c4caa423dd727cea1c891b17f4ded97e57d1 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sun, 30 Apr 2023 22:04:32 +0800 +Subject: [PATCH 125/140] Revert: JDK-8243208: Clean up JVMFlag implementation + JDK-8236625: Remove writeable macro from JVM flags declaration + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index b7d85373c4a..0becd9efd35 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -82,9 +82,12 @@ define_pd_global(intx, InlineSmallCode, 1000); + + #define ARCH_FLAGS(develop, \ + product, \ ++ diagnostic, \ ++ experimental, \ + notproduct, \ + range, \ +- constraint) \ ++ constraint, \ ++ writeable) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + +From 19a9e6e8c3dba77cf8be0f25b1aec394aeca0b25 Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Wed, 10 May 2023 09:44:12 +0800 +Subject: [PATCH 126/140] Revert JDK-8213436: Obsolete UseMembar && + JDK-8188764: Obsolete AssumeMP and then remove all support for non-MP builds, + always enabled + +--- + src/hotspot/cpu/riscv/globals_riscv.hpp | 2 ++ + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 0becd9efd35..e820898d87f 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -64,6 +64,8 @@ define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + define_pd_global(bool, RewriteBytecodes, true); + define_pd_global(bool, RewriteFrequentPairs, true); + ++define_pd_global(bool, UseMembar, true); ++ + define_pd_global(bool, PreserveFramePointer, false); + + // GC Ergo Flags +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +index 50ee7edb708..f13e4269b77 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -139,6 +139,8 @@ void VM_Version::initialize() { + #endif // COMPILER2 + + UNSUPPORTED_OPTION(CriticalJNINatives); ++ ++ FLAG_SET_DEFAULT(UseMembar, true); + } + + #ifdef COMPILER2 + +From 0c4a9d1b6b3b3b31a1c105ff311414ae542764bb Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Mon, 1 May 2023 16:04:15 +0800 +Subject: [PATCH 127/140] Misc adaptations to jdk11u + +--- + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 2 +- + .../linux_riscv/vm_version_linux_riscv.cpp | 16 ++++++++-------- + 2 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +index 25e00bea901..9316d4be02e 100644 +--- a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -57,7 +57,7 @@ define_pd_global(uintx, CodeCacheMinBlockLength, 1); + define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + define_pd_global(uintx, MetaspaceSize, 12*M ); + define_pd_global(bool, NeverActAsServerClassMachine, true ); +-define_pd_global(uint64_t, MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); + define_pd_global(bool, CICompileOSR, true ); + #endif // !TIERED + define_pd_global(bool, UseTypeProfile, false); +diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +index 4623dbfad42..60260854db6 100644 +--- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +@@ -83,14 +83,14 @@ void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); + +- static_assert(CPU_I == HWCAP_ISA_I, "Flag CPU_I must follow Linux HWCAP"); +- static_assert(CPU_M == HWCAP_ISA_M, "Flag CPU_M must follow Linux HWCAP"); +- static_assert(CPU_A == HWCAP_ISA_A, "Flag CPU_A must follow Linux HWCAP"); +- static_assert(CPU_F == HWCAP_ISA_F, "Flag CPU_F must follow Linux HWCAP"); +- static_assert(CPU_D == HWCAP_ISA_D, "Flag CPU_D must follow Linux HWCAP"); +- static_assert(CPU_C == HWCAP_ISA_C, "Flag CPU_C must follow Linux HWCAP"); +- static_assert(CPU_V == HWCAP_ISA_V, "Flag CPU_V must follow Linux HWCAP"); +- static_assert(CPU_B == HWCAP_ISA_B, "Flag CPU_B must follow Linux HWCAP"); ++ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); ++ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); ++ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); ++ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); ++ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); ++ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); ++ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ STATIC_ASSERT(CPU_B == HWCAP_ISA_B); + _features = auxv & ( + HWCAP_ISA_I | + HWCAP_ISA_M | + +From 4ce5e05526029360ad15eb9639c9c05fac77ac8e Mon Sep 17 00:00:00 2001 +From: "yunyao.zxl" +Date: Sat, 20 May 2023 17:51:52 +0800 +Subject: [PATCH 128/140] Save all call-clobbered registers for spark tests may + crash + +--- + .../cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +index bc847388f68..e191cbcee2a 100644 +--- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -157,21 +157,15 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + __ j(done); + + __ bind(runtime); +- // save the live input values +- RegSet saved = RegSet::of(pre_val); +- if (tosca_live) { saved += RegSet::of(x10); } +- if (obj != noreg) { saved += RegSet::of(obj); } +- +- __ push_reg(saved, sp); + ++ __ push_call_clobbered_registers(); + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } +- +- __ pop_reg(saved, sp); ++ __ pop_call_clobbered_registers(); + + __ bind(done); + + +From 1b8778b0831571e9ac688bbd22afca4cf8f62407 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 22 Aug 2023 16:17:31 +0800 +Subject: [PATCH 129/140] Build with gcc 13 + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 1 + + src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 37ccf132986..fd18bb77058 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -49,6 +49,7 @@ + #include "runtime/thread.hpp" + #ifdef COMPILER2 + #include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" + #include "opto/node.hpp" + #include "opto/output.hpp" + #endif +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +index 31d9254d8ad..ccceed643ed 100644 +--- a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -24,6 +24,7 @@ + */ + + #include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" + #include "runtime/frame.inline.hpp" + #include "runtime/thread.inline.hpp" + + +From 4c23be6665aec94462e82e3b4adcf7abb5b23981 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 5 Sep 2023 15:37:43 +0800 +Subject: [PATCH 130/140] Fix copyright information + +--- + make/autoconf/build-aux/config.guess | 2 +- + .../MyPackage/HeapMonitorEventsForTwoThreadsTest.java | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess +index 15111d827ab..a88a9adec3f 100644 +--- a/make/autoconf/build-aux/config.guess ++++ b/make/autoconf/build-aux/config.guess +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved. + # Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # +diff --git a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +index f0b7aed5ceb..54640b245f8 100644 +--- a/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java ++++ b/test/hotspot/jtreg/serviceability/jvmti/HeapMonitor/MyPackage/HeapMonitorEventsForTwoThreadsTest.java +@@ -1,4 +1,5 @@ + /* ++ * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + +From 70a060f73c3617e58f881bcee19f1a3ce43f54ff Mon Sep 17 00:00:00 2001 +From: Chris Plummer +Date: Thu, 2 Jul 2020 13:13:10 -0700 +Subject: [PATCH 131/140] 8247533: SA stack walking sometimes fails with + sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a lwp + +Reviewed-by: sspitsyn, ysuenaga, dtitov +--- + .../native/libsaproc/LinuxDebuggerLocal.c | 8 ++++++- + .../linux/native/libsaproc/ps_proc.c | 3 ++- + .../native/libsaproc/MacosxDebuggerLocal.m | 24 ++++++++++++------- + .../debugger/bsd/BsdDebuggerLocal.java | 2 +- + .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++++--- + .../debugger/linux/LinuxDebuggerLocal.java | 2 +- + .../hotspot/debugger/linux/LinuxThread.java | 10 +++++--- + .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++++++---- + .../windows/native/libsaproc/sawindbg.cpp | 14 ++++++++--- + 9 files changed, 61 insertions(+), 27 deletions(-) + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 45a927fb5ee..6f1887f8113 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -413,7 +413,13 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (get_lwp_regs(ph, lwp_id, &gregs) != true) { +- THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); ++ // This is not considered fatal and does happen on occassion, usually with an ++ // ESRCH error. The root cause is not fully understood, but by ignoring this error ++ // and returning NULL, stacking walking code will get null registers and fallback ++ // to using the "last java frame" if setup. ++ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); ++ fflush(stdout); ++ return NULL; + } + + #undef NPRGREG +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index de5254d859e..691c3f6684a 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -144,7 +144,8 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + + #ifdef PTRACE_GETREGS_REQ + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { +- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); ++ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, ++ errno, strerror(errno)); + return false; + } + return true; +diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +index 18b8b4282fe..e46370a1f18 100644 +--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m ++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + JNIEnv *env, jobject this_obj, + jlong thread_id) + { +- print_debug("getThreadRegisterSet0 called\n"); ++ print_debug("getThreadIntegerRegisterSet0 called\n"); + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (ph != NULL && ph->core != NULL) { +@@ -705,7 +705,13 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); + + if (result != KERN_SUCCESS) { +- print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); ++ // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a ++ // failure to get thread registers, but if it were to fail the response should ++ // be the same. By ignoring this error and returning NULL, stacking walking code ++ // will get null registers and fallback to using the "last java frame" if setup. ++ fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", ++ result, tid); ++ fflush(stdout); + return NULL; + } + +@@ -808,25 +814,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + */ + JNIEXPORT jint JNICALL + Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( +- JNIEnv *env, jobject this_obj, jint tid) ++ JNIEnv *env, jobject this_obj, jint tid) + { + print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); + + kern_return_t result; + thread_t foreign_tid, usable_tid; + mach_msg_type_name_t type; +- ++ + foreign_tid = tid; +- ++ + task_t gTask = getTask(env, this_obj); +- result = mach_port_extract_right(gTask, foreign_tid, +- MACH_MSG_TYPE_COPY_SEND, ++ result = mach_port_extract_right(gTask, foreign_tid, ++ MACH_MSG_TYPE_COPY_SEND, + &usable_tid, &type); + if (result != KERN_SUCCESS) + return -1; +- ++ + print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); +- ++ + return (jint) usable_tid; + } + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +index 655b450c3fc..d0557a7d254 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException); ++ throw new DebuggerException(lastException.getMessage(), lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +index 0d637f30f14..c52d3a51d54 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -67,8 +67,12 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); + ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +index cb6712b58ee..6a0648f508a 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException); ++ throw new DebuggerException(lastException.getMessage(), lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +index 52307b9cdcf..3fe795d34bc 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -73,8 +73,12 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); + ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +index ec5aea35e8c..377650a0a1c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -30,9 +30,9 @@ + + class WindbgAMD64Thread implements ThreadProxy { + private WindbgDebugger debugger; +- private long sysId; ++ private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id + private boolean gotID; +- private long id; ++ private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId + + // The address argument must be the address of the OSThread::_thread_id + WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { +@@ -50,8 +50,12 @@ class WindbgAMD64Thread implements ThreadProxy { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); + WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); ++ // null means we failed to get the register set for some reason. The caller ++ // is responsible for dealing with the set of null registers in that case. ++ if (data != null) { ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); ++ } + } + return context; + } +@@ -86,6 +90,7 @@ public String toString() { + private long getThreadID() { + if (!gotID) { + id = debugger.getThreadIdFromSysId(sysId); ++ gotID = true; + } + + return id; +diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +index 314cf69c957..e3b218b4dae 100644 +--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp ++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +@@ -45,6 +45,7 @@ + + #include + #include ++#include + + #define DEBUG_NO_IMPLEMENTATION + #include +@@ -765,9 +766,16 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal + CHECK_EXCEPTION_(0); + + ULONG id = 0; +- COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), +- "Windbg Error: GetThreadIdBySystemId failed!", 0); +- ++ HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); ++ if (hr != S_OK) { ++ // This is not considered fatal and does happen on occassion, usually with an ++ // 0x80004002 "No such interface supported". The root cause is not fully understood, ++ // but by ignoring this error and returning NULL, stacking walking code will get ++ // null registers and fallback to using the "last java frame" if setup. ++ printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", ++ hr, sysId); ++ return -1; ++ } + return (jlong) id; + } + + +From 2cadd133d25e05be6ab9b16024a37bed79af1f15 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Wed, 30 Mar 2022 09:04:55 +0000 +Subject: [PATCH 132/140] 8283737: riscv: MacroAssembler::stop() should emit + fixed-length instruction sequence + +Reviewed-by: fyang, shade +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index fd18bb77058..b72a553da2f 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -542,8 +542,11 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t + void MacroAssembler::stop(const char* msg) { + address ip = pc(); + pusha(); +- li(c_rarg0, (uintptr_t)(address)msg); +- li(c_rarg1, (uintptr_t)(address)ip); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of msg and ip so that the size of mach nodes for scratch ++ // emit and normal emit matches. ++ mv(c_rarg0, (address)msg); ++ mv(c_rarg1, (address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); + +From 729e0db14cb320aedf1f12051e667513bddbb8e8 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Sun, 24 Apr 2022 02:17:03 +0000 +Subject: [PATCH 133/140] 8285437: riscv: Fix MachNode size mismatch for + MacroAssembler::verify_oops* + +Reviewed-by: shade, fyang +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index b72a553da2f..9f80f7e2650 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -389,7 +389,10 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 +- li(t0, (uintptr_t)(address)b); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; +@@ -425,7 +428,10 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + ld(x10, addr); + } + +- li(t0, (uintptr_t)(address)b); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; + +From 5cab06c6f09f4b62d54d8d291b1a23f796a085c1 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Mon, 30 May 2022 07:45:50 +0000 +Subject: [PATCH 134/140] 8287418: riscv: Fix correctness issue of + MacroAssembler::movptr + +Reviewed-by: fjiang, yadongwang, fyang +--- + src/hotspot/cpu/riscv/assembler_riscv.cpp | 14 +++++++------- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 18 +++++++++--------- + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 ++- + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 2 +- + 4 files changed, 19 insertions(+), 18 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp +index f15ef5304c5..a5f688cda1f 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp +@@ -282,9 +282,9 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + } + #endif + assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), +- "48-bit overflow in address constant"); +- // Load upper 32 bits +- int32_t imm = imm64 >> 16; ++ "bit 47 overflows in address constant"); ++ // Load upper 31 bits ++ int32_t imm = imm64 >> 17; + int64_t upper = imm, lower = imm; + lower = (lower << 52) >> 52; + upper -= lower; +@@ -292,13 +292,13 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { + lui(Rd, upper); + addi(Rd, Rd, lower); + +- // Load the rest 16 bits. ++ // Load the rest 17 bits. + slli(Rd, Rd, 11); +- addi(Rd, Rd, (imm64 >> 5) & 0x7ff); +- slli(Rd, Rd, 5); ++ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); ++ slli(Rd, Rd, 6); + + // This offset will be used by following jalr/ld. +- offset = imm64 & 0x1f; ++ offset = imm64 & 0x3f; + } + + void Assembler::movptr(Register Rd, uintptr_t imm64) { +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 9f80f7e2650..f592d7585da 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1158,12 +1158,12 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) { + + static int patch_addr_in_movptr(address branch, address target) { + const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load +- int32_t lower = ((intptr_t)target << 36) >> 36; +- int64_t upper = ((intptr_t)target - lower) >> 28; +- Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] +- Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] +- Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] +- Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] ++ int32_t lower = ((intptr_t)target << 35) >> 35; ++ int64_t upper = ((intptr_t)target - lower) >> 29; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] + return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; + } + +@@ -1235,9 +1235,9 @@ static long get_offset_of_pc_relative(address insn_addr) { + + static address get_target_of_movptr(address insn_addr) { + assert_cond(insn_addr != NULL); +- intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. +- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. +- target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + return (address) target_address; + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 45ffc663963..792c1fc2103 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -827,7 +827,8 @@ class MacroAssembler: public Assembler { + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { +- return ((uintptr_t)addr >> 48) == 0; ++ // sv48: must have bits 63–48 all equal to bit 47 ++ return ((uintptr_t)addr >> 47) == 0; + } + + void ld_constant(Register dest, const Address &const_addr) { +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +index bfe84fa4e30..27011ad1283 100644 +--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -89,7 +89,7 @@ bool NativeInstruction::is_movptr_at(address instr) { + is_addi_at(instr + instruction_size) && // Addi + is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 + is_addi_at(instr + instruction_size * 3) && // Addi +- is_slli_shift_at(instr + instruction_size * 4, 5) && // Slli Rd, Rs, 5 ++ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 + (is_addi_at(instr + instruction_size * 5) || + is_jalr_at(instr + instruction_size * 5) || + is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load + +From 41d73298bf28473b3ba2483e61a39c188eddfde3 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Fri, 22 Sep 2023 16:57:56 +0800 +Subject: [PATCH 135/140] Fix: Fixed-length mv() mistakenly redirected to li() + during reshaping + +--- + src/hotspot/cpu/riscv/macroAssembler_riscv.cpp | 6 ++++++ + src/hotspot/cpu/riscv/macroAssembler_riscv.hpp | 3 +-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index f592d7585da..f851cc1e413 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -1343,6 +1343,12 @@ void MacroAssembler::mv(Register Rd, Address dest) { + movptr(Rd, dest.target()); + } + ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruction ++ // movptr instead of li ++ movptr(Rd, addr); ++} ++ + void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 792c1fc2103..65f91532661 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -540,8 +540,6 @@ class MacroAssembler: public Assembler { + } + + // mv +- void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } +- + inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } +@@ -552,6 +550,7 @@ class MacroAssembler: public Assembler { + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); ++ void mv(Register Rd, address dest); + void mv(Register Rd, RegisterOrConstant src); + + // logic + +From 26f4b26a98507ec03a2329bfcbaab393247fe83f Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Fri, 2 Sep 2022 07:01:02 +0000 +Subject: [PATCH 136/140] 8293100: RISC-V: Need to save and restore + callee-saved FloatRegisters in StubGenerator::generate_call_stub + +Reviewed-by: yadongwang, fjiang, shade, vkempik +--- + src/hotspot/cpu/riscv/frame_riscv.hpp | 2 +- + src/hotspot/cpu/riscv/riscv.ad | 18 ++--- + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 74 +++++++++++++++++-- + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 2 +- + 4 files changed, 80 insertions(+), 16 deletions(-) + +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp +index 3b88f6d5a1a..18e021dcb94 100644 +--- a/src/hotspot/cpu/riscv/frame_riscv.hpp ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -131,7 +131,7 @@ + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub +- entry_frame_after_call_words = 22, ++ entry_frame_after_call_words = 34, + entry_frame_call_wrapper_offset = -10, + + // we don't need a save area +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index e410bd06aa6..69696b272a5 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -8601,7 +8601,7 @@ instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "float_b$cmp $op1, $op2 \t#@cmpF_branch"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); +@@ -8618,7 +8618,7 @@ instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "float_b$cmp $op1, $op2\t#@cmpF_loop"%} ++ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); +@@ -8636,7 +8636,7 @@ instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "double_b$cmp $op1, $op2\t#@cmpD_branch"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -8654,7 +8654,7 @@ instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST); +- format %{ "double_b$cmp $op1, $op2\t#@cmpD_loop"%} ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -8929,7 +8929,7 @@ instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ + effect(USE lbl); + + ins_cost(BRANCH_COST); +- format %{ "far_b$cmp $cr, zr, L\t#@far_cmpFlag_branch"%} ++ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} + + ins_encode %{ + __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); +@@ -9138,7 +9138,7 @@ instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_branch"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), +@@ -9154,7 +9154,7 @@ instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_float_b$cmp $op1, $op2\t#@far_cmpF_loop"%} ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), +@@ -9171,7 +9171,7 @@ instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_branch"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +@@ -9187,7 +9187,7 @@ instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) + effect(USE lbl); + + ins_cost(XFER_COST + BRANCH_COST * 2); +- format %{ "far_double_b$cmp $op1, $op2\t#@far_cmpD_loop"%} ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} + + ins_encode %{ + __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +index 74c38c3d044..9970229c5c5 100644 +--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -118,16 +118,28 @@ class StubGenerator: public StubCodeGenerator { + // we don't need to save x6-x7 and x28-x31 which both C and Java treat as + // volatile + // +- // we save x18-x27 which Java uses as temporary registers and C +- // expects to be callee-save ++ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary ++ // registers and C expects to be callee-save + // + // so the stub frame looks like this when we enter Java code + // + // [ return_from_Java ] <--- sp + // [ argument word n ] + // ... +- // -22 [ argument word 1 ] +- // -21 [ saved x27 ] <--- sp_after_call ++ // -34 [ argument word 1 ] ++ // -33 [ saved f27 ] <--- sp_after_call ++ // -32 [ saved f26 ] ++ // -31 [ saved f25 ] ++ // -30 [ saved f24 ] ++ // -29 [ saved f23 ] ++ // -28 [ saved f22 ] ++ // -27 [ saved f21 ] ++ // -26 [ saved f20 ] ++ // -25 [ saved f19 ] ++ // -24 [ saved f18 ] ++ // -23 [ saved f9 ] ++ // -22 [ saved f8 ] ++ // -21 [ saved x27 ] + // -20 [ saved x26 ] + // -19 [ saved x25 ] + // -18 [ saved x24 ] +@@ -152,7 +164,20 @@ class StubGenerator: public StubCodeGenerator { + + // Call stub stack layout word offsets from fp + enum call_stub_layout { +- sp_after_call_off = -21, ++ sp_after_call_off = -33, ++ ++ f27_off = -33, ++ f26_off = -32, ++ f25_off = -31, ++ f24_off = -30, ++ f23_off = -29, ++ f22_off = -28, ++ f21_off = -27, ++ f20_off = -26, ++ f19_off = -25, ++ f18_off = -24, ++ f9_off = -23, ++ f8_off = -22, + + x27_off = -21, + x26_off = -20, +@@ -198,6 +223,19 @@ class StubGenerator: public StubCodeGenerator { + + const Address thread (fp, thread_off * wordSize); + ++ const Address f27_save (fp, f27_off * wordSize); ++ const Address f26_save (fp, f26_off * wordSize); ++ const Address f25_save (fp, f25_off * wordSize); ++ const Address f24_save (fp, f24_off * wordSize); ++ const Address f23_save (fp, f23_off * wordSize); ++ const Address f22_save (fp, f22_off * wordSize); ++ const Address f21_save (fp, f21_off * wordSize); ++ const Address f20_save (fp, f20_off * wordSize); ++ const Address f19_save (fp, f19_off * wordSize); ++ const Address f18_save (fp, f18_off * wordSize); ++ const Address f9_save (fp, f9_off * wordSize); ++ const Address f8_save (fp, f8_off * wordSize); ++ + const Address x27_save (fp, x27_off * wordSize); + const Address x26_save (fp, x26_off * wordSize); + const Address x25_save (fp, x25_off * wordSize); +@@ -244,6 +282,19 @@ class StubGenerator: public StubCodeGenerator { + __ sd(x26, x26_save); + __ sd(x27, x27_save); + ++ __ fsd(f8, f8_save); ++ __ fsd(f9, f9_save); ++ __ fsd(f18, f18_save); ++ __ fsd(f19, f19_save); ++ __ fsd(f20, f20_save); ++ __ fsd(f21, f21_save); ++ __ fsd(f22, f22_save); ++ __ fsd(f23, f23_save); ++ __ fsd(f24, f24_save); ++ __ fsd(f25, f25_save); ++ __ fsd(f26, f26_save); ++ __ fsd(f27, f27_save); ++ + // install Java thread in global register now we have saved + // whatever value it held + __ mv(xthread, c_rarg7); +@@ -335,6 +386,19 @@ class StubGenerator: public StubCodeGenerator { + #endif + + // restore callee-save registers ++ __ fld(f27, f27_save); ++ __ fld(f26, f26_save); ++ __ fld(f25, f25_save); ++ __ fld(f24, f24_save); ++ __ fld(f23, f23_save); ++ __ fld(f22, f22_save); ++ __ fld(f21, f21_save); ++ __ fld(f20, f20_save); ++ __ fld(f19, f19_save); ++ __ fld(f18, f18_save); ++ __ fld(f9, f9_save); ++ __ fld(f8, f8_save); ++ + __ ld(x27, x27_save); + __ ld(x26, x26_save); + __ ld(x25, x25_save); +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +index 5d1187c2a27..c4338715f95 100644 +--- a/src/hotspot/cpu/riscv/vmreg_riscv.cpp ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -40,7 +40,7 @@ void VMRegImpl::set_regName() { + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { +- regName[i++] = reg->name(); ++ regName[i++] = freg->name(); + } + freg = freg->successor(); + } + +From 69ea557c320ad7b2f35fc0e986af9b485f95addf Mon Sep 17 00:00:00 2001 +From: Xiaolin Zheng +Date: Fri, 28 Oct 2022 11:56:21 +0000 +Subject: [PATCH 137/140] 8295926: RISC-V: C1: Fix + LIRGenerator::do_LibmIntrinsic + +Reviewed-by: yadongwang, fyang +--- + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 21 +++-- + .../floatingpoint/TestLibmIntrinsics.java | 80 +++++++++++++++++++ + 2 files changed, 96 insertions(+), 5 deletions(-) + create mode 100644 test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java + +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +index f9242251491..c41819fc2ae 100644 +--- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -679,19 +679,30 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); ++ + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); ++ + CallingConvention* cc = NULL; +- BasicTypeList signature(1); +- signature.append(T_DOUBLE); +- if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } +- cc = frame_map()->c_calling_convention(&signature); +- value.load_item_force(cc->at(0)); ++ + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); ++ + value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + } ++ + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +new file mode 100644 +index 00000000000..5c711efddea +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -60882,205 +82024,423 @@ index 000000000..6852c0540 + * or visit www.oracle.com if you need additional information or have any + * questions. + */ -+package org.openjdk.bench.java.lang; + -+import java.util.Random; -+import org.openjdk.jmh.annotations.Benchmark; -+import org.openjdk.jmh.annotations.BenchmarkMode; -+import org.openjdk.jmh.annotations.OutputTimeUnit; -+import org.openjdk.jmh.annotations.Mode; -+import org.openjdk.jmh.annotations.Scope; -+import org.openjdk.jmh.annotations.State; -+ -+import java.util.concurrent.TimeUnit; -+ -+/** -+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of -+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified. -+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string ++/* ++ * @test ++ * @summary Test libm intrinsics ++ * @library /test/lib / ++ * ++ * @build jdk.test.whitebox.WhiteBox ++ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox ++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ++ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement ++ * compiler.floatingpoint.TestLibmIntrinsics + */ -+@BenchmarkMode(Mode.AverageTime) -+@OutputTimeUnit(TimeUnit.NANOSECONDS) -+@State(Scope.Thread) -+public class IndexOfBenchmark { -+ private static final int loops = 100000; -+ private static final Random rng = new Random(1999); -+ private static final int pathCnt = 1000; -+ private static final String [] latn1_short = new String[pathCnt]; -+ private static final String [] latn1_sse4 = new String[pathCnt]; -+ private static final String [] latn1_avx2 = new String[pathCnt]; -+ private static final String [] latn1_mixedLength = new String[pathCnt]; -+ private static final String [] utf16_short = new String[pathCnt]; -+ private static final String [] utf16_sse4 = new String[pathCnt]; -+ private static final String [] utf16_avx2 = new String[pathCnt]; -+ private static final String [] utf16_mixedLength = new String[pathCnt]; -+ static { -+ for (int i = 0; i < pathCnt; i++) { -+ latn1_short[i] = makeRndString(false, 15); -+ latn1_sse4[i] = makeRndString(false, 16); -+ latn1_avx2[i] = makeRndString(false, 32); -+ utf16_short[i] = makeRndString(true, 7); -+ utf16_sse4[i] = makeRndString(true, 8); -+ utf16_avx2[i] = makeRndString(true, 16); -+ latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65)); -+ utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65)); -+ } ++ ++package compiler.floatingpoint; ++ ++import compiler.whitebox.CompilerWhiteBoxTest; ++import jdk.test.whitebox.WhiteBox; ++ ++import java.lang.reflect.Method; ++ ++public class TestLibmIntrinsics { ++ ++ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); ++ ++ private static final double pi = 3.1415926; ++ ++ private static final double expected = 2.5355263553695413; ++ ++ static double m() { ++ return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi))))))); + } + -+ private static String makeRndString(boolean isUtf16, int length) { -+ StringBuilder sb = new StringBuilder(length); -+ if(length > 0){ -+ sb.append(isUtf16?'☺':'b'); ++ static public void main(String[] args) throws NoSuchMethodException { ++ Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m"); + -+ for (int i = 1; i < length-1; i++) { -+ sb.append((char)('b' + rng.nextInt(26))); -+ } ++ double interpreter_result = m(); + -+ sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string ++ // Compile with C1 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); ++ ++ double c1_result = m(); ++ ++ WHITE_BOX.deoptimizeMethod(test_method); ++ ++ // Compile it with C2 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); ++ ++ double c2_result = m(); ++ ++ if (interpreter_result != c1_result || ++ interpreter_result != c2_result || ++ c1_result != c2_result) { ++ System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result); ++ throw new RuntimeException("Test Failed"); + } -+ return sb.toString(); -+ } -+ -+ -+ @Benchmark -+ public static void latin1_mixed_char() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_char() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_mixed_String() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_String() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ ////////// more detailed code path dependent tests ////////// -+ -+ @Benchmark -+ public static void latin1_Short_char() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_SSE4_char() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_AVX2_char() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static int utf16_Short_char() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_char() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_char() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_Short_String() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_SSE4_String() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_AVX2_String() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_Short_String() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_String() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_String() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; + } +} --- -2.40.0.windows.1 +From ec57f23aa4001315a030cacd55aa5ef7c3269fbb Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Mon, 9 Oct 2023 11:07:34 +0800 +Subject: [PATCH 138/140] Fix test error after port 8295926 + +--- + .../jtreg/compiler/floatingpoint/TestLibmIntrinsics.java | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +index 5c711efddea..5a1b659bbe0 100644 +--- a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -27,8 +27,8 @@ + * @summary Test libm intrinsics + * @library /test/lib / + * +- * @build jdk.test.whitebox.WhiteBox +- * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox ++ * @build sun.hotspot.WhiteBox ++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement + * compiler.floatingpoint.TestLibmIntrinsics +@@ -37,7 +37,7 @@ + package compiler.floatingpoint; + + import compiler.whitebox.CompilerWhiteBoxTest; +-import jdk.test.whitebox.WhiteBox; ++import sun.hotspot.WhiteBox; + + import java.lang.reflect.Method; + + +From b115ec4381ad3ad8cbe9ca3d225cb438538916ac Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Tue, 17 Oct 2023 14:22:49 +0800 +Subject: [PATCH 139/140] Revert JDK-8247533: SA stack walking sometimes fails + with sun.jvm.hotspot.debugger.DebuggerException: get_thread_regs failed for a + lwp + +--- + .../native/libsaproc/LinuxDebuggerLocal.c | 8 +------ + .../linux/native/libsaproc/ps_proc.c | 3 +-- + .../native/libsaproc/MacosxDebuggerLocal.m | 24 +++++++------------ + .../debugger/bsd/BsdDebuggerLocal.java | 2 +- + .../jvm/hotspot/debugger/bsd/BsdThread.java | 10 +++----- + .../debugger/linux/LinuxDebuggerLocal.java | 2 +- + .../hotspot/debugger/linux/LinuxThread.java | 10 +++----- + .../windbg/amd64/WindbgAMD64Thread.java | 15 ++++-------- + .../windows/native/libsaproc/sawindbg.cpp | 14 +++-------- + 9 files changed, 27 insertions(+), 61 deletions(-) + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 6f1887f8113..45a927fb5ee 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -413,13 +413,7 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (get_lwp_regs(ph, lwp_id, &gregs) != true) { +- // This is not considered fatal and does happen on occassion, usually with an +- // ESRCH error. The root cause is not fully understood, but by ignoring this error +- // and returning NULL, stacking walking code will get null registers and fallback +- // to using the "last java frame" if setup. +- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: get_lwp_regs failed for lwp (%d)\n", lwp_id); +- fflush(stdout); +- return NULL; ++ THROW_NEW_DEBUGGER_EXCEPTION_("get_thread_regs failed for a lwp", 0); + } + + #undef NPRGREG +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index 691c3f6684a..de5254d859e 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -144,8 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + + #ifdef PTRACE_GETREGS_REQ + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { +- print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp(%d) errno(%d) \"%s\"\n", pid, +- errno, strerror(errno)); ++ print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); + return false; + } + return true; +diff --git a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +index e46370a1f18..18b8b4282fe 100644 +--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m ++++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +@@ -685,7 +685,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + JNIEnv *env, jobject this_obj, + jlong thread_id) + { +- print_debug("getThreadIntegerRegisterSet0 called\n"); ++ print_debug("getThreadRegisterSet0 called\n"); + + struct ps_prochandle* ph = get_proc_handle(env, this_obj); + if (ph != NULL && ph->core != NULL) { +@@ -705,13 +705,7 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + result = thread_get_state(tid, HSDB_THREAD_STATE, (thread_state_t)&state, &count); + + if (result != KERN_SUCCESS) { +- // This is not considered fatal. Unlike on Linux and Windows, we haven't seen a +- // failure to get thread registers, but if it were to fail the response should +- // be the same. By ignoring this error and returning NULL, stacking walking code +- // will get null registers and fallback to using the "last java frame" if setup. +- fprintf(stdout, "WARNING: getThreadIntegerRegisterSet0: thread_get_state failed (%d) for thread (%d)\n", +- result, tid); +- fflush(stdout); ++ print_error("getregs: thread_get_state(%d) failed (%d)\n", tid, result); + return NULL; + } + +@@ -814,25 +808,25 @@ jlongArray getThreadIntegerRegisterSetFromCore(JNIEnv *env, jobject this_obj, lo + */ + JNIEXPORT jint JNICALL + Java_sun_jvm_hotspot_debugger_macosx_MacOSXDebuggerLocal_translateTID0( +- JNIEnv *env, jobject this_obj, jint tid) ++ JNIEnv *env, jobject this_obj, jint tid) + { + print_debug("translateTID0 called on tid = 0x%x\n", (int)tid); + + kern_return_t result; + thread_t foreign_tid, usable_tid; + mach_msg_type_name_t type; +- ++ + foreign_tid = tid; +- ++ + task_t gTask = getTask(env, this_obj); +- result = mach_port_extract_right(gTask, foreign_tid, +- MACH_MSG_TYPE_COPY_SEND, ++ result = mach_port_extract_right(gTask, foreign_tid, ++ MACH_MSG_TYPE_COPY_SEND, + &usable_tid, &type); + if (result != KERN_SUCCESS) + return -1; +- ++ + print_debug("translateTID0: 0x%x -> 0x%x\n", foreign_tid, usable_tid); +- ++ + return (jint) usable_tid; + } + +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +index d0557a7d254..655b450c3fc 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdDebuggerLocal.java +@@ -166,7 +166,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException.getMessage(), lastException); ++ throw new DebuggerException(lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +index c52d3a51d54..0d637f30f14 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/bsd/BsdThread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -67,12 +67,8 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(unique_thread_id); + ThreadContext context = BsdThreadContextFactory.createThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +index 6a0648f508a..cb6712b58ee 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxDebuggerLocal.java +@@ -173,7 +173,7 @@ public WorkerThreadTask execute(WorkerThreadTask task) throws DebuggerException + } catch (InterruptedException x) {} + } + if (lastException != null) { +- throw new DebuggerException(lastException.getMessage(), lastException); ++ throw new DebuggerException(lastException); + } else { + return task; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +index 3fe795d34bc..52307b9cdcf 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -73,12 +73,8 @@ public String toString() { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(lwp_id); + ThreadContext context = LinuxThreadContextFactory.createThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); + } + return context; + } +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +index 377650a0a1c..ec5aea35e8c 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/windbg/amd64/WindbgAMD64Thread.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -30,9 +30,9 @@ + + class WindbgAMD64Thread implements ThreadProxy { + private WindbgDebugger debugger; +- private long sysId; // SystemID for Windows thread, stored in OSThread::_thread_id ++ private long sysId; + private boolean gotID; +- private long id; // ThreadID for Windows thread, returned by GetThreadIdBySystemId ++ private long id; + + // The address argument must be the address of the OSThread::_thread_id + WindbgAMD64Thread(WindbgDebugger debugger, Address addr) { +@@ -50,12 +50,8 @@ class WindbgAMD64Thread implements ThreadProxy { + public ThreadContext getContext() throws IllegalThreadStateException { + long[] data = debugger.getThreadIntegerRegisterSet(getThreadID()); + WindbgAMD64ThreadContext context = new WindbgAMD64ThreadContext(debugger); +- // null means we failed to get the register set for some reason. The caller +- // is responsible for dealing with the set of null registers in that case. +- if (data != null) { +- for (int i = 0; i < data.length; i++) { +- context.setRegister(i, data[i]); +- } ++ for (int i = 0; i < data.length; i++) { ++ context.setRegister(i, data[i]); + } + return context; + } +@@ -90,7 +86,6 @@ public String toString() { + private long getThreadID() { + if (!gotID) { + id = debugger.getThreadIdFromSysId(sysId); +- gotID = true; + } + + return id; +diff --git a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +index e3b218b4dae..314cf69c957 100644 +--- a/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp ++++ b/src/jdk.hotspot.agent/windows/native/libsaproc/sawindbg.cpp +@@ -45,7 +45,6 @@ + + #include + #include +-#include + + #define DEBUG_NO_IMPLEMENTATION + #include +@@ -766,16 +765,9 @@ JNIEXPORT jlong JNICALL Java_sun_jvm_hotspot_debugger_windbg_WindbgDebuggerLocal + CHECK_EXCEPTION_(0); + + ULONG id = 0; +- HRESULT hr = ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id); +- if (hr != S_OK) { +- // This is not considered fatal and does happen on occassion, usually with an +- // 0x80004002 "No such interface supported". The root cause is not fully understood, +- // but by ignoring this error and returning NULL, stacking walking code will get +- // null registers and fallback to using the "last java frame" if setup. +- printf("WARNING: GetThreadIdBySystemId failed with 0x%x for sysId (%" PRIu64 ")\n", +- hr, sysId); +- return -1; +- } ++ COM_VERIFY_OK_(ptrIDebugSystemObjects->GetThreadIdBySystemId((ULONG)sysId, &id), ++ "Windbg Error: GetThreadIdBySystemId failed!", 0); ++ + return (jlong) id; + } + + +From 4b01e13731fc330ca3d57a5cd532c91bc66579c8 Mon Sep 17 00:00:00 2001 +From: Kuai Wei +Date: Wed, 31 Jan 2024 17:26:31 +0800 +Subject: [PATCH 140/140] Remove unused zSyscall_linux_riscv.hpp + +--- + .../linux_riscv/gc/z/zSyscall_linux_riscv.hpp | 42 ------------------- + 1 file changed, 42 deletions(-) + delete mode 100644 src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp + +diff --git a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp +deleted file mode 100644 +index 1aa58f27871..00000000000 +--- a/src/hotspot/os_cpu/linux_riscv/gc/z/zSyscall_linux_riscv.hpp ++++ /dev/null +@@ -1,42 +0,0 @@ +-/* +- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. +- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +- * +- * This code is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License version 2 only, as +- * published by the Free Software Foundation. +- * +- * This code is distributed in the hope that it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * version 2 for more details (a copy is included in the LICENSE file that +- * accompanied this code). +- * +- * You should have received a copy of the GNU General Public License version +- * 2 along with this work; if not, write to the Free Software Foundation, +- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +- * +- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +- * or visit www.oracle.com if you need additional information or have any +- * questions. +- * +- */ +- +-#ifndef OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +-#define OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP +- +-#include +- +-// +-// Support for building on older Linux systems +-// +- +-#ifndef SYS_memfd_create +-#define SYS_memfd_create 279 +-#endif +-#ifndef SYS_fallocate +-#define SYS_fallocate 47 +-#endif +- +-#endif // OS_CPU_LINUX_RISCV_GC_Z_ZSYSCALL_LINUX_RISCV_HPP diff --git a/openjdk-11.spec b/openjdk-11.spec index 26a6ccf..4a05f17 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -753,7 +753,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 1 +Release: 2 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -906,7 +906,7 @@ Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch # riscv64 specific patches # ############################################ -Patch2000: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +Patch2000: Add-riscv64-support.patch BuildRequires: elfutils-extra BuildRequires: autoconf @@ -1721,6 +1721,9 @@ cjc.mainProgram(arg) %changelog +* Mon Feb 26 2024 misaka00251 - 1:11.0.22.7-2 +- Fix build on riscv64 + * Tue Feb 20 2024 Leslie Zhai - 1:11.0.22.7-1 - init support of LoongArch64