61087 lines
2.1 MiB
61087 lines
2.1 MiB
From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001
|
|
From: misaka00251 <liuxin@iscas.ac.cn>
|
|
Date: Wed, 9 Aug 2023 02:24:23 +0800
|
|
Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
|
|
|
|
---
|
|
make/autoconf/build-aux/config.sub | 7 +
|
|
make/autoconf/hotspot.m4 | 3 +-
|
|
make/autoconf/libraries.m4 | 4 +-
|
|
make/autoconf/platform.m4 | 10 +-
|
|
make/hotspot/gensrc/GensrcAdlc.gmk | 16 +-
|
|
src/hotspot/cpu/aarch64/aarch64.ad | 40 +-
|
|
.../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 +-
|
|
.../cpu/aarch64/macroAssembler_aarch64.cpp | 64 +
|
|
.../cpu/aarch64/macroAssembler_aarch64.hpp | 3 +
|
|
src/hotspot/cpu/arm/arm.ad | 10 +-
|
|
src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 5 +-
|
|
src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 5 +-
|
|
src/hotspot/cpu/ppc/ppc.ad | 16 +-
|
|
.../cpu/riscv/abstractInterpreter_riscv.cpp | 185 +
|
|
src/hotspot/cpu/riscv/assembler_riscv.cpp | 365 +
|
|
src/hotspot/cpu/riscv/assembler_riscv.hpp | 2004 +++
|
|
.../cpu/riscv/assembler_riscv.inline.hpp | 47 +
|
|
src/hotspot/cpu/riscv/bytes_riscv.hpp | 169 +
|
|
src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 352 +
|
|
src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 85 +
|
|
.../cpu/riscv/c1_FpuStackSim_riscv.cpp | 31 +
|
|
.../cpu/riscv/c1_FpuStackSim_riscv.hpp | 33 +
|
|
src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 391 +
|
|
src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 149 +
|
|
.../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 287 +
|
|
.../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 36 +
|
|
.../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 387 +
|
|
.../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 51 +
|
|
.../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2275 ++++
|
|
.../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 +
|
|
.../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1083 ++
|
|
src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 +
|
|
src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 +
|
|
src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 85 +
|
|
.../cpu/riscv/c1_MacroAssembler_riscv.cpp | 441 +
|
|
.../cpu/riscv/c1_MacroAssembler_riscv.hpp | 121 +
|
|
src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1206 ++
|
|
src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 72 +
|
|
src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 91 +
|
|
src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 +
|
|
src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 +
|
|
src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 154 +
|
|
src/hotspot/cpu/riscv/copy_riscv.hpp | 60 +
|
|
src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 +
|
|
src/hotspot/cpu/riscv/disassembler_riscv.hpp | 37 +
|
|
src/hotspot/cpu/riscv/frame_riscv.cpp | 683 +
|
|
src/hotspot/cpu/riscv/frame_riscv.hpp | 200 +
|
|
src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 257 +
|
|
.../gc/g1/g1BarrierSetAssembler_riscv.cpp | 479 +
|
|
.../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 +
|
|
.../gc/shared/barrierSetAssembler_riscv.cpp | 226 +
|
|
.../gc/shared/barrierSetAssembler_riscv.hpp | 75 +
|
|
.../cardTableBarrierSetAssembler_riscv.cpp | 120 +
|
|
.../cardTableBarrierSetAssembler_riscv.hpp | 43 +
|
|
.../modRefBarrierSetAssembler_riscv.cpp | 54 +
|
|
.../modRefBarrierSetAssembler_riscv.hpp | 55 +
|
|
.../c1/shenandoahBarrierSetC1_riscv.cpp | 124 +
|
|
.../shenandoahBarrierSetAssembler_riscv.cpp | 743 ++
|
|
.../shenandoahBarrierSetAssembler_riscv.hpp | 92 +
|
|
.../riscv/gc/shenandoah/shenandoah_riscv64.ad | 188 +
|
|
.../cpu/riscv/globalDefinitions_riscv.hpp | 44 +
|
|
src/hotspot/cpu/riscv/globals_riscv.hpp | 120 +
|
|
src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 +
|
|
src/hotspot/cpu/riscv/icache_riscv.cpp | 61 +
|
|
src/hotspot/cpu/riscv/icache_riscv.hpp | 42 +
|
|
src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1932 +++
|
|
src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 283 +
|
|
src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 296 +
|
|
src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 +
|
|
.../cpu/riscv/javaFrameAnchor_riscv.hpp | 89 +
|
|
.../cpu/riscv/jniFastGetField_riscv.cpp | 193 +
|
|
src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 108 +
|
|
.../cpu/riscv/macroAssembler_riscv.cpp | 5861 +++++++++
|
|
.../cpu/riscv/macroAssembler_riscv.hpp | 975 ++
|
|
.../cpu/riscv/macroAssembler_riscv.inline.hpp | 30 +
|
|
src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 440 +
|
|
src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 58 +
|
|
src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 404 +
|
|
src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 561 +
|
|
src/hotspot/cpu/riscv/registerMap_riscv.hpp | 46 +
|
|
.../cpu/riscv/register_definitions_riscv.cpp | 193 +
|
|
src/hotspot/cpu/riscv/register_riscv.cpp | 69 +
|
|
src/hotspot/cpu/riscv/register_riscv.hpp | 337 +
|
|
src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 +
|
|
src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 45 +
|
|
src/hotspot/cpu/riscv/riscv.ad | 10685 ++++++++++++++++
|
|
src/hotspot/cpu/riscv/riscv_b.ad | 605 +
|
|
src/hotspot/cpu/riscv/riscv_v.ad | 1723 +++
|
|
src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2738 ++++
|
|
src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3743 ++++++
|
|
src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 60 +
|
|
src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 179 +
|
|
.../templateInterpreterGenerator_riscv.cpp | 1841 +++
|
|
src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4028 ++++++
|
|
src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 +
|
|
src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 43 +
|
|
.../cpu/riscv/vm_version_ext_riscv.cpp | 91 +
|
|
.../cpu/riscv/vm_version_ext_riscv.hpp | 55 +
|
|
src/hotspot/cpu/riscv/vm_version_riscv.cpp | 190 +
|
|
src/hotspot/cpu/riscv/vm_version_riscv.hpp | 65 +
|
|
src/hotspot/cpu/riscv/vmreg_riscv.cpp | 60 +
|
|
src/hotspot/cpu/riscv/vmreg_riscv.hpp | 64 +
|
|
src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 47 +
|
|
src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 +
|
|
src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 5 +-
|
|
src/hotspot/cpu/s390/s390.ad | 16 +-
|
|
src/hotspot/cpu/sparc/sparc.ad | 10 +-
|
|
src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 5 +-
|
|
src/hotspot/cpu/x86/macroAssembler_x86.cpp | 93 +
|
|
src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 +
|
|
src/hotspot/cpu/x86/x86.ad | 14 +-
|
|
src/hotspot/cpu/x86/x86_32.ad | 19 +-
|
|
src/hotspot/cpu/x86/x86_64.ad | 24 +-
|
|
src/hotspot/os/linux/os_linux.cpp | 11 +-
|
|
.../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 113 +
|
|
.../linux_riscv/bytes_linux_riscv.inline.hpp | 44 +
|
|
.../linux_riscv/copy_linux_riscv.inline.hpp | 116 +
|
|
.../linux_riscv/globals_linux_riscv.hpp | 43 +
|
|
.../linux_riscv/orderAccess_linux_riscv.hpp | 73 +
|
|
.../os_cpu/linux_riscv/os_linux_riscv.cpp | 628 +
|
|
.../os_cpu/linux_riscv/os_linux_riscv.hpp | 40 +
|
|
.../prefetch_linux_riscv.inline.hpp | 38 +
|
|
.../os_cpu/linux_riscv/thread_linux_riscv.cpp | 103 +
|
|
.../os_cpu/linux_riscv/thread_linux_riscv.hpp | 67 +
|
|
.../linux_riscv/vmStructs_linux_riscv.hpp | 55 +
|
|
.../linux_riscv/vm_version_linux_riscv.cpp | 116 +
|
|
src/hotspot/share/adlc/archDesc.cpp | 5 +
|
|
src/hotspot/share/adlc/formssel.cpp | 2 +
|
|
src/hotspot/share/c1/c1_LIR.cpp | 113 +-
|
|
src/hotspot/share/c1/c1_LIR.hpp | 208 +-
|
|
src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +-
|
|
src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +-
|
|
src/hotspot/share/c1/c1_LinearScan.cpp | 14 +-
|
|
src/hotspot/share/classfile/vmSymbols.cpp | 2 +
|
|
src/hotspot/share/classfile/vmSymbols.hpp | 1 +
|
|
.../gc/shenandoah/shenandoahArguments.cpp | 2 +-
|
|
.../share/jfr/utilities/jfrBigEndian.hpp | 2 +-
|
|
src/hotspot/share/opto/c2compiler.cpp | 1 +
|
|
src/hotspot/share/opto/chaitin.cpp | 90 +-
|
|
src/hotspot/share/opto/chaitin.hpp | 32 +-
|
|
src/hotspot/share/opto/intrinsicnode.hpp | 5 +-
|
|
src/hotspot/share/opto/library_call.cpp | 13 +-
|
|
src/hotspot/share/opto/machnode.cpp | 2 +-
|
|
src/hotspot/share/opto/machnode.hpp | 4 +
|
|
src/hotspot/share/opto/matcher.cpp | 41 +-
|
|
src/hotspot/share/opto/matcher.hpp | 6 +-
|
|
src/hotspot/share/opto/node.cpp | 21 +
|
|
src/hotspot/share/opto/node.hpp | 5 +
|
|
src/hotspot/share/opto/opcodes.cpp | 4 +-
|
|
src/hotspot/share/opto/opcodes.hpp | 2 +
|
|
src/hotspot/share/opto/phase.cpp | 2 +
|
|
src/hotspot/share/opto/phase.hpp | 1 +
|
|
src/hotspot/share/opto/postaloc.cpp | 53 +-
|
|
src/hotspot/share/opto/regmask.cpp | 46 +-
|
|
src/hotspot/share/opto/regmask.hpp | 10 +-
|
|
src/hotspot/share/opto/superword.cpp | 7 +-
|
|
src/hotspot/share/opto/type.cpp | 14 +-
|
|
src/hotspot/share/opto/type.hpp | 12 +-
|
|
src/hotspot/share/opto/vectornode.cpp | 4 +-
|
|
.../share/runtime/abstract_vm_version.cpp | 12 +-
|
|
src/hotspot/share/runtime/thread.hpp | 2 +-
|
|
src/hotspot/share/runtime/thread.inline.hpp | 2 +-
|
|
src/hotspot/share/utilities/debug.cpp | 1 +
|
|
src/hotspot/share/utilities/macros.hpp | 26 +
|
|
.../share/classes/java/lang/StringLatin1.java | 5 +
|
|
.../native/libsaproc/LinuxDebuggerLocal.c | 49 +-
|
|
.../linux/native/libsaproc/libproc.h | 2 +
|
|
.../linux/native/libsaproc/ps_proc.c | 4 +
|
|
.../classes/sun/jvm/hotspot/HotSpotAgent.java | 4 +
|
|
.../debugger/MachineDescriptionRISCV64.java | 40 +
|
|
.../debugger/linux/LinuxCDebugger.java | 11 +-
|
|
.../linux/riscv64/LinuxRISCV64CFrame.java | 90 +
|
|
.../riscv64/LinuxRISCV64ThreadContext.java | 48 +
|
|
.../debugger/proc/ProcDebuggerLocal.java | 6 +
|
|
.../proc/riscv64/ProcRISCV64Thread.java | 88 +
|
|
.../riscv64/ProcRISCV64ThreadContext.java | 48 +
|
|
.../riscv64/ProcRISCV64ThreadFactory.java | 46 +
|
|
.../remote/riscv64/RemoteRISCV64Thread.java | 55 +
|
|
.../riscv64/RemoteRISCV64ThreadContext.java | 48 +
|
|
.../riscv64/RemoteRISCV64ThreadFactory.java | 46 +
|
|
.../riscv64/RISCV64ThreadContext.java | 172 +
|
|
.../sun/jvm/hotspot/runtime/Threads.java | 3 +
|
|
.../LinuxRISCV64JavaThreadPDAccess.java | 132 +
|
|
.../riscv64/RISCV64CurrentFrameGuess.java | 223 +
|
|
.../hotspot/runtime/riscv64/RISCV64Frame.java | 554 +
|
|
.../riscv64/RISCV64JavaCallWrapper.java | 58 +
|
|
.../runtime/riscv64/RISCV64RegisterMap.java | 53 +
|
|
.../jvm/hotspot/utilities/PlatformInfo.java | 2 +-
|
|
src/utils/hsdis/hsdis.c | 6 +-
|
|
test/hotspot/jtreg/compiler/c2/TestBit.java | 6 +-
|
|
...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 4 +
|
|
...HA256IntrinsicsOptionOnUnsupportedCPU.java | 4 +
|
|
...HA512IntrinsicsOptionOnUnsupportedCPU.java | 4 +
|
|
.../cli/TestUseSHAOptionOnUnsupportedCPU.java | 4 +
|
|
.../testcases/GenericTestCaseForOtherCPU.java | 10 +-
|
|
...nericTestCaseForUnsupportedRISCV64CPU.java | 102 +
|
|
.../string/TestStringLatin1IndexOfChar.java | 153 +
|
|
.../loopopts/superword/ProdRed_Double.java | 2 +-
|
|
.../loopopts/superword/ProdRed_Float.java | 2 +-
|
|
.../loopopts/superword/ProdRed_Int.java | 2 +-
|
|
.../loopopts/superword/ReductionPerf.java | 2 +-
|
|
.../superword/SumRedAbsNeg_Double.java | 2 +-
|
|
.../superword/SumRedAbsNeg_Float.java | 2 +-
|
|
.../loopopts/superword/SumRedSqrt_Double.java | 2 +-
|
|
.../loopopts/superword/SumRed_Double.java | 2 +-
|
|
.../loopopts/superword/SumRed_Float.java | 2 +-
|
|
.../loopopts/superword/SumRed_Int.java | 2 +-
|
|
.../argumentcorruption/CheckLongArgs.java | 2 +-
|
|
.../criticalnatives/lookup/LookUp.java | 2 +-
|
|
.../sha/predicate/IntrinsicPredicates.java | 9 +-
|
|
.../NMT/CheckForProperDetailStackTrace.java | 3 +-
|
|
.../ReservedStack/ReservedStackTest.java | 3 +-
|
|
test/hotspot/jtreg/test_env.sh | 5 +
|
|
...stMutuallyExclusivePlatformPredicates.java | 3 +-
|
|
.../nsk/jvmti/GetThreadInfo/thrinfo001.java | 2 +-
|
|
.../jdk/jfr/event/os/TestCPUInformation.java | 5 +-
|
|
test/lib/jdk/test/lib/Platform.java | 5 +
|
|
.../bench/java/lang/StringIndexOfChar.java | 221 +
|
|
218 files changed, 57653 insertions(+), 221 deletions(-)
|
|
create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
|
|
create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/riscv.ad
|
|
create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad
|
|
create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
|
|
create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
|
|
create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
|
|
create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
|
|
create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
|
|
create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
|
|
create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
|
|
|
|
diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
|
|
index 3c280ac7c..eda408e01 100644
|
|
--- a/make/autoconf/build-aux/config.sub
|
|
+++ b/make/autoconf/build-aux/config.sub
|
|
@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then
|
|
exit
|
|
fi
|
|
|
|
+# Canonicalize for riscv which autoconf-config.sub doesn't handle
|
|
+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then
|
|
+ result=`echo $@ | sed 's/linux/unknown-linux/'`
|
|
+ echo $result
|
|
+ exit
|
|
+fi
|
|
+
|
|
while test $# -gt 0 ; do
|
|
case $1 in
|
|
-- ) # Stop option processing
|
|
diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
|
|
index a3e1e00b2..01ef26c10 100644
|
|
--- a/make/autoconf/hotspot.m4
|
|
+++ b/make/autoconf/hotspot.m4
|
|
@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
|
|
AC_MSG_CHECKING([if shenandoah can be built])
|
|
if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then
|
|
if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
|
|
- test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
|
|
+ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
|
|
+ test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then
|
|
AC_MSG_RESULT([yes])
|
|
else
|
|
DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
|
|
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
|
|
index 16e906bdc..c01fdbcce 100644
|
|
--- a/make/autoconf/libraries.m4
|
|
+++ b/make/autoconf/libraries.m4
|
|
@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
|
|
GLOBAL_LIBS=""
|
|
fi
|
|
|
|
- BASIC_JDKLIB_LIBS=""
|
|
+ BASIC_JDKLIB_LIBS="-latomic"
|
|
if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then
|
|
BASIC_JDKLIB_LIBS="-ljava -ljvm"
|
|
fi
|
|
@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
|
|
wsock32.lib winmm.lib version.lib psapi.lib"
|
|
fi
|
|
|
|
+ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
|
|
+
|
|
JDKLIB_LIBS="$BASIC_JDKLIB_LIBS"
|
|
JDKEXE_LIBS=""
|
|
JVM_LIBS="$BASIC_JVM_LIBS"
|
|
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
|
|
index f89b22f5f..48d615992 100644
|
|
--- a/make/autoconf/platform.m4
|
|
+++ b/make/autoconf/platform.m4
|
|
@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
|
|
VAR_CPU_BITS=64
|
|
VAR_CPU_ENDIAN=little
|
|
;;
|
|
+ riscv32)
|
|
+ VAR_CPU=riscv32
|
|
+ VAR_CPU_ARCH=riscv
|
|
+ VAR_CPU_BITS=32
|
|
+ VAR_CPU_ENDIAN=little
|
|
+ ;;
|
|
riscv64)
|
|
VAR_CPU=riscv64
|
|
VAR_CPU_ARCH=riscv
|
|
@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
|
|
HOTSPOT_$1_CPU_DEFINE=S390
|
|
elif test "x$OPENJDK_$1_CPU" = xs390x; then
|
|
HOTSPOT_$1_CPU_DEFINE=S390
|
|
+ elif test "x$OPENJDK_$1_CPU" = xriscv32; then
|
|
+ HOTSPOT_$1_CPU_DEFINE=RISCV32
|
|
elif test "x$OPENJDK_$1_CPU" = xriscv64; then
|
|
- HOTSPOT_$1_CPU_DEFINE=RISCV
|
|
+ HOTSPOT_$1_CPU_DEFINE=RISCV64
|
|
elif test "x$OPENJDK_$1_CPU" != x; then
|
|
HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z)
|
|
fi
|
|
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
|
|
index c5a3ac572..9de6f663c 100644
|
|
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
|
|
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
|
|
@@ -1,5 +1,5 @@
|
|
#
|
|
-# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
|
|
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
#
|
|
# This code is free software; you can redistribute it and/or modify it
|
|
@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true)
|
|
$d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
|
|
)))
|
|
|
|
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
|
|
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
|
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
|
|
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
|
|
+ )))
|
|
+ endif
|
|
+
|
|
+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
|
|
+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
|
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
|
|
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
|
|
+ )))
|
|
+ endif
|
|
+
|
|
ifeq ($(call check-jvm-feature, shenandoahgc), true)
|
|
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
|
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
|
|
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
|
|
index 1e4ee33a9..ac5d56f0f 100644
|
|
--- a/src/hotspot/cpu/aarch64/aarch64.ad
|
|
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
|
|
@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
return true; // Per default match rules are supported.
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
// TODO
|
|
// identify extra cases that we might want to provide match rules for
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
- bool ret_value = match_rule_supported(opcode);
|
|
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
|
|
+ return false;
|
|
+ }
|
|
// Add rules here.
|
|
|
|
- return ret_value; // Per default match rules are supported.
|
|
+ return true; // Per default match rules are supported.
|
|
}
|
|
|
|
const bool Matcher::has_predicated_vectors(void) {
|
|
@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
return size;
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
// Vector ideal reg.
|
|
const uint Matcher::vector_ideal_reg(int len) {
|
|
switch(len) {
|
|
@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
|
|
ins_pipe(pipe_class_memory);
|
|
%}
|
|
|
|
-instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
|
+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
|
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
iRegINoSp tmp3, rFlagsReg cr)
|
|
%{
|
|
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U);
|
|
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
|
|
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
|
|
|
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
|
|
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
|
|
|
|
ins_encode %{
|
|
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
|
|
@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
|
ins_pipe(pipe_class_memory);
|
|
%}
|
|
|
|
+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
|
|
+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
|
|
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
|
|
+
|
|
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
|
|
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
|
|
iRegI_R0 result, rFlagsReg cr)
|
|
%{
|
|
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
|
|
index fdd2c0ca3..1a35be210 100644
|
|
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
|
|
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
|
|
@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
|
|
}
|
|
|
|
|
|
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64");
|
|
|
|
Assembler::Condition acond, ncond;
|
|
switch (condition) {
|
|
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
|
index 5753cc9a6..21c6fdf19 100644
|
|
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
|
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
|
|
@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
|
BIND(DONE);
|
|
}
|
|
|
|
+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ Register tmp1, Register tmp2, Register tmp3)
|
|
+{
|
|
+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
|
|
+ Register cnt1_neg = cnt1;
|
|
+ Register ch1 = rscratch1;
|
|
+ Register result_tmp = rscratch2;
|
|
+
|
|
+ cbz(cnt1, NOMATCH);
|
|
+
|
|
+ cmp(cnt1, (u1)8);
|
|
+ br(LT, DO1_SHORT);
|
|
+
|
|
+ orr(ch, ch, ch, LSL, 8);
|
|
+ orr(ch, ch, ch, LSL, 16);
|
|
+ orr(ch, ch, ch, LSL, 32);
|
|
+
|
|
+ sub(cnt1, cnt1, 8);
|
|
+ mov(result_tmp, cnt1);
|
|
+ lea(str1, Address(str1, cnt1));
|
|
+ sub(cnt1_neg, zr, cnt1);
|
|
+
|
|
+ mov(tmp3, 0x0101010101010101);
|
|
+
|
|
+ BIND(CH1_LOOP);
|
|
+ ldr(ch1, Address(str1, cnt1_neg));
|
|
+ eor(ch1, ch, ch1);
|
|
+ sub(tmp1, ch1, tmp3);
|
|
+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
|
|
+ bics(tmp1, tmp1, tmp2);
|
|
+ br(NE, HAS_ZERO);
|
|
+ adds(cnt1_neg, cnt1_neg, 8);
|
|
+ br(LT, CH1_LOOP);
|
|
+
|
|
+ cmp(cnt1_neg, (u1)8);
|
|
+ mov(cnt1_neg, 0);
|
|
+ br(LT, CH1_LOOP);
|
|
+ b(NOMATCH);
|
|
+
|
|
+ BIND(HAS_ZERO);
|
|
+ rev(tmp1, tmp1);
|
|
+ clz(tmp1, tmp1);
|
|
+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
|
|
+ b(MATCH);
|
|
+
|
|
+ BIND(DO1_SHORT);
|
|
+ mov(result_tmp, cnt1);
|
|
+ lea(str1, Address(str1, cnt1));
|
|
+ sub(cnt1_neg, zr, cnt1);
|
|
+ BIND(DO1_LOOP);
|
|
+ ldrb(ch1, Address(str1, cnt1_neg));
|
|
+ cmp(ch, ch1);
|
|
+ br(EQ, MATCH);
|
|
+ adds(cnt1_neg, cnt1_neg, 1);
|
|
+ br(LT, DO1_LOOP);
|
|
+ BIND(NOMATCH);
|
|
+ mov(result, -1);
|
|
+ b(DONE);
|
|
+ BIND(MATCH);
|
|
+ add(result, result_tmp, cnt1_neg);
|
|
+ BIND(DONE);
|
|
+}
|
|
+
|
|
// Compare strings.
|
|
void MacroAssembler::string_compare(Register str1, Register str2,
|
|
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
|
|
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
|
index 7e23c16a4..c3d472a9a 100644
|
|
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
|
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
|
|
@@ -1260,6 +1260,9 @@ public:
|
|
void string_indexof_char(Register str1, Register cnt1,
|
|
Register ch, Register result,
|
|
Register tmp1, Register tmp2, Register tmp3);
|
|
+ void stringL_indexof_char(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ Register tmp1, Register tmp2, Register tmp3);
|
|
void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
|
|
FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
|
|
FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
|
|
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
|
|
index 51f2d9ce7..71f83521e 100644
|
|
--- a/src/hotspot/cpu/arm/arm.ad
|
|
+++ b/src/hotspot/cpu/arm/arm.ad
|
|
@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
return true; // Per default match rules are supported.
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
// TODO
|
|
// identify extra cases that we might want to provide match rules for
|
|
@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
|
|
return MaxVectorSize;
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
// Vector ideal reg corresponding to specified size in bytes
|
|
const uint Matcher::vector_ideal_reg(int size) {
|
|
assert(MaxVectorSize >= size, "");
|
|
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
|
|
index f0a7229aa..2d06d3d58 100644
|
|
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
|
|
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
|
|
@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
|
|
}
|
|
|
|
|
|
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm");
|
|
+
|
|
AsmCondition acond = al;
|
|
AsmCondition ncond = nv;
|
|
if (opr1 != opr2) {
|
|
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
|
|
index 847f7d61d..d081116be 100644
|
|
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
|
|
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
|
|
@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
|
|
}
|
|
|
|
|
|
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc");
|
|
+
|
|
if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
|
|
load_to_reg(this, opr1, result); // Condition doesn't matter.
|
|
return;
|
|
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
|
|
index ebbe80a26..df66a46dc 100644
|
|
--- a/src/hotspot/cpu/ppc/ppc.ad
|
|
+++ b/src/hotspot/cpu/ppc/ppc.ad
|
|
@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
return true; // Per default match rules are supported.
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
// TODO
|
|
// identify extra cases that we might want to provide match rules for
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
- bool ret_value = match_rule_supported(opcode);
|
|
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
|
|
+ return false;
|
|
+ }
|
|
// Add rules here.
|
|
|
|
- return ret_value; // Per default match rules are supported.
|
|
+ return true; // Per default match rules are supported.
|
|
}
|
|
|
|
const bool Matcher::has_predicated_vectors(void) {
|
|
@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
return max_vector_size(bt); // Same as max.
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
// PPC implementation uses VSX load/store instructions (if
|
|
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
|
|
const bool Matcher::misaligned_vectors_ok() {
|
|
diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..5661b7425
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
|
|
@@ -0,0 +1,185 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "oops/constMethod.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "utilities/align.hpp"
|
|
+#include "utilities/debug.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+
|
|
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
|
|
+ int i = 0;
|
|
+ switch (type) {
|
|
+ case T_BOOLEAN: i = 0; break;
|
|
+ case T_CHAR : i = 1; break;
|
|
+ case T_BYTE : i = 2; break;
|
|
+ case T_SHORT : i = 3; break;
|
|
+ case T_INT : i = 4; break;
|
|
+ case T_LONG : i = 5; break;
|
|
+ case T_VOID : i = 6; break;
|
|
+ case T_FLOAT : i = 7; break;
|
|
+ case T_DOUBLE : i = 8; break;
|
|
+ case T_OBJECT : i = 9; break;
|
|
+ case T_ARRAY : i = 9; break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
|
|
+ "index out of bounds");
|
|
+ return i;
|
|
+}
|
|
+
|
|
+// How much stack a method activation needs in words.
|
|
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size();
|
|
+
|
|
+ // total overhead size: entry_size + (saved fp thru expr stack
|
|
+ // bottom). be sure to change this if you add/subtract anything
|
|
+ // to/from the overhead area
|
|
+ const int overhead_size =
|
|
+ -(frame::interpreter_frame_initial_sp_offset) + entry_size;
|
|
+
|
|
+ const int stub_code = frame::entry_frame_after_call_words;
|
|
+ assert_cond(method != NULL);
|
|
+ const int method_stack = (method->max_locals() + method->max_stack()) *
|
|
+ Interpreter::stackElementWords;
|
|
+ return (overhead_size + method_stack + stub_code);
|
|
+}
|
|
+
|
|
+// asm based interpreter deoptimization helpers
|
|
+int AbstractInterpreter::size_activation(int max_stack,
|
|
+ int temps,
|
|
+ int extra_args,
|
|
+ int monitors,
|
|
+ int callee_params,
|
|
+ int callee_locals,
|
|
+ bool is_top_frame) {
|
|
+ // Note: This calculation must exactly parallel the frame setup
|
|
+ // in TemplateInterpreterGenerator::generate_method_entry.
|
|
+
|
|
+ // fixed size of an interpreter frame:
|
|
+ int overhead = frame::sender_sp_offset -
|
|
+ frame::interpreter_frame_initial_sp_offset;
|
|
+ // Our locals were accounted for by the caller (or last_frame_adjust
|
|
+ // on the transistion) Since the callee parameters already account
|
|
+ // for the callee's params we only need to account for the extra
|
|
+ // locals.
|
|
+ int size = overhead +
|
|
+ (callee_locals - callee_params) +
|
|
+ monitors * frame::interpreter_frame_monitor_size() +
|
|
+ // On the top frame, at all times SP <= ESP, and SP is
|
|
+ // 16-aligned. We ensure this by adjusting SP on method
|
|
+ // entry and re-entry to allow room for the maximum size of
|
|
+ // the expression stack. When we call another method we bump
|
|
+ // SP so that no stack space is wasted. So, only on the top
|
|
+ // frame do we need to allow max_stack words.
|
|
+ (is_top_frame ? max_stack : temps + extra_args);
|
|
+
|
|
+ // On riscv we always keep the stack pointer 16-aligned, so we
|
|
+ // must round up here.
|
|
+ size = align_up(size, 2);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+
|
|
+void AbstractInterpreter::layout_activation(Method* method,
|
|
+ int tempcount,
|
|
+ int popframe_extra_args,
|
|
+ int moncount,
|
|
+ int caller_actual_parameters,
|
|
+ int callee_param_count,
|
|
+ int callee_locals,
|
|
+ frame* caller,
|
|
+ frame* interpreter_frame,
|
|
+ bool is_top_frame,
|
|
+ bool is_bottom_frame) {
|
|
+ // The frame interpreter_frame is guaranteed to be the right size,
|
|
+ // as determined by a previous call to the size_activation() method.
|
|
+ // It is also guaranteed to be walkable even though it is in a
|
|
+ // skeletal state
|
|
+
|
|
+ assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL);
|
|
+ int max_locals = method->max_locals() * Interpreter::stackElementWords;
|
|
+ int extra_locals = (method->max_locals() - method->size_of_parameters()) *
|
|
+ Interpreter::stackElementWords;
|
|
+
|
|
+#ifdef ASSERT
|
|
+ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable");
|
|
+#endif
|
|
+
|
|
+ interpreter_frame->interpreter_frame_set_method(method);
|
|
+ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
|
|
+ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
|
|
+ // and sender_sp is fp
|
|
+ //
|
|
+ // The interpreted method entry on riscv aligns SP to 16 bytes
|
|
+ // before generating the fixed part of the activation frame. So there
|
|
+ // may be a gap between the locals block and the saved sender SP. For
|
|
+ // an interpreted caller we need to recreate this gap and exactly
|
|
+ // align the incoming parameters with the caller's temporary
|
|
+ // expression stack. For other types of caller frame it doesn't
|
|
+ // matter.
|
|
+ intptr_t* locals = NULL;
|
|
+ if (caller->is_interpreted_frame()) {
|
|
+ locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;
|
|
+ } else {
|
|
+ locals = interpreter_frame->sender_sp() + max_locals - 1;
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ if (caller->is_interpreted_frame()) {
|
|
+ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ interpreter_frame->interpreter_frame_set_locals(locals);
|
|
+ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
|
|
+ BasicObjectLock* monbot = montop - moncount;
|
|
+ interpreter_frame->interpreter_frame_set_monitor_end(monbot);
|
|
+
|
|
+ // Set last_sp
|
|
+ intptr_t* last_sp = (intptr_t*) monbot -
|
|
+ tempcount*Interpreter::stackElementWords -
|
|
+ popframe_extra_args;
|
|
+ interpreter_frame->interpreter_frame_set_last_sp(last_sp);
|
|
+
|
|
+ // All frames but the initial (oldest) interpreter frame we fill in have
|
|
+ // a value for sender_sp that allows walking the stack but isn't
|
|
+ // truly correct. Correct the value here.
|
|
+ if (extra_locals != 0 &&
|
|
+ interpreter_frame->sender_sp() ==
|
|
+ interpreter_frame->interpreter_frame_sender_sp()) {
|
|
+ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
|
|
+ extra_locals);
|
|
+ }
|
|
+ *interpreter_frame->interpreter_frame_cache_addr() =
|
|
+ method->constants()->cache();
|
|
+ *interpreter_frame->interpreter_frame_mirror_addr() =
|
|
+ method->method_holder()->java_mirror();
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..40ecf1a6c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
|
|
@@ -0,0 +1,365 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+
|
|
+#include <stdio.h>
|
|
+#include <sys/types.h>
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "asm/assembler.inline.hpp"
|
|
+#include "compiler/disassembler.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "runtime/interfaceSupport.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+
|
|
+#define __ _masm.
|
|
+
|
|
+int AbstractAssembler::code_fill_byte() {
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void Assembler::add(Register Rd, Register Rn, int64_t increment, Register temp) {
|
|
+ if (is_imm_in_range(increment, 12, 0)) {
|
|
+ addi(Rd, Rn, increment);
|
|
+ } else {
|
|
+ assert_different_registers(Rn, temp);
|
|
+ li(temp, increment);
|
|
+ add(Rd, Rn, temp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
|
|
+ if (is_imm_in_range(increment, 12, 0)) {
|
|
+ addiw(Rd, Rn, increment);
|
|
+ } else {
|
|
+ assert_different_registers(Rn, temp);
|
|
+ li(temp, increment);
|
|
+ addw(Rd, Rn, temp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
|
|
+ if (is_imm_in_range(-decrement, 12, 0)) {
|
|
+ addi(Rd, Rn, -decrement);
|
|
+ } else {
|
|
+ assert_different_registers(Rn, temp);
|
|
+ li(temp, decrement);
|
|
+ sub(Rd, Rn, temp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
|
|
+ if (is_imm_in_range(-decrement, 12, 0)) {
|
|
+ addiw(Rd, Rn, -decrement);
|
|
+ } else {
|
|
+ assert_different_registers(Rn, temp);
|
|
+ li(temp, decrement);
|
|
+ subw(Rd, Rn, temp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::zext_w(Register Rd, Register Rs) {
|
|
+ add_uw(Rd, Rs, zr);
|
|
+}
|
|
+
|
|
+void Assembler::li(Register Rd, int64_t imm) {
|
|
+ // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
|
|
+ int shift = 12;
|
|
+ int64_t upper = imm, lower = imm;
|
|
+ // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm.
|
|
+ lower = ((int32_t)imm << 20) >> 20;
|
|
+ upper -= lower;
|
|
+
|
|
+ // Test whether imm is a 32-bit integer.
|
|
+ if (!(((imm) & ~(int64_t)0x7fffffff) == 0 ||
|
|
+ (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) {
|
|
+ while (((upper >> shift) & 1) == 0) { shift++; }
|
|
+ upper >>= shift;
|
|
+ li(Rd, upper);
|
|
+ slli(Rd, Rd, shift);
|
|
+ if (lower != 0) {
|
|
+ addi(Rd, Rd, lower);
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ // 32-bit integer
|
|
+ Register hi_Rd = zr;
|
|
+ if (upper != 0) {
|
|
+ lui(Rd, (int32_t)upper);
|
|
+ hi_Rd = Rd;
|
|
+ }
|
|
+ if (lower != 0 || hi_Rd == zr) {
|
|
+ addiw(Rd, hi_Rd, lower);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::li64(Register Rd, int64_t imm) {
|
|
+ // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1),
|
|
+ // upper = imm[63:32] + 1.
|
|
+ int64_t lower = imm & 0xffffffff;
|
|
+ lower -= ((lower << 44) >> 44);
|
|
+ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
|
|
+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
|
|
+
|
|
+ // Load upper 32 bits
|
|
+ int64_t up = upper, lo = upper;
|
|
+ lo = (lo << 52) >> 52;
|
|
+ up -= lo;
|
|
+ up = (int32_t)up;
|
|
+ lui(Rd, up);
|
|
+ addi(Rd, Rd, lo);
|
|
+
|
|
+ // Load the rest 32 bits.
|
|
+ slli(Rd, Rd, 12);
|
|
+ addi(Rd, Rd, (int32_t)lower >> 20);
|
|
+ slli(Rd, Rd, 12);
|
|
+ lower = ((int32_t)imm << 12) >> 20;
|
|
+ addi(Rd, Rd, lower);
|
|
+ slli(Rd, Rd, 8);
|
|
+ lower = imm & 0xff;
|
|
+ addi(Rd, Rd, lower);
|
|
+}
|
|
+
|
|
+void Assembler::li32(Register Rd, int32_t imm) {
|
|
+ // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit
|
|
+ int64_t upper = imm, lower = imm;
|
|
+ lower = (imm << 20) >> 20;
|
|
+ upper -= lower;
|
|
+ upper = (int32_t)upper;
|
|
+ // lui Rd, imm[31:12] + imm[11]
|
|
+ lui(Rd, upper);
|
|
+ // use addiw to distinguish li32 to li64
|
|
+ addiw(Rd, Rd, lower);
|
|
+}
|
|
+
|
|
+#define INSN(NAME, REGISTER) \
|
|
+ void Assembler::NAME(const address &dest, Register temp) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t distance = dest - pc(); \
|
|
+ if (is_imm_in_range(distance, 20, 1)) { \
|
|
+ jal(REGISTER, distance); \
|
|
+ } else { \
|
|
+ assert(temp != noreg, "temp must not be empty register!"); \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(temp, dest, offset); \
|
|
+ jalr(REGISTER, temp, offset); \
|
|
+ } \
|
|
+ } \
|
|
+ void Assembler::NAME(Label &l, Register temp) { \
|
|
+ jal(REGISTER, l, temp); \
|
|
+ } \
|
|
+
|
|
+ INSN(j, x0);
|
|
+ INSN(jal, x1);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, REGISTER) \
|
|
+ void Assembler::NAME(Register Rs) { \
|
|
+ jalr(REGISTER, Rs, 0); \
|
|
+ }
|
|
+
|
|
+ INSN(jr, x0);
|
|
+ INSN(jalr, x1);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+void Assembler::ret() {
|
|
+ jalr(x0, x1, 0);
|
|
+}
|
|
+
|
|
+#define INSN(NAME, REGISTER) \
|
|
+ void Assembler::NAME(const address &dest, Register temp) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ assert(temp != noreg, "temp must not be empty register!"); \
|
|
+ int64_t distance = dest - pc(); \
|
|
+ if (is_offset_in_range(distance, 32)) { \
|
|
+ auipc(temp, distance + 0x800); \
|
|
+ jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20); \
|
|
+ } else { \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(temp, dest, offset); \
|
|
+ jalr(REGISTER, temp, offset); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(call, x1);
|
|
+ INSN(tail, x0);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, REGISTER) \
|
|
+ void Assembler::NAME(const Address &adr, Register temp) { \
|
|
+ switch(adr.getMode()) { \
|
|
+ case Address::literal: { \
|
|
+ code_section()->relocate(pc(), adr.rspec()); \
|
|
+ NAME(adr.target(), temp); \
|
|
+ break; \
|
|
+ } \
|
|
+ case Address::base_plus_offset: { \
|
|
+ Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \
|
|
+ jalr(REGISTER, tmp_adr.base(), tmp_adr.offset()); \
|
|
+ break; \
|
|
+ } \
|
|
+ default: \
|
|
+ ShouldNotReachHere(); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(j, x0);
|
|
+ INSN(jal, x1);
|
|
+ INSN(call, x1);
|
|
+ INSN(tail, x0);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn,
|
|
+ compare_and_branch_label_insn neg_insn, bool is_far) {
|
|
+ if (is_far) {
|
|
+ Label done;
|
|
+ (this->*neg_insn)(r1, r2, done, /* is_far */ false);
|
|
+ j(L);
|
|
+ bind(done);
|
|
+ } else {
|
|
+ if (L.is_bound()) {
|
|
+ (this->*insn)(r1, r2, target(L));
|
|
+ } else {
|
|
+ L.add_patch_at(code(), locator());
|
|
+ (this->*insn)(r1, r2, pc());
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) {
|
|
+ if (L.is_bound()) {
|
|
+ (this->*insn)(Rt, target(L), tmp);
|
|
+ } else {
|
|
+ L.add_patch_at(code(), locator());
|
|
+ (this->*insn)(Rt, pc(), tmp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) {
|
|
+ if (L.is_bound()) {
|
|
+ (this->*insn)(Rt, target(L));
|
|
+ } else {
|
|
+ L.add_patch_at(code(), locator());
|
|
+ (this->*insn)(Rt, pc());
|
|
+ }
|
|
+}
|
|
+
|
|
+void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
|
|
+ uintptr_t imm64 = (uintptr_t)addr;
|
|
+#ifndef PRODUCT
|
|
+ {
|
|
+ char buffer[64];
|
|
+ snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64);
|
|
+ block_comment(buffer);
|
|
+ }
|
|
+#endif
|
|
+ assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1),
|
|
+ "bit 47 overflows in address constant");
|
|
+ // Load upper 31 bits
|
|
+ int32_t imm = imm64 >> 17;
|
|
+ int64_t upper = imm, lower = imm;
|
|
+ lower = (lower << 52) >> 52;
|
|
+ upper -= lower;
|
|
+ upper = (int32_t)upper;
|
|
+ lui(Rd, upper);
|
|
+ addi(Rd, Rd, lower);
|
|
+
|
|
+ // Load the rest 17 bits.
|
|
+ slli(Rd, Rd, 11);
|
|
+ addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
|
|
+ slli(Rd, Rd, 6);
|
|
+
|
|
+ // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld.
|
|
+ offset = imm64 & 0x3f;
|
|
+}
|
|
+
|
|
+void Assembler::movptr(Register Rd, uintptr_t imm64) {
|
|
+ movptr(Rd, (address)imm64);
|
|
+}
|
|
+
|
|
+void Assembler::movptr(Register Rd, address addr) {
|
|
+ int offset = 0;
|
|
+ movptr_with_offset(Rd, addr, offset);
|
|
+ addi(Rd, Rd, offset);
|
|
+}
|
|
+
|
|
+#define INSN(NAME, NEG_INSN) \
|
|
+ void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \
|
|
+ NEG_INSN(Rt, Rs, dest); \
|
|
+ } \
|
|
+ void Assembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \
|
|
+ NEG_INSN(Rt, Rs, l, is_far); \
|
|
+ }
|
|
+
|
|
+ INSN(bgt, blt);
|
|
+ INSN(ble, bge);
|
|
+ INSN(bgtu, bltu);
|
|
+ INSN(bleu, bgeu);
|
|
+#undef INSN
|
|
+
|
|
+#undef __
|
|
+
|
|
+Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _offset(0), _mode(literal) {
|
|
+ _target = target;
|
|
+ switch (rtype) {
|
|
+ case relocInfo::oop_type:
|
|
+ case relocInfo::metadata_type:
|
|
+ // Oops are a special case. Normally they would be their own section
|
|
+ // but in cases like icBuffer they are literals in the code stream that
|
|
+ // we don't have a section for. We use none so that we get a literal address
|
|
+ // which is always patchable.
|
|
+ break;
|
|
+ case relocInfo::external_word_type:
|
|
+ _rspec = external_word_Relocation::spec(target);
|
|
+ break;
|
|
+ case relocInfo::internal_word_type:
|
|
+ _rspec = internal_word_Relocation::spec(target);
|
|
+ break;
|
|
+ case relocInfo::opt_virtual_call_type:
|
|
+ _rspec = opt_virtual_call_Relocation::spec();
|
|
+ break;
|
|
+ case relocInfo::static_call_type:
|
|
+ _rspec = static_call_Relocation::spec();
|
|
+ break;
|
|
+ case relocInfo::runtime_call_type:
|
|
+ _rspec = runtime_call_Relocation::spec();
|
|
+ break;
|
|
+ case relocInfo::poll_type:
|
|
+ case relocInfo::poll_return_type:
|
|
+ _rspec = Relocation::spec_simple(rtype);
|
|
+ break;
|
|
+ case relocInfo::none:
|
|
+ _rspec = RelocationHolder::none;
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..d4da30ed6
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
|
|
@@ -0,0 +1,2004 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_ASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/register.hpp"
|
|
+#include "assembler_riscv.inline.hpp"
|
|
+
|
|
+#define XLEN 64
|
|
+
|
|
+// definitions of various symbolic names for machine registers
|
|
+
|
|
+// First intercalls between C and Java which use 8 general registers
|
|
+// and 8 floating registers
|
|
+
|
|
+class Argument {
|
|
+ public:
|
|
+ enum {
|
|
+ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
|
|
+ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
|
|
+
|
|
+ n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...)
|
|
+ n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...)
|
|
+ };
|
|
+};
|
|
+
|
|
+// function argument(caller-save registers)
|
|
+REGISTER_DECLARATION(Register, c_rarg0, x10);
|
|
+REGISTER_DECLARATION(Register, c_rarg1, x11);
|
|
+REGISTER_DECLARATION(Register, c_rarg2, x12);
|
|
+REGISTER_DECLARATION(Register, c_rarg3, x13);
|
|
+REGISTER_DECLARATION(Register, c_rarg4, x14);
|
|
+REGISTER_DECLARATION(Register, c_rarg5, x15);
|
|
+REGISTER_DECLARATION(Register, c_rarg6, x16);
|
|
+REGISTER_DECLARATION(Register, c_rarg7, x17);
|
|
+
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg0, f10);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg1, f11);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg2, f12);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg3, f13);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg4, f14);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg5, f15);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg6, f16);
|
|
+REGISTER_DECLARATION(FloatRegister, c_farg7, f17);
|
|
+
|
|
+// java function register(caller-save registers)
|
|
+REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
|
|
+REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
|
|
+REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
|
|
+REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
|
|
+REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
|
|
+REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
|
|
+REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
|
|
+REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
|
|
+
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg0, f10);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg1, f11);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg2, f12);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg3, f13);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg4, f14);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg5, f15);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg6, f16);
|
|
+REGISTER_DECLARATION(FloatRegister, j_farg7, f17);
|
|
+
|
|
+// zero rigster
|
|
+REGISTER_DECLARATION(Register, zr, x0);
|
|
+// global pointer
|
|
+REGISTER_DECLARATION(Register, gp, x3);
|
|
+// thread pointer
|
|
+REGISTER_DECLARATION(Register, tp, x4);
|
|
+
|
|
+// volatile (caller-save) registers
|
|
+
|
|
+// current method -- must be in a call-clobbered register
|
|
+REGISTER_DECLARATION(Register, xmethod, x31);
|
|
+// return address
|
|
+REGISTER_DECLARATION(Register, ra, x1);
|
|
+
|
|
+// non-volatile (callee-save) registers
|
|
+
|
|
+// stack pointer
|
|
+REGISTER_DECLARATION(Register, sp, x2);
|
|
+// frame pointer
|
|
+REGISTER_DECLARATION(Register, fp, x8);
|
|
+// base of heap
|
|
+REGISTER_DECLARATION(Register, xheapbase, x27);
|
|
+// constant pool cache
|
|
+REGISTER_DECLARATION(Register, xcpool, x26);
|
|
+// monitors allocated on stack
|
|
+REGISTER_DECLARATION(Register, xmonitors, x25);
|
|
+// locals on stack
|
|
+REGISTER_DECLARATION(Register, xlocals, x24);
|
|
+
|
|
+/* If you use x4(tp) as java thread pointer according to the instruction manual,
|
|
+ * it overlaps with the register used by c++ thread.
|
|
+ */
|
|
+// java thread pointer
|
|
+REGISTER_DECLARATION(Register, xthread, x23);
|
|
+// bytecode pointer
|
|
+REGISTER_DECLARATION(Register, xbcp, x22);
|
|
+// Dispatch table base
|
|
+REGISTER_DECLARATION(Register, xdispatch, x21);
|
|
+// Java stack pointer
|
|
+REGISTER_DECLARATION(Register, esp, x20);
|
|
+
|
|
+// tempory register(caller-save registers)
|
|
+REGISTER_DECLARATION(Register, t0, x5);
|
|
+REGISTER_DECLARATION(Register, t1, x6);
|
|
+REGISTER_DECLARATION(Register, t2, x7);
|
|
+
|
|
+const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
|
|
+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
|
|
+};
|
|
+
|
|
+const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
|
|
+ c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7
|
|
+};
|
|
+
|
|
+#define assert_cond(ARG1) assert(ARG1, #ARG1)
|
|
+
|
|
+// Addressing modes
|
|
+class Address {
|
|
+ public:
|
|
+
|
|
+ enum mode { no_mode, base_plus_offset, pcrel, literal };
|
|
+
|
|
+ private:
|
|
+ Register _base;
|
|
+ int64_t _offset;
|
|
+ enum mode _mode;
|
|
+
|
|
+ RelocationHolder _rspec;
|
|
+
|
|
+ // If the target is far we'll need to load the ea of this to a
|
|
+ // register to reach it. Otherwise if near we can do PC-relative
|
|
+ // addressing.
|
|
+ address _target;
|
|
+
|
|
+ public:
|
|
+ Address()
|
|
+ : _base(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
|
|
+ Address(Register r)
|
|
+ : _base(r), _offset(0), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, int o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, long o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, long long o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, unsigned int o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, unsigned long o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+ Address(Register r, unsigned long long o)
|
|
+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { }
|
|
+#ifdef ASSERT
|
|
+ Address(Register r, ByteSize disp)
|
|
+ : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { }
|
|
+#endif
|
|
+ Address(address target, RelocationHolder const& rspec)
|
|
+ : _base(noreg),
|
|
+ _offset(0),
|
|
+ _mode(literal),
|
|
+ _rspec(rspec),
|
|
+ _target(target) { }
|
|
+ Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
|
|
+
|
|
+ const Register base() const {
|
|
+ guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode");
|
|
+ return _base;
|
|
+ }
|
|
+ long offset() const {
|
|
+ return _offset;
|
|
+ }
|
|
+
|
|
+ mode getMode() const {
|
|
+ return _mode;
|
|
+ }
|
|
+
|
|
+ bool uses(Register reg) const { return _base == reg;}
|
|
+ const address target() const { return _target; }
|
|
+ const RelocationHolder& rspec() const { return _rspec; }
|
|
+ ~Address() {
|
|
+ _target = NULL;
|
|
+ _base = NULL;
|
|
+ }
|
|
+};
|
|
+
|
|
+// Convience classes
|
|
+class RuntimeAddress: public Address {
|
|
+
|
|
+ public:
|
|
+
|
|
+ RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
|
|
+ ~RuntimeAddress() {}
|
|
+};
|
|
+
|
|
+class OopAddress: public Address {
|
|
+
|
|
+ public:
|
|
+
|
|
+ OopAddress(address target) : Address(target, relocInfo::oop_type) {}
|
|
+ ~OopAddress() {}
|
|
+};
|
|
+
|
|
+class ExternalAddress: public Address {
|
|
+ private:
|
|
+ static relocInfo::relocType reloc_for_target(address target) {
|
|
+ // Sometimes ExternalAddress is used for values which aren't
|
|
+ // exactly addresses, like the card table base.
|
|
+ // external_word_type can't be used for values in the first page
|
|
+ // so just skip the reloc in that case.
|
|
+ return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
|
|
+ }
|
|
+
|
|
+ public:
|
|
+
|
|
+ ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
|
|
+ ~ExternalAddress() {}
|
|
+};
|
|
+
|
|
+class InternalAddress: public Address {
|
|
+
|
|
+ public:
|
|
+
|
|
+ InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
|
|
+ ~InternalAddress() {}
|
|
+};
|
|
+
|
|
+class Assembler : public AbstractAssembler {
|
|
+public:
|
|
+
|
|
+ enum { instruction_size = 4 };
|
|
+
|
|
+ enum RoundingMode {
|
|
+ rne = 0b000, // round to Nearest, ties to Even
|
|
+ rtz = 0b001, // round towards Zero
|
|
+ rdn = 0b010, // round Down (towards eegative infinity)
|
|
+ rup = 0b011, // round Up (towards infinity)
|
|
+ rmm = 0b100, // round to Nearest, ties to Max Magnitude
|
|
+ rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
|
|
+ };
|
|
+
|
|
+ Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
|
|
+ assert_different_registers(noreg, temp, base);
|
|
+ int64_t upper = offset, lower = offset;
|
|
+
|
|
+ int8_t shift = 64 - expect_offbits;
|
|
+ lower = (offset << shift) >> shift;
|
|
+ upper -= lower;
|
|
+
|
|
+ li(temp, upper);
|
|
+ add(temp, temp, base);
|
|
+ return Address(temp, lower);
|
|
+ }
|
|
+
|
|
+ Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
|
|
+ if (is_offset_in_range(offset, expect_offbits)) {
|
|
+ return Address(base, offset);
|
|
+ }
|
|
+ return form_address_complex(base, offset, expect_offbits, temp);
|
|
+ }
|
|
+
|
|
+ void li(Register Rd, int64_t imm); // optimized load immediate
|
|
+ void li32(Register Rd, int32_t imm);
|
|
+ void li64(Register Rd, int64_t imm);
|
|
+ void movptr(Register Rd, address addr);
|
|
+ void movptr_with_offset(Register Rd, address addr, int32_t &offset);
|
|
+ void movptr(Register Rd, uintptr_t imm64);
|
|
+ void j(const address &dest, Register temp = t0);
|
|
+ void j(const Address &adr, Register temp = t0) ;
|
|
+ void j(Label &l, Register temp = t0);
|
|
+ void jal(Label &l, Register temp = t0);
|
|
+ void jal(const address &dest, Register temp = t0);
|
|
+ void jal(const Address &adr, Register temp = t0);
|
|
+ void jr(Register Rs);
|
|
+ void jalr(Register Rs);
|
|
+ void ret();
|
|
+ void call(const address &dest, Register temp = t0);
|
|
+ void call(const Address &adr, Register temp = t0);
|
|
+ void tail(const address &dest, Register temp = t0);
|
|
+ void tail(const Address &adr, Register temp = t0);
|
|
+ void call(Label &l, Register temp) {
|
|
+ call(target(l), temp);
|
|
+ }
|
|
+ void tail(Label &l, Register temp) {
|
|
+ tail(target(l), temp);
|
|
+ }
|
|
+
|
|
+ static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) {
|
|
+ assert_cond(msb >= lsb && msb <= 31);
|
|
+ unsigned nbits = msb - lsb + 1;
|
|
+ uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
|
|
+ uint32_t result = val >> lsb;
|
|
+ result &= mask;
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) {
|
|
+ assert_cond(msb >= lsb && msb <= 31);
|
|
+ int32_t result = val << (31 - msb);
|
|
+ result >>= (31 - msb + lsb);
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ static void patch(address a, unsigned msb, unsigned lsb, unsigned val) {
|
|
+ assert_cond(a != NULL);
|
|
+ assert_cond(msb >= lsb && msb <= 31);
|
|
+ unsigned nbits = msb - lsb + 1;
|
|
+ guarantee(val < (1ULL << nbits), "Field too big for insn");
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
|
|
+ val <<= lsb;
|
|
+ mask <<= lsb;
|
|
+ unsigned target = *(unsigned *)a;
|
|
+ target &= ~mask;
|
|
+ target |= val;
|
|
+ *(unsigned *)a = target;
|
|
+ }
|
|
+
|
|
+ static void patch(address a, unsigned bit, unsigned val) {
|
|
+ patch(a, bit, bit, val);
|
|
+ }
|
|
+
|
|
+ static void patch_reg(address a, unsigned lsb, Register reg) {
|
|
+ patch(a, lsb + 4, lsb, reg->encoding_nocheck());
|
|
+ }
|
|
+
|
|
+ static void patch_reg(address a, unsigned lsb, FloatRegister reg) {
|
|
+ patch(a, lsb + 4, lsb, reg->encoding_nocheck());
|
|
+ }
|
|
+
|
|
+ static void patch_reg(address a, unsigned lsb, VectorRegister reg) {
|
|
+ patch(a, lsb + 4, lsb, reg->encoding_nocheck());
|
|
+ }
|
|
+
|
|
+ void emit(unsigned insn) {
|
|
+ emit_int32((jint)insn);
|
|
+ }
|
|
+
|
|
+ void halt() {
|
|
+ emit_int32(0);
|
|
+ }
|
|
+
|
|
+// Rigster Instruction
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, Register Rs2) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(add, 0b0110011, 0b000, 0b0000000);
|
|
+ INSN(sub, 0b0110011, 0b000, 0b0100000);
|
|
+ INSN(andr, 0b0110011, 0b111, 0b0000000);
|
|
+ INSN(orr, 0b0110011, 0b110, 0b0000000);
|
|
+ INSN(xorr, 0b0110011, 0b100, 0b0000000);
|
|
+ INSN(sll, 0b0110011, 0b001, 0b0000000);
|
|
+ INSN(sra, 0b0110011, 0b101, 0b0100000);
|
|
+ INSN(srl, 0b0110011, 0b101, 0b0000000);
|
|
+ INSN(slt, 0b0110011, 0b010, 0b0000000);
|
|
+ INSN(sltu, 0b0110011, 0b011, 0b0000000);
|
|
+ INSN(addw, 0b0111011, 0b000, 0b0000000);
|
|
+ INSN(subw, 0b0111011, 0b000, 0b0100000);
|
|
+ INSN(sllw, 0b0111011, 0b001, 0b0000000);
|
|
+ INSN(sraw, 0b0111011, 0b101, 0b0100000);
|
|
+ INSN(srlw, 0b0111011, 0b101, 0b0000000);
|
|
+ INSN(mul, 0b0110011, 0b000, 0b0000001);
|
|
+ INSN(mulh, 0b0110011, 0b001, 0b0000001);
|
|
+ INSN(mulhsu,0b0110011, 0b010, 0b0000001);
|
|
+ INSN(mulhu, 0b0110011, 0b011, 0b0000001);
|
|
+ INSN(mulw, 0b0111011, 0b000, 0b0000001);
|
|
+ INSN(div, 0b0110011, 0b100, 0b0000001);
|
|
+ INSN(divu, 0b0110011, 0b101, 0b0000001);
|
|
+ INSN(divw, 0b0111011, 0b100, 0b0000001);
|
|
+ INSN(divuw, 0b0111011, 0b101, 0b0000001);
|
|
+ INSN(rem, 0b0110011, 0b110, 0b0000001);
|
|
+ INSN(remu, 0b0110011, 0b111, 0b0000001);
|
|
+ INSN(remw, 0b0111011, 0b110, 0b0000001);
|
|
+ INSN(remuw, 0b0111011, 0b111, 0b0000001);
|
|
+
|
|
+ // Vector Configuration Instruction
|
|
+ INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN_ENTRY_RELOC(result_type, header) \
|
|
+ result_type header { \
|
|
+ guarantee(rtype == relocInfo::internal_word_type, \
|
|
+ "only internal_word_type relocs make sense here"); \
|
|
+ code_section()->relocate(pc(), InternalAddress(dest).rspec());
|
|
+
|
|
+ // Load/store register (all modes)
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, Register Rs, const int32_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \
|
|
+ int32_t val = offset & 0xfff; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 15, Rs); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch((address)&insn, 31, 20, val); \
|
|
+ emit(insn); \
|
|
+ } \
|
|
+ void NAME(Register Rd, address dest) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t distance = (dest - pc()); \
|
|
+ if (is_offset_in_range(distance, 32)) { \
|
|
+ auipc(Rd, (int32_t)distance + 0x800); \
|
|
+ NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \
|
|
+ } else { \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(Rd, dest, offset); \
|
|
+ NAME(Rd, Rd, offset); \
|
|
+ } \
|
|
+ } \
|
|
+ INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \
|
|
+ NAME(Rd, dest); \
|
|
+ } \
|
|
+ void NAME(Register Rd, const Address &adr, Register temp = t0) { \
|
|
+ switch(adr.getMode()) { \
|
|
+ case Address::literal: { \
|
|
+ code_section()->relocate(pc(), adr.rspec()); \
|
|
+ NAME(Rd, adr.target()); \
|
|
+ break; \
|
|
+ } \
|
|
+ case Address::base_plus_offset: { \
|
|
+ if (is_offset_in_range(adr.offset(), 12)) { \
|
|
+ NAME(Rd, adr.base(), adr.offset()); \
|
|
+ } else { \
|
|
+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \
|
|
+ } \
|
|
+ break; \
|
|
+ } \
|
|
+ default: \
|
|
+ ShouldNotReachHere(); \
|
|
+ } \
|
|
+ } \
|
|
+ void NAME(Register Rd, Label &L) { \
|
|
+ wrap_label(Rd, L, &Assembler::NAME); \
|
|
+ }
|
|
+
|
|
+ INSN(lb, 0b0000011, 0b000);
|
|
+ INSN(lbu, 0b0000011, 0b100);
|
|
+ INSN(ld, 0b0000011, 0b011);
|
|
+ INSN(lh, 0b0000011, 0b001);
|
|
+ INSN(lhu, 0b0000011, 0b101);
|
|
+ INSN(lw, 0b0000011, 0b010);
|
|
+ INSN(lwu, 0b0000011, 0b110);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \
|
|
+ uint32_t val = offset & 0xfff; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 15, Rs); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch((address)&insn, 31, 20, val); \
|
|
+ emit(insn); \
|
|
+ } \
|
|
+ void NAME(FloatRegister Rd, address dest, Register temp = t0) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t distance = (dest - pc()); \
|
|
+ if (is_offset_in_range(distance, 32)) { \
|
|
+ auipc(temp, (int32_t)distance + 0x800); \
|
|
+ NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \
|
|
+ } else { \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(temp, dest, offset); \
|
|
+ NAME(Rd, temp, offset); \
|
|
+ } \
|
|
+ } \
|
|
+ INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \
|
|
+ NAME(Rd, dest, temp); \
|
|
+ } \
|
|
+ void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \
|
|
+ switch(adr.getMode()) { \
|
|
+ case Address::literal: { \
|
|
+ code_section()->relocate(pc(), adr.rspec()); \
|
|
+ NAME(Rd, adr.target(), temp); \
|
|
+ break; \
|
|
+ } \
|
|
+ case Address::base_plus_offset: { \
|
|
+ if (is_offset_in_range(adr.offset(), 12)) { \
|
|
+ NAME(Rd, adr.base(), adr.offset()); \
|
|
+ } else { \
|
|
+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp)); \
|
|
+ } \
|
|
+ break; \
|
|
+ } \
|
|
+ default: \
|
|
+ ShouldNotReachHere(); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(flw, 0b0000111, 0b010);
|
|
+ INSN(fld, 0b0000111, 0b011);
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \
|
|
+ uint32_t val = offset & 0x1fff; \
|
|
+ uint32_t val11 = (val >> 11) & 0x1; \
|
|
+ uint32_t val12 = (val >> 12) & 0x1; \
|
|
+ uint32_t low = (val >> 1) & 0xf; \
|
|
+ uint32_t high = (val >> 5) & 0x3f; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ patch((address)&insn, 7, val11); \
|
|
+ patch((address)&insn, 11, 8, low); \
|
|
+ patch((address)&insn, 30, 25, high); \
|
|
+ patch((address)&insn, 31, val12); \
|
|
+ emit(insn); \
|
|
+ } \
|
|
+ void NAME(Register Rs1, Register Rs2, const address dest) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t offset = (dest - pc()); \
|
|
+ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \
|
|
+ NAME(Rs1, Rs2, offset); \
|
|
+ } \
|
|
+ INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \
|
|
+ NAME(Rs1, Rs2, dest); \
|
|
+ }
|
|
+
|
|
+ INSN(beq, 0b1100011, 0b000);
|
|
+ INSN(bge, 0b1100011, 0b101);
|
|
+ INSN(bgeu, 0b1100011, 0b111);
|
|
+ INSN(blt, 0b1100011, 0b100);
|
|
+ INSN(bltu, 0b1100011, 0b110);
|
|
+ INSN(bne, 0b1100011, 0b001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, NEG_INSN) \
|
|
+ void NAME(Register Rs1, Register Rs2, Label &L, bool is_far = false) { \
|
|
+ wrap_label(Rs1, Rs2, L, &Assembler::NAME, &Assembler::NEG_INSN, is_far); \
|
|
+ }
|
|
+
|
|
+ INSN(beq, bne);
|
|
+ INSN(bne, beq);
|
|
+ INSN(blt, bge);
|
|
+ INSN(bge, blt);
|
|
+ INSN(bltu, bgeu);
|
|
+ INSN(bgeu, bltu);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, REGISTER, op, funct3) \
|
|
+ void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \
|
|
+ uint32_t val = offset & 0xfff; \
|
|
+ uint32_t low = val & 0x1f; \
|
|
+ uint32_t high = (val >> 5) & 0x7f; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 15, Rs2); \
|
|
+ patch_reg((address)&insn, 20, Rs1); \
|
|
+ patch((address)&insn, 11, 7, low); \
|
|
+ patch((address)&insn, 31, 25, high); \
|
|
+ emit(insn); \
|
|
+ } \
|
|
+ INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \
|
|
+ NAME(Rs, dest, temp); \
|
|
+ }
|
|
+
|
|
+ INSN(sb, Register, 0b0100011, 0b000);
|
|
+ INSN(sh, Register, 0b0100011, 0b001);
|
|
+ INSN(sw, Register, 0b0100011, 0b010);
|
|
+ INSN(sd, Register, 0b0100011, 0b011);
|
|
+ INSN(fsw, FloatRegister, 0b0100111, 0b010);
|
|
+ INSN(fsd, FloatRegister, 0b0100111, 0b011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME) \
|
|
+ void NAME(Register Rs, address dest, Register temp = t0) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ assert_different_registers(Rs, temp); \
|
|
+ int64_t distance = (dest - pc()); \
|
|
+ if (is_offset_in_range(distance, 32)) { \
|
|
+ auipc(temp, (int32_t)distance + 0x800); \
|
|
+ NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
|
|
+ } else { \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(temp, dest, offset); \
|
|
+ NAME(Rs, temp, offset); \
|
|
+ } \
|
|
+ } \
|
|
+ void NAME(Register Rs, const Address &adr, Register temp = t0) { \
|
|
+ switch(adr.getMode()) { \
|
|
+ case Address::literal: { \
|
|
+ assert_different_registers(Rs, temp); \
|
|
+ code_section()->relocate(pc(), adr.rspec()); \
|
|
+ NAME(Rs, adr.target(), temp); \
|
|
+ break; \
|
|
+ } \
|
|
+ case Address::base_plus_offset: { \
|
|
+ if (is_offset_in_range(adr.offset(), 12)) { \
|
|
+ NAME(Rs, adr.base(), adr.offset()); \
|
|
+ } else { \
|
|
+ assert_different_registers(Rs, temp); \
|
|
+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \
|
|
+ } \
|
|
+ break; \
|
|
+ } \
|
|
+ default: \
|
|
+ ShouldNotReachHere(); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(sb);
|
|
+ INSN(sh);
|
|
+ INSN(sw);
|
|
+ INSN(sd);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME) \
|
|
+ void NAME(FloatRegister Rs, address dest, Register temp = t0) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t distance = (dest - pc()); \
|
|
+ if (is_offset_in_range(distance, 32)) { \
|
|
+ auipc(temp, (int32_t)distance + 0x800); \
|
|
+ NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
|
|
+ } else { \
|
|
+ int32_t offset = 0; \
|
|
+ movptr_with_offset(temp, dest, offset); \
|
|
+ NAME(Rs, temp, offset); \
|
|
+ } \
|
|
+ } \
|
|
+ void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \
|
|
+ switch(adr.getMode()) { \
|
|
+ case Address::literal: { \
|
|
+ code_section()->relocate(pc(), adr.rspec()); \
|
|
+ NAME(Rs, adr.target(), temp); \
|
|
+ break; \
|
|
+ } \
|
|
+ case Address::base_plus_offset: { \
|
|
+ if (is_offset_in_range(adr.offset(), 12)) { \
|
|
+ NAME(Rs, adr.base(), adr.offset()); \
|
|
+ } else { \
|
|
+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \
|
|
+ } \
|
|
+ break; \
|
|
+ } \
|
|
+ default: \
|
|
+ ShouldNotReachHere(); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(fsw);
|
|
+ INSN(fsd);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, const uint32_t csr, Register Rs1) { \
|
|
+ guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch((address)&insn, 31, 20, csr); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(csrrw, 0b1110011, 0b001);
|
|
+ INSN(csrrs, 0b1110011, 0b010);
|
|
+ INSN(csrrc, 0b1110011, 0b011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) { \
|
|
+ guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \
|
|
+ guarantee(is_unsigned_imm_in_range(uimm, 5, 0), "uimm is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ uint32_t val = uimm & 0x1f; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch((address)&insn, 19, 15, val); \
|
|
+ patch((address)&insn, 31, 20, csr); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(csrrwi, 0b1110011, 0b101);
|
|
+ INSN(csrrsi, 0b1110011, 0b110);
|
|
+ INSN(csrrci, 0b1110011, 0b111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op) \
|
|
+ void NAME(Register Rd, const int32_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \
|
|
+ patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1)); \
|
|
+ patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \
|
|
+ patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \
|
|
+ emit(insn); \
|
|
+ } \
|
|
+ void NAME(Register Rd, const address dest, Register temp = t0) { \
|
|
+ assert_cond(dest != NULL); \
|
|
+ int64_t offset = dest - pc(); \
|
|
+ if (is_imm_in_range(offset, 20, 1)) { \
|
|
+ NAME(Rd, offset); \
|
|
+ } else { \
|
|
+ assert_different_registers(Rd, temp); \
|
|
+ int32_t off = 0; \
|
|
+ movptr_with_offset(temp, dest, off); \
|
|
+ jalr(Rd, temp, off); \
|
|
+ } \
|
|
+ } \
|
|
+ void NAME(Register Rd, Label &L, Register temp = t0) { \
|
|
+ assert_different_registers(Rd, temp); \
|
|
+ wrap_label(Rd, L, temp, &Assembler::NAME); \
|
|
+ }
|
|
+
|
|
+ INSN(jal, 0b1101111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#undef INSN_ENTRY_RELOC
|
|
+
|
|
+#define INSN(NAME, op, funct) \
|
|
+ void NAME(Register Rd, Register Rs, const int32_t offset) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch((address)&insn, 14, 12, funct); \
|
|
+ patch_reg((address)&insn, 15, Rs); \
|
|
+ int32_t val = offset & 0xfff; \
|
|
+ patch((address)&insn, 31, 20, val); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(jalr, 0b1100111, 0b000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+ enum barrier {
|
|
+ i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001,
|
|
+ ir = i | r, ow = o | w, iorw = i | o | r | w
|
|
+ };
|
|
+
|
|
+ void fence(const uint32_t predecessor, const uint32_t successor) {
|
|
+ unsigned insn = 0;
|
|
+ guarantee(predecessor < 16, "predecessor is invalid");
|
|
+ guarantee(successor < 16, "successor is invalid");
|
|
+ patch((address)&insn, 6, 0, 0b001111);
|
|
+ patch((address)&insn, 11, 7, 0b00000);
|
|
+ patch((address)&insn, 14, 12, 0b000);
|
|
+ patch((address)&insn, 19, 15, 0b00000);
|
|
+ patch((address)&insn, 23, 20, successor);
|
|
+ patch((address)&insn, 27, 24, predecessor);
|
|
+ patch((address)&insn, 31, 28, 0b0000);
|
|
+ emit(insn);
|
|
+ }
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME() { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 11, 7, 0b00000); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 19, 15, 0b00000); \
|
|
+ patch((address)&insn, 31, 20, funct7); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(ecall, 0b1110011, 0b000, 0b000000000000);
|
|
+ INSN(ebreak, 0b1110011, 0b000, 0b000000000001);
|
|
+#undef INSN
|
|
+
|
|
+enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ patch((address)&insn, 31, 27, funct7); \
|
|
+ patch((address)&insn, 26, 25, memory_order); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(amoswap_w, 0b0101111, 0b010, 0b00001);
|
|
+ INSN(amoadd_w, 0b0101111, 0b010, 0b00000);
|
|
+ INSN(amoxor_w, 0b0101111, 0b010, 0b00100);
|
|
+ INSN(amoand_w, 0b0101111, 0b010, 0b01100);
|
|
+ INSN(amoor_w, 0b0101111, 0b010, 0b01000);
|
|
+ INSN(amomin_w, 0b0101111, 0b010, 0b10000);
|
|
+ INSN(amomax_w, 0b0101111, 0b010, 0b10100);
|
|
+ INSN(amominu_w, 0b0101111, 0b010, 0b11000);
|
|
+ INSN(amomaxu_w, 0b0101111, 0b010, 0b11100);
|
|
+ INSN(amoswap_d, 0b0101111, 0b011, 0b00001);
|
|
+ INSN(amoadd_d, 0b0101111, 0b011, 0b00000);
|
|
+ INSN(amoxor_d, 0b0101111, 0b011, 0b00100);
|
|
+ INSN(amoand_d, 0b0101111, 0b011, 0b01100);
|
|
+ INSN(amoor_d, 0b0101111, 0b011, 0b01000);
|
|
+ INSN(amomin_d, 0b0101111, 0b011, 0b10000);
|
|
+ INSN(amomax_d , 0b0101111, 0b011, 0b10100);
|
|
+ INSN(amominu_d, 0b0101111, 0b011, 0b11000);
|
|
+ INSN(amomaxu_d, 0b0101111, 0b011, 0b11100);
|
|
+#undef INSN
|
|
+
|
|
+enum operand_size { int8, int16, int32, uint32, int64 };
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \
|
|
+ unsigned insn = 0; \
|
|
+ uint32_t val = memory_order & 0x3; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch((address)&insn, 25, 20, 0b00000); \
|
|
+ patch((address)&insn, 31, 27, funct7); \
|
|
+ patch((address)&insn, 26, 25, val); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(lr_w, 0b0101111, 0b010, 0b00010);
|
|
+ INSN(lr_d, 0b0101111, 0b011, 0b00010);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \
|
|
+ unsigned insn = 0; \
|
|
+ uint32_t val = memory_order & 0x3; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs2); \
|
|
+ patch_reg((address)&insn, 20, Rs1); \
|
|
+ patch((address)&insn, 31, 27, funct7); \
|
|
+ patch((address)&insn, 26, 25, val); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(sc_w, 0b0101111, 0b010, 0b00011);
|
|
+ INSN(sc_d, 0b0101111, 0b011, 0b00011);
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct5, funct7) \
|
|
+ void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, rm); \
|
|
+ patch((address)&insn, 24, 20, funct5); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fsqrt_s, 0b1010011, 0b00000, 0b0101100);
|
|
+ INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101);
|
|
+ INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000);
|
|
+ INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001);
|
|
+#undef INSN
|
|
+
|
|
+// Immediate Instruction
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, Register Rs1, int32_t imm) { \
|
|
+ guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 20, imm & 0x00000fff); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(addi, 0b0010011, 0b000);
|
|
+ INSN(slti, 0b0010011, 0b010);
|
|
+ INSN(addiw, 0b0011011, 0b000);
|
|
+ INSN(and_imm12, 0b0010011, 0b111);
|
|
+ INSN(ori, 0b0010011, 0b110);
|
|
+ INSN(xori, 0b0010011, 0b100);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, Register Rs1, uint32_t imm) { \
|
|
+ guarantee(is_unsigned_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn,6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 20, imm & 0x00000fff); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(sltiu, 0b0010011, 0b011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Shift Immediate Instruction
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(Register Rd, Register Rs1, unsigned shamt) { \
|
|
+ guarantee(shamt <= 0x3f, "Shamt is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 25, 20, shamt); \
|
|
+ patch((address)&insn, 31, 26, funct6); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(slli, 0b0010011, 0b001, 0b000000);
|
|
+ INSN(srai, 0b0010011, 0b101, 0b010000);
|
|
+ INSN(srli, 0b0010011, 0b101, 0b000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Shift Word Immediate Instruction
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, unsigned shamt) { \
|
|
+ guarantee(shamt <= 0x1f, "Shamt is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 24, 20, shamt); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(slliw, 0b0011011, 0b001, 0b0000000);
|
|
+ INSN(sraiw, 0b0011011, 0b101, 0b0100000);
|
|
+ INSN(srliw, 0b0011011, 0b101, 0b0000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Upper Immediate Instruction
|
|
+#define INSN(NAME, op) \
|
|
+ void NAME(Register Rd, int32_t imm) { \
|
|
+ int32_t upperImm = imm >> 12; \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ upperImm &= 0x000fffff; \
|
|
+ patch((address)&insn, 31, 12, upperImm); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(lui, 0b0110111);
|
|
+ INSN(auipc, 0b0010111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Rigster Instruction
|
|
+#define INSN(NAME, op, funct2) \
|
|
+ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, rm); \
|
|
+ patch((address)&insn, 26, 25, funct2); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ patch_reg((address)&insn, 27, Rs3); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fmadd_s, 0b1000011, 0b00);
|
|
+ INSN(fmsub_s, 0b1000111, 0b00);
|
|
+ INSN(fnmsub_s, 0b1001011, 0b00);
|
|
+ INSN(fnmadd_s, 0b1001111, 0b00);
|
|
+ INSN(fmadd_d, 0b1000011, 0b01);
|
|
+ INSN(fmsub_d, 0b1000111, 0b01);
|
|
+ INSN(fnmsub_d, 0b1001011, 0b01);
|
|
+ INSN(fnmadd_d, 0b1001111, 0b01);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Rigster Instruction
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fsgnj_s, 0b1010011, 0b000, 0b0010000);
|
|
+ INSN(fsgnjn_s, 0b1010011, 0b001, 0b0010000);
|
|
+ INSN(fsgnjx_s, 0b1010011, 0b010, 0b0010000);
|
|
+ INSN(fmin_s, 0b1010011, 0b000, 0b0010100);
|
|
+ INSN(fmax_s, 0b1010011, 0b001, 0b0010100);
|
|
+ INSN(fsgnj_d, 0b1010011, 0b000, 0b0010001);
|
|
+ INSN(fsgnjn_d, 0b1010011, 0b001, 0b0010001);
|
|
+ INSN(fsgnjx_d, 0b1010011, 0b010, 0b0010001);
|
|
+ INSN(fmin_d, 0b1010011, 0b000, 0b0010101);
|
|
+ INSN(fmax_d, 0b1010011, 0b001, 0b0010101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Rigster Arith Instruction
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, FloatRegister Rs1, FloatRegister Rs2) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(feq_s, 0b1010011, 0b010, 0b1010000);
|
|
+ INSN(flt_s, 0b1010011, 0b001, 0b1010000);
|
|
+ INSN(fle_s, 0b1010011, 0b000, 0b1010000);
|
|
+ INSN(feq_d, 0b1010011, 0b010, 0b1010001);
|
|
+ INSN(fle_d, 0b1010011, 0b000, 0b1010001);
|
|
+ INSN(flt_d, 0b1010011, 0b001, 0b1010001);
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Arith Instruction
|
|
+#define INSN(NAME, op, funct7) \
|
|
+ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, rm); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fadd_s, 0b1010011, 0b0000000);
|
|
+ INSN(fsub_s, 0b1010011, 0b0000100);
|
|
+ INSN(fmul_s, 0b1010011, 0b0001000);
|
|
+ INSN(fdiv_s, 0b1010011, 0b0001100);
|
|
+ INSN(fadd_d, 0b1010011, 0b0000001);
|
|
+ INSN(fsub_d, 0b1010011, 0b0000101);
|
|
+ INSN(fmul_d, 0b1010011, 0b0001001);
|
|
+ INSN(fdiv_d, 0b1010011, 0b0001101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Whole Float and Double Conversion Instruction
|
|
+#define INSN(NAME, op, funct5, funct7) \
|
|
+ void NAME(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, rm); \
|
|
+ patch((address)&insn, 24, 20, funct5); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fcvt_s_w, 0b1010011, 0b00000, 0b1101000);
|
|
+ INSN(fcvt_s_wu, 0b1010011, 0b00001, 0b1101000);
|
|
+ INSN(fcvt_s_l, 0b1010011, 0b00010, 0b1101000);
|
|
+ INSN(fcvt_s_lu, 0b1010011, 0b00011, 0b1101000);
|
|
+ INSN(fcvt_d_w, 0b1010011, 0b00000, 0b1101001);
|
|
+ INSN(fcvt_d_wu, 0b1010011, 0b00001, 0b1101001);
|
|
+ INSN(fcvt_d_l, 0b1010011, 0b00010, 0b1101001);
|
|
+ INSN(fcvt_d_lu, 0b1010011, 0b00011, 0b1101001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Conversion Instruction
|
|
+#define INSN(NAME, op, funct5, funct7) \
|
|
+ void NAME(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, rm); \
|
|
+ patch((address)&insn, 24, 20, funct5); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fcvt_w_s, 0b1010011, 0b00000, 0b1100000);
|
|
+ INSN(fcvt_l_s, 0b1010011, 0b00010, 0b1100000);
|
|
+ INSN(fcvt_wu_s, 0b1010011, 0b00001, 0b1100000);
|
|
+ INSN(fcvt_lu_s, 0b1010011, 0b00011, 0b1100000);
|
|
+ INSN(fcvt_w_d, 0b1010011, 0b00000, 0b1100001);
|
|
+ INSN(fcvt_wu_d, 0b1010011, 0b00001, 0b1100001);
|
|
+ INSN(fcvt_l_d, 0b1010011, 0b00010, 0b1100001);
|
|
+ INSN(fcvt_lu_d, 0b1010011, 0b00011, 0b1100001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Move Instruction
|
|
+#define INSN(NAME, op, funct3, funct5, funct7) \
|
|
+ void NAME(FloatRegister Rd, Register Rs1) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 20, funct5); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000);
|
|
+ INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float and Double Conversion Instruction
|
|
+#define INSN(NAME, op, funct3, funct5, funct7) \
|
|
+ void NAME(Register Rd, FloatRegister Rs1) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 20, funct5); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000);
|
|
+ INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001);
|
|
+ INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000);
|
|
+ INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// ==========================
|
|
+// RISC-V Vector Extension
|
|
+// ==========================
|
|
+enum SEW {
|
|
+ e8,
|
|
+ e16,
|
|
+ e32,
|
|
+ e64,
|
|
+ RESERVED,
|
|
+};
|
|
+
|
|
+enum LMUL {
|
|
+ mf8 = 0b101,
|
|
+ mf4 = 0b110,
|
|
+ mf2 = 0b111,
|
|
+ m1 = 0b000,
|
|
+ m2 = 0b001,
|
|
+ m4 = 0b010,
|
|
+ m8 = 0b011,
|
|
+};
|
|
+
|
|
+enum VMA {
|
|
+ mu, // undisturbed
|
|
+ ma, // agnostic
|
|
+};
|
|
+
|
|
+enum VTA {
|
|
+ tu, // undisturbed
|
|
+ ta, // agnostic
|
|
+};
|
|
+
|
|
+static Assembler::SEW elembytes_to_sew(int ebytes) {
|
|
+ assert(ebytes > 0 && ebytes <= 8, "unsupported element size");
|
|
+ return (Assembler::SEW) exact_log2(ebytes);
|
|
+}
|
|
+
|
|
+static Assembler::SEW elemtype_to_sew(BasicType etype) {
|
|
+ return Assembler::elembytes_to_sew(type2aelembytes(etype));
|
|
+}
|
|
+
|
|
+#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \
|
|
+ if (vill == 1) { \
|
|
+ guarantee((vlmul | vsew | vta | vma == 0), \
|
|
+ "the other bits in vtype shall be zero"); \
|
|
+ } \
|
|
+ patch((address)&insn, lsb + 2, lsb, vlmul); \
|
|
+ patch((address)&insn, lsb + 5, lsb + 3, vsew); \
|
|
+ patch((address)&insn, lsb + 6, vta); \
|
|
+ patch((address)&insn, lsb + 7, vma); \
|
|
+ patch((address)&insn, hsb - 1, lsb + 8, 0); \
|
|
+ patch((address)&insn, hsb, vill)
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1, \
|
|
+ VMA vma = mu, VTA vta = tu, bool vill = false) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch_vtype(30, 20, lmul, sew, vta, vma, vill); \
|
|
+ patch((address)&insn, 31, 0); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(vsetvli, 0b1010111, 0b111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3) \
|
|
+ void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \
|
|
+ VMA vma = mu, VTA vta = tu, bool vill = false) { \
|
|
+ unsigned insn = 0; \
|
|
+ guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 19, 15, imm); \
|
|
+ patch_vtype(29, 20, lmul, sew, vta, vma, vill); \
|
|
+ patch((address)&insn, 31, 30, 0b11); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(vsetivli, 0b1010111, 0b111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#undef patch_vtype
|
|
+
|
|
+enum VectorMask {
|
|
+ v0_t = 0b0,
|
|
+ unmasked = 0b1
|
|
+};
|
|
+
|
|
+#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6) \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 19, 15, Reg_or_Imm5); \
|
|
+ patch((address)&insn, 25, vm); \
|
|
+ patch((address)&insn, 31, 26, funct6); \
|
|
+ patch_reg((address)&insn, 7, Reg); \
|
|
+ patch_reg((address)&insn, 20, Vs2); \
|
|
+ emit(insn)
|
|
+
|
|
+// r2_vm
|
|
+#define INSN(NAME, op, funct3, Vs1, funct6) \
|
|
+ void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Mask
|
|
+ INSN(vpopc_m, 0b1010111, 0b010, 0b10000, 0b010000);
|
|
+ INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000);
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs1, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Integer Extension
|
|
+ INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010);
|
|
+ INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010);
|
|
+ INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010);
|
|
+ INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010);
|
|
+ INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010);
|
|
+ INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010);
|
|
+
|
|
+ // Vector Mask
|
|
+ INSN(vmsbf_m, 0b1010111, 0b010, 0b00001, 0b010100);
|
|
+ INSN(vmsif_m, 0b1010111, 0b010, 0b00011, 0b010100);
|
|
+ INSN(vmsof_m, 0b1010111, 0b010, 0b00010, 0b010100);
|
|
+ INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100);
|
|
+
|
|
+ // Vector Single-Width Floating-Point/Integer Type-Convert Instructions
|
|
+ INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010);
|
|
+ INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010);
|
|
+ INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010);
|
|
+ INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010);
|
|
+ INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010);
|
|
+ INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010);
|
|
+
|
|
+ // Vector Floating-Point Instruction
|
|
+ INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011);
|
|
+ INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// r2rd
|
|
+#define INSN(NAME, op, funct3, simm5, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2) { \
|
|
+ patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Whole Vector Register Move
|
|
+ INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111);
|
|
+ INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111);
|
|
+ INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111);
|
|
+ INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs1, vm, funct6) \
|
|
+ void NAME(FloatRegister Rd, VectorRegister Vs2) { \
|
|
+ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Floating-Point Move Instruction
|
|
+ INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs1, vm, funct6) \
|
|
+ void NAME(Register Rd, VectorRegister Vs2) { \
|
|
+ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Integer Scalar Move Instructions
|
|
+ INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// r_vm
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \
|
|
+ guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \
|
|
+ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Single-Width Bit Shift Instructions
|
|
+ INSN(vsra_vi, 0b1010111, 0b011, 0b101001);
|
|
+ INSN(vsrl_vi, 0b1010111, 0b011, 0b101000);
|
|
+ INSN(vsll_vi, 0b1010111, 0b011, 0b100101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
|
|
+ INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011);
|
|
+ INSN(vfmsub_vv, 0b1010111, 0b001, 0b101010);
|
|
+ INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001);
|
|
+ INSN(vfmadd_vv, 0b1010111, 0b001, 0b101000);
|
|
+ INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111);
|
|
+ INSN(vfmsac_vv, 0b1010111, 0b001, 0b101110);
|
|
+ INSN(vfmacc_vv, 0b1010111, 0b001, 0b101100);
|
|
+ INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101);
|
|
+
|
|
+ // Vector Single-Width Integer Multiply-Add Instructions
|
|
+ INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011);
|
|
+ INSN(vmadd_vv, 0b1010111, 0b010, 0b101001);
|
|
+ INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111);
|
|
+ INSN(vmacc_vv, 0b1010111, 0b010, 0b101101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Single-Width Integer Multiply-Add Instructions
|
|
+ INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011);
|
|
+ INSN(vmadd_vx, 0b1010111, 0b110, 0b101001);
|
|
+ INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111);
|
|
+ INSN(vmacc_vx, 0b1010111, 0b110, 0b101101);
|
|
+
|
|
+ INSN(vrsub_vx, 0b1010111, 0b100, 0b000011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
|
|
+ INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011);
|
|
+ INSN(vfmsub_vf, 0b1010111, 0b101, 0b101010);
|
|
+ INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001);
|
|
+ INSN(vfmadd_vf, 0b1010111, 0b101, 0b101000);
|
|
+ INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111);
|
|
+ INSN(vfmsac_vf, 0b1010111, 0b101, 0b101110);
|
|
+ INSN(vfmacc_vf, 0b1010111, 0b101, 0b101100);
|
|
+ INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Single-Width Floating-Point Reduction Instructions
|
|
+ INSN(vfredsum_vs, 0b1010111, 0b001, 0b000001);
|
|
+ INSN(vfredosum_vs, 0b1010111, 0b001, 0b000011);
|
|
+ INSN(vfredmin_vs, 0b1010111, 0b001, 0b000101);
|
|
+ INSN(vfredmax_vs, 0b1010111, 0b001, 0b000111);
|
|
+
|
|
+ // Vector Single-Width Integer Reduction Instructions
|
|
+ INSN(vredsum_vs, 0b1010111, 0b010, 0b000000);
|
|
+ INSN(vredand_vs, 0b1010111, 0b010, 0b000001);
|
|
+ INSN(vredor_vs, 0b1010111, 0b010, 0b000010);
|
|
+ INSN(vredxor_vs, 0b1010111, 0b010, 0b000011);
|
|
+ INSN(vredminu_vs, 0b1010111, 0b010, 0b000100);
|
|
+ INSN(vredmin_vs, 0b1010111, 0b010, 0b000101);
|
|
+ INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110);
|
|
+ INSN(vredmax_vs, 0b1010111, 0b010, 0b000111);
|
|
+
|
|
+ // Vector Floating-Point Compare Instructions
|
|
+ INSN(vmfle_vv, 0b1010111, 0b001, 0b011001);
|
|
+ INSN(vmflt_vv, 0b1010111, 0b001, 0b011011);
|
|
+ INSN(vmfne_vv, 0b1010111, 0b001, 0b011100);
|
|
+ INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000);
|
|
+
|
|
+ // Vector Floating-Point Sign-Injection Instructions
|
|
+ INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010);
|
|
+ INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001);
|
|
+ INSN(vfsgnj_vv, 0b1010111, 0b001, 0b001000);
|
|
+
|
|
+ // Vector Floating-Point MIN/MAX Instructions
|
|
+ INSN(vfmax_vv, 0b1010111, 0b001, 0b000110);
|
|
+ INSN(vfmin_vv, 0b1010111, 0b001, 0b000100);
|
|
+
|
|
+ // Vector Single-Width Floating-Point Multiply/Divide Instructions
|
|
+ INSN(vfdiv_vv, 0b1010111, 0b001, 0b100000);
|
|
+ INSN(vfmul_vv, 0b1010111, 0b001, 0b100100);
|
|
+
|
|
+ // Vector Single-Width Floating-Point Add/Subtract Instructions
|
|
+ INSN(vfsub_vv, 0b1010111, 0b001, 0b000010);
|
|
+ INSN(vfadd_vv, 0b1010111, 0b001, 0b000000);
|
|
+
|
|
+ // Vector Single-Width Fractional Multiply with Rounding and Saturation
|
|
+ INSN(vsmul_vv, 0b1010111, 0b000, 0b100111);
|
|
+
|
|
+ // Vector Integer Divide Instructions
|
|
+ INSN(vrem_vv, 0b1010111, 0b010, 0b100011);
|
|
+ INSN(vremu_vv, 0b1010111, 0b010, 0b100010);
|
|
+ INSN(vdiv_vv, 0b1010111, 0b010, 0b100001);
|
|
+ INSN(vdivu_vv, 0b1010111, 0b010, 0b100000);
|
|
+
|
|
+ // Vector Single-Width Integer Multiply Instructions
|
|
+ INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110);
|
|
+ INSN(vmulhu_vv, 0b1010111, 0b010, 0b100100);
|
|
+ INSN(vmulh_vv, 0b1010111, 0b010, 0b100111);
|
|
+ INSN(vmul_vv, 0b1010111, 0b010, 0b100101);
|
|
+
|
|
+ // Vector Integer Min/Max Instructions
|
|
+ INSN(vmax_vv, 0b1010111, 0b000, 0b000111);
|
|
+ INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110);
|
|
+ INSN(vmin_vv, 0b1010111, 0b000, 0b000101);
|
|
+ INSN(vminu_vv, 0b1010111, 0b000, 0b000100);
|
|
+
|
|
+ // Vector Integer Comparison Instructions
|
|
+ INSN(vmsle_vv, 0b1010111, 0b000, 0b011101);
|
|
+ INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100);
|
|
+ INSN(vmslt_vv, 0b1010111, 0b000, 0b011011);
|
|
+ INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010);
|
|
+ INSN(vmsne_vv, 0b1010111, 0b000, 0b011001);
|
|
+ INSN(vmseq_vv, 0b1010111, 0b000, 0b011000);
|
|
+
|
|
+ // Vector Single-Width Bit Shift Instructions
|
|
+ INSN(vsra_vv, 0b1010111, 0b000, 0b101001);
|
|
+ INSN(vsrl_vv, 0b1010111, 0b000, 0b101000);
|
|
+ INSN(vsll_vv, 0b1010111, 0b000, 0b100101);
|
|
+
|
|
+ // Vector Bitwise Logical Instructions
|
|
+ INSN(vxor_vv, 0b1010111, 0b000, 0b001011);
|
|
+ INSN(vor_vv, 0b1010111, 0b000, 0b001010);
|
|
+ INSN(vand_vv, 0b1010111, 0b000, 0b001001);
|
|
+
|
|
+ // Vector Single-Width Integer Add and Subtract
|
|
+ INSN(vsub_vv, 0b1010111, 0b000, 0b000010);
|
|
+ INSN(vadd_vv, 0b1010111, 0b000, 0b000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Integer Divide Instructions
|
|
+ INSN(vrem_vx, 0b1010111, 0b110, 0b100011);
|
|
+ INSN(vremu_vx, 0b1010111, 0b110, 0b100010);
|
|
+ INSN(vdiv_vx, 0b1010111, 0b110, 0b100001);
|
|
+ INSN(vdivu_vx, 0b1010111, 0b110, 0b100000);
|
|
+
|
|
+ // Vector Single-Width Integer Multiply Instructions
|
|
+ INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110);
|
|
+ INSN(vmulhu_vx, 0b1010111, 0b110, 0b100100);
|
|
+ INSN(vmulh_vx, 0b1010111, 0b110, 0b100111);
|
|
+ INSN(vmul_vx, 0b1010111, 0b110, 0b100101);
|
|
+
|
|
+ // Vector Integer Min/Max Instructions
|
|
+ INSN(vmax_vx, 0b1010111, 0b100, 0b000111);
|
|
+ INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110);
|
|
+ INSN(vmin_vx, 0b1010111, 0b100, 0b000101);
|
|
+ INSN(vminu_vx, 0b1010111, 0b100, 0b000100);
|
|
+
|
|
+ // Vector Integer Comparison Instructions
|
|
+ INSN(vmsgt_vx, 0b1010111, 0b100, 0b011111);
|
|
+ INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110);
|
|
+ INSN(vmsle_vx, 0b1010111, 0b100, 0b011101);
|
|
+ INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100);
|
|
+ INSN(vmslt_vx, 0b1010111, 0b100, 0b011011);
|
|
+ INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010);
|
|
+ INSN(vmsne_vx, 0b1010111, 0b100, 0b011001);
|
|
+ INSN(vmseq_vx, 0b1010111, 0b100, 0b011000);
|
|
+
|
|
+ // Vector Narrowing Integer Right Shift Instructions
|
|
+ INSN(vnsra_wx, 0b1010111, 0b100, 0b101101);
|
|
+ INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100);
|
|
+
|
|
+ // Vector Single-Width Bit Shift Instructions
|
|
+ INSN(vsra_vx, 0b1010111, 0b100, 0b101001);
|
|
+ INSN(vsrl_vx, 0b1010111, 0b100, 0b101000);
|
|
+ INSN(vsll_vx, 0b1010111, 0b100, 0b100101);
|
|
+
|
|
+ // Vector Bitwise Logical Instructions
|
|
+ INSN(vxor_vx, 0b1010111, 0b100, 0b001011);
|
|
+ INSN(vor_vx, 0b1010111, 0b100, 0b001010);
|
|
+ INSN(vand_vx, 0b1010111, 0b100, 0b001001);
|
|
+
|
|
+ // Vector Single-Width Integer Add and Subtract
|
|
+ INSN(vsub_vx, 0b1010111, 0b100, 0b000010);
|
|
+ INSN(vadd_vx, 0b1010111, 0b100, 0b000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Floating-Point Compare Instructions
|
|
+ INSN(vmfge_vf, 0b1010111, 0b101, 0b011111);
|
|
+ INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101);
|
|
+ INSN(vmfle_vf, 0b1010111, 0b101, 0b011001);
|
|
+ INSN(vmflt_vf, 0b1010111, 0b101, 0b011011);
|
|
+ INSN(vmfne_vf, 0b1010111, 0b101, 0b011100);
|
|
+ INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000);
|
|
+
|
|
+ // Vector Floating-Point Sign-Injection Instructions
|
|
+ INSN(vfsgnjx_vf, 0b1010111, 0b101, 0b001010);
|
|
+ INSN(vfsgnjn_vf, 0b1010111, 0b101, 0b001001);
|
|
+ INSN(vfsgnj_vf, 0b1010111, 0b101, 0b001000);
|
|
+
|
|
+ // Vector Floating-Point MIN/MAX Instructions
|
|
+ INSN(vfmax_vf, 0b1010111, 0b101, 0b000110);
|
|
+ INSN(vfmin_vf, 0b1010111, 0b101, 0b000100);
|
|
+
|
|
+ // Vector Single-Width Floating-Point Multiply/Divide Instructions
|
|
+ INSN(vfdiv_vf, 0b1010111, 0b101, 0b100000);
|
|
+ INSN(vfmul_vf, 0b1010111, 0b101, 0b100100);
|
|
+ INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001);
|
|
+
|
|
+ // Vector Single-Width Floating-Point Add/Subtract Instructions
|
|
+ INSN(vfsub_vf, 0b1010111, 0b101, 0b000010);
|
|
+ INSN(vfadd_vf, 0b1010111, 0b101, 0b000000);
|
|
+ INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { \
|
|
+ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \
|
|
+ patch_VArith(op, Vd, funct3, (uint32_t)imm & 0x1f, Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111);
|
|
+ INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110);
|
|
+ INSN(vmsle_vi, 0b1010111, 0b011, 0b011101);
|
|
+ INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100);
|
|
+ INSN(vmsne_vi, 0b1010111, 0b011, 0b011001);
|
|
+ INSN(vmseq_vi, 0b1010111, 0b011, 0b011000);
|
|
+ INSN(vxor_vi, 0b1010111, 0b011, 0b001011);
|
|
+ INSN(vor_vi, 0b1010111, 0b011, 0b001010);
|
|
+ INSN(vand_vi, 0b1010111, 0b011, 0b001001);
|
|
+ INSN(vadd_vi, 0b1010111, 0b011, 0b000000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(VectorRegister Vd, int32_t imm, VectorRegister Vs2, VectorMask vm = unmasked) { \
|
|
+ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \
|
|
+ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ INSN(vrsub_vi, 0b1010111, 0b011, 0b000011);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \
|
|
+ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Compress Instruction
|
|
+ INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111);
|
|
+
|
|
+ // Vector Mask-Register Logical Instructions
|
|
+ INSN(vmxnor_mm, 0b1010111, 0b010, 0b1, 0b011111);
|
|
+ INSN(vmornot_mm, 0b1010111, 0b010, 0b1, 0b011100);
|
|
+ INSN(vmnor_mm, 0b1010111, 0b010, 0b1, 0b011110);
|
|
+ INSN(vmor_mm, 0b1010111, 0b010, 0b1, 0b011010);
|
|
+ INSN(vmxor_mm, 0b1010111, 0b010, 0b1, 0b011011);
|
|
+ INSN(vmandnot_mm, 0b1010111, 0b010, 0b1, 0b011000);
|
|
+ INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101);
|
|
+ INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, int32_t imm) { \
|
|
+ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \
|
|
+ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Integer Move Instructions
|
|
+ INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, FloatRegister Rs1) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Floating-Point Scalar Move Instructions
|
|
+ INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000);
|
|
+ // Vector Floating-Point Move Instruction
|
|
+ INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorRegister Vs1) { \
|
|
+ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Vector Integer Move Instructions
|
|
+ INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, Vs2, vm, funct6) \
|
|
+ void NAME(VectorRegister Vd, Register Rs1) { \
|
|
+ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
|
|
+ }
|
|
+
|
|
+ // Integer Scalar Move Instructions
|
|
+ INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000);
|
|
+
|
|
+ // Vector Integer Move Instructions
|
|
+ INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111);
|
|
+
|
|
+#undef INSN
|
|
+#undef patch_VArith
|
|
+
|
|
+#define INSN(NAME, op, funct13, funct6) \
|
|
+ void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 24, 12, funct13); \
|
|
+ patch((address)&insn, 25, vm); \
|
|
+ patch((address)&insn, 31, 26, funct6); \
|
|
+ patch_reg((address)&insn, 7, Vd); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ // Vector Element Index Instruction
|
|
+ INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+enum Nf {
|
|
+ g1 = 0b000,
|
|
+ g2 = 0b001,
|
|
+ g3 = 0b010,
|
|
+ g4 = 0b011,
|
|
+ g5 = 0b100,
|
|
+ g6 = 0b101,
|
|
+ g7 = 0b110,
|
|
+ g8 = 0b111
|
|
+};
|
|
+
|
|
+#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, width); \
|
|
+ patch((address)&insn, 24, 20, Reg_or_umop); \
|
|
+ patch((address)&insn, 25, vm); \
|
|
+ patch((address)&insn, 27, 26, mop); \
|
|
+ patch((address)&insn, 28, mew); \
|
|
+ patch((address)&insn, 31, 29, nf); \
|
|
+ patch_reg((address)&insn, 7, VReg); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn)
|
|
+
|
|
+#define INSN(NAME, op, lumop, vm, mop, nf) \
|
|
+ void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \
|
|
+ guarantee(is_unsigned_imm_in_range(width, 3, 0), "width is invalid"); \
|
|
+ patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector Load/Store Instructions
|
|
+ INSN(vl1r_v, 0b0000111, 0b01000, 0b1, 0b00, g1);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, width, sumop, vm, mop, mew, nf) \
|
|
+ void NAME(VectorRegister Vs3, Register Rs1) { \
|
|
+ patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector Load/Store Instructions
|
|
+ INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// r2_nfvm
|
|
+#define INSN(NAME, op, width, umop, mop, mew) \
|
|
+ void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) { \
|
|
+ patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector Unit-Stride Instructions
|
|
+ INSN(vle1_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0);
|
|
+ INSN(vse1_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, width, umop, mop, mew) \
|
|
+ void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \
|
|
+ patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector Unit-Stride Instructions
|
|
+ INSN(vle8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0);
|
|
+ INSN(vle16_v, 0b0000111, 0b101, 0b00000, 0b00, 0b0);
|
|
+ INSN(vle32_v, 0b0000111, 0b110, 0b00000, 0b00, 0b0);
|
|
+ INSN(vle64_v, 0b0000111, 0b111, 0b00000, 0b00, 0b0);
|
|
+
|
|
+ // Vector unit-stride fault-only-first Instructions
|
|
+ INSN(vle8ff_v, 0b0000111, 0b000, 0b10000, 0b00, 0b0);
|
|
+ INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0);
|
|
+ INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0);
|
|
+ INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0);
|
|
+
|
|
+ INSN(vse8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0);
|
|
+ INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0);
|
|
+ INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0);
|
|
+ INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, width, mop, mew) \
|
|
+ void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \
|
|
+ patch_VLdSt(op, Vd, width, Rs1, Vs2->encoding_nocheck(), vm, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector unordered indexed load instructions
|
|
+ INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0);
|
|
+ INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0);
|
|
+ INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0);
|
|
+ INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0);
|
|
+
|
|
+ // Vector ordered indexed load instructions
|
|
+ INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0);
|
|
+ INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0);
|
|
+ INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0);
|
|
+ INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0);
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, width, mop, mew) \
|
|
+ void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) { \
|
|
+ patch_VLdSt(op, Vd, width, Rs1, Rs2->encoding_nocheck(), vm, mop, mew, nf); \
|
|
+ }
|
|
+
|
|
+ // Vector Strided Instructions
|
|
+ INSN(vlse8_v, 0b0000111, 0b000, 0b10, 0b0);
|
|
+ INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0);
|
|
+ INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0);
|
|
+ INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0);
|
|
+
|
|
+#undef INSN
|
|
+#undef patch_VLdSt
|
|
+
|
|
+// ====================================
|
|
+// RISC-V Bit-Manipulation Extension
|
|
+// Currently only support Zba and Zbb.
|
|
+// ====================================
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, Register Rs2) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ patch_reg((address)&insn, 20, Rs2); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(add_uw, 0b0111011, 0b000, 0b0000100);
|
|
+ INSN(rol, 0b0110011, 0b001, 0b0110000);
|
|
+ INSN(rolw, 0b0111011, 0b001, 0b0110000);
|
|
+ INSN(ror, 0b0110011, 0b101, 0b0110000);
|
|
+ INSN(rorw, 0b0111011, 0b101, 0b0110000);
|
|
+ INSN(sh1add, 0b0110011, 0b010, 0b0010000);
|
|
+ INSN(sh2add, 0b0110011, 0b100, 0b0010000);
|
|
+ INSN(sh3add, 0b0110011, 0b110, 0b0010000);
|
|
+ INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000);
|
|
+ INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000);
|
|
+ INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000);
|
|
+ INSN(andn, 0b0110011, 0b111, 0b0100000);
|
|
+ INSN(orn, 0b0110011, 0b110, 0b0100000);
|
|
+ INSN(xnor, 0b0110011, 0b100, 0b0100000);
|
|
+ INSN(max, 0b0110011, 0b110, 0b0000101);
|
|
+ INSN(maxu, 0b0110011, 0b111, 0b0000101);
|
|
+ INSN(min, 0b0110011, 0b100, 0b0000101);
|
|
+ INSN(minu, 0b0110011, 0b101, 0b0000101);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct12) \
|
|
+ void NAME(Register Rd, Register Rs1) { \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 31, 20, funct12); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(rev8, 0b0010011, 0b101, 0b011010111000);
|
|
+ INSN(sext_b, 0b0010011, 0b001, 0b011000000100);
|
|
+ INSN(sext_h, 0b0010011, 0b001, 0b011000000101);
|
|
+ INSN(zext_h, 0b0111011, 0b100, 0b000010000000);
|
|
+ INSN(clz, 0b0010011, 0b001, 0b011000000000);
|
|
+ INSN(clzw, 0b0011011, 0b001, 0b011000000000);
|
|
+ INSN(ctz, 0b0010011, 0b001, 0b011000000001);
|
|
+ INSN(ctzw, 0b0011011, 0b001, 0b011000000001);
|
|
+ INSN(cpop, 0b0010011, 0b001, 0b011000000010);
|
|
+ INSN(cpopw, 0b0011011, 0b001, 0b011000000010);
|
|
+ INSN(orc_b, 0b0010011, 0b101, 0b001010000111);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct6) \
|
|
+ void NAME(Register Rd, Register Rs1, unsigned shamt) {\
|
|
+ guarantee(shamt <= 0x3f, "Shamt is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 25, 20, shamt); \
|
|
+ patch((address)&insn, 31, 26, funct6); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(rori, 0b0010011, 0b101, 0b011000);
|
|
+ INSN(slli_uw, 0b0011011, 0b001, 0b000010);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, op, funct3, funct7) \
|
|
+ void NAME(Register Rd, Register Rs1, unsigned shamt){ \
|
|
+ guarantee(shamt <= 0x1f, "Shamt is invalid"); \
|
|
+ unsigned insn = 0; \
|
|
+ patch((address)&insn, 6, 0, op); \
|
|
+ patch((address)&insn, 14, 12, funct3); \
|
|
+ patch((address)&insn, 24, 20, shamt); \
|
|
+ patch((address)&insn, 31, 25, funct7); \
|
|
+ patch_reg((address)&insn, 7, Rd); \
|
|
+ patch_reg((address)&insn, 15, Rs1); \
|
|
+ emit(insn); \
|
|
+ }
|
|
+
|
|
+ INSN(roriw, 0b0011011, 0b101, 0b0110000);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+ void bgt(Register Rs, Register Rt, const address &dest);
|
|
+ void ble(Register Rs, Register Rt, const address &dest);
|
|
+ void bgtu(Register Rs, Register Rt, const address &dest);
|
|
+ void bleu(Register Rs, Register Rt, const address &dest);
|
|
+ void bgt(Register Rs, Register Rt, Label &l, bool is_far = false);
|
|
+ void ble(Register Rs, Register Rt, Label &l, bool is_far = false);
|
|
+ void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
|
|
+ void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
|
|
+
|
|
+ typedef void (Assembler::* jal_jalr_insn)(Register Rt, address dest);
|
|
+ typedef void (Assembler::* load_insn_by_temp)(Register Rt, address dest, Register temp);
|
|
+ typedef void (Assembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
|
|
+ typedef void (Assembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
|
|
+
|
|
+ void wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn,
|
|
+ compare_and_branch_label_insn neg_insn, bool is_far);
|
|
+ void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
|
|
+ void wrap_label(Register r, Label &L, jal_jalr_insn insn);
|
|
+
|
|
+ // Computational pseudo instructions
|
|
+ void add(Register Rd, Register Rn, int64_t increment, Register temp = t0);
|
|
+ void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
|
|
+
|
|
+ void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
|
|
+ void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
|
|
+
|
|
+ // RVB pseudo instructions
|
|
+ // zero extend word
|
|
+ void zext_w(Register Rd, Register Rs);
|
|
+
|
|
+ Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
|
+ }
|
|
+
|
|
+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
|
|
+ Register tmp,
|
|
+ int offset) {
|
|
+ ShouldNotCallThis();
|
|
+ return RegisterOrConstant();
|
|
+ }
|
|
+
|
|
+ // Stack overflow checking
|
|
+ virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
|
|
+
|
|
+ static bool operand_valid_for_add_immediate(long imm) {
|
|
+ return is_imm_in_range(imm, 12, 0);
|
|
+ }
|
|
+
|
|
+ // The maximum range of a branch is fixed for the riscv
|
|
+ // architecture.
|
|
+ static const unsigned long branch_range = 1 * M;
|
|
+
|
|
+ static bool reachable_from_branch_at(address branch, address target) {
|
|
+ return uabs(target - branch) < branch_range;
|
|
+ }
|
|
+
|
|
+ static Assembler::SEW elemBytes_to_sew(int esize) {
|
|
+ assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size");
|
|
+ return (Assembler::SEW) exact_log2(esize);
|
|
+ }
|
|
+
|
|
+ virtual ~Assembler() {}
|
|
+
|
|
+};
|
|
+
|
|
+class BiasedLockingCounters;
|
|
+
|
|
+#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..82b825db7
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
|
|
@@ -0,0 +1,47 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
|
|
+#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
|
|
+
|
|
+#include "asm/assembler.inline.hpp"
|
|
+#include "asm/codeBuffer.hpp"
|
|
+#include "code/codeCache.hpp"
|
|
+
|
|
+inline bool is_imm_in_range(long value, unsigned bits, unsigned align_bits) {
|
|
+ intx sign_bits = (value >> (bits + align_bits - 1));
|
|
+ return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1));
|
|
+}
|
|
+
|
|
+inline bool is_unsigned_imm_in_range(intx value, unsigned bits, unsigned align_bits) {
|
|
+ return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0);
|
|
+}
|
|
+
|
|
+inline bool is_offset_in_range(intx offset, unsigned bits) {
|
|
+ return is_imm_in_range(offset, bits, 0);
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..d0ac7ef46
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
|
|
@@ -0,0 +1,169 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_BYTES_RISCV_HPP
|
|
+#define CPU_RISCV_BYTES_RISCV_HPP
|
|
+
|
|
+#include "memory/allocation.hpp"
|
|
+
|
|
+class Bytes: AllStatic {
|
|
+ public:
|
|
+ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
|
|
+ // RISCV needs to check for alignment.
|
|
+
|
|
+ // Forward declarations of the compiler-dependent implementation
|
|
+ static inline u2 swap_u2(u2 x);
|
|
+ static inline u4 swap_u4(u4 x);
|
|
+ static inline u8 swap_u8(u8 x);
|
|
+
|
|
+ static inline u2 get_native_u2(address p) {
|
|
+ if ((intptr_t(p) & 1) == 0) {
|
|
+ return *(u2*)p;
|
|
+ } else {
|
|
+ return ((u2)(p[1]) << 8) |
|
|
+ ((u2)(p[0]));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static inline u4 get_native_u4(address p) {
|
|
+ switch (intptr_t(p) & 3) {
|
|
+ case 0:
|
|
+ return *(u4*)p;
|
|
+
|
|
+ case 2:
|
|
+ return ((u4)(((u2*)p)[1]) << 16) |
|
|
+ ((u4)(((u2*)p)[0]));
|
|
+
|
|
+ default:
|
|
+ return ((u4)(p[3]) << 24) |
|
|
+ ((u4)(p[2]) << 16) |
|
|
+ ((u4)(p[1]) << 8) |
|
|
+ ((u4)(p[0]));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static inline u8 get_native_u8(address p) {
|
|
+ switch (intptr_t(p) & 7) {
|
|
+ case 0:
|
|
+ return *(u8*)p;
|
|
+
|
|
+ case 4:
|
|
+ return ((u8)(((u4*)p)[1]) << 32) |
|
|
+ ((u8)(((u4*)p)[0]));
|
|
+
|
|
+ case 2:
|
|
+ case 6:
|
|
+ return ((u8)(((u2*)p)[3]) << 48) |
|
|
+ ((u8)(((u2*)p)[2]) << 32) |
|
|
+ ((u8)(((u2*)p)[1]) << 16) |
|
|
+ ((u8)(((u2*)p)[0]));
|
|
+
|
|
+ default:
|
|
+ return ((u8)(p[7]) << 56) |
|
|
+ ((u8)(p[6]) << 48) |
|
|
+ ((u8)(p[5]) << 40) |
|
|
+ ((u8)(p[4]) << 32) |
|
|
+ ((u8)(p[3]) << 24) |
|
|
+ ((u8)(p[2]) << 16) |
|
|
+ ((u8)(p[1]) << 8) |
|
|
+ (u8)(p[0]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static inline void put_native_u2(address p, u2 x) {
|
|
+ if ((intptr_t(p) & 1) == 0) {
|
|
+ *(u2*)p = x;
|
|
+ } else {
|
|
+ p[1] = x >> 8;
|
|
+ p[0] = x;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static inline void put_native_u4(address p, u4 x) {
|
|
+ switch (intptr_t(p) & 3) {
|
|
+ case 0:
|
|
+ *(u4*)p = x;
|
|
+ break;
|
|
+
|
|
+ case 2:
|
|
+ ((u2*)p)[1] = x >> 16;
|
|
+ ((u2*)p)[0] = x;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ ((u1*)p)[3] = x >> 24;
|
|
+ ((u1*)p)[2] = x >> 16;
|
|
+ ((u1*)p)[1] = x >> 8;
|
|
+ ((u1*)p)[0] = x;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static inline void put_native_u8(address p, u8 x) {
|
|
+ switch (intptr_t(p) & 7) {
|
|
+ case 0:
|
|
+ *(u8*)p = x;
|
|
+ break;
|
|
+
|
|
+ case 4:
|
|
+ ((u4*)p)[1] = x >> 32;
|
|
+ ((u4*)p)[0] = x;
|
|
+ break;
|
|
+
|
|
+ case 2:
|
|
+ case 6:
|
|
+ ((u2*)p)[3] = x >> 48;
|
|
+ ((u2*)p)[2] = x >> 32;
|
|
+ ((u2*)p)[1] = x >> 16;
|
|
+ ((u2*)p)[0] = x;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ ((u1*)p)[7] = x >> 56;
|
|
+ ((u1*)p)[6] = x >> 48;
|
|
+ ((u1*)p)[5] = x >> 40;
|
|
+ ((u1*)p)[4] = x >> 32;
|
|
+ ((u1*)p)[3] = x >> 24;
|
|
+ ((u1*)p)[2] = x >> 16;
|
|
+ ((u1*)p)[1] = x >> 8;
|
|
+ ((u1*)p)[0] = x;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
|
|
+ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); }
|
|
+ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); }
|
|
+ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); }
|
|
+
|
|
+ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); }
|
|
+ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); }
|
|
+ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); }
|
|
+};
|
|
+
|
|
+#include OS_CPU_HEADER_INLINE(bytes)
|
|
+
|
|
+#endif // CPU_RISCV_BYTES_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..522eedd29
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
|
|
@@ -0,0 +1,352 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "c1/c1_CodeStubs.hpp"
|
|
+#include "c1/c1_FrameMap.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+
|
|
+
|
|
+#define __ ce->masm()->
|
|
+
|
|
+void CounterOverflowStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ __ bind(_entry);
|
|
+ Metadata *m = _method->as_constant_ptr()->as_metadata();
|
|
+ __ mov_metadata(t0, m);
|
|
+ ce->store_parameter(t0, 1);
|
|
+ ce->store_parameter(_bci, 0);
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array)
|
|
+ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false)
|
|
+{
|
|
+ assert(info != NULL, "must have info");
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
|
|
+ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true)
|
|
+{
|
|
+ assert(info != NULL, "must have info");
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+void RangeCheckStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ __ bind(_entry);
|
|
+ if (_info->deoptimize_on_exception()) {
|
|
+ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
|
|
+ __ far_call(RuntimeAddress(a));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ debug_only(__ should_not_reach_here());
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (_index->is_cpu_register()) {
|
|
+ __ mv(t0, _index->as_register());
|
|
+ } else {
|
|
+ __ mv(t0, _index->as_jint());
|
|
+ }
|
|
+ Runtime1::StubID stub_id;
|
|
+ if (_throw_index_out_of_bounds_exception) {
|
|
+ stub_id = Runtime1::throw_index_exception_id;
|
|
+ } else {
|
|
+ assert(_array != NULL, "sanity");
|
|
+ __ mv(t1, _array->as_pointer_register());
|
|
+ stub_id = Runtime1::throw_range_check_failed_id;
|
|
+ }
|
|
+ int32_t off = 0;
|
|
+ __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
|
|
+ __ jalr(ra, ra, off);
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ debug_only(__ should_not_reach_here());
|
|
+}
|
|
+
|
|
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info)
|
|
+{
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+void PredicateFailedStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ __ bind(_entry);
|
|
+ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
|
|
+ __ far_call(RuntimeAddress(a));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ debug_only(__ should_not_reach_here());
|
|
+}
|
|
+
|
|
+void DivByZeroStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ if (_offset != -1) {
|
|
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
|
|
+ }
|
|
+ __ bind(_entry);
|
|
+ __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+#ifdef ASSERT
|
|
+ __ should_not_reach_here();
|
|
+#endif
|
|
+}
|
|
+
|
|
+// Implementation of NewInstanceStub
|
|
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id)
|
|
+{
|
|
+ _result = result;
|
|
+ _klass = klass;
|
|
+ _klass_reg = klass_reg;
|
|
+ _info = new CodeEmitInfo(info);
|
|
+ assert(stub_id == Runtime1::new_instance_id ||
|
|
+ stub_id == Runtime1::fast_new_instance_id ||
|
|
+ stub_id == Runtime1::fast_new_instance_init_check_id,
|
|
+ "need new_instance id");
|
|
+ _stub_id = stub_id;
|
|
+}
|
|
+
|
|
+void NewInstanceStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(__ rsp_offset() == 0, "frame size should be fixed");
|
|
+ __ bind(_entry);
|
|
+ __ mv(x13, _klass_reg->as_register());
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ assert(_result->as_register() == x10, "result must in x10");
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+// Implementation of NewTypeArrayStub
|
|
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
|
|
+{
|
|
+ _klass_reg = klass_reg;
|
|
+ _length = length;
|
|
+ _result = result;
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(__ rsp_offset() == 0, "frame size should be fixed");
|
|
+ __ bind(_entry);
|
|
+ assert(_length->as_register() == x9, "length must in x9");
|
|
+ assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ assert(_result->as_register() == x10, "result must in x10");
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+// Implementation of NewObjectArrayStub
|
|
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
|
|
+{
|
|
+ _klass_reg = klass_reg;
|
|
+ _result = result;
|
|
+ _length = length;
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(__ rsp_offset() == 0, "frame size should be fixed");
|
|
+ __ bind(_entry);
|
|
+ assert(_length->as_register() == x9, "length must in x9");
|
|
+ assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ assert(_result->as_register() == x10, "result must in x10");
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+// Implementation of MonitorAccessStubs
|
|
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
|
|
+: MonitorAccessStub(obj_reg, lock_reg)
|
|
+{
|
|
+ _info = new CodeEmitInfo(info);
|
|
+}
|
|
+
|
|
+void MonitorEnterStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(__ rsp_offset() == 0, "frame size should be fixed");
|
|
+ __ bind(_entry);
|
|
+ ce->store_parameter(_obj_reg->as_register(), 1);
|
|
+ ce->store_parameter(_lock_reg->as_register(), 0);
|
|
+ Runtime1::StubID enter_id;
|
|
+ if (ce->compilation()->has_fpu_code()) {
|
|
+ enter_id = Runtime1::monitorenter_id;
|
|
+ } else {
|
|
+ enter_id = Runtime1::monitorenter_nofpu_id;
|
|
+ }
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+void MonitorExitStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ __ bind(_entry);
|
|
+ if (_compute_lock) {
|
|
+ // lock_reg was destroyed by fast unlocking attempt => recompute it
|
|
+ ce->monitor_address(_monitor_ix, _lock_reg);
|
|
+ }
|
|
+ ce->store_parameter(_lock_reg->as_register(), 0);
|
|
+ // note: non-blocking leaf routine => no call info needed
|
|
+ Runtime1::StubID exit_id;
|
|
+ if (ce->compilation()->has_fpu_code()) {
|
|
+ exit_id = Runtime1::monitorexit_id;
|
|
+ } else {
|
|
+ exit_id = Runtime1::monitorexit_nofpu_id;
|
|
+ }
|
|
+ __ la(ra, _continuation);
|
|
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
|
|
+}
|
|
+
|
|
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
|
|
+
|
|
+void PatchingStub::align_patch_site(MacroAssembler* masm) {}
|
|
+
|
|
+// RISCV don't use C1 runtime patching. When need patch, just deoptimize.
|
|
+void PatchingStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(false, "RISCV should not use C1 runtime patching");
|
|
+}
|
|
+
|
|
+void DeoptimizeStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ __ bind(_entry);
|
|
+ ce->store_parameter(_trap_request, 0);
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
|
|
+ ce->add_call_info_here(_info);
|
|
+ DEBUG_ONLY(__ should_not_reach_here());
|
|
+}
|
|
+
|
|
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ address a = NULL;
|
|
+ if (_info->deoptimize_on_exception()) {
|
|
+ // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
|
|
+ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
|
|
+ } else {
|
|
+ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
|
|
+ }
|
|
+
|
|
+ ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
|
|
+ __ bind(_entry);
|
|
+ __ far_call(RuntimeAddress(a));
|
|
+ ce->add_call_info_here(_info);
|
|
+ ce->verify_oop_map(_info);
|
|
+ debug_only(__ should_not_reach_here());
|
|
+}
|
|
+
|
|
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ assert(__ rsp_offset() == 0, "frame size should be fixed");
|
|
+
|
|
+ __ bind(_entry);
|
|
+ // pass the object in a tmp register because all other registers
|
|
+ // must be preserved
|
|
+ if (_obj->is_cpu_register()) {
|
|
+ __ mv(t0, _obj->as_register());
|
|
+ }
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1);
|
|
+ ce->add_call_info_here(_info);
|
|
+ debug_only(__ should_not_reach_here());
|
|
+}
|
|
+
|
|
+void ArrayCopyStub::emit_code(LIR_Assembler* ce)
|
|
+{
|
|
+ // ---------------slow case: call to native-----------------
|
|
+ __ bind(_entry);
|
|
+ // Figure out where the args should go
|
|
+ // This should really convert the IntrinsicID to the Method* and signature
|
|
+ // but I don't know how to do that.
|
|
+ //
|
|
+ const int args_num = 5;
|
|
+ VMRegPair args[args_num];
|
|
+ BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
|
|
+ SharedRuntime::java_calling_convention(signature, args, args_num, true);
|
|
+
|
|
+ // push parameters
|
|
+ Register r[args_num];
|
|
+ int i = 0;
|
|
+ r[i++] = src()->as_register();
|
|
+ r[i++] = src_pos()->as_register();
|
|
+ r[i++] = dst()->as_register();
|
|
+ r[i++] = dst_pos()->as_register();
|
|
+ r[i++] = length()->as_register();
|
|
+
|
|
+ // next registers will get stored on the stack
|
|
+ for (int j = 0; j < args_num; j++) {
|
|
+ VMReg r_1 = args[j].first();
|
|
+ if (r_1->is_stack()) {
|
|
+ int st_off = r_1->reg2stack() * wordSize;
|
|
+ __ sd(r[j], Address(sp, st_off));
|
|
+ } else {
|
|
+ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg ");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ce->align_call(lir_static_call);
|
|
+
|
|
+ ce->emit_static_call_stub();
|
|
+ if (ce->compilation()->bailed_out()) {
|
|
+ return; // CodeCache is full
|
|
+ }
|
|
+ Address resolve(SharedRuntime::get_resolve_static_call_stub(),
|
|
+ relocInfo::static_call_type);
|
|
+ address call = __ trampoline_call(resolve);
|
|
+ if (call == NULL) {
|
|
+ ce->bailout("trampoline stub overflow");
|
|
+ return;
|
|
+ }
|
|
+ ce->add_call_info_here(info());
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
|
|
+ __ incrementw(Address(t1));
|
|
+#endif
|
|
+
|
|
+ __ j(_continuation);
|
|
+}
|
|
+
|
|
+#undef __
|
|
diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..a0f411352
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
|
|
@@ -0,0 +1,85 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_DEFS_RISCV_HPP
|
|
+#define CPU_RISCV_C1_DEFS_RISCV_HPP
|
|
+
|
|
+// native word offsets from memory address (little endian)
|
|
+enum {
|
|
+ pd_lo_word_offset_in_bytes = 0,
|
|
+ pd_hi_word_offset_in_bytes = BytesPerWord
|
|
+};
|
|
+
|
|
+// explicit rounding operations are required to implement the strictFP mode
|
|
+enum {
|
|
+ pd_strict_fp_requires_explicit_rounding = false
|
|
+};
|
|
+
|
|
+// registers
|
|
+enum {
|
|
+ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission
|
|
+ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of float registers used during code emission
|
|
+
|
|
+ // caller saved
|
|
+ pd_nof_caller_save_cpu_regs_frame_map = 13, // number of registers killed by calls
|
|
+ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of float registers killed by calls
|
|
+
|
|
+ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
|
|
+ pd_last_callee_saved_reg = 21,
|
|
+
|
|
+ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
|
|
+
|
|
+ pd_nof_cpu_regs_reg_alloc
|
|
+ = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
|
|
+ pd_nof_fpu_regs_reg_alloc = 32, // number of float registers that are visible to register allocator
|
|
+
|
|
+ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
|
|
+ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan
|
|
+ pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
|
|
+
|
|
+ pd_first_cpu_reg = 0,
|
|
+ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
|
|
+ pd_first_byte_reg = 0,
|
|
+ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
|
|
+
|
|
+ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
|
|
+ pd_last_fpu_reg = pd_first_fpu_reg + 31,
|
|
+
|
|
+ pd_first_callee_saved_fpu_reg_1 = 8 + pd_first_fpu_reg,
|
|
+ pd_last_callee_saved_fpu_reg_1 = 9 + pd_first_fpu_reg,
|
|
+ pd_first_callee_saved_fpu_reg_2 = 18 + pd_first_fpu_reg,
|
|
+ pd_last_callee_saved_fpu_reg_2 = 27 + pd_first_fpu_reg
|
|
+};
|
|
+
|
|
+
|
|
+// Encoding of float value in debug info. This is true on x86 where
|
|
+// floats are extended to doubles when stored in the stack, false for
|
|
+// RISCV where floats and doubles are stored in their native form.
|
|
+enum {
|
|
+ pd_float_saved_as_double = false
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..d4876625c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
|
|
@@ -0,0 +1,31 @@
|
|
+/*
|
|
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+//--------------------------------------------------------
|
|
+// FpuStackSim
|
|
+//--------------------------------------------------------
|
|
+
|
|
+// No FPU stack on RISCV
|
|
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..4b43bc4d7
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
|
|
@@ -0,0 +1,33 @@
|
|
+/*
|
|
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
|
|
+#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
|
|
+
|
|
+// No FPU stack on RISCV
|
|
+class FpuStackSim;
|
|
+
|
|
+#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..94b4e0f0b
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
|
|
@@ -0,0 +1,391 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "c1/c1_FrameMap.hpp"
|
|
+#include "c1/c1_LIR.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+
|
|
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool)
|
|
+{
|
|
+ LIR_Opr opr = LIR_OprFact::illegalOpr;
|
|
+ VMReg r_1 = reg->first();
|
|
+ VMReg r_2 = reg->second();
|
|
+ if (r_1->is_stack()) {
|
|
+ // Convert stack slot to an SP offset
|
|
+ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
|
|
+ // so we must add it in here.
|
|
+ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
|
|
+ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
|
|
+ } else if (r_1->is_Register()) {
|
|
+ Register reg1 = r_1->as_Register();
|
|
+ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
|
|
+ Register reg2 = r_2->as_Register();
|
|
+ assert(reg2 == reg1, "must be same register");
|
|
+ opr = as_long_opr(reg1);
|
|
+ } else if (type == T_OBJECT || type == T_ARRAY) {
|
|
+ opr = as_oop_opr(reg1);
|
|
+ } else if (type == T_METADATA) {
|
|
+ opr = as_metadata_opr(reg1);
|
|
+ } else if (type == T_ADDRESS) {
|
|
+ opr = as_address_opr(reg1);
|
|
+ } else {
|
|
+ opr = as_opr(reg1);
|
|
+ }
|
|
+ } else if (r_1->is_FloatRegister()) {
|
|
+ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
|
|
+ int num = r_1->as_FloatRegister()->encoding();
|
|
+ if (type == T_FLOAT) {
|
|
+ opr = LIR_OprFact::single_fpu(num);
|
|
+ } else {
|
|
+ opr = LIR_OprFact::double_fpu(num);
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ return opr;
|
|
+}
|
|
+
|
|
+LIR_Opr FrameMap::zr_opr;
|
|
+LIR_Opr FrameMap::r1_opr;
|
|
+LIR_Opr FrameMap::r2_opr;
|
|
+LIR_Opr FrameMap::r3_opr;
|
|
+LIR_Opr FrameMap::r4_opr;
|
|
+LIR_Opr FrameMap::r5_opr;
|
|
+LIR_Opr FrameMap::r6_opr;
|
|
+LIR_Opr FrameMap::r7_opr;
|
|
+LIR_Opr FrameMap::r8_opr;
|
|
+LIR_Opr FrameMap::r9_opr;
|
|
+LIR_Opr FrameMap::r10_opr;
|
|
+LIR_Opr FrameMap::r11_opr;
|
|
+LIR_Opr FrameMap::r12_opr;
|
|
+LIR_Opr FrameMap::r13_opr;
|
|
+LIR_Opr FrameMap::r14_opr;
|
|
+LIR_Opr FrameMap::r15_opr;
|
|
+LIR_Opr FrameMap::r16_opr;
|
|
+LIR_Opr FrameMap::r17_opr;
|
|
+LIR_Opr FrameMap::r18_opr;
|
|
+LIR_Opr FrameMap::r19_opr;
|
|
+LIR_Opr FrameMap::r20_opr;
|
|
+LIR_Opr FrameMap::r21_opr;
|
|
+LIR_Opr FrameMap::r22_opr;
|
|
+LIR_Opr FrameMap::r23_opr;
|
|
+LIR_Opr FrameMap::r24_opr;
|
|
+LIR_Opr FrameMap::r25_opr;
|
|
+LIR_Opr FrameMap::r26_opr;
|
|
+LIR_Opr FrameMap::r27_opr;
|
|
+LIR_Opr FrameMap::r28_opr;
|
|
+LIR_Opr FrameMap::r29_opr;
|
|
+LIR_Opr FrameMap::r30_opr;
|
|
+LIR_Opr FrameMap::r31_opr;
|
|
+
|
|
+LIR_Opr FrameMap::fp_opr;
|
|
+LIR_Opr FrameMap::sp_opr;
|
|
+
|
|
+LIR_Opr FrameMap::receiver_opr;
|
|
+
|
|
+LIR_Opr FrameMap::zr_oop_opr;
|
|
+LIR_Opr FrameMap::r1_oop_opr;
|
|
+LIR_Opr FrameMap::r2_oop_opr;
|
|
+LIR_Opr FrameMap::r3_oop_opr;
|
|
+LIR_Opr FrameMap::r4_oop_opr;
|
|
+LIR_Opr FrameMap::r5_oop_opr;
|
|
+LIR_Opr FrameMap::r6_oop_opr;
|
|
+LIR_Opr FrameMap::r7_oop_opr;
|
|
+LIR_Opr FrameMap::r8_oop_opr;
|
|
+LIR_Opr FrameMap::r9_oop_opr;
|
|
+LIR_Opr FrameMap::r10_oop_opr;
|
|
+LIR_Opr FrameMap::r11_oop_opr;
|
|
+LIR_Opr FrameMap::r12_oop_opr;
|
|
+LIR_Opr FrameMap::r13_oop_opr;
|
|
+LIR_Opr FrameMap::r14_oop_opr;
|
|
+LIR_Opr FrameMap::r15_oop_opr;
|
|
+LIR_Opr FrameMap::r16_oop_opr;
|
|
+LIR_Opr FrameMap::r17_oop_opr;
|
|
+LIR_Opr FrameMap::r18_oop_opr;
|
|
+LIR_Opr FrameMap::r19_oop_opr;
|
|
+LIR_Opr FrameMap::r20_oop_opr;
|
|
+LIR_Opr FrameMap::r21_oop_opr;
|
|
+LIR_Opr FrameMap::r22_oop_opr;
|
|
+LIR_Opr FrameMap::r23_oop_opr;
|
|
+LIR_Opr FrameMap::r24_oop_opr;
|
|
+LIR_Opr FrameMap::r25_oop_opr;
|
|
+LIR_Opr FrameMap::r26_oop_opr;
|
|
+LIR_Opr FrameMap::r27_oop_opr;
|
|
+LIR_Opr FrameMap::r28_oop_opr;
|
|
+LIR_Opr FrameMap::r29_oop_opr;
|
|
+LIR_Opr FrameMap::r30_oop_opr;
|
|
+LIR_Opr FrameMap::r31_oop_opr;
|
|
+
|
|
+LIR_Opr FrameMap::t0_opr;
|
|
+LIR_Opr FrameMap::t1_opr;
|
|
+LIR_Opr FrameMap::t0_long_opr;
|
|
+LIR_Opr FrameMap::t1_long_opr;
|
|
+
|
|
+LIR_Opr FrameMap::r10_metadata_opr;
|
|
+LIR_Opr FrameMap::r11_metadata_opr;
|
|
+LIR_Opr FrameMap::r12_metadata_opr;
|
|
+LIR_Opr FrameMap::r13_metadata_opr;
|
|
+LIR_Opr FrameMap::r14_metadata_opr;
|
|
+LIR_Opr FrameMap::r15_metadata_opr;
|
|
+
|
|
+LIR_Opr FrameMap::long10_opr;
|
|
+LIR_Opr FrameMap::long11_opr;
|
|
+LIR_Opr FrameMap::fpu10_float_opr;
|
|
+LIR_Opr FrameMap::fpu10_double_opr;
|
|
+
|
|
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
|
|
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
|
|
+
|
|
+//--------------------------------------------------------
|
|
+// FrameMap
|
|
+//--------------------------------------------------------
|
|
+// |---f31--|
|
|
+// |---..---|
|
|
+// |---f28--|
|
|
+// |---f27--|<---pd_last_callee_saved_fpu_reg_2
|
|
+// |---..---|
|
|
+// |---f18--|<---pd_first_callee_saved_fpu_reg_2
|
|
+// |---f17--|
|
|
+// |---..---|
|
|
+// |---f10--|
|
|
+// |---f9---|<---pd_last_callee_saved_fpu_reg_1
|
|
+// |---f8---|<---pd_first_callee_saved_fpu_reg_1
|
|
+// |---f7---|
|
|
+// |---..---|
|
|
+// |---f0---|
|
|
+// |---x27--|
|
|
+// |---x23--|
|
|
+// |---x8---|
|
|
+// |---x4---|
|
|
+// |---x3---|
|
|
+// |---x2---|
|
|
+// |---x1---|
|
|
+// |---x0---|
|
|
+// |---x26--|<---pd_last_callee_saved_reg
|
|
+// |---..---|
|
|
+// |---x18--|
|
|
+// |---x9---|<---pd_first_callee_saved_reg
|
|
+// |---x31--|
|
|
+// |---..---|
|
|
+// |---x28--|
|
|
+// |---x17--|
|
|
+// |---..---|
|
|
+// |---x10--|
|
|
+// |---x7---|
|
|
+
|
|
+void FrameMap::initialize() {
|
|
+ assert(!_init_done, "once");
|
|
+
|
|
+ int i = 0;
|
|
+
|
|
+ // caller save register
|
|
+ map_register(i, x7); r7_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x10); r10_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x11); r11_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x12); r12_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x13); r13_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x14); r14_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x15); r15_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x16); r16_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x17); r17_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x28); r28_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x29); r29_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x30); r30_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x31); r31_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+
|
|
+ // callee save register
|
|
+ map_register(i, x9); r9_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x18); r18_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x19); r19_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x20); r20_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x21); r21_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x22); r22_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x24); r24_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x25); r25_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+ map_register(i, x26); r26_opr = LIR_OprFact::single_cpu(i); i++;
|
|
+
|
|
+ // special register
|
|
+ map_register(i, x0); zr_opr = LIR_OprFact::single_cpu(i); i++; // zr
|
|
+ map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // ra
|
|
+ map_register(i, x2); r2_opr = LIR_OprFact::single_cpu(i); i++; // sp
|
|
+ map_register(i, x3); r3_opr = LIR_OprFact::single_cpu(i); i++; // gp
|
|
+ map_register(i, x4); r4_opr = LIR_OprFact::single_cpu(i); i++; // thread
|
|
+ map_register(i, x8); r8_opr = LIR_OprFact::single_cpu(i); i++; // fp
|
|
+ map_register(i, x23); r23_opr = LIR_OprFact::single_cpu(i); i++; // java thread
|
|
+ map_register(i, x27); r27_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
|
|
+
|
|
+ // tmp register
|
|
+ map_register(i, x5); r5_opr = LIR_OprFact::single_cpu(i); i++; // t0
|
|
+ map_register(i, x6); r6_opr = LIR_OprFact::single_cpu(i); i++; // t1
|
|
+
|
|
+ t0_opr = r5_opr;
|
|
+ t1_opr = r6_opr;
|
|
+ t0_long_opr = LIR_OprFact::double_cpu(r5_opr->cpu_regnr(), r5_opr->cpu_regnr());
|
|
+ t1_long_opr = LIR_OprFact::double_cpu(r6_opr->cpu_regnr(), r6_opr->cpu_regnr());
|
|
+
|
|
+ long10_opr = LIR_OprFact::double_cpu(r10_opr->cpu_regnr(), r10_opr->cpu_regnr());
|
|
+ long11_opr = LIR_OprFact::double_cpu(r11_opr->cpu_regnr(), r11_opr->cpu_regnr());
|
|
+
|
|
+ fpu10_float_opr = LIR_OprFact::single_fpu(10);
|
|
+ fpu10_double_opr = LIR_OprFact::double_fpu(10);
|
|
+
|
|
+ i = 0;
|
|
+ _caller_save_cpu_regs[i++] = r7_opr;
|
|
+ _caller_save_cpu_regs[i++] = r10_opr;
|
|
+ _caller_save_cpu_regs[i++] = r11_opr;
|
|
+ _caller_save_cpu_regs[i++] = r12_opr;
|
|
+ _caller_save_cpu_regs[i++] = r13_opr;
|
|
+ _caller_save_cpu_regs[i++] = r14_opr;
|
|
+ _caller_save_cpu_regs[i++] = r15_opr;
|
|
+ _caller_save_cpu_regs[i++] = r16_opr;
|
|
+ _caller_save_cpu_regs[i++] = r17_opr;
|
|
+ _caller_save_cpu_regs[i++] = r28_opr;
|
|
+ _caller_save_cpu_regs[i++] = r29_opr;
|
|
+ _caller_save_cpu_regs[i++] = r30_opr;
|
|
+ _caller_save_cpu_regs[i++] = r31_opr;
|
|
+
|
|
+ _init_done = true;
|
|
+
|
|
+ zr_oop_opr = as_oop_opr(x0);
|
|
+ r1_oop_opr = as_oop_opr(x1);
|
|
+ r2_oop_opr = as_oop_opr(x2);
|
|
+ r3_oop_opr = as_oop_opr(x3);
|
|
+ r4_oop_opr = as_oop_opr(x4);
|
|
+ r5_oop_opr = as_oop_opr(x5);
|
|
+ r6_oop_opr = as_oop_opr(x6);
|
|
+ r7_oop_opr = as_oop_opr(x7);
|
|
+ r8_oop_opr = as_oop_opr(x8);
|
|
+ r9_oop_opr = as_oop_opr(x9);
|
|
+ r10_oop_opr = as_oop_opr(x10);
|
|
+ r11_oop_opr = as_oop_opr(x11);
|
|
+ r12_oop_opr = as_oop_opr(x12);
|
|
+ r13_oop_opr = as_oop_opr(x13);
|
|
+ r14_oop_opr = as_oop_opr(x14);
|
|
+ r15_oop_opr = as_oop_opr(x15);
|
|
+ r16_oop_opr = as_oop_opr(x16);
|
|
+ r17_oop_opr = as_oop_opr(x17);
|
|
+ r18_oop_opr = as_oop_opr(x18);
|
|
+ r19_oop_opr = as_oop_opr(x19);
|
|
+ r20_oop_opr = as_oop_opr(x20);
|
|
+ r21_oop_opr = as_oop_opr(x21);
|
|
+ r22_oop_opr = as_oop_opr(x22);
|
|
+ r23_oop_opr = as_oop_opr(x23);
|
|
+ r24_oop_opr = as_oop_opr(x24);
|
|
+ r25_oop_opr = as_oop_opr(x25);
|
|
+ r26_oop_opr = as_oop_opr(x26);
|
|
+ r27_oop_opr = as_oop_opr(x27);
|
|
+ r28_oop_opr = as_oop_opr(x28);
|
|
+ r29_oop_opr = as_oop_opr(x29);
|
|
+ r30_oop_opr = as_oop_opr(x30);
|
|
+ r31_oop_opr = as_oop_opr(x31);
|
|
+
|
|
+ r10_metadata_opr = as_metadata_opr(x10);
|
|
+ r11_metadata_opr = as_metadata_opr(x11);
|
|
+ r12_metadata_opr = as_metadata_opr(x12);
|
|
+ r13_metadata_opr = as_metadata_opr(x13);
|
|
+ r14_metadata_opr = as_metadata_opr(x14);
|
|
+ r15_metadata_opr = as_metadata_opr(x15);
|
|
+
|
|
+ sp_opr = as_pointer_opr(sp);
|
|
+ fp_opr = as_pointer_opr(fp);
|
|
+
|
|
+ VMRegPair regs;
|
|
+ BasicType sig_bt = T_OBJECT;
|
|
+ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true);
|
|
+ receiver_opr = as_oop_opr(regs.first()->as_Register());
|
|
+
|
|
+ for (i = 0; i < nof_caller_save_fpu_regs; i++) {
|
|
+ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
|
|
+ return Address(sp, in_bytes(sp_offset));
|
|
+}
|
|
+
|
|
+
|
|
+// ----------------mapping-----------------------
|
|
+// all mapping is based on rfp addressing, except for simple leaf methods where we access
|
|
+// the locals sp based (and no frame is built)
|
|
+
|
|
+
|
|
+// Frame for simple leaf methods (quick entries)
|
|
+//
|
|
+// +----------+
|
|
+// | ret addr | <- TOS
|
|
+// +----------+
|
|
+// | args |
|
|
+// | ...... |
|
|
+
|
|
+// Frame for standard methods
|
|
+//
|
|
+// | .........| <- TOS
|
|
+// | locals |
|
|
+// +----------+
|
|
+// | old fp, |
|
|
+// +----------+
|
|
+// | ret addr |
|
|
+// +----------+
|
|
+// | args | <- FP
|
|
+// | .........|
|
|
+
|
|
+
|
|
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
|
|
+// This is the offset from sp() in the frame of the slot for the index,
|
|
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
|
|
+//
|
|
+// framesize +
|
|
+// stack0 stack0 0 <- VMReg
|
|
+// | | <registers> |
|
|
+// ...........|..............|.............|
|
|
+// 0 1 2 3 x x 4 5 6 ... | <- local indices
|
|
+// ^ ^ sp() ( x x indicate link
|
|
+// | | and return addr)
|
|
+// arguments non-argument locals
|
|
+
|
|
+
|
|
+VMReg FrameMap::fpu_regname (int n) {
|
|
+ // Return the OptoReg name for the fpu stack slot "n"
|
|
+ // A spilled fpu stack slot comprises to two single-word OptoReg's.
|
|
+ return as_FloatRegister(n)->as_VMReg();
|
|
+}
|
|
+
|
|
+LIR_Opr FrameMap::stack_pointer()
|
|
+{
|
|
+ return FrameMap::sp_opr;
|
|
+}
|
|
+
|
|
+// JSR 292
|
|
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
|
|
+ return LIR_OprFact::illegalOpr; // Not needed on riscv
|
|
+}
|
|
+
|
|
+bool FrameMap::validate_frame() {
|
|
+ return true;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..f600c2f6f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
|
|
@@ -0,0 +1,149 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
|
|
+#define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
|
|
+
|
|
+// On RISCV the frame looks as follows:
|
|
+//
|
|
+// +-----------------------------+---------+----------------------------------------+----------------+-----------
|
|
+// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
|
|
+// +-----------------------------+---------+----------------------------------------+----------------+-----------
|
|
+
|
|
+ public:
|
|
+ static const int pd_c_runtime_reserved_arg_size;
|
|
+
|
|
+ enum {
|
|
+ first_available_sp_in_frame = 0,
|
|
+ frame_pad_in_bytes = 16,
|
|
+ nof_reg_args = 8
|
|
+ };
|
|
+
|
|
+ public:
|
|
+ static LIR_Opr receiver_opr;
|
|
+
|
|
+ static LIR_Opr zr_opr;
|
|
+ static LIR_Opr r1_opr;
|
|
+ static LIR_Opr r2_opr;
|
|
+ static LIR_Opr r3_opr;
|
|
+ static LIR_Opr r4_opr;
|
|
+ static LIR_Opr r5_opr;
|
|
+ static LIR_Opr r6_opr;
|
|
+ static LIR_Opr r7_opr;
|
|
+ static LIR_Opr r8_opr;
|
|
+ static LIR_Opr r9_opr;
|
|
+ static LIR_Opr r10_opr;
|
|
+ static LIR_Opr r11_opr;
|
|
+ static LIR_Opr r12_opr;
|
|
+ static LIR_Opr r13_opr;
|
|
+ static LIR_Opr r14_opr;
|
|
+ static LIR_Opr r15_opr;
|
|
+ static LIR_Opr r16_opr;
|
|
+ static LIR_Opr r17_opr;
|
|
+ static LIR_Opr r18_opr;
|
|
+ static LIR_Opr r19_opr;
|
|
+ static LIR_Opr r20_opr;
|
|
+ static LIR_Opr r21_opr;
|
|
+ static LIR_Opr r22_opr;
|
|
+ static LIR_Opr r23_opr;
|
|
+ static LIR_Opr r24_opr;
|
|
+ static LIR_Opr r25_opr;
|
|
+ static LIR_Opr r26_opr;
|
|
+ static LIR_Opr r27_opr;
|
|
+ static LIR_Opr r28_opr;
|
|
+ static LIR_Opr r29_opr;
|
|
+ static LIR_Opr r30_opr;
|
|
+ static LIR_Opr r31_opr;
|
|
+ static LIR_Opr fp_opr;
|
|
+ static LIR_Opr sp_opr;
|
|
+
|
|
+ static LIR_Opr zr_oop_opr;
|
|
+ static LIR_Opr r1_oop_opr;
|
|
+ static LIR_Opr r2_oop_opr;
|
|
+ static LIR_Opr r3_oop_opr;
|
|
+ static LIR_Opr r4_oop_opr;
|
|
+ static LIR_Opr r5_oop_opr;
|
|
+ static LIR_Opr r6_oop_opr;
|
|
+ static LIR_Opr r7_oop_opr;
|
|
+ static LIR_Opr r8_oop_opr;
|
|
+ static LIR_Opr r9_oop_opr;
|
|
+ static LIR_Opr r10_oop_opr;
|
|
+ static LIR_Opr r11_oop_opr;
|
|
+ static LIR_Opr r12_oop_opr;
|
|
+ static LIR_Opr r13_oop_opr;
|
|
+ static LIR_Opr r14_oop_opr;
|
|
+ static LIR_Opr r15_oop_opr;
|
|
+ static LIR_Opr r16_oop_opr;
|
|
+ static LIR_Opr r17_oop_opr;
|
|
+ static LIR_Opr r18_oop_opr;
|
|
+ static LIR_Opr r19_oop_opr;
|
|
+ static LIR_Opr r20_oop_opr;
|
|
+ static LIR_Opr r21_oop_opr;
|
|
+ static LIR_Opr r22_oop_opr;
|
|
+ static LIR_Opr r23_oop_opr;
|
|
+ static LIR_Opr r24_oop_opr;
|
|
+ static LIR_Opr r25_oop_opr;
|
|
+ static LIR_Opr r26_oop_opr;
|
|
+ static LIR_Opr r27_oop_opr;
|
|
+ static LIR_Opr r28_oop_opr;
|
|
+ static LIR_Opr r29_oop_opr;
|
|
+ static LIR_Opr r30_oop_opr;
|
|
+ static LIR_Opr r31_oop_opr;
|
|
+
|
|
+ static LIR_Opr t0_opr;
|
|
+ static LIR_Opr t1_opr;
|
|
+ static LIR_Opr t0_long_opr;
|
|
+ static LIR_Opr t1_long_opr;
|
|
+
|
|
+ static LIR_Opr r10_metadata_opr;
|
|
+ static LIR_Opr r11_metadata_opr;
|
|
+ static LIR_Opr r12_metadata_opr;
|
|
+ static LIR_Opr r13_metadata_opr;
|
|
+ static LIR_Opr r14_metadata_opr;
|
|
+ static LIR_Opr r15_metadata_opr;
|
|
+
|
|
+ static LIR_Opr long10_opr;
|
|
+ static LIR_Opr long11_opr;
|
|
+ static LIR_Opr fpu10_float_opr;
|
|
+ static LIR_Opr fpu10_double_opr;
|
|
+
|
|
+ static LIR_Opr as_long_opr(Register r) {
|
|
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
|
|
+ }
|
|
+ static LIR_Opr as_pointer_opr(Register r) {
|
|
+ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
|
|
+ }
|
|
+
|
|
+ // VMReg name for spilled physical FPU stack slot n
|
|
+ static VMReg fpu_regname(int n);
|
|
+
|
|
+ static bool is_caller_save_register(LIR_Opr opr) { return true; }
|
|
+ static bool is_caller_save_register(Register r) { return true; }
|
|
+
|
|
+ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
|
|
+ static int last_cpu_reg() { return pd_last_cpu_reg; }
|
|
+
|
|
+#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..a846d60ae
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
|
|
@@ -0,0 +1,287 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+
|
|
+#ifndef PRODUCT
|
|
+#define COMMENT(x) do { __ block_comment(x); } while (0)
|
|
+#else
|
|
+#define COMMENT(x)
|
|
+#endif
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
|
|
+
|
|
+ // opcode check
|
|
+ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
|
|
+ bool is_irem = (code == lir_irem);
|
|
+
|
|
+ // operand check
|
|
+ assert(left->is_single_cpu(), "left must be register");
|
|
+ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
|
|
+ assert(result->is_single_cpu(), "result must be register");
|
|
+ Register lreg = left->as_register();
|
|
+ Register dreg = result->as_register();
|
|
+
|
|
+ // power-of-2 constant check and codegen
|
|
+ if (right->is_constant()) {
|
|
+ int c = right->as_constant_ptr()->as_jint();
|
|
+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
|
|
+ if (is_irem) {
|
|
+ if (c == 1) {
|
|
+ // move 0 to dreg if divisor is 1
|
|
+ __ mv(dreg, zr);
|
|
+ } else {
|
|
+ unsigned int shift = exact_log2(c);
|
|
+ __ sraiw(t0, lreg, 0x1f);
|
|
+ __ srliw(t0, t0, BitsPerInt - shift);
|
|
+ __ addw(t1, lreg, t0);
|
|
+ if (is_imm_in_range(c - 1, 12, 0)) {
|
|
+ __ andi(t1, t1, c - 1);
|
|
+ } else {
|
|
+ __ zero_extend(t1, t1, shift);
|
|
+ }
|
|
+ __ subw(dreg, t1, t0);
|
|
+ }
|
|
+ } else {
|
|
+ if (c == 1) {
|
|
+ // move lreg to dreg if divisor is 1
|
|
+ __ mv(dreg, lreg);
|
|
+ } else {
|
|
+ unsigned int shift = exact_log2(c);
|
|
+ __ sraiw(t0, lreg, 0x1f);
|
|
+ if (is_imm_in_range(c - 1, 12, 0)) {
|
|
+ __ andi(t0, t0, c - 1);
|
|
+ } else {
|
|
+ __ zero_extend(t0, t0, shift);
|
|
+ }
|
|
+ __ addw(dreg, t0, lreg);
|
|
+ __ sraiw(dreg, dreg, shift);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ Register rreg = right->as_register();
|
|
+ __ corrected_idivl(dreg, lreg, rreg, is_irem);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right,
|
|
+ Register lreg, Register dreg) {
|
|
+ // cpu register - constant
|
|
+ jlong c;
|
|
+
|
|
+ switch (right->type()) {
|
|
+ case T_LONG:
|
|
+ c = right->as_constant_ptr()->as_jlong(); break;
|
|
+ case T_INT: // fall through
|
|
+ case T_ADDRESS:
|
|
+ c = right->as_constant_ptr()->as_jint(); break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ c = 0; // unreachable
|
|
+ }
|
|
+
|
|
+ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
|
|
+ if (c == 0 && dreg == lreg) {
|
|
+ COMMENT("effective nop elided");
|
|
+ return;
|
|
+ }
|
|
+ switch (left->type()) {
|
|
+ case T_INT:
|
|
+ switch (code) {
|
|
+ case lir_add: __ addw(dreg, lreg, c); break;
|
|
+ case lir_sub: __ subw(dreg, lreg, c); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ break;
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ADDRESS:
|
|
+ switch (code) {
|
|
+ case lir_add: __ add(dreg, lreg, c); break;
|
|
+ case lir_sub: __ sub(dreg, lreg, c); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
|
|
+ Register lreg = left->as_register();
|
|
+ Register dreg = as_reg(dest);
|
|
+
|
|
+ if (right->is_single_cpu()) {
|
|
+ // cpu register - cpu register
|
|
+ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
|
|
+ Register rreg = right->as_register();
|
|
+ switch (code) {
|
|
+ case lir_add: __ addw(dest->as_register(), lreg, rreg); break;
|
|
+ case lir_sub: __ subw(dest->as_register(), lreg, rreg); break;
|
|
+ case lir_mul: __ mulw(dest->as_register(), lreg, rreg); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (right->is_double_cpu()) {
|
|
+ Register rreg = right->as_register_lo();
|
|
+ // sigle_cpu + double_cpu; can happen with obj_long
|
|
+ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
|
|
+ switch (code) {
|
|
+ case lir_add: __ add(dreg, lreg, rreg); break;
|
|
+ case lir_sub: __ sub(dreg, lreg, rreg); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (right->is_constant()) {
|
|
+ arith_op_single_cpu_right_constant(code, left, right, lreg, dreg);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
|
|
+ Register lreg_lo = left->as_register_lo();
|
|
+
|
|
+ if (right->is_double_cpu()) {
|
|
+ // cpu register - cpu register
|
|
+ Register rreg_lo = right->as_register_lo();
|
|
+ switch (code) {
|
|
+ case lir_add: __ add(dest->as_register_lo(), lreg_lo, rreg_lo); break;
|
|
+ case lir_sub: __ sub(dest->as_register_lo(), lreg_lo, rreg_lo); break;
|
|
+ case lir_mul: __ mul(dest->as_register_lo(), lreg_lo, rreg_lo); break;
|
|
+ case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false); break;
|
|
+ case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true); break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (right->is_constant()) {
|
|
+ jlong c = right->as_constant_ptr()->as_jlong();
|
|
+ Register dreg = as_reg(dest);
|
|
+ switch (code) {
|
|
+ case lir_add:
|
|
+ case lir_sub:
|
|
+ if (c == 0 && dreg == lreg_lo) {
|
|
+ COMMENT("effective nop elided");
|
|
+ return;
|
|
+ }
|
|
+ code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
|
|
+ break;
|
|
+ case lir_div:
|
|
+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
|
|
+ if (c == 1) {
|
|
+ // move lreg_lo to dreg if divisor is 1
|
|
+ __ mv(dreg, lreg_lo);
|
|
+ } else {
|
|
+ unsigned int shift = exact_log2(c);
|
|
+ // use t0 as intermediate result register
|
|
+ __ srai(t0, lreg_lo, 0x3f);
|
|
+ if (is_imm_in_range(c - 1, 12, 0)) {
|
|
+ __ andi(t0, t0, c - 1);
|
|
+ } else {
|
|
+ __ zero_extend(t0, t0, shift);
|
|
+ }
|
|
+ __ add(dreg, t0, lreg_lo);
|
|
+ __ srai(dreg, dreg, shift);
|
|
+ }
|
|
+ break;
|
|
+ case lir_rem:
|
|
+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
|
|
+ if (c == 1) {
|
|
+ // move 0 to dreg if divisor is 1
|
|
+ __ mv(dreg, zr);
|
|
+ } else {
|
|
+ unsigned int shift = exact_log2(c);
|
|
+ __ srai(t0, lreg_lo, 0x3f);
|
|
+ __ srli(t0, t0, BitsPerLong - shift);
|
|
+ __ add(t1, lreg_lo, t0);
|
|
+ if (is_imm_in_range(c - 1, 12, 0)) {
|
|
+ __ andi(t1, t1, c - 1);
|
|
+ } else {
|
|
+ __ zero_extend(t1, t1, shift);
|
|
+ }
|
|
+ __ sub(dreg, t1, t0);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
|
|
+ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
|
|
+ switch (code) {
|
|
+ case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
|
|
+ case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
|
|
+ case lir_mul_strictfp: // fall through
|
|
+ case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
|
|
+ case lir_div_strictfp: // fall through
|
|
+ case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
|
|
+ if (right->is_double_fpu()) {
|
|
+ // fpu register - fpu register
|
|
+ switch (code) {
|
|
+ case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
|
|
+ case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
|
|
+ case lir_mul_strictfp: // fall through
|
|
+ case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
|
|
+ case lir_div_strictfp: // fall through
|
|
+ case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
|
|
+ CodeEmitInfo* info, bool pop_fpu_stack) {
|
|
+ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
|
|
+
|
|
+ if (left->is_single_cpu()) {
|
|
+ arith_op_single_cpu(code, left, right, dest);
|
|
+ } else if (left->is_double_cpu()) {
|
|
+ arith_op_double_cpu(code, left, right, dest);
|
|
+ } else if (left->is_single_fpu()) {
|
|
+ arith_op_single_fpu(code, left, right, dest);
|
|
+ } else if (left->is_double_fpu()) {
|
|
+ arith_op_double_fpu(code, left, right, dest);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+#undef __
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..93530ef58
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
|
|
@@ -0,0 +1,36 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
|
|
+#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
|
|
+
|
|
+ // arith_op sub functions
|
|
+ void arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
|
|
+ void arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
|
|
+ void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
|
|
+ void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
|
|
+ void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg);
|
|
+ void arithmetic_idiv(LIR_Op3* op, bool is_irem);
|
|
+#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..31f8d6a4a
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
|
|
@@ -0,0 +1,387 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "ci/ciArrayKlass.hpp"
|
|
+#include "oops/objArrayKlass.hpp"
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+
|
|
+void LIR_Assembler::generic_arraycopy(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, CodeStub *stub) {
|
|
+ assert(src == x11 && src_pos == x12, "mismatch in calling convention");
|
|
+ // Save the arguments in case the generic arraycopy fails and we
|
|
+ // have to fall back to the JNI stub
|
|
+ arraycopy_store_args(src, src_pos, length, dst, dst_pos);
|
|
+
|
|
+ address copyfunc_addr = StubRoutines::generic_arraycopy();
|
|
+ assert(copyfunc_addr != NULL, "generic arraycopy stub required");
|
|
+
|
|
+ // The arguments are in java calling convention so we shift them
|
|
+ // to C convention
|
|
+ assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
|
|
+ __ mv(c_rarg0, j_rarg0);
|
|
+ assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
|
|
+ __ mv(c_rarg1, j_rarg1);
|
|
+ assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
|
|
+ __ mv(c_rarg2, j_rarg2);
|
|
+ assert_different_registers(c_rarg3, j_rarg4);
|
|
+ __ mv(c_rarg3, j_rarg3);
|
|
+ __ mv(c_rarg4, j_rarg4);
|
|
+#ifndef PRODUCT
|
|
+ if (PrintC1Statistics) {
|
|
+ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
|
|
+ }
|
|
+#endif
|
|
+ __ far_call(RuntimeAddress(copyfunc_addr));
|
|
+ __ beqz(x10, *stub->continuation());
|
|
+ // Reload values from the stack so they are where the stub
|
|
+ // expects them.
|
|
+ arraycopy_load_args(src, src_pos, length, dst, dst_pos);
|
|
+
|
|
+ // x10 is -1^K where K == partial copied count
|
|
+ __ xori(t0, x10, -1);
|
|
+ // adjust length down and src/end pos up by partial copied count
|
|
+ __ subw(length, length, t0);
|
|
+ __ addw(src_pos, src_pos, t0);
|
|
+ __ addw(dst_pos, dst_pos, t0);
|
|
+ __ j(*stub->entry());
|
|
+
|
|
+ __ bind(*stub->continuation());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_simple_check(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, int flags) {
|
|
+ // test for NULL
|
|
+ if (flags & LIR_OpArrayCopy::src_null_check) {
|
|
+ __ beqz(src, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+ if (flags & LIR_OpArrayCopy::dst_null_check) {
|
|
+ __ beqz(dst, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+
|
|
+ // If the compiler was not able to prove that exact type of the source or the destination
|
|
+ // of the arraycopy is an array type, check at runtime if the source or the destination is
|
|
+ // an instance type.
|
|
+ if (flags & LIR_OpArrayCopy::type_check) {
|
|
+ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
|
|
+ __ load_klass(tmp, dst);
|
|
+ __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
|
|
+ __ mv(t1, Klass::_lh_neutral_value);
|
|
+ __ bge(t0, t1, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+
|
|
+ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
|
|
+ __ load_klass(tmp, src);
|
|
+ __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
|
|
+ __ mv(t1, Klass::_lh_neutral_value);
|
|
+ __ bge(t0, t1, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // check if negative
|
|
+ if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
|
|
+ __ bltz(src_pos, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
|
|
+ __ bltz(dst_pos, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+ if (flags & LIR_OpArrayCopy::length_positive_check) {
|
|
+ __ bltz(length, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+
|
|
+ if (flags & LIR_OpArrayCopy::src_range_check) {
|
|
+ __ addw(tmp, src_pos, length);
|
|
+ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
|
|
+ __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+ if (flags & LIR_OpArrayCopy::dst_range_check) {
|
|
+ __ addw(tmp, dst_pos, length);
|
|
+ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
|
|
+ __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_checkcast(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, BasicType basic_type,
|
|
+ address copyfunc_addr, int flags) {
|
|
+ // src is not a sub class of dst so we have to do a
|
|
+ // per-element check.
|
|
+ int mask = LIR_OpArrayCopy::src_objarray | LIR_OpArrayCopy::dst_objarray;
|
|
+ if ((flags & mask) != mask) {
|
|
+ // Check that at least both of them object arrays.
|
|
+ assert(flags & mask, "one of the two should be known to be an object array");
|
|
+
|
|
+ if (!(flags & LIR_OpArrayCopy::src_objarray)) {
|
|
+ __ load_klass(tmp, src);
|
|
+ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
|
|
+ __ load_klass(tmp, dst);
|
|
+ }
|
|
+ int lh_offset = in_bytes(Klass::layout_helper_offset());
|
|
+ Address klass_lh_addr(tmp, lh_offset);
|
|
+ jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
|
|
+ __ lw(t0, klass_lh_addr);
|
|
+ __ mvw(t1, objArray_lh);
|
|
+ __ bne(t0, t1, *stub->entry(), /* is_far */ true);
|
|
+ }
|
|
+
|
|
+ // Spill because stubs can use any register they like and it's
|
|
+ // easier to restore just those that we care about.
|
|
+ arraycopy_store_args(src, src_pos, length, dst, dst_pos);
|
|
+ arraycopy_checkcast_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
|
|
+ __ far_call(RuntimeAddress(copyfunc_addr));
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ if (PrintC1Statistics) {
|
|
+ Label failed;
|
|
+ __ bnez(x10, failed);
|
|
+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
|
|
+ __ bind(failed);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ __ beqz(x10, *stub->continuation());
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ if (PrintC1Statistics) {
|
|
+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
|
|
+ }
|
|
+#endif
|
|
+ assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0);
|
|
+
|
|
+ // Restore previously spilled arguments
|
|
+ arraycopy_load_args(src, src_pos, length, dst, dst_pos);
|
|
+
|
|
+ // return value is -1^K where K is partial copied count
|
|
+ __ xori(t0, x10, -1);
|
|
+ // adjust length down and src/end pos up by partial copied count
|
|
+ __ subw(length, length, t0);
|
|
+ __ addw(src_pos, src_pos, t0);
|
|
+ __ addw(dst_pos, dst_pos, t0);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, BasicType basic_type, int flags) {
|
|
+ // We don't know the array types are compatible
|
|
+ if (basic_type != T_OBJECT) {
|
|
+ // Simple test for basic type arrays
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+ __ bne(tmp, t0, *stub->entry(), /* is_far */ true);
|
|
+ } else {
|
|
+ // For object arrays, if src is a sub class of dst then we can
|
|
+ // safely do the copy.
|
|
+ Label cont, slow;
|
|
+
|
|
+#define PUSH(r1, r2) \
|
|
+ __ addi(sp, sp, -2 * wordSize); \
|
|
+ __ sd(r1, Address(sp, 1 * wordSize)); \
|
|
+ __ sd(r2, Address(sp, 0));
|
|
+
|
|
+#define POP(r1, r2) \
|
|
+ __ ld(r1, Address(sp, 1 * wordSize)); \
|
|
+ __ ld(r2, Address(sp, 0)); \
|
|
+ __ addi(sp, sp, 2 * wordSize);
|
|
+
|
|
+ PUSH(src, dst);
|
|
+ __ load_klass(src, src);
|
|
+ __ load_klass(dst, dst);
|
|
+ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
|
|
+
|
|
+ PUSH(src, dst);
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
|
|
+ POP(src, dst);
|
|
+ __ bnez(dst, cont);
|
|
+
|
|
+ __ bind(slow);
|
|
+ POP(src, dst);
|
|
+
|
|
+ address copyfunc_addr = StubRoutines::checkcast_arraycopy();
|
|
+ if (copyfunc_addr != NULL) { // use stub if available
|
|
+ arraycopy_checkcast(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, copyfunc_addr, flags);
|
|
+ }
|
|
+
|
|
+ __ j(*stub->entry());
|
|
+ __ bind(cont);
|
|
+ POP(src, dst);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) {
|
|
+ assert(default_type != NULL, "NULL default_type!");
|
|
+ BasicType basic_type = default_type->element_type()->basic_type();
|
|
+ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
|
|
+ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
|
|
+ // Sanity check the known type with the incoming class. For the
|
|
+ // primitive case the types must match exactly with src.klass and
|
|
+ // dst.klass each exactly matching the default type. For the
|
|
+ // object array case, if no type check is needed then either the
|
|
+ // dst type is exactly the expected type and the src type is a
|
|
+ // subtype which we can't check or src is the same array as dst
|
|
+ // but not necessarily exactly of type default_type.
|
|
+ Label known_ok, halt;
|
|
+ __ mov_metadata(tmp, default_type->constant_encoding());
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ encode_klass_not_null(tmp);
|
|
+ }
|
|
+
|
|
+ if (basic_type != T_OBJECT) {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+ __ bne(tmp, t0, halt);
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+ __ beq(tmp, t0, known_ok);
|
|
+ } else {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+ __ beq(tmp, t0, known_ok);
|
|
+ __ beq(src, dst, known_ok);
|
|
+ }
|
|
+ __ bind(halt);
|
|
+ __ stop("incorrect type information in arraycopy");
|
|
+ __ bind(known_ok);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
|
|
+ ciArrayKlass *default_type = op->expected_type();
|
|
+ Register src = op->src()->as_register();
|
|
+ Register dst = op->dst()->as_register();
|
|
+ Register src_pos = op->src_pos()->as_register();
|
|
+ Register dst_pos = op->dst_pos()->as_register();
|
|
+ Register length = op->length()->as_register();
|
|
+ Register tmp = op->tmp()->as_register();
|
|
+
|
|
+ CodeStub* stub = op->stub();
|
|
+ int flags = op->flags();
|
|
+ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
|
|
+ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
|
|
+
|
|
+ // if we don't know anything, just go through the generic arraycopy
|
|
+ if (default_type == NULL) {
|
|
+ generic_arraycopy(src, src_pos, length, dst, dst_pos, stub);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
|
|
+ "must be true at this point");
|
|
+
|
|
+ arraycopy_simple_check(src, src_pos, length, dst, dst_pos, tmp, stub, flags);
|
|
+
|
|
+ if (flags & LIR_OpArrayCopy::type_check) {
|
|
+ arraycopy_type_check(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, flags);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ arraycopy_assert(src, dst, tmp, default_type, flags);
|
|
+#endif
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ if (PrintC1Statistics) {
|
|
+ __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
|
|
+ }
|
|
+#endif
|
|
+ arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
|
|
+
|
|
+ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
|
|
+ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
|
|
+ const char *name = NULL;
|
|
+ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
|
|
+
|
|
+ CodeBlob *cb = CodeCache::find_blob(entry);
|
|
+ if (cb != NULL) {
|
|
+ __ far_call(RuntimeAddress(entry));
|
|
+ } else {
|
|
+ const int args_num = 3;
|
|
+ __ call_VM_leaf(entry, args_num);
|
|
+ }
|
|
+
|
|
+ __ bind(*stub->continuation());
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, BasicType basic_type) {
|
|
+ int scale = array_element_size(basic_type);
|
|
+ __ shadd(c_rarg0, src_pos, src, t0, scale);
|
|
+ __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
|
|
+ assert_different_registers(c_rarg0, dst, dst_pos, length);
|
|
+ __ shadd(c_rarg1, dst_pos, dst, t0, scale);
|
|
+ __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
|
|
+ assert_different_registers(c_rarg1, dst, length);
|
|
+ __ mv(c_rarg2, length);
|
|
+ assert_different_registers(c_rarg2, dst);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, BasicType basic_type) {
|
|
+ arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
|
|
+ __ load_klass(c_rarg4, dst);
|
|
+ __ ld(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
|
|
+ __ lwu(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_store_args(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos) {
|
|
+ __ sd(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset
|
|
+ __ sd(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset
|
|
+ __ sd(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset
|
|
+ __ sd(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset
|
|
+ __ sd(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset
|
|
+}
|
|
+
|
|
+void LIR_Assembler::arraycopy_load_args(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos) {
|
|
+ __ ld(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset
|
|
+ __ ld(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset
|
|
+ __ ld(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset
|
|
+ __ ld(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset
|
|
+ __ ld(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset
|
|
+}
|
|
+
|
|
+#undef __
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..872fd2ef6
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
|
|
@@ -0,0 +1,51 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
|
|
+#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
|
|
+ // arraycopy sub functions
|
|
+ void generic_arraycopy(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, CodeStub *stub);
|
|
+ void arraycopy_simple_check(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, int flags);
|
|
+ void arraycopy_checkcast(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, BasicType basic_type,
|
|
+ address copyfunc_addr, int flags);
|
|
+ void arraycopy_type_check(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, Register tmp,
|
|
+ CodeStub *stub, BasicType basic_type, int flags);
|
|
+ void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags);
|
|
+ void arraycopy_prepare_params(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, BasicType basic_type);
|
|
+ void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos, BasicType basic_type);
|
|
+ void arraycopy_store_args(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos);
|
|
+ void arraycopy_load_args(Register src, Register src_pos, Register length,
|
|
+ Register dst, Register dst_pos);
|
|
+#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..222e3e97e
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
|
|
@@ -0,0 +1,2275 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "c1/c1_CodeStubs.hpp"
|
|
+#include "c1/c1_Compilation.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "c1/c1_ValueStack.hpp"
|
|
+#include "ci/ciArrayKlass.hpp"
|
|
+#include "ci/ciInstance.hpp"
|
|
+#include "code/compiledIC.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/cardTableBarrierSet.hpp"
|
|
+#include "gc/shared/collectedHeap.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/objArrayKlass.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+
|
|
+#ifndef PRODUCT
|
|
+#define COMMENT(x) do { __ block_comment(x); } while (0)
|
|
+#else
|
|
+#define COMMENT(x)
|
|
+#endif
|
|
+
|
|
+NEEDS_CLEANUP // remove this definitions ?
|
|
+const Register IC_Klass = t1; // where the IC klass is cached
|
|
+const Register SYNC_header = x10; // synchronization header
|
|
+const Register SHIFT_count = x10; // where count for shift operations must be
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+static void select_different_registers(Register preserve,
|
|
+ Register extra,
|
|
+ Register &tmp1,
|
|
+ Register &tmp2) {
|
|
+ if (tmp1 == preserve) {
|
|
+ assert_different_registers(tmp1, tmp2, extra);
|
|
+ tmp1 = extra;
|
|
+ } else if (tmp2 == preserve) {
|
|
+ assert_different_registers(tmp1, tmp2, extra);
|
|
+ tmp2 = extra;
|
|
+ }
|
|
+ assert_different_registers(preserve, tmp1, tmp2);
|
|
+}
|
|
+
|
|
+static void select_different_registers(Register preserve,
|
|
+ Register extra,
|
|
+ Register &tmp1,
|
|
+ Register &tmp2,
|
|
+ Register &tmp3) {
|
|
+ if (tmp1 == preserve) {
|
|
+ assert_different_registers(tmp1, tmp2, tmp3, extra);
|
|
+ tmp1 = extra;
|
|
+ } else if (tmp2 == preserve) {
|
|
+ assert_different_registers(tmp1, tmp2, tmp3, extra);
|
|
+ tmp2 = extra;
|
|
+ } else if (tmp3 == preserve) {
|
|
+ assert_different_registers(tmp1, tmp2, tmp3, extra);
|
|
+ tmp3 = extra;
|
|
+ }
|
|
+ assert_different_registers(preserve, tmp1, tmp2, tmp3);
|
|
+}
|
|
+
|
|
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
|
|
+
|
|
+
|
|
+LIR_Opr LIR_Assembler::receiverOpr() {
|
|
+ return FrameMap::receiver_opr;
|
|
+}
|
|
+
|
|
+LIR_Opr LIR_Assembler::osrBufferPointer() {
|
|
+ return FrameMap::as_pointer_opr(receiverOpr()->as_register());
|
|
+}
|
|
+
|
|
+//--------------fpu register translations-----------------------
|
|
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::fpop() { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::fld(int i) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::breakpoint() { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
|
|
+//-------------------------------------------
|
|
+
|
|
+static jlong as_long(LIR_Opr data) {
|
|
+ jlong result;
|
|
+ switch (data->type()) {
|
|
+ case T_INT:
|
|
+ result = (data->as_jint());
|
|
+ break;
|
|
+ case T_LONG:
|
|
+ result = (data->as_jlong());
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ result = 0; // unreachable
|
|
+ }
|
|
+ return result;
|
|
+}
|
|
+
|
|
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
|
|
+ ShouldNotReachHere();
|
|
+ return Address();
|
|
+}
|
|
+
|
|
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
|
|
+ return as_Address(addr, t0);
|
|
+}
|
|
+
|
|
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
|
|
+ return as_Address(addr);
|
|
+}
|
|
+
|
|
+// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
|
|
+// not encodable as a base + (immediate) offset, generate an explicit address
|
|
+// calculation to hold the address in a temporary register.
|
|
+Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
|
|
+ precond(size == 4 || size == 8);
|
|
+ Address addr = frame_map()->address_for_slot(index, adjust);
|
|
+ precond(addr.getMode() == Address::base_plus_offset);
|
|
+ precond(addr.base() == sp);
|
|
+ precond(addr.offset() > 0);
|
|
+ uint mask = size - 1;
|
|
+ assert((addr.offset() & mask) == 0, "scaled offsets only");
|
|
+
|
|
+ return addr;
|
|
+}
|
|
+
|
|
+void LIR_Assembler::osr_entry() {
|
|
+ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
|
|
+ BlockBegin* osr_entry = compilation()->hir()->osr_entry();
|
|
+ guarantee(osr_entry != NULL, "NULL osr_entry!");
|
|
+ ValueStack* entry_state = osr_entry->state();
|
|
+ int number_of_locks = entry_state->locks_size();
|
|
+
|
|
+ // we jump here if osr happens with the interpreter
|
|
+ // state set up to continue at the beginning of the
|
|
+ // loop that triggered osr - in particular, we have
|
|
+ // the following registers setup:
|
|
+ //
|
|
+ // x12: osr buffer
|
|
+ //
|
|
+
|
|
+ //build frame
|
|
+ ciMethod* m = compilation()->method();
|
|
+ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
|
|
+
|
|
+ // OSR buffer is
|
|
+ //
|
|
+ // locals[nlocals-1..0]
|
|
+ // monitors[0..number_of_locks]
|
|
+ //
|
|
+ // locals is a direct copy of the interpreter frame so in the osr buffer
|
|
+ // so first slot in the local array is the last local from the interpreter
|
|
+ // and last slot is local[0] (receiver) from the interpreter
|
|
+ //
|
|
+ // Similarly with locks. The first lock slot in the osr buffer is the nth lock
|
|
+ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
|
|
+ // in the interpreter frame (the method lock if a sync method)
|
|
+
|
|
+ // Initialize monitors in the compiled activation.
|
|
+ // x12: pointer to osr buffer
|
|
+ // All other registers are dead at this point and the locals will be
|
|
+ // copied into place by code emitted in the IR.
|
|
+
|
|
+ Register OSR_buf = osrBufferPointer()->as_pointer_register();
|
|
+ {
|
|
+ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
|
|
+ int monitor_offset = BytesPerWord * method()->max_locals() +
|
|
+ (2 * BytesPerWord) * (number_of_locks - 1);
|
|
+ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
|
|
+ // the OSR buffer using 2 word entries: first the lock and then
|
|
+ // the oop.
|
|
+ for (int i = 0; i < number_of_locks; i++) {
|
|
+ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
|
|
+#ifdef ASSERT
|
|
+ // verify the interpreter's monitor has a non-null object
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
|
|
+ __ bnez(t0, L);
|
|
+ __ stop("locked object is NULL");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+ __ ld(x9, Address(OSR_buf, slot_offset + 0));
|
|
+ __ sd(x9, frame_map()->address_for_monitor_lock(i));
|
|
+ __ ld(x9, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
|
|
+ __ sd(x9, frame_map()->address_for_monitor_object(i));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// inline cache check; done before the frame is built.
|
|
+int LIR_Assembler::check_icache() {
|
|
+ Register receiver = FrameMap::receiver_opr->as_register();
|
|
+ Register ic_klass = IC_Klass;
|
|
+ int start_offset = __ offset();
|
|
+ Label dont;
|
|
+ __ inline_cache_check(receiver, ic_klass, dont);
|
|
+
|
|
+ // if icache check fails, then jump to runtime routine
|
|
+ // Note: RECEIVER must still contain the receiver!
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
|
|
+
|
|
+ // We align the verified entry point unless the method body
|
|
+ // (including its inline cache check) will fit in a single 64-byte
|
|
+ // icache line.
|
|
+ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
|
|
+ // force alignment after the cache check.
|
|
+ __ align(CodeEntryAlignment);
|
|
+ }
|
|
+
|
|
+ __ bind(dont);
|
|
+ return start_offset;
|
|
+}
|
|
+
|
|
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
|
|
+ if (o == NULL) {
|
|
+ __ mv(reg, zr);
|
|
+ } else {
|
|
+ __ movoop(reg, o, /* immediate */ true);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
|
|
+ deoptimize_trap(info);
|
|
+}
|
|
+
|
|
+// This specifies the rsp decrement needed to build the frame
|
|
+int LIR_Assembler::initial_frame_size_in_bytes() const {
|
|
+ // if rounding, must let FrameMap know!
|
|
+
|
|
+ // The frame_map records size in slots (32bit word)
|
|
+
|
|
+ // subtract two words to account for return address and link
|
|
+ return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size;
|
|
+}
|
|
+
|
|
+int LIR_Assembler::emit_exception_handler() {
|
|
+ // if the last instruction is a call (typically to do a throw which
|
|
+ // is coming at the end after block reordering) the return address
|
|
+ // must still point into the code area in order to avoid assertion
|
|
+ // failures when searching for the corresponding bci ==> add a nop
|
|
+ // (was bug 5/14/1999 -gri)
|
|
+ __ nop();
|
|
+
|
|
+ // generate code for exception handler
|
|
+ address handler_base = __ start_a_stub(exception_handler_size());
|
|
+ if (handler_base == NULL) {
|
|
+ // not enough space left for the handler
|
|
+ bailout("exception handler overflow");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ int offset = code_offset();
|
|
+
|
|
+ // the exception oop and pc are in x10, and x13
|
|
+ // no other registers need to be preserved, so invalidate them
|
|
+ __ invalidate_registers(false, true, true, false, true, true);
|
|
+
|
|
+ // check that there is really an exception
|
|
+ __ verify_not_null_oop(x10);
|
|
+
|
|
+ // search an exception handler (x10: exception oop, x13: throwing pc)
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
|
|
+ __ should_not_reach_here();
|
|
+ guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
|
|
+ __ end_a_stub();
|
|
+
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+// Emit the code to remove the frame from the stack in the exception
|
|
+// unwind path.
|
|
+int LIR_Assembler::emit_unwind_handler() {
|
|
+#ifndef PRODUCT
|
|
+ if (CommentedAssembly) {
|
|
+ _masm->block_comment("Unwind handler");
|
|
+ }
|
|
+#endif // PRODUCT
|
|
+
|
|
+ int offset = code_offset();
|
|
+
|
|
+ // Fetch the exception from TLS and clear out exception related thread state
|
|
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+ __ bind(_unwind_handler_entry);
|
|
+ __ verify_not_null_oop(x10);
|
|
+ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
|
|
+ __ mv(x9, x10); // Perserve the exception
|
|
+ }
|
|
+
|
|
+ // Preform needed unlocking
|
|
+ MonitorExitStub* stub = NULL;
|
|
+ if (method()->is_synchronized()) {
|
|
+ monitor_address(0, FrameMap::r10_opr);
|
|
+ stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
|
|
+ __ unlock_object(x15, x14, x10, *stub->entry());
|
|
+ __ bind(*stub->continuation());
|
|
+ }
|
|
+
|
|
+ if (compilation()->env()->dtrace_method_probes()) {
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mov_metadata(c_rarg1, method()->constant_encoding());
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), c_rarg0, c_rarg1);
|
|
+ }
|
|
+
|
|
+ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
|
|
+ __ mv(x10, x9); // Restore the exception
|
|
+ }
|
|
+
|
|
+ // remove the activation and dispatch to the unwind handler
|
|
+ __ block_comment("remove_frame and dispatch to the unwind handler");
|
|
+ __ remove_frame(initial_frame_size_in_bytes());
|
|
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
|
|
+
|
|
+ // Emit the slow path assembly
|
|
+ if (stub != NULL) {
|
|
+ stub->emit_code(this);
|
|
+ }
|
|
+
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+int LIR_Assembler::emit_deopt_handler() {
|
|
+ // if the last instruciton is a call (typically to do a throw which
|
|
+ // is coming at the end after block reordering) the return address
|
|
+ // must still point into the code area in order to avoid assertion
|
|
+ // failures when searching for the corresponding bck => add a nop
|
|
+ // (was bug 5/14/1999 - gri)
|
|
+ __ nop();
|
|
+
|
|
+ // generate code for exception handler
|
|
+ address handler_base = __ start_a_stub(deopt_handler_size());
|
|
+ if (handler_base == NULL) {
|
|
+ // not enough space left for the handler
|
|
+ bailout("deopt handler overflow");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ int offset = code_offset();
|
|
+
|
|
+ __ auipc(ra, 0);
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
|
|
+ guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
|
|
+ __ end_a_stub();
|
|
+
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+void LIR_Assembler::return_op(LIR_Opr result) {
|
|
+ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
|
|
+
|
|
+ // Pop the stack before the safepoint code
|
|
+ __ remove_frame(initial_frame_size_in_bytes());
|
|
+
|
|
+ if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
|
|
+ __ reserved_stack_check();
|
|
+ }
|
|
+
|
|
+ address polling_page(os::get_polling_page());
|
|
+ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
|
|
+ __ ret();
|
|
+}
|
|
+
|
|
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
|
|
+ address polling_page(os::get_polling_page());
|
|
+ guarantee(info != NULL, "Shouldn't be NULL");
|
|
+ assert(os::is_poll_address(polling_page), "should be");
|
|
+ int32_t offset = 0;
|
|
+ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
|
|
+ add_debug_info_for_branch(info); // This isn't just debug info:
|
|
+ // it's the oop map
|
|
+ __ read_polling_page(t0, offset, relocInfo::poll_type);
|
|
+ return __ offset();
|
|
+}
|
|
+
|
|
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
|
|
+ __ mv(to_reg, from_reg);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
|
|
+ assert(src->is_constant(), "should not call otherwise");
|
|
+ assert(dest->is_register(), "should not call otherwise");
|
|
+ LIR_Const* c = src->as_constant_ptr();
|
|
+ address const_addr = NULL;
|
|
+
|
|
+ switch (c->type()) {
|
|
+ case T_INT:
|
|
+ assert(patch_code == lir_patch_none, "no patching handled here");
|
|
+ __ mvw(dest->as_register(), c->as_jint());
|
|
+ break;
|
|
+
|
|
+ case T_ADDRESS:
|
|
+ assert(patch_code == lir_patch_none, "no patching handled here");
|
|
+ __ mv(dest->as_register(), c->as_jint());
|
|
+ break;
|
|
+
|
|
+ case T_LONG:
|
|
+ assert(patch_code == lir_patch_none, "no patching handled here");
|
|
+ __ mv(dest->as_register_lo(), (intptr_t)c->as_jlong());
|
|
+ break;
|
|
+
|
|
+ case T_OBJECT:
|
|
+ case T_ARRAY:
|
|
+ if (patch_code == lir_patch_none) {
|
|
+ jobject2reg(c->as_jobject(), dest->as_register());
|
|
+ } else {
|
|
+ jobject2reg_with_patching(dest->as_register(), info);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case T_METADATA:
|
|
+ if (patch_code != lir_patch_none) {
|
|
+ klass2reg_with_patching(dest->as_register(), info);
|
|
+ } else {
|
|
+ __ mov_metadata(dest->as_register(), c->as_metadata());
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case T_FLOAT:
|
|
+ const_addr = float_constant(c->as_jfloat());
|
|
+ assert(const_addr != NULL, "must create float constant in the constant table");
|
|
+ __ flw(dest->as_float_reg(), InternalAddress(const_addr));
|
|
+ break;
|
|
+
|
|
+ case T_DOUBLE:
|
|
+ const_addr = double_constant(c->as_jdouble());
|
|
+ assert(const_addr != NULL, "must create double constant in the constant table");
|
|
+ __ fld(dest->as_double_reg(), InternalAddress(const_addr));
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
|
|
+ assert(src->is_constant(), "should not call otherwise");
|
|
+ assert(dest->is_stack(), "should not call otherwise");
|
|
+ LIR_Const* c = src->as_constant_ptr();
|
|
+ switch (c->type()) {
|
|
+ case T_OBJECT:
|
|
+ if (c->as_jobject() == NULL) {
|
|
+ __ sd(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
|
|
+ } else {
|
|
+ const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL);
|
|
+ reg2stack(FrameMap::t1_opr, dest, c->type(), false);
|
|
+ }
|
|
+ break;
|
|
+ case T_ADDRESS: // fall through
|
|
+ const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL);
|
|
+ reg2stack(FrameMap::t1_opr, dest, c->type(), false);
|
|
+ case T_INT: // fall through
|
|
+ case T_FLOAT:
|
|
+ if (c->as_jint_bits() == 0) {
|
|
+ __ sw(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
|
|
+ } else {
|
|
+ __ mvw(t1, c->as_jint_bits());
|
|
+ __ sw(t1, frame_map()->address_for_slot(dest->single_stack_ix()));
|
|
+ }
|
|
+ break;
|
|
+ case T_LONG: // fall through
|
|
+ case T_DOUBLE:
|
|
+ if (c->as_jlong_bits() == 0) {
|
|
+ __ sd(zr, frame_map()->address_for_slot(dest->double_stack_ix(),
|
|
+ lo_word_offset_in_bytes));
|
|
+ } else {
|
|
+ __ mv(t1, (intptr_t)c->as_jlong_bits());
|
|
+ __ sd(t1, frame_map()->address_for_slot(dest->double_stack_ix(),
|
|
+ lo_word_offset_in_bytes));
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
|
|
+ assert(src->is_constant(), "should not call otherwise");
|
|
+ assert(dest->is_address(), "should not call otherwise");
|
|
+ LIR_Const* c = src->as_constant_ptr();
|
|
+ LIR_Address* to_addr = dest->as_address_ptr();
|
|
+ void (Assembler::* insn)(Register Rt, const Address &adr, Register temp);
|
|
+ switch (type) {
|
|
+ case T_ADDRESS:
|
|
+ assert(c->as_jint() == 0, "should be");
|
|
+ insn = &Assembler::sd; break;
|
|
+ case T_LONG:
|
|
+ assert(c->as_jlong() == 0, "should be");
|
|
+ insn = &Assembler::sd; break;
|
|
+ case T_DOUBLE:
|
|
+ assert(c->as_jdouble() == 0.0, "should be");
|
|
+ insn = &Assembler::sd; break;
|
|
+ case T_INT:
|
|
+ assert(c->as_jint() == 0, "should be");
|
|
+ insn = &Assembler::sw; break;
|
|
+ case T_FLOAT:
|
|
+ assert(c->as_jfloat() == 0.0f, "should be");
|
|
+ insn = &Assembler::sw; break;
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ARRAY:
|
|
+ assert(c->as_jobject() == 0, "should be");
|
|
+ if (UseCompressedOops && !wide) {
|
|
+ insn = &Assembler::sw;
|
|
+ } else {
|
|
+ insn = &Assembler::sd;
|
|
+ }
|
|
+ break;
|
|
+ case T_CHAR: // fall through
|
|
+ case T_SHORT:
|
|
+ assert(c->as_jint() == 0, "should be");
|
|
+ insn = &Assembler::sh;
|
|
+ break;
|
|
+ case T_BOOLEAN: // fall through
|
|
+ case T_BYTE:
|
|
+ assert(c->as_jint() == 0, "should be");
|
|
+ insn = &Assembler::sb; break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ insn = &Assembler::sd; // unreachable
|
|
+ }
|
|
+ if (info != NULL) {
|
|
+ add_debug_info_for_null_check_here(info);
|
|
+ }
|
|
+ (_masm->*insn)(zr, as_Address(to_addr), t0);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
|
|
+ assert(src->is_register(), "should not call otherwise");
|
|
+ assert(dest->is_register(), "should not call otherwise");
|
|
+
|
|
+ // move between cpu-registers
|
|
+ if (dest->is_single_cpu()) {
|
|
+ if (src->type() == T_LONG) {
|
|
+ // Can do LONG -> OBJECT
|
|
+ move_regs(src->as_register_lo(), dest->as_register());
|
|
+ return;
|
|
+ }
|
|
+ assert(src->is_single_cpu(), "must match");
|
|
+ if (src->type() == T_OBJECT) {
|
|
+ __ verify_oop(src->as_register());
|
|
+ }
|
|
+ move_regs(src->as_register(), dest->as_register());
|
|
+ } else if (dest->is_double_cpu()) {
|
|
+ if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
|
|
+ __ verify_oop(src->as_register());
|
|
+ move_regs(src->as_register(), dest->as_register_lo());
|
|
+ return;
|
|
+ }
|
|
+ assert(src->is_double_cpu(), "must match");
|
|
+ Register f_lo = src->as_register_lo();
|
|
+ Register f_hi = src->as_register_hi();
|
|
+ Register t_lo = dest->as_register_lo();
|
|
+ Register t_hi = dest->as_register_hi();
|
|
+ assert(f_hi == f_lo, "must be same");
|
|
+ assert(t_hi == t_lo, "must be same");
|
|
+ move_regs(f_lo, t_lo);
|
|
+ } else if (dest->is_single_fpu()) {
|
|
+ assert(src->is_single_fpu(), "expect single fpu");
|
|
+ __ fmv_s(dest->as_float_reg(), src->as_float_reg());
|
|
+ } else if (dest->is_double_fpu()) {
|
|
+ assert(src->is_double_fpu(), "expect double fpu");
|
|
+ __ fmv_d(dest->as_double_reg(), src->as_double_reg());
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
|
|
+ precond(src->is_register() && dest->is_stack());
|
|
+
|
|
+ uint const c_sz32 = sizeof(uint32_t);
|
|
+ uint const c_sz64 = sizeof(uint64_t);
|
|
+
|
|
+ assert(src->is_register(), "should not call otherwise");
|
|
+ assert(dest->is_stack(), "should not call otherwise");
|
|
+ if (src->is_single_cpu()) {
|
|
+ int index = dest->single_stack_ix();
|
|
+ if (is_reference_type(type)) {
|
|
+ __ sd(src->as_register(), stack_slot_address(index, c_sz64));
|
|
+ __ verify_oop(src->as_register());
|
|
+ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
|
|
+ __ sd(src->as_register(), stack_slot_address(index, c_sz64));
|
|
+ } else {
|
|
+ __ sw(src->as_register(), stack_slot_address(index, c_sz32));
|
|
+ }
|
|
+ } else if (src->is_double_cpu()) {
|
|
+ int index = dest->double_stack_ix();
|
|
+ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
|
|
+ __ sd(src->as_register_lo(), dest_addr_LO);
|
|
+ } else if (src->is_single_fpu()) {
|
|
+ int index = dest->single_stack_ix();
|
|
+ __ fsw(src->as_float_reg(), stack_slot_address(index, c_sz32));
|
|
+ } else if (src->is_double_fpu()) {
|
|
+ int index = dest->double_stack_ix();
|
|
+ __ fsd(src->as_double_reg(), stack_slot_address(index, c_sz64));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
|
|
+ bool pop_fpu_stack, bool wide, bool /* unaligned */) {
|
|
+ LIR_Address* to_addr = dest->as_address_ptr();
|
|
+ // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
|
|
+ Register compressed_src = t1;
|
|
+
|
|
+ if (patch_code != lir_patch_none) {
|
|
+ deoptimize_trap(info);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (type == T_ARRAY || type == T_OBJECT) {
|
|
+ __ verify_oop(src->as_register());
|
|
+
|
|
+ if (UseCompressedOops && !wide) {
|
|
+ __ encode_heap_oop(compressed_src, src->as_register());
|
|
+ } else {
|
|
+ compressed_src = src->as_register();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ int null_check_here = code_offset();
|
|
+
|
|
+ switch (type) {
|
|
+ case T_FLOAT:
|
|
+ __ fsw(src->as_float_reg(), as_Address(to_addr));
|
|
+ break;
|
|
+
|
|
+ case T_DOUBLE:
|
|
+ __ fsd(src->as_double_reg(), as_Address(to_addr));
|
|
+ break;
|
|
+
|
|
+ case T_ARRAY: // fall through
|
|
+ case T_OBJECT:
|
|
+ if (UseCompressedOops && !wide) {
|
|
+ __ sw(compressed_src, as_Address(to_addr));
|
|
+ } else {
|
|
+ __ sd(compressed_src, as_Address(to_addr));
|
|
+ }
|
|
+ break;
|
|
+ case T_METADATA:
|
|
+ // We get here to store a method pointer to the stack to pass to
|
|
+ // a dtrace runtime call. This can't work on 64 bit with
|
|
+ // compressed klass ptrs: T_METADATA can be compressed klass
|
|
+ // ptr or a 64 bit method pointer.
|
|
+ ShouldNotReachHere();
|
|
+ __ sd(src->as_register(), as_Address(to_addr));
|
|
+ break;
|
|
+ case T_ADDRESS:
|
|
+ __ sd(src->as_register(), as_Address(to_addr));
|
|
+ break;
|
|
+ case T_INT:
|
|
+ __ sw(src->as_register(), as_Address(to_addr));
|
|
+ break;
|
|
+ case T_LONG:
|
|
+ __ sd(src->as_register_lo(), as_Address(to_addr));
|
|
+ break;
|
|
+ case T_BYTE: // fall through
|
|
+ case T_BOOLEAN:
|
|
+ __ sb(src->as_register(), as_Address(to_addr));
|
|
+ break;
|
|
+ case T_CHAR: // fall through
|
|
+ case T_SHORT:
|
|
+ __ sh(src->as_register(), as_Address(to_addr));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ if (info != NULL) {
|
|
+ add_debug_info_for_null_check(null_check_here, info);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
|
|
+ precond(src->is_stack() && dest->is_register());
|
|
+
|
|
+ uint const c_sz32 = sizeof(uint32_t);
|
|
+ uint const c_sz64 = sizeof(uint64_t);
|
|
+
|
|
+ if (dest->is_single_cpu()) {
|
|
+ int index = src->single_stack_ix();
|
|
+ if (type == T_INT) {
|
|
+ __ lw(dest->as_register(), stack_slot_address(index, c_sz32));
|
|
+ } else if (is_reference_type(type)) {
|
|
+ __ ld(dest->as_register(), stack_slot_address(index, c_sz64));
|
|
+ __ verify_oop(dest->as_register());
|
|
+ } else if (type == T_METADATA || type == T_ADDRESS) {
|
|
+ __ ld(dest->as_register(), stack_slot_address(index, c_sz64));
|
|
+ } else {
|
|
+ __ lwu(dest->as_register(), stack_slot_address(index, c_sz32));
|
|
+ }
|
|
+ } else if (dest->is_double_cpu()) {
|
|
+ int index = src->double_stack_ix();
|
|
+ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
|
|
+ __ ld(dest->as_register_lo(), src_addr_LO);
|
|
+ } else if (dest->is_single_fpu()) {
|
|
+ int index = src->single_stack_ix();
|
|
+ __ flw(dest->as_float_reg(), stack_slot_address(index, c_sz32));
|
|
+ } else if (dest->is_double_fpu()) {
|
|
+ int index = src->double_stack_ix();
|
|
+ __ fld(dest->as_double_reg(), stack_slot_address(index, c_sz64));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
|
|
+ deoptimize_trap(info);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
|
|
+ LIR_Opr temp;
|
|
+ if (type == T_LONG || type == T_DOUBLE) {
|
|
+ temp = FrameMap::t1_long_opr;
|
|
+ } else {
|
|
+ temp = FrameMap::t1_opr;
|
|
+ }
|
|
+
|
|
+ stack2reg(src, temp, src->type());
|
|
+ reg2stack(temp, dest, dest->type(), false);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
|
|
+ bool wide, bool /* unaligned */) {
|
|
+ assert(src->is_address(), "should not call otherwise");
|
|
+ assert(dest->is_register(), "should not call otherwise");
|
|
+
|
|
+ LIR_Address* addr = src->as_address_ptr();
|
|
+ LIR_Address* from_addr = src->as_address_ptr();
|
|
+
|
|
+ if (addr->base()->type() == T_OBJECT) {
|
|
+ __ verify_oop(addr->base()->as_pointer_register());
|
|
+ }
|
|
+
|
|
+ if (patch_code != lir_patch_none) {
|
|
+ deoptimize_trap(info);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (info != NULL) {
|
|
+ add_debug_info_for_null_check_here(info);
|
|
+ }
|
|
+
|
|
+ int null_check_here = code_offset();
|
|
+ switch (type) {
|
|
+ case T_FLOAT:
|
|
+ __ flw(dest->as_float_reg(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_DOUBLE:
|
|
+ __ fld(dest->as_double_reg(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_ARRAY: // fall through
|
|
+ case T_OBJECT:
|
|
+ if (UseCompressedOops && !wide) {
|
|
+ __ lwu(dest->as_register(), as_Address(from_addr));
|
|
+ } else {
|
|
+ __ ld(dest->as_register(), as_Address(from_addr));
|
|
+ }
|
|
+ break;
|
|
+ case T_METADATA:
|
|
+ // We get here to store a method pointer to the stack to pass to
|
|
+ // a dtrace runtime call. This can't work on 64 bit with
|
|
+ // compressed klass ptrs: T_METADATA can be a compressed klass
|
|
+ // ptr or a 64 bit method pointer.
|
|
+ ShouldNotReachHere();
|
|
+ __ ld(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_ADDRESS:
|
|
+ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
|
|
+ __ lwu(dest->as_register(), as_Address(from_addr));
|
|
+ } else {
|
|
+ __ ld(dest->as_register(), as_Address(from_addr));
|
|
+ }
|
|
+ break;
|
|
+ case T_INT:
|
|
+ __ lw(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_LONG:
|
|
+ __ ld(dest->as_register_lo(), as_Address_lo(from_addr));
|
|
+ break;
|
|
+ case T_BYTE:
|
|
+ __ lb(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_BOOLEAN:
|
|
+ __ lbu(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_CHAR:
|
|
+ __ lhu(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ case T_SHORT:
|
|
+ __ lh(dest->as_register(), as_Address(from_addr));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ if (type == T_ARRAY || type == T_OBJECT) {
|
|
+ if (UseCompressedOops && !wide) {
|
|
+ __ decode_heap_oop(dest->as_register());
|
|
+ }
|
|
+ __ verify_oop(dest->as_register());
|
|
+ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ __ decode_klass_not_null(dest->as_register());
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
|
|
+ switch (op->code()) {
|
|
+ case lir_idiv:
|
|
+ case lir_irem:
|
|
+ arithmetic_idiv(op->code(),
|
|
+ op->in_opr1(),
|
|
+ op->in_opr2(),
|
|
+ op->in_opr3(),
|
|
+ op->result_opr(),
|
|
+ op->info());
|
|
+ break;
|
|
+ case lir_fmad:
|
|
+ __ fmadd_d(op->result_opr()->as_double_reg(),
|
|
+ op->in_opr1()->as_double_reg(),
|
|
+ op->in_opr2()->as_double_reg(),
|
|
+ op->in_opr3()->as_double_reg());
|
|
+ break;
|
|
+ case lir_fmaf:
|
|
+ __ fmadd_s(op->result_opr()->as_float_reg(),
|
|
+ op->in_opr1()->as_float_reg(),
|
|
+ op->in_opr2()->as_float_reg(),
|
|
+ op->in_opr3()->as_float_reg());
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ Label label;
|
|
+
|
|
+ emit_branch(condition, cmp_opr1, cmp_opr2, label, /* is_far */ false,
|
|
+ /* is_unordered */ (condition == lir_cond_greaterEqual || condition == lir_cond_greater) ? false : true);
|
|
+
|
|
+ Label done;
|
|
+ move_op(opr2, result, type, lir_patch_none, NULL,
|
|
+ false, // pop_fpu_stack
|
|
+ false, // unaligned
|
|
+ false); // wide
|
|
+ __ j(done);
|
|
+ __ bind(label);
|
|
+ move_op(opr1, result, type, lir_patch_none, NULL,
|
|
+ false, // pop_fpu_stack
|
|
+ false, // unaligned
|
|
+ false); // wide
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
|
|
+ LIR_Condition condition = op->cond();
|
|
+ if (condition == lir_cond_always) {
|
|
+ if (op->info() != NULL) {
|
|
+ add_debug_info_for_branch(op->info());
|
|
+ }
|
|
+ } else {
|
|
+ assert(op->in_opr1() != LIR_OprFact::illegalOpr && op->in_opr2() != LIR_OprFact::illegalOpr, "conditional branches must have legal operands");
|
|
+ }
|
|
+ bool is_unordered = (op->ublock() == op->block());
|
|
+ emit_branch(condition, op->in_opr1(), op->in_opr2(), *op->label(), /* is_far */ true, is_unordered);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label,
|
|
+ bool is_far, bool is_unordered) {
|
|
+
|
|
+ if (cmp_flag == lir_cond_always) {
|
|
+ __ j(label);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (cmp1->is_cpu_register()) {
|
|
+ Register reg1 = as_reg(cmp1);
|
|
+ if (cmp2->is_cpu_register()) {
|
|
+ Register reg2 = as_reg(cmp2);
|
|
+ __ c1_cmp_branch(cmp_flag, reg1, reg2, label, cmp1->type(), is_far);
|
|
+ } else if (cmp2->is_constant()) {
|
|
+ const2reg_helper(cmp2);
|
|
+ __ c1_cmp_branch(cmp_flag, reg1, t0, label, cmp2->type(), is_far);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (cmp1->is_single_fpu()) {
|
|
+ assert(cmp2->is_single_fpu(), "expect single float register");
|
|
+ __ c1_float_cmp_branch(cmp_flag, cmp1->as_float_reg(), cmp2->as_float_reg(), label, is_far, is_unordered);
|
|
+ } else if (cmp1->is_double_fpu()) {
|
|
+ assert(cmp2->is_double_fpu(), "expect double float register");
|
|
+ __ c1_float_cmp_branch(cmp_flag | C1_MacroAssembler::c1_double_branch_mask,
|
|
+ cmp1->as_double_reg(), cmp2->as_double_reg(), label, is_far, is_unordered);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
|
|
+ LIR_Opr src = op->in_opr();
|
|
+ LIR_Opr dest = op->result_opr();
|
|
+
|
|
+ switch (op->bytecode()) {
|
|
+ case Bytecodes::_i2f:
|
|
+ __ fcvt_s_w(dest->as_float_reg(), src->as_register()); break;
|
|
+ case Bytecodes::_i2d:
|
|
+ __ fcvt_d_w(dest->as_double_reg(), src->as_register()); break;
|
|
+ case Bytecodes::_l2d:
|
|
+ __ fcvt_d_l(dest->as_double_reg(), src->as_register_lo()); break;
|
|
+ case Bytecodes::_l2f:
|
|
+ __ fcvt_s_l(dest->as_float_reg(), src->as_register_lo()); break;
|
|
+ case Bytecodes::_f2d:
|
|
+ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); break;
|
|
+ case Bytecodes::_d2f:
|
|
+ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break;
|
|
+ case Bytecodes::_i2c:
|
|
+ __ zero_extend(dest->as_register(), src->as_register(), 16); break;
|
|
+ case Bytecodes::_i2l:
|
|
+ __ addw(dest->as_register_lo(), src->as_register(), zr); break;
|
|
+ case Bytecodes::_i2s:
|
|
+ __ sign_extend(dest->as_register(), src->as_register(), 16); break;
|
|
+ case Bytecodes::_i2b:
|
|
+ __ sign_extend(dest->as_register(), src->as_register(), 8); break;
|
|
+ case Bytecodes::_l2i:
|
|
+ _masm->block_comment("FIXME: This coulde be no-op");
|
|
+ __ addw(dest->as_register(), src->as_register_lo(), zr); break;
|
|
+ case Bytecodes::_d2l:
|
|
+ __ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break;
|
|
+ case Bytecodes::_f2i:
|
|
+ __ fcvt_w_s_safe(dest->as_register(), src->as_float_reg()); break;
|
|
+ case Bytecodes::_f2l:
|
|
+ __ fcvt_l_s_safe(dest->as_register_lo(), src->as_float_reg()); break;
|
|
+ case Bytecodes::_d2i:
|
|
+ __ fcvt_w_d_safe(dest->as_register(), src->as_double_reg()); break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
|
|
+ if (op->init_check()) {
|
|
+ __ lbu(t0, Address(op->klass()->as_register(),
|
|
+ InstanceKlass::init_state_offset()));
|
|
+ __ mvw(t1, InstanceKlass::fully_initialized);
|
|
+ add_debug_info_for_null_check_here(op->stub()->info());
|
|
+ __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true);
|
|
+ }
|
|
+
|
|
+ __ allocate_object(op->obj()->as_register(),
|
|
+ op->tmp1()->as_register(),
|
|
+ op->tmp2()->as_register(),
|
|
+ op->header_size(),
|
|
+ op->object_size(),
|
|
+ op->klass()->as_register(),
|
|
+ *op->stub()->entry());
|
|
+
|
|
+ __ bind(*op->stub()->continuation());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
|
|
+ Register len = op->len()->as_register();
|
|
+
|
|
+ if (UseSlowPath ||
|
|
+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
|
|
+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
|
|
+ __ j(*op->stub()->entry());
|
|
+ } else {
|
|
+ Register tmp1 = op->tmp1()->as_register();
|
|
+ Register tmp2 = op->tmp2()->as_register();
|
|
+ Register tmp3 = op->tmp3()->as_register();
|
|
+ if (len == tmp1) {
|
|
+ tmp1 = tmp3;
|
|
+ } else if (len == tmp2) {
|
|
+ tmp2 = tmp3;
|
|
+ } else if (len == tmp3) {
|
|
+ // everything is ok
|
|
+ } else {
|
|
+ __ mv(tmp3, len);
|
|
+ }
|
|
+ __ allocate_array(op->obj()->as_register(),
|
|
+ len,
|
|
+ tmp1,
|
|
+ tmp2,
|
|
+ arrayOopDesc::header_size(op->type()),
|
|
+ array_element_size(op->type()),
|
|
+ op->klass()->as_register(),
|
|
+ *op->stub()->entry());
|
|
+ }
|
|
+ __ bind(*op->stub()->continuation());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
|
|
+ Register recv, Label* update_done) {
|
|
+ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
|
|
+ Label next_test;
|
|
+ // See if the receiver is receiver[n].
|
|
+ __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
|
|
+ __ bne(recv, t1, next_test);
|
|
+ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
|
|
+ __ increment(data_addr, DataLayout::counter_increment);
|
|
+ __ j(*update_done);
|
|
+ __ bind(next_test);
|
|
+ }
|
|
+
|
|
+ // Didn't find receiver; find next empty slot and fill it in
|
|
+ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
|
|
+ Label next_test;
|
|
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
|
|
+ __ ld(t1, recv_addr);
|
|
+ __ bnez(t1, next_test);
|
|
+ __ sd(recv, recv_addr);
|
|
+ __ mv(t1, DataLayout::counter_increment);
|
|
+ __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
|
|
+ __ j(*update_done);
|
|
+ __ bind(next_test);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) {
|
|
+ ciMethod* method = op->profiled_method();
|
|
+ assert(method != NULL, "Should have method");
|
|
+ int bci = op->profiled_bci();
|
|
+ *md = method->method_data_or_null();
|
|
+ guarantee(*md != NULL, "Sanity");
|
|
+ *data = ((*md)->bci_to_data(bci));
|
|
+ assert(*data != NULL, "need data for type check");
|
|
+ assert((*data)->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
|
|
+}
|
|
+
|
|
+void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Register Rtmp1,
|
|
+ Register k_RInfo, Register klass_RInfo,
|
|
+ Label *failure_target, Label *success_target) {
|
|
+ // get object class
|
|
+ // not a safepoint as obj null check happens earlier
|
|
+ __ load_klass(klass_RInfo, obj);
|
|
+ if (k->is_loaded()) {
|
|
+ // See if we get an immediate positive hit
|
|
+ __ ld(t0, Address(klass_RInfo, long(k->super_check_offset())));
|
|
+ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
|
|
+ __ bne(k_RInfo, t0, *failure_target, /* is_far */ true);
|
|
+ // successful cast, fall through to profile or jump
|
|
+ } else {
|
|
+ // See if we get an immediate positive hit
|
|
+ __ beq(k_RInfo, t0, *success_target);
|
|
+ // check for self
|
|
+ __ beq(klass_RInfo, k_RInfo, *success_target);
|
|
+
|
|
+ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
|
|
+ __ sd(k_RInfo, Address(sp, 0)); // sub klass
|
|
+ __ sd(klass_RInfo, Address(sp, wordSize)); // super klass
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
|
|
+ // load result to k_RInfo
|
|
+ __ ld(k_RInfo, Address(sp, 0));
|
|
+ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
|
|
+ // result is a boolean
|
|
+ __ beqz(k_RInfo, *failure_target, /* is_far */ true);
|
|
+ // successful cast, fall through to profile or jump
|
|
+ }
|
|
+ } else {
|
|
+ // perform the fast part of the checking logic
|
|
+ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
|
|
+ // call out-of-line instance of __ check_klass_subtytpe_slow_path(...)
|
|
+ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
|
|
+ __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass
|
|
+ __ sd(k_RInfo, Address(sp, 0)); // super klass
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
|
|
+ // load result to k_RInfo
|
|
+ __ ld(k_RInfo, Address(sp, 0));
|
|
+ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
|
|
+ // result is a boolean
|
|
+ __ beqz(k_RInfo, *failure_target, /* is_far */ true);
|
|
+ // successful cast, fall thriugh to profile or jump
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Register obj,
|
|
+ Register klass_RInfo, Label* obj_is_null) {
|
|
+ Label not_null;
|
|
+ __ bnez(obj, not_null);
|
|
+ // Object is null, update MDO and exit
|
|
+ Register mdo = klass_RInfo;
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ Address data_addr = __ form_address(mdo, /* base */
|
|
+ md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */
|
|
+ 12, /* expect offset bits */
|
|
+ t1); /* temp reg */
|
|
+ __ lbu(t0, data_addr);
|
|
+ __ ori(t0, t0, BitData::null_seen_byte_constant());
|
|
+ __ sb(t0, data_addr);
|
|
+ __ j(*obj_is_null);
|
|
+ __ bind(not_null);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) {
|
|
+ if (!k->is_loaded()) {
|
|
+ klass2reg_with_patching(k_RInfo, op->info_for_patch());
|
|
+ } else {
|
|
+ __ mov_metadata(k_RInfo, k->constant_encoding());
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
|
|
+ Register obj = op->object()->as_register();
|
|
+ Register k_RInfo = op->tmp1()->as_register();
|
|
+ Register klass_RInfo = op->tmp2()->as_register();
|
|
+ Register dst = op->result_opr()->as_register();
|
|
+ ciKlass* k = op->klass();
|
|
+ Register Rtmp1 = noreg;
|
|
+
|
|
+ // check if it needs to be profiled
|
|
+ ciMethodData* md = NULL;
|
|
+ ciProfileData* data = NULL;
|
|
+
|
|
+ const bool should_profile = op->should_profile();
|
|
+ if (should_profile) {
|
|
+ data_check(op, &md, &data);
|
|
+ }
|
|
+ Label profile_cast_success, profile_cast_failure;
|
|
+ Label *success_target = should_profile ? &profile_cast_success : success;
|
|
+ Label *failure_target = should_profile ? &profile_cast_failure : failure;
|
|
+
|
|
+ if (obj == k_RInfo) {
|
|
+ k_RInfo = dst;
|
|
+ } else if (obj == klass_RInfo) {
|
|
+ klass_RInfo = dst;
|
|
+ }
|
|
+ if (k->is_loaded() && !UseCompressedClassPointers) {
|
|
+ select_different_registers(obj, dst, k_RInfo, klass_RInfo);
|
|
+ } else {
|
|
+ Rtmp1 = op->tmp3()->as_register();
|
|
+ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
|
|
+ }
|
|
+
|
|
+ assert_different_registers(obj, k_RInfo, klass_RInfo);
|
|
+
|
|
+ if (should_profile) {
|
|
+ profile_object(md, data, obj, klass_RInfo, obj_is_null);
|
|
+ } else {
|
|
+ __ beqz(obj, *obj_is_null);
|
|
+ }
|
|
+
|
|
+ typecheck_loaded(op, k, k_RInfo);
|
|
+ __ verify_oop(obj);
|
|
+
|
|
+ if (op->fast_check()) {
|
|
+ // get object class
|
|
+ // not a safepoint as obj null check happens earlier
|
|
+ __ load_klass(t0, obj);
|
|
+ __ bne(t0, k_RInfo, *failure_target, /* is_far */ true);
|
|
+ // successful cast, fall through to profile or jump
|
|
+ } else {
|
|
+ typecheck_helper_slowcheck(k, obj, Rtmp1, k_RInfo, klass_RInfo, failure_target, success_target);
|
|
+ }
|
|
+ if (should_profile) {
|
|
+ type_profile(obj, md, klass_RInfo, k_RInfo, data, success, failure, profile_cast_success, profile_cast_failure);
|
|
+ }
|
|
+ __ j(*success);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
|
|
+ const bool should_profile = op->should_profile();
|
|
+
|
|
+ LIR_Code code = op->code();
|
|
+ if (code == lir_store_check) {
|
|
+ typecheck_lir_store(op, should_profile);
|
|
+ } else if (code == lir_checkcast) {
|
|
+ Register obj = op->object()->as_register();
|
|
+ Register dst = op->result_opr()->as_register();
|
|
+ Label success;
|
|
+ emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
|
|
+ __ bind(success);
|
|
+ if (dst != obj) {
|
|
+ __ mv(dst, obj);
|
|
+ }
|
|
+ } else if (code == lir_instanceof) {
|
|
+ Register obj = op->object()->as_register();
|
|
+ Register dst = op->result_opr()->as_register();
|
|
+ Label success, failure, done;
|
|
+ emit_typecheck_helper(op, &success, &failure, &failure);
|
|
+ __ bind(failure);
|
|
+ __ mv(dst, zr);
|
|
+ __ j(done);
|
|
+ __ bind(success);
|
|
+ __ mv(dst, 1);
|
|
+ __ bind(done);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
|
|
+ assert(VM_Version::supports_cx8(), "wrong machine");
|
|
+ Register addr;
|
|
+ if (op->addr()->is_register()) {
|
|
+ addr = as_reg(op->addr());
|
|
+ } else {
|
|
+ assert(op->addr()->is_address(), "what else?");
|
|
+ LIR_Address* addr_ptr = op->addr()->as_address_ptr();
|
|
+ assert(addr_ptr->disp() == 0, "need 0 disp");
|
|
+ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
|
|
+ addr = as_reg(addr_ptr->base());
|
|
+ }
|
|
+ Register newval = as_reg(op->new_value());
|
|
+ Register cmpval = as_reg(op->cmp_value());
|
|
+
|
|
+ if (op->code() == lir_cas_obj) {
|
|
+ if (UseCompressedOops) {
|
|
+ Register tmp1 = op->tmp1()->as_register();
|
|
+ assert(op->tmp1()->is_valid(), "must be");
|
|
+ __ encode_heap_oop(tmp1, cmpval);
|
|
+ cmpval = tmp1;
|
|
+ __ encode_heap_oop(t1, newval);
|
|
+ newval = t1;
|
|
+ caswu(addr, newval, cmpval);
|
|
+ } else {
|
|
+ casl(addr, newval, cmpval);
|
|
+ }
|
|
+ } else if (op->code() == lir_cas_int) {
|
|
+ casw(addr, newval, cmpval);
|
|
+ } else {
|
|
+ casl(addr, newval, cmpval);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
|
|
+ switch (code) {
|
|
+ case lir_abs: __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
|
|
+ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
|
|
+ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
|
|
+ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
|
|
+ if (dst->is_single_cpu()) {
|
|
+ Register Rdst = dst->as_register();
|
|
+ if (right->is_constant()) {
|
|
+ int right_const = right->as_jint();
|
|
+ if (Assembler::operand_valid_for_add_immediate(right_const)) {
|
|
+ logic_op_imm(Rdst, Rleft, right_const, code);
|
|
+ __ addw(Rdst, Rdst, zr);
|
|
+ } else {
|
|
+ __ mv(t0, right_const);
|
|
+ logic_op_reg32(Rdst, Rleft, t0, code);
|
|
+ }
|
|
+ } else {
|
|
+ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
|
|
+ logic_op_reg32(Rdst, Rleft, Rright, code);
|
|
+ }
|
|
+ } else {
|
|
+ Register Rdst = dst->as_register_lo();
|
|
+ if (right->is_constant()) {
|
|
+ long right_const = right->as_jlong();
|
|
+ if (Assembler::operand_valid_for_add_immediate(right_const)) {
|
|
+ logic_op_imm(Rdst, Rleft, right_const, code);
|
|
+ } else {
|
|
+ __ mv(t0, right_const);
|
|
+ logic_op_reg(Rdst, Rleft, t0, code);
|
|
+ }
|
|
+ } else {
|
|
+ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
|
|
+ logic_op_reg(Rdst, Rleft, Rright, code);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr src, LIR_Opr result, LIR_Op2* op) {
|
|
+ ShouldNotCallThis();
|
|
+}
|
|
+
|
|
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
|
|
+ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
|
|
+ bool is_unordered_less = (code == lir_ucmp_fd2i);
|
|
+ if (left->is_single_fpu()) {
|
|
+ __ float_cmp(true, is_unordered_less ? -1 : 1,
|
|
+ left->as_float_reg(), right->as_float_reg(), dst->as_register());
|
|
+ } else if (left->is_double_fpu()) {
|
|
+ __ float_cmp(false, is_unordered_less ? -1 : 1,
|
|
+ left->as_double_reg(), right->as_double_reg(), dst->as_register());
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (code == lir_cmp_l2i) {
|
|
+ __ cmp_l2i(dst->as_register(), left->as_register_lo(), right->as_register_lo());
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::align_call(LIR_Code code) { }
|
|
+
|
|
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
|
|
+ address call = __ trampoline_call(Address(op->addr(), rtype));
|
|
+ if (call == NULL) {
|
|
+ bailout("trampoline stub overflow");
|
|
+ return;
|
|
+ }
|
|
+ add_call_info(code_offset(), op->info());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
|
|
+ address call = __ ic_call(op->addr());
|
|
+ if (call == NULL) {
|
|
+ bailout("trampoline stub overflow");
|
|
+ return;
|
|
+ }
|
|
+ add_call_info(code_offset(), op->info());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); }
|
|
+
|
|
+void LIR_Assembler::emit_static_call_stub() {
|
|
+ address call_pc = __ pc();
|
|
+ address stub = __ start_a_stub(call_stub_size());
|
|
+ if (stub == NULL) {
|
|
+ bailout("static call stub overflow");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ int start = __ offset();
|
|
+
|
|
+ __ relocate(static_stub_Relocation::spec(call_pc));
|
|
+ __ emit_static_call_stub();
|
|
+
|
|
+ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big");
|
|
+ __ end_a_stub();
|
|
+}
|
|
+
|
|
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
|
|
+ assert(exceptionOop->as_register() == x10, "must match");
|
|
+ assert(exceptionPC->as_register() == x13, "must match");
|
|
+
|
|
+ // exception object is not added to oop map by LinearScan
|
|
+ // (LinearScan assumes that no oops are in fixed registers)
|
|
+ info->add_register_oop(exceptionOop);
|
|
+ Runtime1::StubID unwind_id;
|
|
+
|
|
+ // get current pc information
|
|
+ // pc is only needed if the method has an exception handler, the unwind code does not need it.
|
|
+ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
|
|
+ // As no instructions have been generated yet for this LIR node it's
|
|
+ // possible that an oop map already exists for the current offset.
|
|
+ // In that case insert an dummy NOP here to ensure all oop map PCs
|
|
+ // are unique. See JDK-8237483.
|
|
+ __ nop();
|
|
+ }
|
|
+ int pc_for_athrow_offset = __ offset();
|
|
+ InternalAddress pc_for_athrow(__ pc());
|
|
+ int32_t off = 0;
|
|
+ __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off);
|
|
+ __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off);
|
|
+ add_call_info(pc_for_athrow_offset, info); // for exception handler
|
|
+
|
|
+ __ verify_not_null_oop(x10);
|
|
+ // search an exception handler (x10: exception oop, x13: throwing pc)
|
|
+ if (compilation()->has_fpu_code()) {
|
|
+ unwind_id = Runtime1::handle_exception_id;
|
|
+ } else {
|
|
+ unwind_id = Runtime1::handle_exception_nofpu_id;
|
|
+ }
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
|
|
+ __ nop();
|
|
+}
|
|
+
|
|
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
|
|
+ assert(exceptionOop->as_register() == x10, "must match");
|
|
+ __ j(_unwind_handler_entry);
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
|
|
+ Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
|
|
+ Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
|
|
+ Register count_reg = count->as_register();
|
|
+ if (dest->is_single_cpu()) {
|
|
+ assert (dest->type() == T_INT, "unexpected result type");
|
|
+ assert (left->type() == T_INT, "unexpected left type");
|
|
+ __ andi(t0, count_reg, 31); // should not shift more than 31 bits
|
|
+ switch (code) {
|
|
+ case lir_shl: __ sllw(dest_reg, left_reg, t0); break;
|
|
+ case lir_shr: __ sraw(dest_reg, left_reg, t0); break;
|
|
+ case lir_ushr: __ srlw(dest_reg, left_reg, t0); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (dest->is_double_cpu()) {
|
|
+ __ andi(t0, count_reg, 63); // should not shift more than 63 bits
|
|
+ switch (code) {
|
|
+ case lir_shl: __ sll(dest_reg, left_reg, t0); break;
|
|
+ case lir_shr: __ sra(dest_reg, left_reg, t0); break;
|
|
+ case lir_ushr: __ srl(dest_reg, left_reg, t0); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
|
|
+ Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
|
|
+ Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
|
|
+ if (dest->is_single_cpu()) {
|
|
+ assert (dest->type() == T_INT, "unexpected result type");
|
|
+ assert (left->type() == T_INT, "unexpected left type");
|
|
+ count &= 0x1f;
|
|
+ if (count != 0) {
|
|
+ switch (code) {
|
|
+ case lir_shl: __ slliw(dest_reg, left_reg, count); break;
|
|
+ case lir_shr: __ sraiw(dest_reg, left_reg, count); break;
|
|
+ case lir_ushr: __ srliw(dest_reg, left_reg, count); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ move_regs(left_reg, dest_reg);
|
|
+ }
|
|
+ } else if (dest->is_double_cpu()) {
|
|
+ count &= 0x3f;
|
|
+ if (count != 0) {
|
|
+ switch (code) {
|
|
+ case lir_shl: __ slli(dest_reg, left_reg, count); break;
|
|
+ case lir_shr: __ srai(dest_reg, left_reg, count); break;
|
|
+ case lir_ushr: __ srli(dest_reg, left_reg, count); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ move_regs(left->as_register_lo(), dest->as_register_lo());
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
|
|
+ Register obj = op->obj_opr()->as_register(); // may not be an oop
|
|
+ Register hdr = op->hdr_opr()->as_register();
|
|
+ Register lock = op->lock_opr()->as_register();
|
|
+ if (!UseFastLocking) {
|
|
+ __ j(*op->stub()->entry());
|
|
+ } else if (op->code() == lir_lock) {
|
|
+ Register scratch = noreg;
|
|
+ if (UseBiasedLocking) {
|
|
+ scratch = op->scratch_opr()->as_register();
|
|
+ }
|
|
+ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
|
|
+ // add debug info for NullPointerException only if one is possible
|
|
+ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
|
|
+ if (op->info() != NULL) {
|
|
+ add_debug_info_for_null_check(null_check_offset, op->info());
|
|
+ }
|
|
+ } else if (op->code() == lir_unlock) {
|
|
+ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
|
|
+ __ unlock_object(hdr, obj, lock, *op->stub()->entry());
|
|
+ } else {
|
|
+ Unimplemented();
|
|
+ }
|
|
+ __ bind(*op->stub()->continuation());
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
|
|
+ ciMethod* method = op->profiled_method();
|
|
+ int bci = op->profiled_bci();
|
|
+
|
|
+ // Update counter for all call types
|
|
+ ciMethodData* md = method->method_data_or_null();
|
|
+ guarantee(md != NULL, "Sanity");
|
|
+ ciProfileData* data = md->bci_to_data(bci);
|
|
+ assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
|
|
+ assert(op->mdo()->is_single_cpu(), "mdo must be allocated");
|
|
+ Register mdo = op->mdo()->as_register();
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
|
|
+ // Perform additional virtual call profiling for invokevirtual and
|
|
+ // invokeinterface bytecodes
|
|
+ if (op->should_profile_receiver_type()) {
|
|
+ assert(op->recv()->is_single_cpu(), "recv must be allocated");
|
|
+ Register recv = op->recv()->as_register();
|
|
+ assert_different_registers(mdo, recv);
|
|
+ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
|
|
+ ciKlass* known_klass = op->known_holder();
|
|
+ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
|
|
+ // We know the type that will be seen at this call site; we can
|
|
+ // statically update the MethodData* rather than needing to do
|
|
+ // dynamic tests on the receiver type
|
|
+ // NOTE: we should probably put a lock around this search to
|
|
+ // avoid collisions by concurrent compilations
|
|
+ ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
|
|
+ uint i;
|
|
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
|
|
+ ciKlass* receiver = vc_data->receiver(i);
|
|
+ if (known_klass->equals(receiver)) {
|
|
+ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
|
|
+ __ increment(data_addr, DataLayout::counter_increment);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Receiver type not found in profile data; select an empty slot
|
|
+ // Note that this is less efficient than it should be because it
|
|
+ // always does a write to the receiver part of the
|
|
+ // VirtualCallData rather than just the first time
|
|
+ for (i = 0; i < VirtualCallData::row_limit(); i++) {
|
|
+ ciKlass* receiver = vc_data->receiver(i);
|
|
+ if (receiver == NULL) {
|
|
+ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
|
|
+ __ mov_metadata(t1, known_klass->constant_encoding());
|
|
+ __ sd(t1, recv_addr);
|
|
+ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
|
|
+ __ increment(data_addr, DataLayout::counter_increment);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ __ load_klass(recv, recv);
|
|
+ Label update_done;
|
|
+ type_profile_helper(mdo, md, data, recv, &update_done);
|
|
+ // Receiver did not match any saved receiver and there is no empty row for it.
|
|
+ // Increment total counter to indicate polymorphic case.
|
|
+ __ increment(counter_addr, DataLayout::counter_increment);
|
|
+
|
|
+ __ bind(update_done);
|
|
+ }
|
|
+ } else {
|
|
+ // Static call
|
|
+ __ increment(counter_addr, DataLayout::counter_increment);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_delay(LIR_OpDelay*) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
|
|
+ __ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); }
|
|
+
|
|
+void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass,
|
|
+ Register tmp, Label &next, Label &none,
|
|
+ Address mdo_addr) {
|
|
+ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
|
|
+ if (exact_klass != NULL) {
|
|
+ __ mov_metadata(tmp, exact_klass->constant_encoding());
|
|
+ } else {
|
|
+ __ load_klass(tmp, tmp);
|
|
+ }
|
|
+
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ xorr(tmp, tmp, t1);
|
|
+ __ andi(t0, tmp, TypeEntries::type_klass_mask);
|
|
+ // klass seen before, nothing to do. The unknown bit may have been
|
|
+ // set already but no need to check.
|
|
+ __ beqz(t0, next);
|
|
+
|
|
+ // already unknown. Nothing to do anymore.
|
|
+ __ andi(t0, tmp, TypeEntries::type_unknown);
|
|
+ __ bnez(t0, next);
|
|
+
|
|
+ if (TypeEntries::is_type_none(current_klass)) {
|
|
+ __ beqz(t1, none);
|
|
+ __ mv(t0, (u1)TypeEntries::null_seen);
|
|
+ __ beq(t0, t1, none);
|
|
+ // There is a chance that the checks above (re-reading profiling
|
|
+ // data from memory) fail if another thread has just set the
|
|
+ // profiling to this obj's klass
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ xorr(tmp, tmp, t1);
|
|
+ __ andi(t0, tmp, TypeEntries::type_klass_mask);
|
|
+ __ beqz(t0, next);
|
|
+ }
|
|
+ } else {
|
|
+ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
|
|
+ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
|
|
+
|
|
+ __ ld(tmp, mdo_addr);
|
|
+ // already unknown. Nothing to do anymore.
|
|
+ __ andi(t0, tmp, TypeEntries::type_unknown);
|
|
+ __ bnez(t0, next);
|
|
+ }
|
|
+
|
|
+ // different than before. Cannot keep accurate profile.
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ ori(t1, t1, TypeEntries::type_unknown);
|
|
+ __ sd(t1, mdo_addr);
|
|
+
|
|
+ if (TypeEntries::is_type_none(current_klass)) {
|
|
+ __ j(next);
|
|
+
|
|
+ __ bind(none);
|
|
+ // first time here. Set profile type.
|
|
+ __ sd(tmp, mdo_addr);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp,
|
|
+ Address mdo_addr, Label &next) {
|
|
+ // There's a single possible klass at this profile point
|
|
+ assert(exact_klass != NULL, "should be");
|
|
+ if (TypeEntries::is_type_none(current_klass)) {
|
|
+ __ mov_metadata(tmp, exact_klass->constant_encoding());
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ xorr(tmp, tmp, t1);
|
|
+ __ andi(t0, tmp, TypeEntries::type_klass_mask);
|
|
+ __ beqz(t0, next);
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label ok;
|
|
+ __ ld(t0, mdo_addr);
|
|
+ __ beqz(t0, ok);
|
|
+ __ mv(t1, (u1)TypeEntries::null_seen);
|
|
+ __ beq(t0, t1, ok);
|
|
+ // may have been set by another thread
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+ __ mov_metadata(t0, exact_klass->constant_encoding());
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ xorr(t1, t0, t1);
|
|
+ __ andi(t1, t1, TypeEntries::type_mask);
|
|
+ __ beqz(t1, ok);
|
|
+
|
|
+ __ stop("unexpected profiling mismatch");
|
|
+ __ bind(ok);
|
|
+ }
|
|
+#endif
|
|
+ // first time here. Set profile type.
|
|
+ __ sd(tmp, mdo_addr);
|
|
+ } else {
|
|
+ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
|
|
+ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
|
|
+
|
|
+ __ ld(tmp, mdo_addr);
|
|
+ // already unknown. Nothing to do anymore.
|
|
+ __ andi(t0, tmp, TypeEntries::type_unknown);
|
|
+ __ bnez(t0, next);
|
|
+
|
|
+ __ ori(tmp, tmp, TypeEntries::type_unknown);
|
|
+ __ sd(tmp, mdo_addr);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::check_null(Register tmp, Label &update, intptr_t current_klass,
|
|
+ Address mdo_addr, bool do_update, Label &next) {
|
|
+ __ bnez(tmp, update);
|
|
+ if (!TypeEntries::was_null_seen(current_klass)) {
|
|
+ __ ld(t1, mdo_addr);
|
|
+ __ ori(t1, t1, TypeEntries::null_seen);
|
|
+ __ sd(t1, mdo_addr);
|
|
+ }
|
|
+ if (do_update) {
|
|
+ __ j(next);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
|
|
+ COMMENT("emit_profile_type {");
|
|
+ Register obj = op->obj()->as_register();
|
|
+ Register tmp = op->tmp()->as_pointer_register();
|
|
+ Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
|
|
+ ciKlass* exact_klass = op->exact_klass();
|
|
+ intptr_t current_klass = op->current_klass();
|
|
+ bool not_null = op->not_null();
|
|
+ bool no_conflict = op->no_conflict();
|
|
+
|
|
+ Label update, next, none;
|
|
+
|
|
+ bool do_null = !not_null;
|
|
+ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
|
|
+ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
|
|
+
|
|
+ assert(do_null || do_update, "why are we here?");
|
|
+ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
|
|
+ assert_different_registers(tmp, t0, t1, mdo_addr.base());
|
|
+
|
|
+ __ verify_oop(obj);
|
|
+
|
|
+ if (tmp != obj) {
|
|
+ __ mv(tmp, obj);
|
|
+ }
|
|
+ if (do_null) {
|
|
+ check_null(tmp, update, current_klass, mdo_addr, do_update, next);
|
|
+#ifdef ASSERT
|
|
+ } else {
|
|
+ __ bnez(tmp, update);
|
|
+ __ stop("unexpected null obj");
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ __ bind(update);
|
|
+
|
|
+ if (do_update) {
|
|
+#ifdef ASSERT
|
|
+ if (exact_klass != NULL) {
|
|
+ check_exact_klass(tmp, exact_klass);
|
|
+ }
|
|
+#endif
|
|
+ if (!no_conflict) {
|
|
+ check_conflict(exact_klass, current_klass, tmp, next, none, mdo_addr);
|
|
+ } else {
|
|
+ check_no_conflict(exact_klass, current_klass, tmp, mdo_addr, next);
|
|
+ }
|
|
+
|
|
+ __ bind(next);
|
|
+ }
|
|
+ COMMENT("} emit_profile_type");
|
|
+}
|
|
+
|
|
+void LIR_Assembler::align_backward_branch_target() { }
|
|
+
|
|
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
|
|
+ // tmp must be unused
|
|
+ assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
|
|
+
|
|
+ if (left->is_single_cpu()) {
|
|
+ assert(dest->is_single_cpu(), "expect single result reg");
|
|
+ __ negw(dest->as_register(), left->as_register());
|
|
+ } else if (left->is_double_cpu()) {
|
|
+ assert(dest->is_double_cpu(), "expect double result reg");
|
|
+ __ neg(dest->as_register_lo(), left->as_register_lo());
|
|
+ } else if (left->is_single_fpu()) {
|
|
+ assert(dest->is_single_fpu(), "expect single float result reg");
|
|
+ __ fneg_s(dest->as_float_reg(), left->as_float_reg());
|
|
+ } else {
|
|
+ assert(left->is_double_fpu(), "expect double float operand reg");
|
|
+ assert(dest->is_double_fpu(), "expect double float result reg");
|
|
+ __ fneg_d(dest->as_double_reg(), left->as_double_reg());
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
|
|
+#if INCLUDE_SHENANDOAHGC
|
|
+ if (UseShenandoahGC && patch_code != lir_patch_none) {
|
|
+ deoptimize_trap(info);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ assert(patch_code == lir_patch_none, "Patch code not supported");
|
|
+ LIR_Address* adr = addr->as_address_ptr();
|
|
+ Register dst = dest->as_register_lo();
|
|
+
|
|
+ assert_different_registers(dst, t0);
|
|
+ if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
|
|
+
|
|
+ intptr_t offset = adr->disp();
|
|
+ LIR_Opr index_op = adr->index();
|
|
+ int scale = adr->scale();
|
|
+ if(index_op->is_constant()) {
|
|
+ offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale;
|
|
+ }
|
|
+
|
|
+ if(!is_imm_in_range(offset, 12, 0)) {
|
|
+ __ la(t0, as_Address(adr));
|
|
+ __ mv(dst, t0);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ __ la(dst, as_Address(adr));
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
|
|
+ assert(!tmp->is_valid(), "don't need temporary");
|
|
+
|
|
+ CodeBlob *cb = CodeCache::find_blob(dest);
|
|
+ if (cb != NULL) {
|
|
+ __ far_call(RuntimeAddress(dest));
|
|
+ } else {
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(dest), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ }
|
|
+
|
|
+ if (info != NULL) {
|
|
+ add_call_info_here(info);
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
|
|
+ if (dest->is_address() || src->is_address()) {
|
|
+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false,
|
|
+ /* unaligned */ false, /* wide */ false);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifdef ASSERT
|
|
+// emit run-time assertion
|
|
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
|
|
+ assert(op->code() == lir_assert, "must be");
|
|
+
|
|
+ Label ok;
|
|
+ if (op->in_opr1()->is_valid()) {
|
|
+ assert(op->in_opr2()->is_valid(), "both operands must be valid");
|
|
+ bool is_unordered = false;
|
|
+ LIR_Condition cond = op->condition();
|
|
+ emit_branch(cond, op->in_opr1(), op->in_opr2(), ok, /* is_far */ false,
|
|
+ /* is_unordered */(cond == lir_cond_greaterEqual || cond == lir_cond_greater) ? false : true);
|
|
+ } else {
|
|
+ assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
|
|
+ assert(op->condition() == lir_cond_always, "no other conditions allowed");
|
|
+ }
|
|
+
|
|
+ if (op->halt()) {
|
|
+ const char* str = __ code_string(op->msg());
|
|
+ __ stop(str);
|
|
+ } else {
|
|
+ breakpoint();
|
|
+ }
|
|
+ __ bind(ok);
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifndef PRODUCT
|
|
+#define COMMENT(x) do { __ block_comment(x); } while (0)
|
|
+#else
|
|
+#define COMMENT(x)
|
|
+#endif
|
|
+
|
|
+void LIR_Assembler::membar() {
|
|
+ COMMENT("membar");
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::membar_acquire() {
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::membar_release() {
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::membar_loadload() {
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::membar_storestore() {
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
|
|
+
|
|
+void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
|
|
+
|
|
+void LIR_Assembler::on_spin_wait() {
|
|
+ Unimplemented();
|
|
+}
|
|
+
|
|
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
|
|
+ __ mv(result_reg->as_register(), xthread);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::peephole(LIR_List *lir) {}
|
|
+
|
|
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
|
|
+ Address addr = as_Address(src->as_address_ptr());
|
|
+ BasicType type = src->type();
|
|
+ bool is_oop = type == T_OBJECT || type == T_ARRAY;
|
|
+
|
|
+ get_op(type);
|
|
+
|
|
+ switch (code) {
|
|
+ case lir_xadd:
|
|
+ {
|
|
+ RegisterOrConstant inc;
|
|
+ Register tmp = as_reg(tmp_op);
|
|
+ Register dst = as_reg(dest);
|
|
+ if (data->is_constant()) {
|
|
+ inc = RegisterOrConstant(as_long(data));
|
|
+ assert_different_registers(dst, addr.base(), tmp);
|
|
+ assert_different_registers(tmp, t0);
|
|
+ } else {
|
|
+ inc = RegisterOrConstant(as_reg(data));
|
|
+ assert_different_registers(inc.as_register(), dst, addr.base(), tmp);
|
|
+ }
|
|
+ __ la(tmp, addr);
|
|
+ (_masm->*add)(dst, inc, tmp);
|
|
+ break;
|
|
+ }
|
|
+ case lir_xchg:
|
|
+ {
|
|
+ Register tmp = tmp_op->as_register();
|
|
+ Register obj = as_reg(data);
|
|
+ Register dst = as_reg(dest);
|
|
+ if (is_oop && UseCompressedOops) {
|
|
+ __ encode_heap_oop(t0, obj);
|
|
+ obj = t0;
|
|
+ }
|
|
+ assert_different_registers(obj, addr.base(), tmp, dst);
|
|
+ __ la(tmp, addr);
|
|
+ (_masm->*xchg)(dst, obj, tmp);
|
|
+ if (is_oop && UseCompressedOops) {
|
|
+ __ decode_heap_oop(dst);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+}
|
|
+
|
|
+int LIR_Assembler::array_element_size(BasicType type) const {
|
|
+ int elem_size = type2aelembytes(type);
|
|
+ return exact_log2(elem_size);
|
|
+}
|
|
+
|
|
+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
|
|
+ if (addr->base()->is_illegal()) {
|
|
+ assert(addr->index()->is_illegal(), "must be illegal too");
|
|
+ __ movptr(tmp, addr->disp());
|
|
+ return Address(tmp, 0);
|
|
+ }
|
|
+
|
|
+ Register base = addr->base()->as_pointer_register();
|
|
+ LIR_Opr index_op = addr->index();
|
|
+ int scale = addr->scale();
|
|
+
|
|
+ if (index_op->is_illegal()) {
|
|
+ return Address(base, addr->disp());
|
|
+ } else if (index_op->is_cpu_register()) {
|
|
+ Register index;
|
|
+ if (index_op->is_single_cpu()) {
|
|
+ index = index_op->as_register();
|
|
+ } else {
|
|
+ index = index_op->as_register_lo();
|
|
+ }
|
|
+ if (scale != 0) {
|
|
+ __ shadd(tmp, index, base, tmp, scale);
|
|
+ } else {
|
|
+ __ add(tmp, base, index);
|
|
+ }
|
|
+ return Address(tmp, addr->disp());
|
|
+ } else if (index_op->is_constant()) {
|
|
+ intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp();
|
|
+ return Address(base, addr_offset);
|
|
+ }
|
|
+
|
|
+ Unimplemented();
|
|
+ return Address();
|
|
+}
|
|
+
|
|
+// helper functions which checks for overflow and sets bailout if it
|
|
+// occurs. Always returns a valid embeddable pointer but in the
|
|
+// bailout case the pointer won't be to unique storage.
|
|
+address LIR_Assembler::float_constant(float f) {
|
|
+ address const_addr = __ float_constant(f);
|
|
+ if (const_addr == NULL) {
|
|
+ bailout("const section overflow");
|
|
+ return __ code()->consts()->start();
|
|
+ } else {
|
|
+ return const_addr;
|
|
+ }
|
|
+}
|
|
+
|
|
+address LIR_Assembler::double_constant(double d) {
|
|
+ address const_addr = __ double_constant(d);
|
|
+ if (const_addr == NULL) {
|
|
+ bailout("const section overflow");
|
|
+ return __ code()->consts()->start();
|
|
+ } else {
|
|
+ return const_addr;
|
|
+ }
|
|
+}
|
|
+
|
|
+address LIR_Assembler::int_constant(jlong n) {
|
|
+ address const_addr = __ long_constant(n);
|
|
+ if (const_addr == NULL) {
|
|
+ bailout("const section overflow");
|
|
+ return __ code()->consts()->start();
|
|
+ } else {
|
|
+ return const_addr;
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
|
|
+ _masm->code_section()->relocate(adr, relocInfo::poll_type);
|
|
+ int pc_offset = code_offset();
|
|
+ flush_debug_info(pc_offset);
|
|
+ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
|
|
+ if (info->exception_handlers() != NULL) {
|
|
+ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
|
|
+ __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
|
|
+ Assembler::rl /* release */, t0, true /* result as bool */);
|
|
+ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::caswu(Register addr, Register newval, Register cmpval) {
|
|
+ __ cmpxchg(addr, cmpval, newval, Assembler::uint32, Assembler::aq /* acquire */,
|
|
+ Assembler::rl /* release */, t0, true /* result as bool */);
|
|
+ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
|
|
+ __ cmpxchg(addr, cmpval, newval, Assembler::int64, Assembler::aq /* acquire */,
|
|
+ Assembler::rl /* release */, t0, true /* result as bool */);
|
|
+ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
|
|
+ address target = NULL;
|
|
+
|
|
+ switch (patching_id(info)) {
|
|
+ case PatchingStub::access_field_id:
|
|
+ target = Runtime1::entry_for(Runtime1::access_field_patching_id);
|
|
+ break;
|
|
+ case PatchingStub::load_klass_id:
|
|
+ target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
|
|
+ break;
|
|
+ case PatchingStub::load_mirror_id:
|
|
+ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
|
|
+ break;
|
|
+ case PatchingStub::load_appendix_id:
|
|
+ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ __ far_call(RuntimeAddress(target));
|
|
+ add_call_info_here(info);
|
|
+}
|
|
+
|
|
+
|
|
+void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) {
|
|
+ Label ok;
|
|
+ __ load_klass(tmp, tmp);
|
|
+ __ mov_metadata(t0, exact_klass->constant_encoding());
|
|
+ __ beq(tmp, t0, ok);
|
|
+ __ stop("exact klass and actual klass differ");
|
|
+ __ bind(ok);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::get_op(BasicType type) {
|
|
+ switch (type) {
|
|
+ case T_INT:
|
|
+ xchg = &MacroAssembler::atomic_xchgalw;
|
|
+ add = &MacroAssembler::atomic_addalw;
|
|
+ break;
|
|
+ case T_LONG:
|
|
+ xchg = &MacroAssembler::atomic_xchgal;
|
|
+ add = &MacroAssembler::atomic_addal;
|
|
+ break;
|
|
+ case T_OBJECT:
|
|
+ case T_ARRAY:
|
|
+ if (UseCompressedOops) {
|
|
+ xchg = &MacroAssembler::atomic_xchgalwu;
|
|
+ add = &MacroAssembler::atomic_addalw;
|
|
+ } else {
|
|
+ xchg = &MacroAssembler::atomic_xchgal;
|
|
+ add = &MacroAssembler::atomic_addal;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+// emit_opTypeCheck sub functions
|
|
+void LIR_Assembler::typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile) {
|
|
+ Register value = op->object()->as_register();
|
|
+ Register array = op->array()->as_register();
|
|
+ Register k_RInfo = op->tmp1()->as_register();
|
|
+ Register klass_RInfo = op->tmp2()->as_register();
|
|
+ Register Rtmp1 = op->tmp3()->as_register();
|
|
+
|
|
+ CodeStub* stub = op->stub();
|
|
+
|
|
+ // check if it needs to be profiled
|
|
+ ciMethodData* md = NULL;
|
|
+ ciProfileData* data = NULL;
|
|
+
|
|
+ if (should_profile) {
|
|
+ data_check(op, &md, &data);
|
|
+ }
|
|
+ Label profile_cast_success, profile_cast_failure, done;
|
|
+ Label *success_target = should_profile ? &profile_cast_success : &done;
|
|
+ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
|
|
+
|
|
+ if (should_profile) {
|
|
+ profile_object(md, data, value, klass_RInfo, &done);
|
|
+ } else {
|
|
+ __ beqz(value, done);
|
|
+ }
|
|
+
|
|
+ add_debug_info_for_null_check_here(op->info_for_exception());
|
|
+ __ load_klass(k_RInfo, array);
|
|
+ __ load_klass(klass_RInfo, value);
|
|
+
|
|
+ lir_store_slowcheck(k_RInfo, klass_RInfo, Rtmp1, success_target, failure_target);
|
|
+
|
|
+ // fall through to the success case
|
|
+ if (should_profile) {
|
|
+ Register mdo = klass_RInfo;
|
|
+ Register recv = k_RInfo;
|
|
+ __ bind(profile_cast_success);
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ __ load_klass(recv, value);
|
|
+ type_profile_helper(mdo, md, data, recv, &done);
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(profile_cast_failure);
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
|
|
+ __ ld(t1, counter_addr);
|
|
+ __ addi(t1, t1, -DataLayout::counter_increment);
|
|
+ __ sd(t1, counter_addr);
|
|
+ __ j(*stub->entry());
|
|
+ }
|
|
+
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
|
|
+ ciProfileData* data, Label* success, Label* failure,
|
|
+ Label& profile_cast_success, Label& profile_cast_failure) {
|
|
+ Register mdo = klass_RInfo;
|
|
+ Register recv = k_RInfo;
|
|
+ __ bind(profile_cast_success);
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ __ load_klass(recv, obj);
|
|
+ Label update_done;
|
|
+ type_profile_helper(mdo, md, data, recv, success);
|
|
+ __ j(*success);
|
|
+
|
|
+ __ bind(profile_cast_failure);
|
|
+ __ mov_metadata(mdo, md->constant_encoding());
|
|
+ Address counter_addr = __ form_address(mdo, /* base */
|
|
+ md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */
|
|
+ 12, /* expect offset bits */
|
|
+ t1); /* temp reg */
|
|
+ __ ld(t0, counter_addr);
|
|
+ __ addi(t0, t0, -DataLayout::counter_increment);
|
|
+ __ sd(t0, counter_addr);
|
|
+ __ j(*failure);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1,
|
|
+ Label* success_target, Label* failure_target) {
|
|
+ // get instance klass (it's already uncompressed)
|
|
+ __ ld(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
|
|
+ // perform the fast part of the checking logic
|
|
+ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
|
|
+ // call out-of-line instance of __ check_klass_subtype_slow_path(...)
|
|
+ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
|
|
+ __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass
|
|
+ __ sd(k_RInfo, Address(sp, 0)); // super klass
|
|
+ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
|
|
+ // load result to k_RInfo
|
|
+ __ ld(k_RInfo, Address(sp, 0));
|
|
+ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
|
|
+ // result is a boolean
|
|
+ __ beqz(k_RInfo, *failure_target, /* is_far */ true);
|
|
+}
|
|
+
|
|
+void LIR_Assembler::const2reg_helper(LIR_Opr src) {
|
|
+ switch (src->as_constant_ptr()->type()) {
|
|
+ case T_INT:
|
|
+ case T_ADDRESS:
|
|
+ case T_OBJECT:
|
|
+ case T_ARRAY:
|
|
+ case T_METADATA:
|
|
+ const2reg(src, FrameMap::t0_opr, lir_patch_none, NULL);
|
|
+ break;
|
|
+ case T_LONG:
|
|
+ const2reg(src, FrameMap::t0_long_opr, lir_patch_none, NULL);
|
|
+ break;
|
|
+ case T_FLOAT:
|
|
+ case T_DOUBLE:
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::logic_op_reg32(Register dst, Register left, Register right, LIR_Code code) {
|
|
+ switch (code) {
|
|
+ case lir_logic_and: __ andrw(dst, left, right); break;
|
|
+ case lir_logic_or: __ orrw (dst, left, right); break;
|
|
+ case lir_logic_xor: __ xorrw(dst, left, right); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::logic_op_reg(Register dst, Register left, Register right, LIR_Code code) {
|
|
+ switch (code) {
|
|
+ case lir_logic_and: __ andr(dst, left, right); break;
|
|
+ case lir_logic_or: __ orr (dst, left, right); break;
|
|
+ case lir_logic_xor: __ xorr(dst, left, right); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::logic_op_imm(Register dst, Register left, int right, LIR_Code code) {
|
|
+ switch (code) {
|
|
+ case lir_logic_and: __ andi(dst, left, right); break;
|
|
+ case lir_logic_or: __ ori (dst, left, right); break;
|
|
+ case lir_logic_xor: __ xori(dst, left, right); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
|
|
+ assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
|
|
+ int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
|
|
+ assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
|
|
+ __ sd(r, Address(sp, offset_from_rsp_in_bytes));
|
|
+}
|
|
+
|
|
+void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) {
|
|
+ assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
|
|
+ int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
|
|
+ assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
|
|
+ __ mv(t0, c);
|
|
+ __ sd(t0, Address(sp, offset_from_rsp_in_bytes));
|
|
+}
|
|
+
|
|
+#undef __
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..11a47fd6e
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
|
|
@@ -0,0 +1,132 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
|
|
+
|
|
+// ArrayCopyStub needs access to bailout
|
|
+friend class ArrayCopyStub;
|
|
+
|
|
+private:
|
|
+
|
|
+#include "c1_LIRAssembler_arith_riscv.hpp"
|
|
+#include "c1_LIRAssembler_arraycopy_riscv.hpp"
|
|
+
|
|
+ int array_element_size(BasicType type) const;
|
|
+
|
|
+ static Register as_reg(LIR_Opr op) {
|
|
+ return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
|
|
+ }
|
|
+
|
|
+ Address as_Address(LIR_Address* addr, Register tmp);
|
|
+
|
|
+ // Ensure we have a valid Address (base+offset) to a stack-slot.
|
|
+ Address stack_slot_address(int index, uint shift, int adjust = 0);
|
|
+
|
|
+ // helper functions which checks for overflow and sets bailout if it
|
|
+ // occurs. Always returns a valid embeddable pointer but in the
|
|
+ // bailout case the pointer won't be to unique storage.
|
|
+ address float_constant(float f);
|
|
+ address double_constant(double d);
|
|
+ address int_constant(jlong n);
|
|
+
|
|
+ // Record the type of the receiver in ReceiverTypeData
|
|
+ void type_profile_helper(Register mdo,
|
|
+ ciMethodData *md, ciProfileData *data,
|
|
+ Register recv, Label* update_done);
|
|
+
|
|
+ void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
|
|
+
|
|
+ void casw(Register addr, Register newval, Register cmpval);
|
|
+ void caswu(Register addr, Register newval, Register cmpval);
|
|
+ void casl(Register addr, Register newval, Register cmpval);
|
|
+
|
|
+ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
|
|
+
|
|
+ void deoptimize_trap(CodeEmitInfo *info);
|
|
+
|
|
+ enum
|
|
+ {
|
|
+ // see emit_static_call_stub for detail:
|
|
+ // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
|
|
+ _call_stub_size = 14 * NativeInstruction::instruction_size +
|
|
+ (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
|
|
+ _call_aot_stub_size = 0,
|
|
+ // see emit_exception_handler for detail:
|
|
+ // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
|
|
+ _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
|
|
+ // see emit_deopt_handler for detail
|
|
+ // auipc (1) + far_jump (6 or 2)
|
|
+ _deopt_handler_size = 1 * NativeInstruction::instruction_size +
|
|
+ 6 * NativeInstruction::instruction_size // or smaller
|
|
+ };
|
|
+
|
|
+ void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp,
|
|
+ Label &next, Label &none, Address mdo_addr);
|
|
+ void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next);
|
|
+
|
|
+ void check_exact_klass(Register tmp, ciKlass* exact_klass);
|
|
+ void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next);
|
|
+
|
|
+ void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr);
|
|
+ void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr);
|
|
+ void get_op(BasicType type);
|
|
+
|
|
+ // emit_typecheck_helper sub functions
|
|
+ void data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data);
|
|
+ void typecheck_helper_slowcheck(ciKlass* k, Register obj, Register Rtmp1,
|
|
+ Register k_RInfo, Register klass_RInfo,
|
|
+ Label* failure_target, Label* success_target);
|
|
+ void profile_object(ciMethodData* md, ciProfileData* data, Register obj,
|
|
+ Register klass_RInfo, Label* obj_is_null);
|
|
+ void typecheck_loaded(LIR_OpTypeCheck* op, ciKlass* k, Register k_RInfo);
|
|
+
|
|
+ // emit_opTypeCheck sub functions
|
|
+ void typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile);
|
|
+
|
|
+ void type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
|
|
+ ciProfileData* data, Label* success, Label* failure,
|
|
+ Label& profile_cast_success, Label& profile_cast_failure);
|
|
+
|
|
+ void lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1,
|
|
+ Label* success_target, Label* failure_target);
|
|
+
|
|
+ void const2reg_helper(LIR_Opr src);
|
|
+
|
|
+ void emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, bool is_far, bool is_unordered);
|
|
+
|
|
+ void logic_op_reg32(Register dst, Register left, Register right, LIR_Code code);
|
|
+ void logic_op_reg(Register dst, Register left, Register right, LIR_Code code);
|
|
+ void logic_op_imm(Register dst, Register left, int right, LIR_Code code);
|
|
+
|
|
+public:
|
|
+
|
|
+ void emit_cmove(LIR_Op4* op);
|
|
+
|
|
+ void store_parameter(Register r, int offset_from_rsp_in_words);
|
|
+ void store_parameter(jint c, int offset_from_rsp_in_words);
|
|
+
|
|
+#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..8ba9ed66d
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
|
|
@@ -0,0 +1,1083 @@
|
|
+/*
|
|
+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "c1/c1_Compilation.hpp"
|
|
+#include "c1/c1_FrameMap.hpp"
|
|
+#include "c1/c1_Instruction.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_LIRGenerator.hpp"
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "c1/c1_ValueStack.hpp"
|
|
+#include "ci/ciArray.hpp"
|
|
+#include "ci/ciObjArrayKlass.hpp"
|
|
+#include "ci/ciTypeArrayKlass.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+
|
|
+#ifdef ASSERT
|
|
+#define __ gen()->lir(__FILE__, __LINE__)->
|
|
+#else
|
|
+#define __ gen()->lir()->
|
|
+#endif
|
|
+
|
|
+// Item will be loaded into a byte register; Intel only
|
|
+void LIRItem::load_byte_item() {
|
|
+ load_item();
|
|
+}
|
|
+
|
|
+
|
|
+void LIRItem::load_nonconstant() {
|
|
+ LIR_Opr r = value()->operand();
|
|
+ if (r->is_constant()) {
|
|
+ _result = r;
|
|
+ } else {
|
|
+ load_item();
|
|
+ }
|
|
+}
|
|
+
|
|
+//--------------------------------------------------------------
|
|
+// LIRGenerator
|
|
+//--------------------------------------------------------------
|
|
+
|
|
+
|
|
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r10_oop_opr; }
|
|
+LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r13_opr; }
|
|
+LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; }
|
|
+LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; }
|
|
+LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; }
|
|
+LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; }
|
|
+LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); }
|
|
+LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r10_opr; }
|
|
+LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; }
|
|
+
|
|
+
|
|
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
|
|
+ LIR_Opr opr;
|
|
+ switch (type->tag()) {
|
|
+ case intTag: opr = FrameMap::r10_opr; break;
|
|
+ case objectTag: opr = FrameMap::r10_oop_opr; break;
|
|
+ case longTag: opr = FrameMap::long10_opr; break;
|
|
+ case floatTag: opr = FrameMap::fpu10_float_opr; break;
|
|
+ case doubleTag: opr = FrameMap::fpu10_double_opr; break;
|
|
+
|
|
+ case addressTag: // fall through
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ return LIR_OprFact::illegalOpr;
|
|
+ }
|
|
+
|
|
+ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
|
|
+ return opr;
|
|
+}
|
|
+
|
|
+
|
|
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
|
|
+ LIR_Opr reg = new_register(T_INT);
|
|
+ set_vreg_flag(reg, LIRGenerator::byte_reg);
|
|
+ return reg;
|
|
+}
|
|
+
|
|
+//--------- loading items into registers --------------------------------
|
|
+
|
|
+
|
|
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
|
|
+ if (v->type()->as_IntConstant() != NULL) {
|
|
+ return v->type()->as_IntConstant()->value() == 0;
|
|
+ } else if (v->type()->as_LongConstant() != NULL) {
|
|
+ return v->type()->as_LongConstant()->value() == 0;
|
|
+ } else if (v->type()->as_ObjectConstant() != NULL) {
|
|
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
|
|
+ } else if (v->type()->as_FloatConstant() != NULL) {
|
|
+ return jint_cast(v->type()->as_FloatConstant()->value()) == 0.0f;
|
|
+ } else if (v->type()->as_DoubleConstant() != NULL) {
|
|
+ return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0.0;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+bool LIRGenerator::can_inline_as_constant(Value v) const {
|
|
+ if (v->type()->as_IntConstant() != NULL) {
|
|
+ int value = v->type()->as_IntConstant()->value();
|
|
+ // "-value" must be defined for value may be used for sub
|
|
+ return Assembler::operand_valid_for_add_immediate(value) &&
|
|
+ Assembler::operand_valid_for_add_immediate(- value);
|
|
+ } else if (v->type()->as_ObjectConstant() != NULL) {
|
|
+ return v->type()->as_ObjectConstant()->value()->is_null_object();
|
|
+ } else if (v->type()->as_LongConstant() != NULL) {
|
|
+ long value = v->type()->as_LongConstant()->value();
|
|
+ // "-value" must be defined for value may be used for sub
|
|
+ return Assembler::operand_valid_for_add_immediate(value) &&
|
|
+ Assembler::operand_valid_for_add_immediate(- value);
|
|
+ } else if (v->type()->as_FloatConstant() != NULL) {
|
|
+ return v->type()->as_FloatConstant()->value() == 0.0f;
|
|
+ } else if (v->type()->as_DoubleConstant() != NULL) {
|
|
+ return v->type()->as_DoubleConstant()->value() == 0.0;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+
|
|
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
|
|
+ if (c->as_constant() != NULL) {
|
|
+ long constant = 0;
|
|
+ switch (c->type()) {
|
|
+ case T_INT: constant = c->as_jint(); break;
|
|
+ case T_LONG: constant = c->as_jlong(); break;
|
|
+ default: return false;
|
|
+ }
|
|
+ // "-constant" must be defined for c may be used for sub
|
|
+ return Assembler::operand_valid_for_add_immediate(constant) &&
|
|
+ Assembler::operand_valid_for_add_immediate(- constant);
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+
|
|
+LIR_Opr LIRGenerator::safepoint_poll_register() {
|
|
+ return LIR_OprFact::illegalOpr;
|
|
+}
|
|
+
|
|
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
|
|
+ int shift, int disp, BasicType type) {
|
|
+ assert(base->is_register(), "must be");
|
|
+
|
|
+ if (index->is_constant()) {
|
|
+ LIR_Const *constant = index->as_constant_ptr();
|
|
+ jlong c;
|
|
+ if (constant->type() == T_INT) {
|
|
+ c = (jlong(index->as_jint()) << shift) + disp;
|
|
+ } else {
|
|
+ assert(constant->type() == T_LONG, "should be");
|
|
+ c = (index->as_jlong() << shift) + disp;
|
|
+ }
|
|
+ if ((jlong)((jint)c) == c) {
|
|
+ return new LIR_Address(base, (jint)c, type);
|
|
+ } else {
|
|
+ LIR_Opr tmp = new_register(T_LONG);
|
|
+ __ move(index, tmp);
|
|
+ return new LIR_Address(base, tmp, type);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type);
|
|
+}
|
|
+
|
|
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
|
|
+ BasicType type) {
|
|
+ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
|
|
+ int elem_size = type2aelembytes(type);
|
|
+ int shift = exact_log2(elem_size);
|
|
+
|
|
+ return generate_address(array_opr, index_opr, shift, offset_in_bytes, type);
|
|
+}
|
|
+
|
|
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
|
|
+ switch (type) {
|
|
+ case T_LONG: return LIR_OprFact::longConst(x);
|
|
+ case T_INT: return LIR_OprFact::intConst(x);
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
|
|
+ LIR_Opr pointer = new_pointer_register();
|
|
+ __ move(LIR_OprFact::intptrConst(counter), pointer);
|
|
+ LIR_Address* addr = new LIR_Address(pointer, type);
|
|
+ increment_counter(addr, step);
|
|
+}
|
|
+
|
|
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
|
|
+ LIR_Opr reg = new_register(addr->type());
|
|
+ __ load(addr, reg);
|
|
+ __ add(reg, load_immediate(step, addr->type()), reg);
|
|
+ __ store(reg, addr);
|
|
+}
|
|
+
|
|
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
|
|
+ LIR_Opr reg = new_register(T_INT);
|
|
+ __ load(generate_address(base, disp, T_INT), reg, info);
|
|
+ __ cmp(condition, reg, LIR_OprFact::intConst(c));
|
|
+}
|
|
+
|
|
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
|
|
+ LIR_Opr reg1 = new_register(T_INT);
|
|
+ __ load(generate_address(base, disp, type), reg1, info);
|
|
+ __ cmp(condition, reg, reg1);
|
|
+}
|
|
+
|
|
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
|
|
+ if (tmp->is_valid() && c > 0 && c < max_jint) {
|
|
+ if (is_power_of_2(c - 1)) {
|
|
+ __ shift_left(left, exact_log2(c - 1), tmp);
|
|
+ __ add(tmp, left, result);
|
|
+ return true;
|
|
+ } else if (is_power_of_2(c + 1)) {
|
|
+ __ shift_left(left, exact_log2(c + 1), tmp);
|
|
+ __ sub(tmp, left, result);
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
|
|
+ BasicType type = item->type();
|
|
+ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
|
|
+}
|
|
+
|
|
+void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info,
|
|
+ ciMethod* profiled_method, int profiled_bci) {
|
|
+ LIR_Opr tmp1 = new_register(objectType);
|
|
+ LIR_Opr tmp2 = new_register(objectType);
|
|
+ LIR_Opr tmp3 = new_register(objectType);
|
|
+ __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
|
|
+}
|
|
+
|
|
+//----------------------------------------------------------------------
|
|
+// visitor functions
|
|
+//----------------------------------------------------------------------
|
|
+
|
|
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
|
|
+ assert(x->is_pinned(), "");
|
|
+ LIRItem obj(x->obj(), this);
|
|
+ obj.load_item();
|
|
+
|
|
+ set_no_result(x);
|
|
+
|
|
+ // "lock" stores the address of the monitor stack slot, so this is not an oop
|
|
+ LIR_Opr lock = new_register(T_INT);
|
|
+ // Need a tmp register for biased locking
|
|
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
|
|
+ if (UseBiasedLocking) {
|
|
+ tmp = new_register(T_INT);
|
|
+ }
|
|
+
|
|
+ CodeEmitInfo* info_for_exception = NULL;
|
|
+ if (x->needs_null_check()) {
|
|
+ info_for_exception = state_for(x);
|
|
+ }
|
|
+ // this CodeEmitInfo must not have the xhandlers because here the
|
|
+ // object is already locked (xhandlers expect object to be unlocked)
|
|
+ CodeEmitInfo* info = state_for(x, x->state(), true);
|
|
+ monitor_enter(obj.result(), lock, syncTempOpr(), tmp,
|
|
+ x->monitor_no(), info_for_exception, info);
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
|
|
+ assert(x->is_pinned(), "");
|
|
+
|
|
+ LIRItem obj(x->obj(), this);
|
|
+ obj.dont_load_item();
|
|
+
|
|
+ LIR_Opr lock = new_register(T_INT);
|
|
+ LIR_Opr obj_temp = new_register(T_INT);
|
|
+ set_no_result(x);
|
|
+ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
|
|
+}
|
|
+
|
|
+// neg
|
|
+void LIRGenerator::do_NegateOp(NegateOp* x) {
|
|
+ LIRItem from(x->x(), this);
|
|
+ from.load_item();
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ negate(from.result(), result);
|
|
+}
|
|
+
|
|
+// for _fadd, _fmul, _fsub, _fdiv, _frem
|
|
+// _dadd, _dmul, _dsub, _ddiv, _drem
|
|
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
|
|
+ LIRItem left(x->x(), this);
|
|
+ LIRItem right(x->y(), this);
|
|
+
|
|
+ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
|
|
+
|
|
+ // float remainder is implemented as a direct call into the runtime
|
|
+ BasicTypeList signature(2);
|
|
+ if (x->op() == Bytecodes::_frem) {
|
|
+ signature.append(T_FLOAT);
|
|
+ signature.append(T_FLOAT);
|
|
+ } else {
|
|
+ signature.append(T_DOUBLE);
|
|
+ signature.append(T_DOUBLE);
|
|
+ }
|
|
+ CallingConvention* cc = frame_map()->c_calling_convention(&signature);
|
|
+
|
|
+ const LIR_Opr result_reg = result_register_for(x->type());
|
|
+
|
|
+ left.load_item();
|
|
+ __ move(left.result(), cc->at(0));
|
|
+ right.load_item_force(cc->at(1));
|
|
+
|
|
+ address entry;
|
|
+ if (x->op() == Bytecodes::_frem) {
|
|
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
|
|
+ } else {
|
|
+ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
|
|
+ }
|
|
+
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
|
|
+ __ move(result_reg, result);
|
|
+
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (!left.is_register()) {
|
|
+ left.load_item();
|
|
+ }
|
|
+ // Always load right hand side.
|
|
+ right.load_item();
|
|
+
|
|
+ LIR_Opr reg = rlock(x);
|
|
+ LIR_Opr tmp = LIR_OprFact::illegalOpr;
|
|
+ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
|
|
+ tmp = new_register(T_DOUBLE);
|
|
+ }
|
|
+
|
|
+ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
|
|
+
|
|
+ set_result(x, round_item(reg));
|
|
+}
|
|
+
|
|
+// for _ladd, _lmul, _lsub, _ldiv, _lrem
|
|
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
|
|
+
|
|
+ // missing test if instr is commutative and if we should swap
|
|
+ LIRItem left(x->x(), this);
|
|
+ LIRItem right(x->y(), this);
|
|
+
|
|
+ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
|
|
+
|
|
+ left.load_item();
|
|
+
|
|
+ bool need_zero_check = true;
|
|
+ if (right.is_constant()) {
|
|
+ jlong c = right.get_jlong_constant();
|
|
+ // no need to do div-by-zero check if the divisor is a non-zero constant
|
|
+ if (c != 0) { need_zero_check = false; }
|
|
+ // do not load right if the divisor is a power-of-2 constant
|
|
+ if (c > 0 && is_power_of_2(c)) {
|
|
+ right.dont_load_item();
|
|
+ } else {
|
|
+ right.load_item();
|
|
+ }
|
|
+ } else {
|
|
+ right.load_item();
|
|
+ }
|
|
+ if (need_zero_check) {
|
|
+ CodeEmitInfo* info = state_for(x);
|
|
+ __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
|
|
+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
|
|
+ }
|
|
+
|
|
+ rlock_result(x);
|
|
+ switch (x->op()) {
|
|
+ case Bytecodes::_lrem:
|
|
+ __ rem(left.result(), right.result(), x->operand());
|
|
+ break;
|
|
+ case Bytecodes::_ldiv:
|
|
+ __ div(left.result(), right.result(), x->operand());
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else {
|
|
+ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
|
|
+ "expect lmul, ladd or lsub");
|
|
+ // add, sub, mul
|
|
+ left.load_item();
|
|
+ if (!right.is_register()) {
|
|
+ if (x->op() == Bytecodes::_lmul ||
|
|
+ !right.is_constant() ||
|
|
+ (x->op() == Bytecodes::_ladd &&
|
|
+ !Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
|
|
+ (x->op() == Bytecodes::_lsub &&
|
|
+ !Assembler::operand_valid_for_add_immediate(-right.get_jlong_constant()))) {
|
|
+ right.load_item();
|
|
+ } else { // add, sub
|
|
+ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expected ladd or lsub");
|
|
+ // don't load constants to save register
|
|
+ right.load_nonconstant();
|
|
+ }
|
|
+ }
|
|
+ rlock_result(x);
|
|
+ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
|
|
+ }
|
|
+}
|
|
+
|
|
+// for: _iadd, _imul, _isub, _idiv, _irem
|
|
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
|
|
+
|
|
+ // Test if instr is commutative and if we should swap
|
|
+ LIRItem left(x->x(), this);
|
|
+ LIRItem right(x->y(), this);
|
|
+ LIRItem* left_arg = &left;
|
|
+ LIRItem* right_arg = &right;
|
|
+ if (x->is_commutative() && left.is_stack() && right.is_register()) {
|
|
+ // swap them if left is real stack (or cached) and right is real register(not cached)
|
|
+ left_arg = &right;
|
|
+ right_arg = &left;
|
|
+ }
|
|
+ left_arg->load_item();
|
|
+ // do not need to load right, as we can handle stack and constants
|
|
+ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
|
|
+
|
|
+ rlock_result(x);
|
|
+
|
|
+ bool need_zero_check = true;
|
|
+ if (right.is_constant()) {
|
|
+ jint c = right.get_jint_constant();
|
|
+ // no need to do div-by-zero check if the divisor is a non-zero constant
|
|
+ if (c != 0) { need_zero_check = false; }
|
|
+ // do not load right if the divisor is a power-of-2 constant
|
|
+ if (c > 0 && is_power_of_2(c)) {
|
|
+ right_arg->dont_load_item();
|
|
+ } else {
|
|
+ right_arg->load_item();
|
|
+ }
|
|
+ } else {
|
|
+ right_arg->load_item();
|
|
+ }
|
|
+ if (need_zero_check) {
|
|
+ CodeEmitInfo* info = state_for(x);
|
|
+ __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
|
|
+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
|
|
+ }
|
|
+
|
|
+ LIR_Opr ill = LIR_OprFact::illegalOpr;
|
|
+
|
|
+ if (x->op() == Bytecodes::_irem) {
|
|
+ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
|
|
+ } else if (x->op() == Bytecodes::_idiv) {
|
|
+ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
|
|
+ }
|
|
+ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
|
|
+ if (right.is_constant() &&
|
|
+ ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) ||
|
|
+ (x->op() == Bytecodes::_isub && !Assembler::operand_valid_for_add_immediate(-right.get_jint_constant())))) {
|
|
+ right.load_nonconstant();
|
|
+ } else {
|
|
+ right.load_item();
|
|
+ }
|
|
+ rlock_result(x);
|
|
+ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
|
|
+ } else {
|
|
+ assert (x->op() == Bytecodes::_imul, "expect imul");
|
|
+ if (right.is_constant()) {
|
|
+ jint c = right.get_jint_constant();
|
|
+ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
|
|
+ right_arg->dont_load_item();
|
|
+ } else {
|
|
+ // Cannot use constant op.
|
|
+ right_arg->load_item();
|
|
+ }
|
|
+ } else {
|
|
+ right.load_item();
|
|
+ }
|
|
+ rlock_result(x);
|
|
+ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
|
|
+ // when an operand with use count 1 is the left operand, then it is
|
|
+ // likely that no move for 2-operand-LIR-form is necessary
|
|
+ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
|
|
+ x->swap_operands();
|
|
+ }
|
|
+
|
|
+ ValueTag tag = x->type()->tag();
|
|
+ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
|
|
+ switch (tag) {
|
|
+ case floatTag:
|
|
+ case doubleTag: do_ArithmeticOp_FPU(x); return;
|
|
+ case longTag: do_ArithmeticOp_Long(x); return;
|
|
+ case intTag: do_ArithmeticOp_Int(x); return;
|
|
+ default: ShouldNotReachHere(); return;
|
|
+ }
|
|
+}
|
|
+
|
|
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
|
|
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
|
|
+ LIRItem value(x->x(), this);
|
|
+ LIRItem count(x->y(), this);
|
|
+
|
|
+ value.load_item();
|
|
+ if (count.is_constant()) {
|
|
+ assert(count.type()->as_IntConstant() != NULL || count.type()->as_LongConstant() != NULL , "should be");
|
|
+ count.dont_load_item();
|
|
+ } else {
|
|
+ count.load_item();
|
|
+ }
|
|
+
|
|
+ LIR_Opr res = rlock_result(x);
|
|
+ shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr);
|
|
+}
|
|
+
|
|
+
|
|
+// _iand, _land, _ior, _lor, _ixor, _lxor
|
|
+void LIRGenerator::do_LogicOp(LogicOp* x) {
|
|
+
|
|
+ LIRItem left(x->x(), this);
|
|
+ LIRItem right(x->y(), this);
|
|
+
|
|
+ left.load_item();
|
|
+ rlock_result(x);
|
|
+ ValueTag tag = right.type()->tag();
|
|
+ if(right.is_constant() &&
|
|
+ ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
|
|
+ (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) {
|
|
+ right.dont_load_item();
|
|
+ } else {
|
|
+ right.load_item();
|
|
+ }
|
|
+
|
|
+ switch (x->op()) {
|
|
+ case Bytecodes::_iand: // fall through
|
|
+ case Bytecodes::_land:
|
|
+ __ logical_and(left.result(), right.result(), x->operand()); break;
|
|
+ case Bytecodes::_ior: // fall through
|
|
+ case Bytecodes::_lor:
|
|
+ __ logical_or(left.result(), right.result(), x->operand()); break;
|
|
+ case Bytecodes::_ixor: // fall through
|
|
+ case Bytecodes::_lxor:
|
|
+ __ logical_xor(left.result(), right.result(), x->operand()); break;
|
|
+ default: Unimplemented();
|
|
+ }
|
|
+}
|
|
+
|
|
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
|
|
+void LIRGenerator::do_CompareOp(CompareOp* x) {
|
|
+ LIRItem left(x->x(), this);
|
|
+ LIRItem right(x->y(), this);
|
|
+ ValueTag tag = x->x()->type()->tag();
|
|
+ if (tag == longTag) {
|
|
+ left.set_destroys_register();
|
|
+ }
|
|
+ left.load_item();
|
|
+ right.load_item();
|
|
+ LIR_Opr reg = rlock_result(x);
|
|
+
|
|
+ if (x->x()->type()->is_float_kind()) {
|
|
+ Bytecodes::Code code = x->op();
|
|
+ __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
|
|
+ } else if (x->x()->type()->tag() == longTag) {
|
|
+ __ lcmp2int(left.result(), right.result(), reg);
|
|
+ } else {
|
|
+ Unimplemented();
|
|
+ }
|
|
+}
|
|
+
|
|
+LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) {
|
|
+ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience
|
|
+ new_value.load_item();
|
|
+ cmp_value.load_item();
|
|
+ LIR_Opr result = new_register(T_INT);
|
|
+ if (type == T_OBJECT || type == T_ARRAY) {
|
|
+ __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
|
|
+ } else if (type == T_INT) {
|
|
+ __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
|
|
+ } else if (type == T_LONG) {
|
|
+ __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ __ logical_xor(FrameMap::r5_opr, LIR_OprFact::intConst(1), result);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
|
|
+ bool is_oop = type == T_OBJECT || type == T_ARRAY;
|
|
+ LIR_Opr result = new_register(type);
|
|
+ value.load_item();
|
|
+ assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
|
|
+ LIR_Opr tmp = new_register(T_INT);
|
|
+ __ xchg(addr, value.result(), result, tmp);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
|
|
+ LIR_Opr result = new_register(type);
|
|
+ value.load_item();
|
|
+ assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type");
|
|
+ LIR_Opr tmp = new_register(T_INT);
|
|
+ __ xadd(addr, value.result(), result, tmp);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|
+ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
|
|
+ "wrong type");
|
|
+
|
|
+ switch (x->id()) {
|
|
+ case vmIntrinsics::_dexp: // fall through
|
|
+ case vmIntrinsics::_dlog: // fall through
|
|
+ case vmIntrinsics::_dpow: // fall through
|
|
+ case vmIntrinsics::_dcos: // fall through
|
|
+ case vmIntrinsics::_dsin: // fall through
|
|
+ case vmIntrinsics::_dtan: // fall through
|
|
+ case vmIntrinsics::_dlog10:
|
|
+ do_LibmIntrinsic(x);
|
|
+ break;
|
|
+ case vmIntrinsics::_dabs: // fall through
|
|
+ case vmIntrinsics::_dsqrt: {
|
|
+ assert(x->number_of_arguments() == 1, "wrong type");
|
|
+ LIRItem value(x->argument_at(0), this);
|
|
+ value.load_item();
|
|
+ LIR_Opr dst = rlock_result(x);
|
|
+
|
|
+ switch (x->id()) {
|
|
+ case vmIntrinsics::_dsqrt: {
|
|
+ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
|
|
+ break;
|
|
+ }
|
|
+ case vmIntrinsics::_dabs: {
|
|
+ __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
|
+ LIRItem value(x->argument_at(0), this);
|
|
+ value.set_destroys_register();
|
|
+ LIR_Opr calc_result = rlock_result(x);
|
|
+ LIR_Opr result_reg = result_register_for(x->type());
|
|
+ CallingConvention* cc = NULL;
|
|
+ BasicTypeList signature(1);
|
|
+ signature.append(T_DOUBLE);
|
|
+ if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
|
|
+ cc = frame_map()->c_calling_convention(&signature);
|
|
+ value.load_item_force(cc->at(0));
|
|
+ if (x->id() == vmIntrinsics::_dpow) {
|
|
+ LIRItem value1(x->argument_at(1), this);
|
|
+ value1.set_destroys_register();
|
|
+ value1.load_item_force(cc->at(1));
|
|
+ }
|
|
+ switch (x->id()) {
|
|
+ case vmIntrinsics::_dexp:
|
|
+ if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dlog:
|
|
+ if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dlog10:
|
|
+ if (StubRoutines::dlog10() != NULL) { __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dsin:
|
|
+ if (StubRoutines::dsin() != NULL) { __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dcos:
|
|
+ if (StubRoutines::dcos() != NULL) { __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dtan:
|
|
+ if (StubRoutines::dtan() != NULL) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ case vmIntrinsics::_dpow:
|
|
+ if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); }
|
|
+ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); }
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ __ move(result_reg, calc_result);
|
|
+}
|
|
+
|
|
+
|
|
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
|
|
+ assert(x->number_of_arguments() == 5, "wrong type");
|
|
+
|
|
+ // Make all state_for calls early since they can emit code
|
|
+ CodeEmitInfo* info = state_for(x, x->state());
|
|
+
|
|
+ LIRItem src(x->argument_at(0), this);
|
|
+ LIRItem src_pos(x->argument_at(1), this);
|
|
+ LIRItem dst(x->argument_at(2), this);
|
|
+ LIRItem dst_pos(x->argument_at(3), this);
|
|
+ LIRItem length(x->argument_at(4), this);
|
|
+
|
|
+ // operands for arraycopy must use fixed registers, otherwise
|
|
+ // LinearScan will fail allocation (because arraycopy always needs a
|
|
+ // call)
|
|
+
|
|
+ // The java calling convention will give us enough registers
|
|
+ // so that on the stub side the args will be perfect already.
|
|
+ // On the other slow/special case side we call C and the arg
|
|
+ // positions are not similar enough to pick one as the best.
|
|
+ // Also because the java calling convention is a "shifted" version
|
|
+ // of the C convention we can process the java args trivially into C
|
|
+ // args without worry of overwriting during the xfer
|
|
+
|
|
+ src.load_item_force (FrameMap::as_oop_opr(j_rarg0));
|
|
+ src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
|
|
+ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2));
|
|
+ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
|
|
+ length.load_item_force (FrameMap::as_opr(j_rarg4));
|
|
+
|
|
+ LIR_Opr tmp = FrameMap::as_opr(j_rarg5);
|
|
+
|
|
+ set_no_result(x);
|
|
+
|
|
+ int flags;
|
|
+ ciArrayKlass* expected_type = NULL;
|
|
+ arraycopy_helper(x, &flags, &expected_type);
|
|
+
|
|
+ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp,
|
|
+ expected_type, flags, info); // does add_safepoint
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
|
|
+ ShouldNotReachHere();
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
|
|
+ ShouldNotReachHere();
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
|
|
+ assert(x->number_of_arguments() == 3, "wrong type");
|
|
+ assert(UseFMA, "Needs FMA instructions support.");
|
|
+ LIRItem value(x->argument_at(0), this);
|
|
+ LIRItem value1(x->argument_at(1), this);
|
|
+ LIRItem value2(x->argument_at(2), this);
|
|
+
|
|
+ value.load_item();
|
|
+ value1.load_item();
|
|
+ value2.load_item();
|
|
+
|
|
+ LIR_Opr calc_input = value.result();
|
|
+ LIR_Opr calc_input1 = value1.result();
|
|
+ LIR_Opr calc_input2 = value2.result();
|
|
+ LIR_Opr calc_result = rlock_result(x);
|
|
+
|
|
+ switch (x->id()) {
|
|
+ case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
|
|
+ case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
|
|
+ fatal("vectorizedMismatch intrinsic is not implemented on this platform");
|
|
+}
|
|
+
|
|
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
|
|
+// _i2b, _i2c, _i2s
|
|
+void LIRGenerator::do_Convert(Convert* x) {
|
|
+ LIRItem value(x->value(), this);
|
|
+ value.load_item();
|
|
+ LIR_Opr input = value.result();
|
|
+ LIR_Opr result = rlock(x);
|
|
+
|
|
+ // arguments of lir_convert
|
|
+ LIR_Opr conv_input = input;
|
|
+ LIR_Opr conv_result = result;
|
|
+
|
|
+ __ convert(x->op(), conv_input, conv_result);
|
|
+
|
|
+ assert(result->is_virtual(), "result must be virtual register");
|
|
+ set_result(x, result);
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_NewInstance(NewInstance* x) {
|
|
+#ifndef PRODUCT
|
|
+ if (PrintNotLoaded && !x->klass()->is_loaded()) {
|
|
+ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci());
|
|
+ }
|
|
+#endif
|
|
+ CodeEmitInfo* info = state_for(x, x->state());
|
|
+ LIR_Opr reg = result_register_for(x->type());
|
|
+ new_instance(reg, x->klass(), x->is_unresolved(),
|
|
+ FrameMap::r12_oop_opr,
|
|
+ FrameMap::r15_oop_opr,
|
|
+ FrameMap::r14_oop_opr,
|
|
+ LIR_OprFact::illegalOpr,
|
|
+ FrameMap::r13_metadata_opr,
|
|
+ info);
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ move(reg, result);
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
|
|
+ CodeEmitInfo* info = state_for(x, x->state());
|
|
+
|
|
+ LIRItem length(x->length(), this);
|
|
+ length.load_item_force(FrameMap::r9_opr);
|
|
+
|
|
+ LIR_Opr reg = result_register_for(x->type());
|
|
+ LIR_Opr tmp1 = FrameMap::r12_oop_opr;
|
|
+ LIR_Opr tmp2 = FrameMap::r14_oop_opr;
|
|
+ LIR_Opr tmp3 = FrameMap::r15_oop_opr;
|
|
+ LIR_Opr tmp4 = reg;
|
|
+ LIR_Opr klass_reg = FrameMap::r13_metadata_opr;
|
|
+ LIR_Opr len = length.result();
|
|
+ BasicType elem_type = x->elt_type();
|
|
+
|
|
+ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
|
|
+
|
|
+ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
|
|
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
|
|
+
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ move(reg, result);
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
|
+ LIRItem length(x->length(), this);
|
|
+ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
|
|
+ // and therefore provide the state before the parameters have been consumed
|
|
+ CodeEmitInfo* patching_info = NULL;
|
|
+ if (!x->klass()->is_loaded() || PatchALot) {
|
|
+ patching_info = state_for(x, x->state_before());
|
|
+ }
|
|
+
|
|
+ CodeEmitInfo* info = state_for(x, x->state());
|
|
+
|
|
+ LIR_Opr reg = result_register_for(x->type());
|
|
+ LIR_Opr tmp1 = FrameMap::r12_oop_opr;
|
|
+ LIR_Opr tmp2 = FrameMap::r14_oop_opr;
|
|
+ LIR_Opr tmp3 = FrameMap::r15_oop_opr;
|
|
+ LIR_Opr tmp4 = reg;
|
|
+ LIR_Opr klass_reg = FrameMap::r13_metadata_opr;
|
|
+
|
|
+ length.load_item_force(FrameMap::r9_opr);
|
|
+ LIR_Opr len = length.result();
|
|
+
|
|
+ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
|
|
+ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
|
|
+ if (obj == ciEnv::unloaded_ciobjarrayklass()) {
|
|
+ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
|
|
+ }
|
|
+ klass2reg_with_patching(klass_reg, obj, patching_info);
|
|
+ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
|
|
+
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ move(reg, result);
|
|
+}
|
|
+
|
|
+
|
|
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
|
+ Values* dims = x->dims();
|
|
+ int i = dims->length();
|
|
+ LIRItemList* items = new LIRItemList(i, i, NULL);
|
|
+ while (i-- > 0) {
|
|
+ LIRItem* size = new LIRItem(dims->at(i), this);
|
|
+ items->at_put(i, size);
|
|
+ }
|
|
+
|
|
+ // Evaluate state_for early since it may emit code.
|
|
+ CodeEmitInfo* patching_info = NULL;
|
|
+ if (!x->klass()->is_loaded() || PatchALot) {
|
|
+ patching_info = state_for(x, x->state_before());
|
|
+
|
|
+ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
|
|
+ // clone all handlers (NOTE: Usually this is handled transparently
|
|
+ // by the CodeEmitInfo cloning logic in CodeStub constructors but
|
|
+ // is done explicitly here because a stub isn't being used).
|
|
+ x->set_exception_handlers(new XHandlers(x->exception_handlers()));
|
|
+ }
|
|
+ CodeEmitInfo* info = state_for(x, x->state());
|
|
+
|
|
+ i = dims->length();
|
|
+ while (i-- > 0) {
|
|
+ LIRItem* size = items->at(i);
|
|
+ size->load_item();
|
|
+
|
|
+ store_stack_parameter(size->result(), in_ByteSize(i * BytesPerInt));
|
|
+ }
|
|
+
|
|
+ LIR_Opr klass_reg = FrameMap::r10_metadata_opr;
|
|
+ klass2reg_with_patching(klass_reg, x->klass(), patching_info);
|
|
+
|
|
+ LIR_Opr rank = FrameMap::r9_opr;
|
|
+ __ move(LIR_OprFact::intConst(x->rank()), rank);
|
|
+ LIR_Opr varargs = FrameMap::r12_opr;
|
|
+ __ move(FrameMap::sp_opr, varargs);
|
|
+ LIR_OprList* args = new LIR_OprList(3);
|
|
+ args->append(klass_reg);
|
|
+ args->append(rank);
|
|
+ args->append(varargs);
|
|
+ LIR_Opr reg = result_register_for(x->type());
|
|
+ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
|
|
+ LIR_OprFact::illegalOpr,
|
|
+ reg, args, info);
|
|
+
|
|
+ LIR_Opr result = rlock_result(x);
|
|
+ __ move(reg, result);
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
|
|
+ // nothing to do for now
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_CheckCast(CheckCast* x) {
|
|
+ LIRItem obj(x->obj(), this);
|
|
+
|
|
+ CodeEmitInfo* patching_info = NULL;
|
|
+ if (!x->klass()->is_loaded() ||
|
|
+ (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) {
|
|
+ // must do this before locking the destination register as an oop register,
|
|
+ // and before the obj is loaded (the latter is for deoptimization)
|
|
+ patching_info = state_for(x, x->state_before());
|
|
+ }
|
|
+ obj.load_item();
|
|
+
|
|
+ // info for exceptions
|
|
+ CodeEmitInfo* info_for_exception =
|
|
+ (x->needs_exception_state() ? state_for(x) :
|
|
+ state_for(x, x->state_before(), true /*ignore_xhandler*/ ));
|
|
+
|
|
+ CodeStub* stub = NULL;
|
|
+ if (x->is_incompatible_class_change_check()) {
|
|
+ assert(patching_info == NULL, "can't patch this");
|
|
+ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr,
|
|
+ info_for_exception);
|
|
+ } else if (x->is_invokespecial_receiver_check()) {
|
|
+ assert(patching_info == NULL, "can't patch this");
|
|
+ stub = new DeoptimizeStub(info_for_exception,
|
|
+ Deoptimization::Reason_class_check,
|
|
+ Deoptimization::Action_none);
|
|
+ } else {
|
|
+ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
|
|
+ }
|
|
+ LIR_Opr reg = rlock_result(x);
|
|
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
|
|
+ if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
|
|
+ tmp3 = new_register(objectType);
|
|
+ }
|
|
+ __ checkcast(reg, obj.result(), x->klass(),
|
|
+ new_register(objectType), new_register(objectType), tmp3,
|
|
+ x->direct_compare(), info_for_exception, patching_info, stub,
|
|
+ x->profiled_method(), x->profiled_bci());
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
|
|
+ LIRItem obj(x->obj(), this);
|
|
+
|
|
+ // result and test object may not be in same register
|
|
+ LIR_Opr reg = rlock_result(x);
|
|
+ CodeEmitInfo* patching_info = NULL;
|
|
+ if ((!x->klass()->is_loaded() || PatchALot)) {
|
|
+ // must do this before locking the destination register as an oop register
|
|
+ patching_info = state_for(x, x->state_before());
|
|
+ }
|
|
+ obj.load_item();
|
|
+ LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
|
|
+ if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
|
|
+ tmp3 = new_register(objectType);
|
|
+ }
|
|
+ __ instanceof(reg, obj.result(), x->klass(),
|
|
+ new_register(objectType), new_register(objectType), tmp3,
|
|
+ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
|
|
+}
|
|
+
|
|
+void LIRGenerator::do_If(If* x) {
|
|
+ // If should have two successors
|
|
+ assert(x->number_of_sux() == 2, "inconsistency");
|
|
+ ValueTag tag = x->x()->type()->tag();
|
|
+ bool is_safepoint = x->is_safepoint();
|
|
+
|
|
+ If::Condition cond = x->cond();
|
|
+
|
|
+ LIRItem xitem(x->x(), this);
|
|
+ LIRItem yitem(x->y(), this);
|
|
+ LIRItem* xin = &xitem;
|
|
+ LIRItem* yin = &yitem;
|
|
+
|
|
+ if (tag == longTag) {
|
|
+ // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
|
|
+ // mirror for other conditions
|
|
+ if (cond == If::gtr || cond == If::leq) {
|
|
+ cond = Instruction::mirror(cond);
|
|
+ xin = &yitem;
|
|
+ yin = &xitem;
|
|
+ }
|
|
+ xin->set_destroys_register();
|
|
+ }
|
|
+ xin->load_item();
|
|
+ yin->load_item();
|
|
+
|
|
+ set_no_result(x);
|
|
+
|
|
+ LIR_Opr left = xin->result();
|
|
+ LIR_Opr right = yin->result();
|
|
+
|
|
+ // add safepoint before generating condition code so it can be recomputed
|
|
+ if (x->is_safepoint()) {
|
|
+ // increment backedge counter if needed
|
|
+ increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
|
|
+ x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
|
|
+ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
|
|
+ }
|
|
+
|
|
+ // Generate branch profiling. Profiling code doesn't kill flags.
|
|
+ __ cmp(lir_cond(cond), left, right);
|
|
+ profile_branch(x, cond);
|
|
+ move_to_phi(x->state());
|
|
+ if (x->x()->type()->is_float_kind()) {
|
|
+ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
|
|
+ } else {
|
|
+ __ branch(lir_cond(cond), right->type(), x->tsux());
|
|
+ }
|
|
+ assert(x->default_sux() == x->fsux(), "wrong destination above");
|
|
+ __ jump(x->default_sux());
|
|
+}
|
|
+
|
|
+LIR_Opr LIRGenerator::getThreadPointer() {
|
|
+ return FrameMap::as_pointer_opr(xthread);
|
|
+}
|
|
+
|
|
+void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
|
|
+
|
|
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
|
|
+ CodeEmitInfo* info) {
|
|
+ __ volatile_store_mem_reg(value, address, info);
|
|
+}
|
|
+
|
|
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
|
|
+ CodeEmitInfo* info) {
|
|
+ if (!UseBarriersForVolatile) {
|
|
+ __ membar();
|
|
+ }
|
|
+
|
|
+ __ volatile_load_mem_reg(address, result, info);
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..00e33e882
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/register.hpp"
|
|
+#include "c1/c1_LIR.hpp"
|
|
+
|
|
+FloatRegister LIR_OprDesc::as_float_reg() const {
|
|
+ return as_FloatRegister(fpu_regnr());
|
|
+}
|
|
+
|
|
+FloatRegister LIR_OprDesc::as_double_reg() const {
|
|
+ return as_FloatRegister(fpu_regnrLo());
|
|
+}
|
|
+
|
|
+// Reg2 unused.
|
|
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
|
|
+ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
|
|
+ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
|
|
+ (reg1 << LIR_OprDesc::reg2_shift) |
|
|
+ LIR_OprDesc::double_type |
|
|
+ LIR_OprDesc::fpu_register |
|
|
+ LIR_OprDesc::double_size);
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void LIR_Address::verify() const {
|
|
+ assert(base()->is_cpu_register(), "wrong base operand");
|
|
+ assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
|
|
+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
|
|
+ "wrong type for addresses");
|
|
+}
|
|
+#endif // PRODUCT
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..60dcdc0e1
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
|
|
@@ -0,0 +1,33 @@
|
|
+/*
|
|
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "c1/c1_Instruction.hpp"
|
|
+#include "c1/c1_LinearScan.hpp"
|
|
+#include "utilities/bitMap.inline.hpp"
|
|
+
|
|
+void LinearScan::allocate_fpu_stack() {
|
|
+ // No FPU stack on RISCV
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..f0aa08a39
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
|
|
@@ -0,0 +1,85 @@
|
|
+/*
|
|
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
|
|
+#define CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
|
|
+
|
|
+inline bool LinearScan::is_processed_reg_num(int reg_num)
|
|
+{
|
|
+ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
|
|
+}
|
|
+
|
|
+inline int LinearScan::num_physical_regs(BasicType type) {
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+
|
|
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+inline bool LinearScan::is_caller_save(int assigned_reg) {
|
|
+ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
|
|
+ if (assigned_reg < pd_first_callee_saved_reg) {
|
|
+ return true;
|
|
+ }
|
|
+ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg_1) {
|
|
+ return true;
|
|
+ }
|
|
+ if (assigned_reg > pd_last_callee_saved_fpu_reg_1 && assigned_reg < pd_first_callee_saved_fpu_reg_2) {
|
|
+ return true;
|
|
+ }
|
|
+ if (assigned_reg > pd_last_callee_saved_fpu_reg_2 && assigned_reg < pd_last_fpu_reg) {
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+
|
|
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
|
|
+}
|
|
+
|
|
+
|
|
+// Implementation of LinearScanWalker
|
|
+
|
|
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur)
|
|
+{
|
|
+ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
|
|
+ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
|
|
+ _first_reg = pd_first_callee_saved_reg;
|
|
+ _last_reg = pd_last_callee_saved_reg;
|
|
+ return true;
|
|
+ } else if (cur->type() == T_INT || cur->type() == T_LONG ||
|
|
+ cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
|
|
+ _first_reg = pd_first_cpu_reg;
|
|
+ _last_reg = pd_last_allocatable_cpu_reg;
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+
|
|
+#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..370ec45c6
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
|
|
@@ -0,0 +1,441 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "classfile/systemDictionary.hpp"
|
|
+#include "gc/shared/collectedHeap.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "oops/arrayOop.hpp"
|
|
+#include "oops/markOop.hpp"
|
|
+#include "runtime/basicLock.hpp"
|
|
+#include "runtime/biasedLocking.hpp"
|
|
+#include "runtime/os.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+
|
|
+void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
|
|
+ FloatRegister freg0, FloatRegister freg1,
|
|
+ Register result)
|
|
+{
|
|
+ if (is_float) {
|
|
+ float_compare(result, freg0, freg1, unordered_result);
|
|
+ } else {
|
|
+ double_compare(result, freg0, freg1, unordered_result);
|
|
+ }
|
|
+}
|
|
+
|
|
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) {
|
|
+ const int aligned_mask = BytesPerWord - 1;
|
|
+ const int hdr_offset = oopDesc::mark_offset_in_bytes();
|
|
+ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
|
|
+ Label done;
|
|
+ int null_check_offset = -1;
|
|
+
|
|
+ verify_oop(obj);
|
|
+
|
|
+ // save object being locked into the BasicObjectLock
|
|
+ sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ assert(tmp != noreg, "should have tmp register at this point");
|
|
+ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case);
|
|
+ } else {
|
|
+ null_check_offset = offset();
|
|
+ }
|
|
+
|
|
+ // Load object header
|
|
+ ld(hdr, Address(obj, hdr_offset));
|
|
+ // and mark it as unlocked
|
|
+ ori(hdr, hdr, markOopDesc::unlocked_value);
|
|
+ // save unlocked object header into the displaced header location on the stack
|
|
+ sd(hdr, Address(disp_hdr, 0));
|
|
+ // test if object header is still the same (i.e. unlocked), and if so, store the
|
|
+ // displaced header address in the object header - if it is not the same, get the
|
|
+ // object header instead
|
|
+ la(t1, Address(obj, hdr_offset));
|
|
+ cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/NULL);
|
|
+ // if the object header was the same, we're done
|
|
+ // if the object header was not the same, it is now in the hdr register
|
|
+ // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
|
|
+ //
|
|
+ // 1) (hdr & aligned_mask) == 0
|
|
+ // 2) sp <= hdr
|
|
+ // 3) hdr <= sp + page_size
|
|
+ //
|
|
+ // these 3 tests can be done by evaluating the following expression:
|
|
+ //
|
|
+ // (hdr -sp) & (aligned_mask - page_size)
|
|
+ //
|
|
+ // assuming both the stack pointer and page_size have their least
|
|
+ // significant 2 bits cleared and page_size is a power of 2
|
|
+ sub(hdr, hdr, sp);
|
|
+ mv(t0, aligned_mask - os::vm_page_size());
|
|
+ andr(hdr, hdr, t0);
|
|
+ // for recursive locking, the result is zero => save it in the displaced header
|
|
+ // location (NULL in the displaced hdr location indicates recursive locking)
|
|
+ sd(hdr, Address(disp_hdr, 0));
|
|
+ // otherwise we don't care about the result and handle locking via runtime call
|
|
+ bnez(hdr, slow_case, /* is_far */ true);
|
|
+ bind(done);
|
|
+ if (PrintBiasedLockingStatistics) {
|
|
+ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
|
|
+ incrementw(Address(t1, 0));
|
|
+ }
|
|
+ return null_check_offset;
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
|
|
+ const int aligned_mask = BytesPerWord - 1;
|
|
+ const int hdr_offset = oopDesc::mark_offset_in_bytes();
|
|
+ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
|
|
+ Label done;
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ // load object
|
|
+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ biased_locking_exit(obj, hdr, done);
|
|
+ }
|
|
+
|
|
+ // load displaced header
|
|
+ ld(hdr, Address(disp_hdr, 0));
|
|
+ // if the loaded hdr is NULL we had recursive locking
|
|
+ // if we had recursive locking, we are done
|
|
+ beqz(hdr, done);
|
|
+ if (!UseBiasedLocking) {
|
|
+ // load object
|
|
+ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ }
|
|
+ verify_oop(obj);
|
|
+ // test if object header is pointing to the displaced header, and if so, restore
|
|
+ // the displaced header in the object - if the object header is not pointing to
|
|
+ // the displaced header, get the object header instead
|
|
+ // if the object header was not pointing to the displaced header,
|
|
+ // we do unlocking via runtime call
|
|
+ if (hdr_offset) {
|
|
+ la(t0, Address(obj, hdr_offset));
|
|
+ cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case);
|
|
+ } else {
|
|
+ cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case);
|
|
+ }
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+// Defines obj, preserves var_size_in_bytes
|
|
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) {
|
|
+ if (UseTLAB) {
|
|
+ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, /* is_far */ true);
|
|
+ } else {
|
|
+ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, /* is_far */ true);
|
|
+ }
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
|
|
+ assert_different_registers(obj, klass, len);
|
|
+ if (UseBiasedLocking && !len->is_valid()) {
|
|
+ assert_different_registers(obj, klass, len, tmp1, tmp2);
|
|
+ ld(tmp1, Address(klass, Klass::prototype_header_offset()));
|
|
+ } else {
|
|
+ // This assumes that all prototype bits fitr in an int32_t
|
|
+ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
|
|
+ }
|
|
+ sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
|
|
+
|
|
+ if (UseCompressedClassPointers) { // Take care not to kill klass
|
|
+ encode_klass_not_null(tmp1, klass);
|
|
+ sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ sd(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+
|
|
+ if (len->is_valid()) {
|
|
+ sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
|
|
+ } else if (UseCompressedClassPointers) {
|
|
+ store_klass_gap(obj, zr);
|
|
+ }
|
|
+}
|
|
+
|
|
+// preserves obj, destroys len_in_bytes
|
|
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) {
|
|
+ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
|
|
+ Label done;
|
|
+
|
|
+ // len_in_bytes is positive and ptr sized
|
|
+ sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
|
|
+ beqz(len_in_bytes, done);
|
|
+
|
|
+ // Preserve obj
|
|
+ if (hdr_size_in_bytes) {
|
|
+ add(obj, obj, hdr_size_in_bytes);
|
|
+ }
|
|
+ zero_memory(obj, len_in_bytes, tmp1);
|
|
+ if (hdr_size_in_bytes) {
|
|
+ sub(obj, obj, hdr_size_in_bytes);
|
|
+ }
|
|
+
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case) {
|
|
+ assert_different_registers(obj, tmp1, tmp2);
|
|
+ assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
|
|
+
|
|
+ try_allocate(obj, noreg, object_size * BytesPerWord, tmp1, tmp2, slow_case);
|
|
+
|
|
+ initialize_object(obj, klass, noreg, object_size * HeapWordSize, tmp1, tmp2, UseTLAB);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, bool is_tlab_allocated) {
|
|
+ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
|
|
+ "con_size_in_bytes is not multiple of alignment");
|
|
+ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
|
|
+
|
|
+ initialize_header(obj, klass, noreg, tmp1, tmp2);
|
|
+
|
|
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
|
|
+ // clear rest of allocated space
|
|
+ const Register index = tmp2;
|
|
+ // 16: multipler for threshold
|
|
+ const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
|
|
+ if (var_size_in_bytes != noreg) {
|
|
+ mv(index, var_size_in_bytes);
|
|
+ initialize_body(obj, index, hdr_size_in_bytes, tmp1);
|
|
+ } else if (con_size_in_bytes <= threshold) {
|
|
+ // use explicit null stores
|
|
+ int i = hdr_size_in_bytes;
|
|
+ if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { // 2: multipler for BytesPerWord
|
|
+ sd(zr, Address(obj, i));
|
|
+ i += BytesPerWord;
|
|
+ }
|
|
+ for (; i < con_size_in_bytes; i += BytesPerWord) {
|
|
+ sd(zr, Address(obj, i));
|
|
+ }
|
|
+ } else if (con_size_in_bytes > hdr_size_in_bytes) {
|
|
+ block_comment("zero memory");
|
|
+ // use loop to null out the fields
|
|
+ int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
|
|
+ mv(index, words / 8); // 8: byte size
|
|
+
|
|
+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll
|
|
+ int remainder = words % unroll;
|
|
+ la(t0, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
|
|
+
|
|
+ Label entry_point, loop;
|
|
+ j(entry_point);
|
|
+
|
|
+ bind(loop);
|
|
+ sub(index, index, 1);
|
|
+ for (int i = -unroll; i < 0; i++) {
|
|
+ if (-i == remainder) {
|
|
+ bind(entry_point);
|
|
+ }
|
|
+ sd(zr, Address(t0, i * wordSize));
|
|
+ }
|
|
+ if (remainder == 0) {
|
|
+ bind(entry_point);
|
|
+ }
|
|
+ add(t0, t0, unroll * wordSize);
|
|
+ bnez(index, loop);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ membar(MacroAssembler::StoreStore);
|
|
+
|
|
+ if (CURRENT_ENV->dtrace_alloc_probes()) {
|
|
+ assert(obj == x10, "must be");
|
|
+ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
|
|
+ }
|
|
+
|
|
+ verify_oop(obj);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) {
|
|
+ assert_different_registers(obj, len, tmp1, tmp2, klass);
|
|
+
|
|
+ // determine alignment mask
|
|
+ assert(!(BytesPerWord & 1), "must be multiple of 2 for masking code to work");
|
|
+
|
|
+ // check for negative or excessive length
|
|
+ mv(t0, (int32_t)max_array_allocation_length);
|
|
+ bgeu(len, t0, slow_case, /* is_far */ true);
|
|
+
|
|
+ const Register arr_size = tmp2; // okay to be the same
|
|
+ // align object end
|
|
+ mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
|
|
+ shadd(arr_size, len, arr_size, t0, f);
|
|
+ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
|
|
+
|
|
+ try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case);
|
|
+
|
|
+ initialize_header(obj, klass, len, tmp1, tmp2);
|
|
+
|
|
+ // clear rest of allocated space
|
|
+ const Register len_zero = len;
|
|
+ initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
|
|
+
|
|
+ membar(MacroAssembler::StoreStore);
|
|
+
|
|
+ if (CURRENT_ENV->dtrace_alloc_probes()) {
|
|
+ assert(obj == x10, "must be");
|
|
+ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
|
|
+ }
|
|
+
|
|
+ verify_oop(obj);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, Label &L) {
|
|
+ verify_oop(receiver);
|
|
+ // explicit NULL check not needed since load from [klass_offset] causes a trap
|
|
+ // check against inline cache
|
|
+ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
|
|
+ cmp_klass(receiver, iCache, t0, L);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
|
|
+ // If we have to make this method not-entrant we'll overwrite its
|
|
+ // first instruction with a jump. For this action to be legal we
|
|
+ // must ensure that this first instruction is a J, JAL or NOP.
|
|
+ // Make it a NOP.
|
|
+ nop();
|
|
+ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
|
|
+ // Make sure there is enough stack space for this method's activation.
|
|
+ // Note that we do this before doing an enter().
|
|
+ generate_stack_overflow_check(bang_size_in_bytes);
|
|
+ MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::remove_frame(int framesize) {
|
|
+ MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize
|
|
+}
|
|
+
|
|
+
|
|
+void C1_MacroAssembler::verified_entry() {
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
|
|
+ // fp + -2: link
|
|
+ // + -1: return address
|
|
+ // + 0: argument with offset 0
|
|
+ // + 1: argument with offset 1
|
|
+ // + 2: ...
|
|
+ ld(reg, Address(fp, offset_in_words * BytesPerWord));
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+
|
|
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
|
|
+ if (!VerifyOops) {
|
|
+ return;
|
|
+ }
|
|
+ verify_oop_addr(Address(sp, stack_offset), "oop");
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
|
|
+ if (!VerifyOops) return;
|
|
+ Label not_null;
|
|
+ bnez(r, not_null);
|
|
+ stop("non-null oop required");
|
|
+ bind(not_null);
|
|
+ verify_oop(r);
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv_x12, bool inv_x13, bool inv_x14, bool inv_x15) {
|
|
+#ifdef ASSERT
|
|
+ static int nn;
|
|
+ if (inv_x10) { mv(x10, 0xDEAD); }
|
|
+ if (inv_x9) { mv(x9, 0xDEAD); }
|
|
+ if (inv_x12) { mv(x12, nn++); }
|
|
+ if (inv_x13) { mv(x13, 0xDEAD); }
|
|
+ if (inv_x14) { mv(x14, 0xDEAD); }
|
|
+ if (inv_x15) { mv(x15, 0xDEAD); }
|
|
+#endif // ASSERT
|
|
+}
|
|
+#endif // ifndef PRODUCT
|
|
+
|
|
+typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
|
|
+typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2,
|
|
+ Label& label, bool is_far, bool is_unordered);
|
|
+
|
|
+static c1_cond_branch_insn c1_cond_branch[] =
|
|
+{
|
|
+ /* SHORT branches */
|
|
+ (c1_cond_branch_insn)&Assembler::beq,
|
|
+ (c1_cond_branch_insn)&Assembler::bne,
|
|
+ (c1_cond_branch_insn)&Assembler::blt,
|
|
+ (c1_cond_branch_insn)&Assembler::ble,
|
|
+ (c1_cond_branch_insn)&Assembler::bge,
|
|
+ (c1_cond_branch_insn)&Assembler::bgt,
|
|
+ (c1_cond_branch_insn)&Assembler::bleu, // lir_cond_belowEqual
|
|
+ (c1_cond_branch_insn)&Assembler::bgeu // lir_cond_aboveEqual
|
|
+};
|
|
+
|
|
+static c1_float_cond_branch_insn c1_float_cond_branch[] =
|
|
+{
|
|
+ /* FLOAT branches */
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_beq,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_bne,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_blt,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_ble,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_bge,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::float_bgt,
|
|
+ NULL, // lir_cond_belowEqual
|
|
+ NULL, // lir_cond_aboveEqual
|
|
+
|
|
+ /* DOUBLE branches */
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_beq,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_bne,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_blt,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_ble,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_bge,
|
|
+ (c1_float_cond_branch_insn)&MacroAssembler::double_bgt,
|
|
+ NULL, // lir_cond_belowEqual
|
|
+ NULL // lir_cond_aboveEqual
|
|
+};
|
|
+
|
|
+void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label,
|
|
+ BasicType type, bool is_far) {
|
|
+ if (type == T_OBJECT || type == T_ARRAY) {
|
|
+ assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual");
|
|
+ if (cmpFlag == lir_cond_equal) {
|
|
+ oop_equal(op1, op2, label, is_far);
|
|
+ } else {
|
|
+ oop_nequal(op1, op2, label, is_far);
|
|
+ }
|
|
+ } else {
|
|
+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
|
|
+ "invalid c1 conditional branch index");
|
|
+ (this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far);
|
|
+ }
|
|
+}
|
|
+
|
|
+void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label,
|
|
+ bool is_far, bool is_unordered) {
|
|
+ assert(cmpFlag >= 0 &&
|
|
+ cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])),
|
|
+ "invalid c1 float conditional branch index");
|
|
+ (this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered);
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..5d0cefe89
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
|
|
@@ -0,0 +1,121 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
|
|
+
|
|
+using MacroAssembler::build_frame;
|
|
+using MacroAssembler::null_check;
|
|
+
|
|
+// C1_MacroAssembler contains high-level macros for C1
|
|
+
|
|
+ private:
|
|
+ int _rsp_offset; // track rsp changes
|
|
+ // initialization
|
|
+ void pd_init() { _rsp_offset = 0; }
|
|
+
|
|
+
|
|
+ public:
|
|
+ void try_allocate(
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Register tmp2, // temp register
|
|
+ Label& slow_case // continuation point if fast allocation fails
|
|
+ );
|
|
+
|
|
+ void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2);
|
|
+ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1);
|
|
+
|
|
+ void float_cmp(bool is_float, int unordered_result,
|
|
+ FloatRegister f0, FloatRegister f1,
|
|
+ Register result);
|
|
+
|
|
+ // locking
|
|
+ // hdr : must be x10, contents destroyed
|
|
+ // obj : must point to the object to lock, contents preserved
|
|
+ // disp_hdr: must point to the displaced header location, contents preserved
|
|
+ // tmp : temporary register, contents destroyed
|
|
+ // returns code offset at which to add null check debug information
|
|
+ int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
|
|
+
|
|
+ // unlocking
|
|
+ // hdr : contents destroyed
|
|
+ // obj : must point to the object to lock, contents preserved
|
|
+ // disp_hdr: must be x10 & must point to the displaced header location, contents destroyed
|
|
+ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
|
|
+
|
|
+ void initialize_object(
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register klass, // object klass
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Register tmp2, // temp register
|
|
+ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
|
|
+ );
|
|
+
|
|
+ // allocation of fixed-size objects
|
|
+ // (can also be used to allocate fixed-size arrays, by setting
|
|
+ // hdr_size correctly and storing the array length afterwards)
|
|
+ // obj : will contain pointer to allocated object
|
|
+ // t1, t2 : temp registers - contents destroyed
|
|
+ // header_size: size of object header in words
|
|
+ // object_size: total size of object in words
|
|
+ // slow_case : exit to slow case implementation if fast allocation fails
|
|
+ void allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case);
|
|
+
|
|
+ enum {
|
|
+ max_array_allocation_length = 0x00FFFFFF
|
|
+ };
|
|
+
|
|
+ // allocation of arrays
|
|
+ // obj : will contain pointer to allocated object
|
|
+ // len : array length in number of elements
|
|
+ // t : temp register - contents destroyed
|
|
+ // header_size: size of object header in words
|
|
+ // f : element scale factor
|
|
+ // slow_case : exit to slow case implementation if fast allocation fails
|
|
+ void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case);
|
|
+
|
|
+ int rsp_offset() const { return _rsp_offset; }
|
|
+
|
|
+ void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
|
|
+
|
|
+ // This platform only uses signal-based null checks. The Label is not needed.
|
|
+ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
|
|
+
|
|
+ void load_parameter(int offset_in_words, Register reg);
|
|
+
|
|
+ void inline_cache_check(Register receiver, Register iCache, Label &L);
|
|
+
|
|
+ static const int c1_double_branch_mask = 1 << 3; // depend on c1_float_cond_branch
|
|
+ void c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, BasicType type, bool is_far);
|
|
+ void c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label,
|
|
+ bool is_far, bool is_unordered = false);
|
|
+
|
|
+#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..f06e7b51c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
|
|
@@ -0,0 +1,1206 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "c1/c1_CodeStubs.hpp"
|
|
+#include "c1/c1_Defs.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "compiler/disassembler.hpp"
|
|
+#include "gc/shared/cardTable.hpp"
|
|
+#include "gc/shared/cardTableBarrierSet.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/compiledICHolder.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "prims/jvmtiExport.hpp"
|
|
+#include "register_riscv.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/signature.hpp"
|
|
+#include "runtime/vframe.hpp"
|
|
+#include "runtime/vframeArray.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+
|
|
+
|
|
+// Implementation of StubAssembler
|
|
+
|
|
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
|
|
+ // setup registers
|
|
+ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
|
|
+ "registers must be different");
|
|
+ assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different");
|
|
+ assert(args_size >= 0, "illegal args_size");
|
|
+ bool align_stack = false;
|
|
+
|
|
+ mv(c_rarg0, xthread);
|
|
+ set_num_rt_args(0); // Nothing on stack
|
|
+
|
|
+ Label retaddr;
|
|
+ set_last_Java_frame(sp, fp, retaddr, t0);
|
|
+
|
|
+ // do the call
|
|
+ int32_t off = 0;
|
|
+ la_patchable(t0, RuntimeAddress(entry), off);
|
|
+ jalr(x1, t0, off);
|
|
+ bind(retaddr);
|
|
+ int call_offset = offset();
|
|
+ // verify callee-saved register
|
|
+#ifdef ASSERT
|
|
+ push_reg(x10, sp);
|
|
+ { Label L;
|
|
+ get_thread(x10);
|
|
+ beq(xthread, x10, L);
|
|
+ stop("StubAssembler::call_RT: xthread not callee saved?");
|
|
+ bind(L);
|
|
+ }
|
|
+ pop_reg(x10, sp);
|
|
+#endif
|
|
+ reset_last_Java_frame(true);
|
|
+
|
|
+ // check for pending exceptions
|
|
+ { Label L;
|
|
+ // check for pending exceptions (java_thread is set upon return)
|
|
+ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ beqz(t0, L);
|
|
+ // exception pending => remove activation and forward to exception handler
|
|
+ // make sure that the vm_results are cleared
|
|
+ if (oop_result1->is_valid()) {
|
|
+ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
|
|
+ }
|
|
+ if (metadata_result->is_valid()) {
|
|
+ sd(zr, Address(xthread, JavaThread::vm_result_2_offset()));
|
|
+ }
|
|
+ if (frame_size() == no_frame_size) {
|
|
+ leave();
|
|
+ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
+ } else if (_stub_id == Runtime1::forward_exception_id) {
|
|
+ should_not_reach_here();
|
|
+ } else {
|
|
+ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
|
|
+ }
|
|
+ bind(L);
|
|
+ }
|
|
+ // get oop results if there are any and reset the values in the thread
|
|
+ if (oop_result1->is_valid()) {
|
|
+ get_vm_result(oop_result1, xthread);
|
|
+ }
|
|
+ if (metadata_result->is_valid()) {
|
|
+ get_vm_result_2(metadata_result, xthread);
|
|
+ }
|
|
+ return call_offset;
|
|
+}
|
|
+
|
|
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
|
|
+ mv(c_rarg1, arg1);
|
|
+ return call_RT(oop_result1, metadata_result, entry, 1);
|
|
+}
|
|
+
|
|
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
|
|
+ const int arg_num = 2;
|
|
+ if (c_rarg1 == arg2) {
|
|
+ if (c_rarg2 == arg1) {
|
|
+ xorr(arg1, arg1, arg2);
|
|
+ xorr(arg2, arg1, arg2);
|
|
+ xorr(arg1, arg1, arg2);
|
|
+ } else {
|
|
+ mv(c_rarg2, arg2);
|
|
+ mv(c_rarg1, arg1);
|
|
+ }
|
|
+ } else {
|
|
+ mv(c_rarg1, arg1);
|
|
+ mv(c_rarg2, arg2);
|
|
+ }
|
|
+ return call_RT(oop_result1, metadata_result, entry, arg_num);
|
|
+}
|
|
+
|
|
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
|
|
+ const int arg_num = 3;
|
|
+ // if there is any conflict use the stack
|
|
+ if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
|
|
+ arg2 == c_rarg1 || arg2 == c_rarg3 ||
|
|
+ arg3 == c_rarg1 || arg3 == c_rarg2) {
|
|
+ const int arg1_sp_offset = 0;
|
|
+ const int arg2_sp_offset = 1;
|
|
+ const int arg3_sp_offset = 2;
|
|
+ addi(sp, sp, -(arg_num * wordSize));
|
|
+ sd(arg3, Address(sp, arg3_sp_offset * wordSize));
|
|
+ sd(arg2, Address(sp, arg2_sp_offset * wordSize));
|
|
+ sd(arg1, Address(sp, arg1_sp_offset * wordSize));
|
|
+
|
|
+ ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize));
|
|
+ ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize));
|
|
+ ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize));
|
|
+ addi(sp, sp, arg_num * wordSize);
|
|
+ } else {
|
|
+ mv(c_rarg1, arg1);
|
|
+ mv(c_rarg2, arg2);
|
|
+ mv(c_rarg3, arg3);
|
|
+ }
|
|
+ return call_RT(oop_result1, metadata_result, entry, arg_num);
|
|
+}
|
|
+
|
|
+// Implementation of StubFrame
|
|
+
|
|
+class StubFrame: public StackObj {
|
|
+ private:
|
|
+ StubAssembler* _sasm;
|
|
+
|
|
+ public:
|
|
+ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
|
|
+ void load_argument(int offset_in_words, Register reg);
|
|
+
|
|
+ ~StubFrame();
|
|
+};;
|
|
+
|
|
+void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
|
|
+ set_info(name, must_gc_arguments);
|
|
+ enter();
|
|
+}
|
|
+
|
|
+void StubAssembler::epilogue() {
|
|
+ leave();
|
|
+ ret();
|
|
+}
|
|
+
|
|
+#define __ _sasm->
|
|
+
|
|
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
|
|
+ _sasm = sasm;
|
|
+ __ prologue(name, must_gc_arguments);
|
|
+}
|
|
+
|
|
+// load parameters that were stored with LIR_Assembler::store_parameter
|
|
+// Note: offsets for store_parameter and load_argument must match
|
|
+void StubFrame::load_argument(int offset_in_words, Register reg) {
|
|
+ __ load_parameter(offset_in_words, reg);
|
|
+}
|
|
+
|
|
+
|
|
+StubFrame::~StubFrame() {
|
|
+ __ epilogue();
|
|
+ _sasm = NULL;
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+
|
|
+// Implementation of Runtime1
|
|
+
|
|
+#define __ sasm->
|
|
+
|
|
+const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
|
|
+
|
|
+// Stack layout for saving/restoring all the registers needed during a runtime
|
|
+// call (this includes deoptimization)
|
|
+// Note: note that users of this frame may well have arguments to some runtime
|
|
+// while these values are on the stack. These positions neglect those arguments
|
|
+// but the code in save_live_registers will take the argument count into
|
|
+// account.
|
|
+//
|
|
+
|
|
+enum reg_save_layout {
|
|
+ reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */
|
|
+};
|
|
+
|
|
+// Save off registers which might be killed by calls into the runtime.
|
|
+// Tries to smart of about FP registers. In particular we separate
|
|
+// saving and describing the FPU registers for deoptimization since we
|
|
+// have to save the FPU registers twice if we describe them. The
|
|
+// deopt blob is the only thing which needs to describe FPU registers.
|
|
+// In all other cases it should be sufficient to simply save their
|
|
+// current value.
|
|
+
|
|
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
|
|
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
|
|
+
|
|
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
|
|
+ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
|
|
+ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
|
|
+ int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
|
|
+ OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
|
|
+ assert_cond(oop_map != NULL);
|
|
+
|
|
+ // cpu_regs, caller save registers only, see FrameMap::initialize
|
|
+ // in c1_FrameMap_riscv.cpp for detail.
|
|
+ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12,
|
|
+ x13, x14, x15, x16, x17,
|
|
+ x28, x29, x30, x31};
|
|
+ for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) {
|
|
+ Register r = caller_save_cpu_regs[i];
|
|
+ int sp_offset = cpu_reg_save_offsets[r->encoding()];
|
|
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
|
|
+ r->as_VMReg());
|
|
+ }
|
|
+
|
|
+ // fpu_regs
|
|
+ if (save_fpu_registers) {
|
|
+ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
|
+ FloatRegister r = as_FloatRegister(i);
|
|
+ int sp_offset = fpu_reg_save_offsets[i];
|
|
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
|
|
+ r->as_VMReg());
|
|
+ }
|
|
+ }
|
|
+ return oop_map;
|
|
+}
|
|
+
|
|
+static OopMap* save_live_registers(StubAssembler* sasm,
|
|
+ bool save_fpu_registers = true) {
|
|
+ __ block_comment("save_live_registers");
|
|
+
|
|
+ // if the number of pushed regs is odd, one slot will be reserved for alignment
|
|
+ __ push_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
|
|
+
|
|
+ if (save_fpu_registers) {
|
|
+ // float registers
|
|
+ __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize));
|
|
+ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
|
+ __ fsd(as_FloatRegister(i), Address(sp, i * wordSize));
|
|
+ }
|
|
+ } else {
|
|
+ // we define reg_save_layout = 62 as the fixed frame size,
|
|
+ // we should also sub 32 * wordSize to sp when save_fpu_registers == false
|
|
+ __ addi(sp, sp, -32 * wordSize);
|
|
+ }
|
|
+
|
|
+ return generate_oop_map(sasm, save_fpu_registers);
|
|
+}
|
|
+
|
|
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
|
|
+ if (restore_fpu_registers) {
|
|
+ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
|
+ __ fld(as_FloatRegister(i), Address(sp, i * wordSize));
|
|
+ }
|
|
+ __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize);
|
|
+ } else {
|
|
+ // we define reg_save_layout = 64 as the fixed frame size,
|
|
+ // we should also add 32 * wordSize to sp when save_fpu_registers == false
|
|
+ __ addi(sp, sp, 32 * wordSize);
|
|
+ }
|
|
+
|
|
+ // if the number of popped regs is odd, the reserved slot for alignment will be removed
|
|
+ __ pop_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
|
|
+}
|
|
+
|
|
+static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) {
|
|
+ if (restore_fpu_registers) {
|
|
+ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
|
+ __ fld(as_FloatRegister(i), Address(sp, i * wordSize));
|
|
+ }
|
|
+ __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize);
|
|
+ } else {
|
|
+ // we define reg_save_layout = 64 as the fixed frame size,
|
|
+ // we should also add 32 * wordSize to sp when save_fpu_registers == false
|
|
+ __ addi(sp, sp, 32 * wordSize);
|
|
+ }
|
|
+
|
|
+ // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10
|
|
+ // there is one reserved slot for alignment on the stack in save_live_registers().
|
|
+ __ pop_reg(RegSet::range(x5, x9), sp); // pop x5 ~ x9 with the reserved slot for alignment
|
|
+ __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here
|
|
+}
|
|
+
|
|
+void Runtime1::initialize_pd() {
|
|
+ int i = 0;
|
|
+ int sp_offset = 0;
|
|
+ const int step = 2; // SP offsets are in halfwords
|
|
+
|
|
+ // all float registers are saved explicitly
|
|
+ for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
|
+ fpu_reg_save_offsets[i] = sp_offset;
|
|
+ sp_offset += step;
|
|
+ }
|
|
+
|
|
+ // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg
|
|
+ sp_offset += step;
|
|
+ // we save x5 ~ x31, except x0 ~ x4: loop starts from x5
|
|
+ for (i = 5; i < FrameMap::nof_cpu_regs; i++) {
|
|
+ cpu_reg_save_offsets[i] = sp_offset;
|
|
+ sp_offset += step;
|
|
+ }
|
|
+}
|
|
+
|
|
+// target: the entry point of the method that creates and posts the exception oop
|
|
+// has_argument: true if the exception needs arguments (passed in t0 and t1)
|
|
+
|
|
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
|
|
+ // make a frame and preserve the caller's caller-save registers
|
|
+ OopMap* oop_map = save_live_registers(sasm);
|
|
+ assert_cond(oop_map != NULL);
|
|
+ int call_offset = 0;
|
|
+ if (!has_argument) {
|
|
+ call_offset = __ call_RT(noreg, noreg, target);
|
|
+ } else {
|
|
+ __ mv(c_rarg1, t0);
|
|
+ __ mv(c_rarg2, t1);
|
|
+ call_offset = __ call_RT(noreg, noreg, target);
|
|
+ }
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, oop_map);
|
|
+
|
|
+ __ should_not_reach_here();
|
|
+ return oop_maps;
|
|
+}
|
|
+
|
|
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
|
|
+ __ block_comment("generate_handle_exception");
|
|
+
|
|
+ // incoming parameters
|
|
+ const Register exception_oop = x10;
|
|
+ const Register exception_pc = x13;
|
|
+
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ OopMap* oop_map = NULL;
|
|
+
|
|
+ switch (id) {
|
|
+ case forward_exception_id:
|
|
+ // We're handling an exception in the context of a compiled frame.
|
|
+ // The registers have been saved in the standard places. Perform
|
|
+ // an exception lookup in the caller and dispatch to the handler
|
|
+ // if found. Otherwise unwind and dispatch to the callers
|
|
+ // exception handler.
|
|
+ oop_map = generate_oop_map(sasm, 1 /* thread */);
|
|
+
|
|
+ // load and clear pending exception oop into x10
|
|
+ __ ld(exception_oop, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
|
|
+
|
|
+ // load issuing PC (the return address for this stub) into x13
|
|
+ __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
|
|
+
|
|
+ // make sure that the vm_results are cleared (may be unnecessary)
|
|
+ __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::vm_result_2_offset()));
|
|
+ break;
|
|
+ case handle_exception_nofpu_id:
|
|
+ case handle_exception_id:
|
|
+ // At this point all registers MAY be live.
|
|
+ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
|
|
+ break;
|
|
+ case handle_exception_from_callee_id: {
|
|
+ // At this point all registers except exception oop (x10) and
|
|
+ // exception pc (ra) are dead.
|
|
+ const int frame_size = 2 /* fp, return address */;
|
|
+ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
|
|
+ sasm->set_frame_size(frame_size);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ __ should_not_reach_here();
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ // verify that only x10 and x13 are valid at this time
|
|
+ __ invalidate_registers(false, true, true, false, true, true);
|
|
+ // verify that x10 contains a valid exception
|
|
+ __ verify_not_null_oop(exception_oop);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // check that fields in JavaThread for exception oop and issuing pc are
|
|
+ // empty before writing to them
|
|
+ Label oop_empty;
|
|
+ __ ld(t0, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ beqz(t0, oop_empty);
|
|
+ __ stop("exception oop already set");
|
|
+ __ bind(oop_empty);
|
|
+
|
|
+ Label pc_empty;
|
|
+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ beqz(t0, pc_empty);
|
|
+ __ stop("exception pc already set");
|
|
+ __ bind(pc_empty);
|
|
+#endif
|
|
+
|
|
+ // save exception oop and issuing pc into JavaThread
|
|
+ // (exception handler will load it from here)
|
|
+ __ sd(exception_oop, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+ // patch throwing pc into return address (has bci & oop map)
|
|
+ __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
|
|
+
|
|
+ // compute the exception handler.
|
|
+ // the exception oop and the throwing pc are read from the fields in JavaThread
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
|
|
+ guarantee(oop_map != NULL, "NULL oop_map!");
|
|
+ oop_maps->add_gc_map(call_offset, oop_map);
|
|
+
|
|
+ // x10: handler address
|
|
+ // will be the deopt blob if nmethod was deoptimized while we looked up
|
|
+ // handler regardless of whether handler existed in the nmethod.
|
|
+
|
|
+ // only x10 is valid at this time, all other registers have been destroyed by the runtime call
|
|
+ __ invalidate_registers(false, true, true, true, true, true);
|
|
+
|
|
+ // patch the return address, this stub will directly return to the exception handler
|
|
+ __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord));
|
|
+
|
|
+ switch (id) {
|
|
+ case forward_exception_id:
|
|
+ case handle_exception_nofpu_id:
|
|
+ case handle_exception_id:
|
|
+ // Restore the registers that were saved at the beginning.
|
|
+ restore_live_registers(sasm, id != handle_exception_nofpu_id);
|
|
+ break;
|
|
+ case handle_exception_from_callee_id:
|
|
+ // Pop the return address.
|
|
+ __ leave();
|
|
+ __ ret(); // jump to exception handler
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ return oop_maps;
|
|
+}
|
|
+
|
|
+
|
|
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
|
|
+ // incoming parameters
|
|
+ const Register exception_oop = x10;
|
|
+ // other registers used in this stub
|
|
+ const Register handler_addr = x11;
|
|
+
|
|
+ // verify that only x10, is valid at this time
|
|
+ __ invalidate_registers(false, true, true, true, true, true);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // check that fields in JavaThread for exception oop and issuing pc are empty
|
|
+ Label oop_empty;
|
|
+ __ ld(t0, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ beqz(t0, oop_empty);
|
|
+ __ stop("exception oop must be empty");
|
|
+ __ bind(oop_empty);
|
|
+
|
|
+ Label pc_empty;
|
|
+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ beqz(t0, pc_empty);
|
|
+ __ stop("exception pc must be empty");
|
|
+ __ bind(pc_empty);
|
|
+#endif
|
|
+
|
|
+ // Save our return address because
|
|
+ // exception_handler_for_return_address will destroy it. We also
|
|
+ // save exception_oop
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ sd(exception_oop, Address(sp, wordSize));
|
|
+ __ sd(ra, Address(sp));
|
|
+
|
|
+ // search the exception handler address of the caller (using the return address)
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra);
|
|
+ // x10: exception handler address of the caller
|
|
+
|
|
+ // Only x10 is valid at this time; all other registers have been
|
|
+ // destroyed by the call.
|
|
+ __ invalidate_registers(false, true, true, true, false, true);
|
|
+
|
|
+ // move result of call into correct register
|
|
+ __ mv(handler_addr, x10);
|
|
+
|
|
+ // get throwing pc (= return address).
|
|
+ // ra has been destroyed by the call
|
|
+ __ ld(ra, Address(sp));
|
|
+ __ ld(exception_oop, Address(sp, wordSize));
|
|
+ __ addi(sp, sp, 2 * wordSize);
|
|
+ __ mv(x13, ra);
|
|
+
|
|
+ __ verify_not_null_oop(exception_oop);
|
|
+
|
|
+ // continue at exception handler (return address removed)
|
|
+ // note: do *not* remove arguments when unwinding the
|
|
+ // activation since the caller assumes having
|
|
+ // all arguments on the stack when entering the
|
|
+ // runtime to determine the exception handler
|
|
+ // (GC happens at call site with arguments!)
|
|
+ // x10: exception oop
|
|
+ // x13: throwing pc
|
|
+ // x11: exception handler
|
|
+ __ jr(handler_addr);
|
|
+}
|
|
+
|
|
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
|
|
+ // use the maximum number of runtime-arguments here because it is difficult to
|
|
+ // distinguish each RT-Call.
|
|
+ // Note: This number affects also the RT-Call in generate_handle_exception because
|
|
+ // the oop-map is shared for all calls.
|
|
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
|
|
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
|
|
+
|
|
+ OopMap* oop_map = save_live_registers(sasm);
|
|
+ assert_cond(oop_map != NULL);
|
|
+
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ Label retaddr;
|
|
+ __ set_last_Java_frame(sp, fp, retaddr, t0);
|
|
+ // do the call
|
|
+ int32_t off = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(target), off);
|
|
+ __ jalr(x1, t0, off);
|
|
+ __ bind(retaddr);
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(__ offset(), oop_map);
|
|
+ // verify callee-saved register
|
|
+#ifdef ASSERT
|
|
+ { Label L;
|
|
+ __ get_thread(t0);
|
|
+ __ beq(xthread, t0, L);
|
|
+ __ stop("StubAssembler::call_RT: xthread not callee saved?");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+ __ reset_last_Java_frame(true);
|
|
+
|
|
+ // check for pending exceptions
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, L);
|
|
+ // exception pending => remove activation and forward to exception handler
|
|
+
|
|
+ { Label L1;
|
|
+ __ bnez(x10, L1); // have we deoptimized?
|
|
+ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
|
|
+ __ bind(L1);
|
|
+ }
|
|
+
|
|
+ // the deopt blob expects exceptions in the special fields of
|
|
+ // JavaThread, so copy and clear pending exception.
|
|
+
|
|
+ // load and clear pending exception
|
|
+ __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
|
|
+
|
|
+ // check that there is really a valid exception
|
|
+ __ verify_not_null_oop(x10);
|
|
+
|
|
+ // load throwing pc: this is the return address of the stub
|
|
+ __ ld(x13, Address(fp, wordSize));
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // check that fields in JavaThread for exception oop and issuing pc are empty
|
|
+ Label oop_empty;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, oop_empty);
|
|
+ __ stop("exception oop must be empty");
|
|
+ __ bind(oop_empty);
|
|
+
|
|
+ Label pc_empty;
|
|
+ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ beqz(t0, pc_empty);
|
|
+ __ stop("exception pc must be empty");
|
|
+ __ bind(pc_empty);
|
|
+#endif
|
|
+
|
|
+ // store exception oop and throwing pc to JavaThread
|
|
+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+ restore_live_registers(sasm);
|
|
+
|
|
+ __ leave();
|
|
+
|
|
+ // Forward the exception directly to deopt blob. We can blow no
|
|
+ // registers and must leave throwing pc on the stack. A patch may
|
|
+ // have values live in registers so the entry point with the
|
|
+ // exception in tls.
|
|
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
|
|
+
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // Runtime will return true if the nmethod has been deoptimized during
|
|
+ // the patching process. In that case we must do a deopt reexecute instead.
|
|
+ Label cont;
|
|
+
|
|
+ __ beqz(x10, cont); // have we deoptimized?
|
|
+
|
|
+ // Will reexecute. Proper return address is already on the stack we just restore
|
|
+ // registers, pop all of our frame but the return address and jump to the deopt blob
|
|
+ restore_live_registers(sasm);
|
|
+ __ leave();
|
|
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
|
|
+
|
|
+ __ bind(cont);
|
|
+ restore_live_registers(sasm);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ return oop_maps;
|
|
+}
|
|
+
|
|
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
|
|
+ // for better readability
|
|
+ const bool dont_gc_arguments = false;
|
|
+
|
|
+ // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
|
|
+ bool save_fpu_registers = true;
|
|
+
|
|
+ // stub code & info for the different stubs
|
|
+ OopMapSet* oop_maps = NULL;
|
|
+ switch (id) {
|
|
+ {
|
|
+ case forward_exception_id:
|
|
+ {
|
|
+ oop_maps = generate_handle_exception(id, sasm);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_div0_exception_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_null_pointer_exception_id:
|
|
+ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case new_instance_id:
|
|
+ case fast_new_instance_id:
|
|
+ case fast_new_instance_init_check_id:
|
|
+ {
|
|
+ Register klass = x13; // Incoming
|
|
+ Register obj = x10; // Result
|
|
+
|
|
+ if (id == new_instance_id) {
|
|
+ __ set_info("new_instance", dont_gc_arguments);
|
|
+ } else if (id == fast_new_instance_id) {
|
|
+ __ set_info("fast new_instance", dont_gc_arguments);
|
|
+ } else {
|
|
+ assert(id == fast_new_instance_init_check_id, "bad StubID");
|
|
+ __ set_info("fast new_instance init check", dont_gc_arguments);
|
|
+ }
|
|
+
|
|
+ // If TLAB is disabled, see if there is support for inlining contiguous
|
|
+ // allocations.
|
|
+ // Otherwise, just go to the slow path.
|
|
+ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
|
|
+ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
|
|
+ Label slow_path;
|
|
+ Register obj_size = x12;
|
|
+ Register tmp1 = x9;
|
|
+ Register tmp2 = x14;
|
|
+ assert_different_registers(klass, obj, obj_size, tmp1, tmp2);
|
|
+
|
|
+ const int sp_offset = 2;
|
|
+ const int x9_offset = 1;
|
|
+ const int zr_offset = 0;
|
|
+ __ addi(sp, sp, -(sp_offset * wordSize));
|
|
+ __ sd(x9, Address(sp, x9_offset * wordSize));
|
|
+ __ sd(zr, Address(sp, zr_offset * wordSize));
|
|
+
|
|
+ if (id == fast_new_instance_init_check_id) {
|
|
+ // make sure the klass is initialized
|
|
+ __ lbu(t0, Address(klass, InstanceKlass::init_state_offset()));
|
|
+ __ mv(t1, InstanceKlass::fully_initialized);
|
|
+ __ bne(t0, t1, slow_path);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // assert object can be fast path allocated
|
|
+ {
|
|
+ Label ok, not_ok;
|
|
+ __ lw(obj_size, Address(klass, Klass::layout_helper_offset()));
|
|
+ // make sure it's an instance. For instances, layout helper is a positive number.
|
|
+ // For arrays, layout helper is a negative number
|
|
+ __ blez(obj_size, not_ok);
|
|
+ __ andi(t0, obj_size, Klass::_lh_instance_slow_path_bit);
|
|
+ __ beqz(t0, ok);
|
|
+ __ bind(not_ok);
|
|
+ __ stop("assert(can be fast path allocated)");
|
|
+ __ should_not_reach_here();
|
|
+ __ bind(ok);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+ // get the instance size
|
|
+ __ lwu(obj_size, Address(klass, Klass::layout_helper_offset()));
|
|
+
|
|
+ __ eden_allocate(obj, obj_size, 0, tmp1, slow_path);
|
|
+
|
|
+ __ initialize_object(obj, klass, obj_size, 0, tmp1, tmp2, /* is_tlab_allocated */ false);
|
|
+ __ verify_oop(obj);
|
|
+ __ ld(x9, Address(sp, x9_offset * wordSize));
|
|
+ __ ld(zr, Address(sp, zr_offset * wordSize));
|
|
+ __ addi(sp, sp, sp_offset * wordSize);
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(slow_path);
|
|
+ __ ld(x9, Address(sp, x9_offset * wordSize));
|
|
+ __ ld(zr, Address(sp, zr_offset * wordSize));
|
|
+ __ addi(sp, sp, sp_offset * wordSize);
|
|
+ }
|
|
+
|
|
+ __ enter();
|
|
+ OopMap* map = save_live_registers(sasm);
|
|
+ assert_cond(map != NULL);
|
|
+ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers_except_r10(sasm);
|
|
+ __ verify_oop(obj);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ // x10: new instance
|
|
+ }
|
|
+
|
|
+ break;
|
|
+
|
|
+ case counter_overflow_id:
|
|
+ {
|
|
+ Register bci = x10;
|
|
+ Register method = x11;
|
|
+ __ enter();
|
|
+ OopMap* map = save_live_registers(sasm);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ const int bci_off = 0;
|
|
+ const int method_off = 1;
|
|
+ // Retrieve bci
|
|
+ __ lw(bci, Address(fp, bci_off * BytesPerWord));
|
|
+ // And a pointer to the Method*
|
|
+ __ ld(method, Address(fp, method_off * BytesPerWord));
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers(sasm);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case new_type_array_id:
|
|
+ case new_object_array_id:
|
|
+ {
|
|
+ Register length = x9; // Incoming
|
|
+ Register klass = x13; // Incoming
|
|
+ Register obj = x10; // Result
|
|
+
|
|
+ if (id == new_type_array_id) {
|
|
+ __ set_info("new_type_array", dont_gc_arguments);
|
|
+ } else {
|
|
+ __ set_info("new_object_array", dont_gc_arguments);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // assert object type is really an array of the proper kind
|
|
+ {
|
|
+ Label ok;
|
|
+ Register tmp = obj;
|
|
+ __ lwu(tmp, Address(klass, Klass::layout_helper_offset()));
|
|
+ __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift);
|
|
+ int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value);
|
|
+ __ mv(t0, tag);
|
|
+ __ beq(t0, tmp, ok);
|
|
+ __ stop("assert(is an array klass)");
|
|
+ __ should_not_reach_here();
|
|
+ __ bind(ok);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+ // If TLAB is disabled, see if there is support for inlining contiguous
|
|
+ // allocations.
|
|
+ // Otherwise, just go to the slow path.
|
|
+ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
|
|
+ Register arr_size = x14;
|
|
+ Register tmp1 = x12;
|
|
+ Register tmp2 = x15;
|
|
+ Label slow_path;
|
|
+ assert_different_registers(length, klass, obj, arr_size, tmp1, tmp2);
|
|
+
|
|
+ // check that array length is small enough for fast path.
|
|
+ __ mv(t0, C1_MacroAssembler::max_array_allocation_length);
|
|
+ __ bgtu(length, t0, slow_path);
|
|
+
|
|
+ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
|
|
+ __ lwu(tmp1, Address(klass, Klass::layout_helper_offset()));
|
|
+ __ andi(t0, tmp1, 0x1f);
|
|
+ __ sll(arr_size, length, t0);
|
|
+ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
|
|
+ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
|
|
+ __ slli(tmp1, tmp1, XLEN - lh_header_size_msb);
|
|
+ __ srli(tmp1, tmp1, XLEN - lh_header_size_width);
|
|
+ __ add(arr_size, arr_size, tmp1);
|
|
+ __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
|
|
+ __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
|
|
+
|
|
+ __ eden_allocate(obj, arr_size, 0, tmp1, slow_path); // preserves arr_size
|
|
+
|
|
+ __ initialize_header(obj, klass, length, tmp1, tmp2);
|
|
+ __ lbu(tmp1, Address(klass,
|
|
+ in_bytes(Klass::layout_helper_offset()) +
|
|
+ (Klass::_lh_header_size_shift / BitsPerByte)));
|
|
+ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
|
|
+ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
|
|
+ __ andi(tmp1, tmp1, Klass::_lh_header_size_mask);
|
|
+ __ sub(arr_size, arr_size, tmp1); // body length
|
|
+ __ add(tmp1, tmp1, obj); // body start
|
|
+ __ initialize_body(tmp1, arr_size, 0, tmp2);
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+ __ verify_oop(obj);
|
|
+
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(slow_path);
|
|
+ }
|
|
+
|
|
+ __ enter();
|
|
+ OopMap* map = save_live_registers(sasm);
|
|
+ assert_cond(map != NULL);
|
|
+ int call_offset = 0;
|
|
+ if (id == new_type_array_id) {
|
|
+ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
|
|
+ } else {
|
|
+ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
|
|
+ }
|
|
+
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers_except_r10(sasm);
|
|
+
|
|
+ __ verify_oop(obj);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ // x10: new array
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case new_multi_array_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
|
|
+ // x10: klass
|
|
+ // x9: rank
|
|
+ // x12: address of 1st dimension
|
|
+ OopMap* map = save_live_registers(sasm);
|
|
+ assert_cond(map != NULL);
|
|
+ __ mv(c_rarg1, x10);
|
|
+ __ mv(c_rarg3, x12);
|
|
+ __ mv(c_rarg2, x9);
|
|
+ int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, new_multi_array), x11, x12, x13);
|
|
+
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers_except_r10(sasm);
|
|
+
|
|
+ // x10: new multi array
|
|
+ __ verify_oop(x10);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case register_finalizer_id:
|
|
+ {
|
|
+ __ set_info("register_finalizer", dont_gc_arguments);
|
|
+
|
|
+ // This is called via call_runtime so the arguments
|
|
+ // will be place in C abi locations
|
|
+ __ verify_oop(c_rarg0);
|
|
+
|
|
+ // load the klass and check the has finalizer flag
|
|
+ Label register_finalizer;
|
|
+ Register t = x15;
|
|
+ __ load_klass(t, x10);
|
|
+ __ lwu(t, Address(t, Klass::access_flags_offset()));
|
|
+ __ andi(t0, t, JVM_ACC_HAS_FINALIZER);
|
|
+ __ bnez(t0, register_finalizer);
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(register_finalizer);
|
|
+ __ enter();
|
|
+ OopMap* oop_map = save_live_registers(sasm);
|
|
+ assert_cond(oop_map != NULL);
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), x10);
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, oop_map);
|
|
+
|
|
+ // Now restore all the live registers
|
|
+ restore_live_registers(sasm);
|
|
+
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_class_cast_exception_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_incompatible_class_change_error_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm,
|
|
+ CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case slow_subtype_check_id:
|
|
+ {
|
|
+ // Typical calling sequence:
|
|
+ // push klass_RInfo (object klass or other subclass)
|
|
+ // push sup_k_RInfo (array element klass or other superclass)
|
|
+ // jump to slow_subtype_check
|
|
+ // Note that the subclass is pushed first, and is therefore deepest.
|
|
+ enum layout {
|
|
+ x10_off, x10_off_hi,
|
|
+ x12_off, x12_off_hi,
|
|
+ x14_off, x14_off_hi,
|
|
+ x15_off, x15_off_hi,
|
|
+ sup_k_off, sup_k_off_hi,
|
|
+ klass_off, klass_off_hi,
|
|
+ framesize,
|
|
+ result_off = sup_k_off
|
|
+ };
|
|
+
|
|
+ __ set_info("slow_subtype_check", dont_gc_arguments);
|
|
+ __ push_reg(RegSet::of(x10, x12, x14, x15), sp);
|
|
+
|
|
+ __ ld(x14, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // sub klass
|
|
+ __ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass
|
|
+
|
|
+ Label miss;
|
|
+ __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss);
|
|
+
|
|
+ // fallthrough on success:
|
|
+ __ mv(t0, 1);
|
|
+ __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
|
|
+ __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(miss);
|
|
+ __ sd(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
|
|
+ __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
|
|
+ __ ret();
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case monitorenter_nofpu_id:
|
|
+ save_fpu_registers = false;
|
|
+ // fall through
|
|
+ case monitorenter_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "monitorenter", dont_gc_arguments);
|
|
+ OopMap* map = save_live_registers(sasm, save_fpu_registers);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ // Called with store_parameter and not C abi
|
|
+ f.load_argument(1, x10); // x10: object
|
|
+ f.load_argument(0, x11); // x11: lock address
|
|
+
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), x10, x11);
|
|
+
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers(sasm, save_fpu_registers);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case monitorexit_nofpu_id:
|
|
+ save_fpu_registers = false;
|
|
+ // fall through
|
|
+ case monitorexit_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "monitorexit", dont_gc_arguments);
|
|
+ OopMap* map = save_live_registers(sasm, save_fpu_registers);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ // Called with store_parameter and not C abi
|
|
+ f.load_argument(0, x10); // x10: lock address
|
|
+
|
|
+ // note: really a leaf routine but must setup last java sp
|
|
+ // => use call_RT for now (speed can be improved by
|
|
+ // doing last java sp setup manually)
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), x10);
|
|
+
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers(sasm, save_fpu_registers);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case deoptimize_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "deoptimize", dont_gc_arguments);
|
|
+ OopMap* oop_map = save_live_registers(sasm);
|
|
+ assert_cond(oop_map != NULL);
|
|
+ f.load_argument(0, c_rarg1);
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1);
|
|
+
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, oop_map);
|
|
+ restore_live_registers(sasm);
|
|
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
|
|
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
|
|
+ __ leave();
|
|
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_range_check_failed_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case unwind_exception_id:
|
|
+ {
|
|
+ __ set_info("unwind_exception", dont_gc_arguments);
|
|
+ // note: no stubframe since we are about to leave the current
|
|
+ // activation and we are calling a leaf VM function only.
|
|
+ generate_unwind_exception(sasm);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case access_field_patching_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
|
|
+ // we should set up register map
|
|
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case load_klass_patching_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
|
|
+ // we should set up register map
|
|
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case load_mirror_patching_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
|
|
+ // we should set up register map
|
|
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case load_appendix_patching_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
|
|
+ // we should set up register map
|
|
+ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case handle_exception_nofpu_id:
|
|
+ case handle_exception_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "handle_exception", dont_gc_arguments);
|
|
+ oop_maps = generate_handle_exception(id, sasm);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case handle_exception_from_callee_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
|
|
+ oop_maps = generate_handle_exception(id, sasm);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_index_exception_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case throw_array_store_exception_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
|
|
+ // tos + 0: link
|
|
+ // + 1: return address
|
|
+ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case predicate_failed_trap_id:
|
|
+ {
|
|
+ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
|
|
+
|
|
+ OopMap* map = save_live_registers(sasm);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
|
|
+ oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ oop_maps->add_gc_map(call_offset, map);
|
|
+ restore_live_registers(sasm);
|
|
+ __ leave();
|
|
+ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
|
|
+ assert(deopt_blob != NULL, "deoptimization blob must have been created");
|
|
+
|
|
+ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case dtrace_object_alloc_id:
|
|
+ { // c_rarg0: object
|
|
+ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
|
|
+ save_live_registers(sasm);
|
|
+
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
|
|
+
|
|
+ restore_live_registers(sasm);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ {
|
|
+ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
|
|
+ __ mv(x10, (int)id);
|
|
+ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
|
|
+ __ should_not_reach_here();
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return oop_maps;
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
|
|
diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..974c8fe76
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
|
|
@@ -0,0 +1,72 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C1_GLOBALS_RISCV_HPP
|
|
+#define CPU_RISCV_C1_GLOBALS_RISCV_HPP
|
|
+
|
|
+#include "utilities/globalDefinitions.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+// Sets the default values for platform dependent flags used by the client compiler.
|
|
+// (see c1_globals.hpp)
|
|
+
|
|
+#ifndef TIERED
|
|
+define_pd_global(bool, BackgroundCompilation, true );
|
|
+define_pd_global(bool, UseTLAB, true );
|
|
+define_pd_global(bool, ResizeTLAB, true );
|
|
+define_pd_global(bool, InlineIntrinsics, true );
|
|
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
|
|
+define_pd_global(bool, ProfileTraps, false);
|
|
+define_pd_global(bool, UseOnStackReplacement, true );
|
|
+define_pd_global(bool, TieredCompilation, false);
|
|
+define_pd_global(intx, CompileThreshold, 1500 );
|
|
+
|
|
+define_pd_global(intx, OnStackReplacePercentage, 933 );
|
|
+define_pd_global(intx, FreqInlineSize, 325 );
|
|
+define_pd_global(intx, NewSizeThreadIncrease, 4*K );
|
|
+define_pd_global(intx, InitialCodeCacheSize, 160*K);
|
|
+define_pd_global(intx, ReservedCodeCacheSize, 32*M );
|
|
+define_pd_global(intx, NonProfiledCodeHeapSize, 13*M );
|
|
+define_pd_global(intx, ProfiledCodeHeapSize, 14*M );
|
|
+define_pd_global(intx, NonNMethodCodeHeapSize, 5*M );
|
|
+define_pd_global(bool, ProfileInterpreter, false);
|
|
+define_pd_global(intx, CodeCacheExpansionSize, 32*K );
|
|
+define_pd_global(uintx, CodeCacheMinBlockLength, 1);
|
|
+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
|
|
+define_pd_global(uintx, MetaspaceSize, 12*M );
|
|
+define_pd_global(bool, NeverActAsServerClassMachine, true );
|
|
+define_pd_global(uint64_t,MaxRAM, 1ULL*G);
|
|
+define_pd_global(bool, CICompileOSR, true );
|
|
+#endif // !TIERED
|
|
+define_pd_global(bool, UseTypeProfile, false);
|
|
+define_pd_global(bool, RoundFPResults, true );
|
|
+
|
|
+define_pd_global(bool, LIRFillDelaySlots, false);
|
|
+define_pd_global(bool, OptimizeSinglePrecision, true );
|
|
+define_pd_global(bool, CSEArrayLength, false);
|
|
+define_pd_global(bool, TwoOperandLIRForm, false );
|
|
+
|
|
+#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..bf4efa629
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
|
|
@@ -0,0 +1,91 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
|
|
+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
|
|
+
|
|
+#include "utilities/globalDefinitions.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+// Sets the default values for platform dependent flags used by the server compiler.
|
|
+// (see c2_globals.hpp). Alpha-sorted.
|
|
+
|
|
+define_pd_global(bool, BackgroundCompilation, true);
|
|
+define_pd_global(bool, UseTLAB, true);
|
|
+define_pd_global(bool, ResizeTLAB, true);
|
|
+define_pd_global(bool, CICompileOSR, true);
|
|
+define_pd_global(bool, InlineIntrinsics, true);
|
|
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
|
|
+define_pd_global(bool, ProfileTraps, true);
|
|
+define_pd_global(bool, UseOnStackReplacement, true);
|
|
+define_pd_global(bool, ProfileInterpreter, true);
|
|
+define_pd_global(bool, TieredCompilation, trueInTiered);
|
|
+define_pd_global(intx, CompileThreshold, 10000);
|
|
+
|
|
+define_pd_global(intx, OnStackReplacePercentage, 140);
|
|
+define_pd_global(intx, ConditionalMoveLimit, 0);
|
|
+define_pd_global(intx, FLOATPRESSURE, 64);
|
|
+define_pd_global(intx, FreqInlineSize, 325);
|
|
+define_pd_global(intx, MinJumpTableSize, 10);
|
|
+define_pd_global(intx, INTPRESSURE, 24);
|
|
+define_pd_global(intx, InteriorEntryAlignment, 16);
|
|
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
|
|
+define_pd_global(intx, LoopUnrollLimit, 60);
|
|
+define_pd_global(intx, LoopPercentProfileLimit, 10);
|
|
+// InitialCodeCacheSize derived from specjbb2000 run.
|
|
+define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize
|
|
+define_pd_global(intx, CodeCacheExpansionSize, 64*K);
|
|
+
|
|
+// Ergonomics related flags
|
|
+define_pd_global(uint64_t,MaxRAM, 128ULL*G);
|
|
+define_pd_global(intx, RegisterCostAreaRatio, 16000);
|
|
+
|
|
+// Peephole and CISC spilling both break the graph, and so makes the
|
|
+// scheduler sick.
|
|
+define_pd_global(bool, OptoPeephole, false);
|
|
+define_pd_global(bool, UseCISCSpill, false);
|
|
+define_pd_global(bool, OptoScheduling, true);
|
|
+define_pd_global(bool, OptoBundling, false);
|
|
+define_pd_global(bool, OptoRegScheduling, false);
|
|
+define_pd_global(bool, SuperWordLoopUnrollAnalysis, true);
|
|
+define_pd_global(bool, IdealizeClearArrayNode, true);
|
|
+
|
|
+define_pd_global(intx, ReservedCodeCacheSize, 48*M);
|
|
+define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
|
|
+define_pd_global(intx, ProfiledCodeHeapSize, 22*M);
|
|
+define_pd_global(intx, NonNMethodCodeHeapSize, 5*M );
|
|
+define_pd_global(uintx, CodeCacheMinBlockLength, 4);
|
|
+define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K);
|
|
+
|
|
+// Heap related flags
|
|
+define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M));
|
|
+
|
|
+// Ergonomics related flags
|
|
+define_pd_global(bool, NeverActAsServerClassMachine, false);
|
|
+
|
|
+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed.
|
|
+
|
|
+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..3cb4a4995
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
|
|
@@ -0,0 +1,38 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "opto/compile.hpp"
|
|
+#include "opto/node.hpp"
|
|
+
|
|
+// processor dependent initialization for riscv
|
|
+
|
|
+extern void reg_mask_init();
|
|
+
|
|
+void Compile::pd_compiler2_init() {
|
|
+ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
|
|
+ reg_mask_init();
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..881900892
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
|
|
@@ -0,0 +1,36 @@
|
|
+/*
|
|
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
|
|
+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
|
|
+
|
|
+private:
|
|
+ void pd_initialize() {}
|
|
+
|
|
+public:
|
|
+ void flush_bundle(bool start_new_bundle) {}
|
|
+
|
|
+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..0354a93a0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
|
|
@@ -0,0 +1,154 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "code/compiledIC.hpp"
|
|
+#include "code/icBuffer.hpp"
|
|
+#include "code/nmethod.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "runtime/mutexLocker.hpp"
|
|
+#include "runtime/safepoint.hpp"
|
|
+
|
|
+// ----------------------------------------------------------------------------
|
|
+
|
|
+#define __ _masm.
|
|
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
|
|
+ precond(cbuf.stubs()->start() != badAddress);
|
|
+ precond(cbuf.stubs()->end() != badAddress);
|
|
+ // Stub is fixed up when the corresponding call is converted from
|
|
+ // calling compiled code to calling interpreted code.
|
|
+ // mv xmethod, 0
|
|
+ // jalr -4 # to self
|
|
+
|
|
+ if (mark == NULL) {
|
|
+ mark = cbuf.insts_mark(); // Get mark within main instrs section.
|
|
+ }
|
|
+
|
|
+ // Note that the code buffer's insts_mark is always relative to insts.
|
|
+ // That's why we must use the macroassembler to generate a stub.
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ address base = __ start_a_stub(to_interp_stub_size());
|
|
+ int offset = __ offset();
|
|
+ if (base == NULL) {
|
|
+ return NULL; // CodeBuffer::expand failed
|
|
+ }
|
|
+ // static stub relocation stores the instruction address of the call
|
|
+ __ relocate(static_stub_Relocation::spec(mark));
|
|
+
|
|
+ __ emit_static_call_stub();
|
|
+
|
|
+ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
|
|
+ __ end_a_stub();
|
|
+ return base;
|
|
+}
|
|
+#undef __
|
|
+
|
|
+int CompiledStaticCall::to_interp_stub_size() {
|
|
+ // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
|
|
+ return 12 * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+int CompiledStaticCall::to_trampoline_stub_size() {
|
|
+ // Somewhat pessimistically, we count four instructions here (although
|
|
+ // there are only three) because we sometimes emit an alignment nop.
|
|
+ // Trampoline stubs are always word aligned.
|
|
+ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
|
|
+}
|
|
+
|
|
+// Relocation entries for call stub, compiled java to interpreter.
|
|
+int CompiledStaticCall::reloc_to_interp_stub() {
|
|
+ return 4; // 3 in emit_to_interp_stub + 1 in emit_call
|
|
+}
|
|
+
|
|
+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
|
|
+ address stub = find_stub(false /* is_aot */);
|
|
+ guarantee(stub != NULL, "stub not found");
|
|
+
|
|
+ if (TraceICs) {
|
|
+ ResourceMark rm;
|
|
+ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
|
|
+ p2i(instruction_address()),
|
|
+ callee->name_and_sig_as_C_string());
|
|
+ }
|
|
+
|
|
+ // Creation also verifies the object.
|
|
+ NativeMovConstReg* method_holder
|
|
+ = nativeMovConstReg_at(stub);
|
|
+#ifndef PRODUCT
|
|
+ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
|
|
+
|
|
+ // read the value once
|
|
+ volatile intptr_t data = method_holder->data();
|
|
+ assert(data == 0 || data == (intptr_t)callee(),
|
|
+ "a) MT-unsafe modification of inline cache");
|
|
+ assert(data == 0 || jump->jump_destination() == entry,
|
|
+ "b) MT-unsafe modification of inline cache");
|
|
+#endif
|
|
+ // Update stub.
|
|
+ method_holder->set_data((intptr_t)callee());
|
|
+ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
|
|
+ ICache::invalidate_range(stub, to_interp_stub_size());
|
|
+ // Update jump to call.
|
|
+ set_destination_mt_safe(stub);
|
|
+}
|
|
+
|
|
+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
|
|
+ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
|
|
+ // Reset stub.
|
|
+ address stub = static_stub->addr();
|
|
+ assert(stub != NULL, "stub not found");
|
|
+ // Creation also verifies the object.
|
|
+ NativeMovConstReg* method_holder
|
|
+ = nativeMovConstReg_at(stub);
|
|
+ method_holder->set_data(0);
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Non-product mode code
|
|
+#ifndef PRODUCT
|
|
+
|
|
+void CompiledDirectStaticCall::verify() {
|
|
+ // Verify call.
|
|
+ _call->verify();
|
|
+ if (os::is_MP()) {
|
|
+ _call->verify_alignment();
|
|
+ }
|
|
+
|
|
+ // Verify stub.
|
|
+ address stub = find_stub(false /* is_aot */);
|
|
+ assert(stub != NULL, "no stub found for static call");
|
|
+ // Creation also verifies the object.
|
|
+ NativeMovConstReg* method_holder
|
|
+ = nativeMovConstReg_at(stub);
|
|
+ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
|
|
+
|
|
+ // Verify state.
|
|
+ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
|
|
+}
|
|
+
|
|
+#endif // !PRODUCT
|
|
diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..011e965ad
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
|
|
@@ -0,0 +1,60 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_COPY_RISCV_HPP
|
|
+#define CPU_RISCV_COPY_RISCV_HPP
|
|
+
|
|
+// Inline functions for memory copy and fill.
|
|
+
|
|
+// Contains inline asm implementations
|
|
+#include OS_CPU_HEADER_INLINE(copy)
|
|
+
|
|
+
|
|
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
|
|
+ julong* to = (julong*) tohw;
|
|
+ julong v = ((julong) value << 32) | value;
|
|
+ while (count-- > 0) {
|
|
+ *to++ = v;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
|
|
+ pd_fill_to_words(tohw, count, value);
|
|
+}
|
|
+
|
|
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
|
|
+ (void)memset(to, value, count);
|
|
+}
|
|
+
|
|
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
|
|
+ pd_fill_to_words(tohw, count, 0);
|
|
+}
|
|
+
|
|
+static void pd_zero_to_bytes(void* to, size_t count) {
|
|
+ (void)memset(to, 0, count);
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_COPY_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..31cee7103
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
|
|
@@ -0,0 +1,32 @@
|
|
+/*
|
|
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
|
|
+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
|
|
+
|
|
+// Nothing to do on riscv
|
|
+
|
|
+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..e97b89327
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
|
|
@@ -0,0 +1,37 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
|
|
+
|
|
+ static int pd_instruction_alignment() {
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ static const char* pd_cpu_opts() {
|
|
+ return "";
|
|
+ }
|
|
+
|
|
+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..be6f1a67f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
|
|
@@ -0,0 +1,683 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "oops/markOop.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "prims/methodHandles.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/handles.inline.hpp"
|
|
+#include "runtime/javaCalls.hpp"
|
|
+#include "runtime/monitorChunk.hpp"
|
|
+#include "runtime/os.hpp"
|
|
+#include "runtime/signature.hpp"
|
|
+#include "runtime/stubCodeGenerator.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#include "runtime/vframeArray.hpp"
|
|
+#endif
|
|
+
|
|
+#ifdef ASSERT
|
|
+void RegisterMap::check_location_valid() {
|
|
+}
|
|
+#endif
|
|
+
|
|
+
|
|
+// Profiling/safepoint support
|
|
+
|
|
+bool frame::safe_for_sender(JavaThread *thread) {
|
|
+ address addr_sp = (address)_sp;
|
|
+ address addr_fp = (address)_fp;
|
|
+ address unextended_sp = (address)_unextended_sp;
|
|
+
|
|
+ // consider stack guards when trying to determine "safe" stack pointers
|
|
+ static size_t stack_guard_size = os::uses_stack_guard_pages() ?
|
|
+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
|
|
+ assert_cond(thread != NULL);
|
|
+ size_t usable_stack_size = thread->stack_size() - stack_guard_size;
|
|
+
|
|
+ // sp must be within the usable part of the stack (not in guards)
|
|
+ bool sp_safe = (addr_sp < thread->stack_base()) &&
|
|
+ (addr_sp >= thread->stack_base() - usable_stack_size);
|
|
+
|
|
+ if (!sp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // When we are running interpreted code the machine stack pointer, SP, is
|
|
+ // set low enough so that the Java expression stack can grow and shrink
|
|
+ // without ever exceeding the machine stack bounds. So, ESP >= SP.
|
|
+
|
|
+ // When we call out of an interpreted method, SP is incremented so that
|
|
+ // the space between SP and ESP is removed. The SP saved in the callee's
|
|
+ // frame is the SP *before* this increment. So, when we walk a stack of
|
|
+ // interpreter frames the sender's SP saved in a frame might be less than
|
|
+ // the SP at the point of call.
|
|
+
|
|
+ // So unextended sp must be within the stack but we need not to check
|
|
+ // that unextended sp >= sp
|
|
+
|
|
+ bool unextended_sp_safe = (unextended_sp < thread->stack_base());
|
|
+
|
|
+ if (!unextended_sp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // an fp must be within the stack and above (but not equal) sp
|
|
+ // second evaluation on fp+ is added to handle situation where fp is -1
|
|
+ bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) &&
|
|
+ (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
|
|
+
|
|
+ // We know sp/unextended_sp are safe only fp is questionable here
|
|
+
|
|
+ // If the current frame is known to the code cache then we can attempt to
|
|
+ // to construct the sender and do some validation of it. This goes a long way
|
|
+ // toward eliminating issues when we get in frame construction code
|
|
+
|
|
+ if (_cb != NULL) {
|
|
+
|
|
+ // First check if frame is complete and tester is reliable
|
|
+ // Unfortunately we can only check frame complete for runtime stubs and nmethod
|
|
+ // other generic buffer blobs are more problematic so we just assume they are
|
|
+ // ok. adapter blobs never have a frame complete and are never ok.
|
|
+
|
|
+ if (!_cb->is_frame_complete_at(_pc)) {
|
|
+ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Could just be some random pointer within the codeBlob
|
|
+ if (!_cb->code_contains(_pc)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Entry frame checks
|
|
+ if (is_entry_frame()) {
|
|
+ // an entry frame must have a valid fp.
|
|
+ return fp_safe && is_entry_frame_valid(thread);
|
|
+ }
|
|
+
|
|
+ intptr_t* sender_sp = NULL;
|
|
+ intptr_t* sender_unextended_sp = NULL;
|
|
+ address sender_pc = NULL;
|
|
+ intptr_t* saved_fp = NULL;
|
|
+
|
|
+ if (is_interpreted_frame()) {
|
|
+ // fp must be safe
|
|
+ if (!fp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ sender_pc = (address)this->fp()[return_addr_offset];
|
|
+ // for interpreted frames, the value below is the sender "raw" sp,
|
|
+ // which can be different from the sender unextended sp (the sp seen
|
|
+ // by the sender) because of current frame local variables
|
|
+ sender_sp = (intptr_t*) addr_at(sender_sp_offset);
|
|
+ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
|
|
+ saved_fp = (intptr_t*) this->fp()[link_offset];
|
|
+ } else {
|
|
+ // must be some sort of compiled/runtime frame
|
|
+ // fp does not have to be safe (although it could be check for c1?)
|
|
+
|
|
+ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
|
|
+ if (_cb->frame_size() <= 0) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ sender_sp = _unextended_sp + _cb->frame_size();
|
|
+ // Is sender_sp safe?
|
|
+ if ((address)sender_sp >= thread->stack_base()) {
|
|
+ return false;
|
|
+ }
|
|
+ sender_unextended_sp = sender_sp;
|
|
+ sender_pc = (address) *(sender_sp + frame::return_addr_offset);
|
|
+ saved_fp = (intptr_t*) *(sender_sp + frame::link_offset);
|
|
+ }
|
|
+
|
|
+
|
|
+ // If the potential sender is the interpreter then we can do some more checking
|
|
+ if (Interpreter::contains(sender_pc)) {
|
|
+
|
|
+ // fp is always saved in a recognizable place in any code we generate. However
|
|
+ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
|
|
+ // is really a frame pointer.
|
|
+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
|
|
+
|
|
+ if (!saved_fp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // construct the potential sender
|
|
+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
|
|
+
|
|
+ return sender.is_interpreted_frame_valid(thread);
|
|
+ }
|
|
+
|
|
+ // We must always be able to find a recognizable pc
|
|
+ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
|
|
+ if (sender_pc == NULL || sender_blob == NULL) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Could be a zombie method
|
|
+ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Could just be some random pointer within the codeBlob
|
|
+ if (!sender_blob->code_contains(sender_pc)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // We should never be able to see an adapter if the current frame is something from code cache
|
|
+ if (sender_blob->is_adapter_blob()) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Could be the call_stub
|
|
+ if (StubRoutines::returns_to_call_stub(sender_pc)) {
|
|
+ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
|
|
+
|
|
+ if (!saved_fp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // construct the potential sender
|
|
+ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
|
|
+
|
|
+ // Validate the JavaCallWrapper an entry frame must have
|
|
+ address jcw = (address)sender.entry_frame_call_wrapper();
|
|
+
|
|
+ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
|
|
+
|
|
+ return jcw_safe;
|
|
+ }
|
|
+
|
|
+ CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
|
|
+ if (nm != NULL) {
|
|
+ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
|
|
+ nm->method()->is_method_handle_intrinsic()) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
|
|
+ // because the return address counts against the callee's frame.
|
|
+ if (sender_blob->frame_size() <= 0) {
|
|
+ assert(!sender_blob->is_compiled(), "should count return address at least");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // We should never be able to see anything here except an nmethod. If something in the
|
|
+ // code cache (current frame) is called by an entity within the code cache that entity
|
|
+ // should not be anything but the call stub (already covered), the interpreter (already covered)
|
|
+ // or an nmethod.
|
|
+ if (!sender_blob->is_compiled()) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Could put some more validation for the potential non-interpreted sender
|
|
+ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
|
|
+
|
|
+ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
|
|
+
|
|
+ // We've validated the potential sender that would be created
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ // Must be native-compiled frame. Since sender will try and use fp to find
|
|
+ // linkages it must be safe
|
|
+ if (!fp_safe) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
|
|
+ if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+void frame::patch_pc(Thread* thread, address pc) {
|
|
+ address* pc_addr = &(((address*) sp())[-1]);
|
|
+ if (TracePcPatching) {
|
|
+ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
|
|
+ p2i(pc_addr), p2i(*pc_addr), p2i(pc));
|
|
+ }
|
|
+ // Either the return address is the original one or we are going to
|
|
+ // patch in the same address that's already there.
|
|
+ assert(_pc == *pc_addr || pc == *pc_addr, "must be");
|
|
+ *pc_addr = pc;
|
|
+ _cb = CodeCache::find_blob(pc);
|
|
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
|
|
+ if (original_pc != NULL) {
|
|
+ assert(original_pc == _pc, "expected original PC to be stored before patching");
|
|
+ _deopt_state = is_deoptimized;
|
|
+ // leave _pc as is
|
|
+ } else {
|
|
+ _deopt_state = not_deoptimized;
|
|
+ _pc = pc;
|
|
+ }
|
|
+}
|
|
+
|
|
+bool frame::is_interpreted_frame() const {
|
|
+ return Interpreter::contains(pc());
|
|
+}
|
|
+
|
|
+int frame::frame_size(RegisterMap* map) const {
|
|
+ frame sender = this->sender(map);
|
|
+ return sender.sp() - sp();
|
|
+}
|
|
+
|
|
+intptr_t* frame::entry_frame_argument_at(int offset) const {
|
|
+ // convert offset to index to deal with tsi
|
|
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
|
|
+ // Entry frame's arguments are always in relation to unextended_sp()
|
|
+ return &unextended_sp()[index];
|
|
+}
|
|
+
|
|
+// sender_sp
|
|
+intptr_t* frame::interpreter_frame_sender_sp() const {
|
|
+ assert(is_interpreted_frame(), "interpreted frame expected");
|
|
+ return (intptr_t*) at(interpreter_frame_sender_sp_offset);
|
|
+}
|
|
+
|
|
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
|
|
+ assert(is_interpreted_frame(), "interpreted frame expected");
|
|
+ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
|
|
+}
|
|
+
|
|
+
|
|
+// monitor elements
|
|
+
|
|
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
|
|
+ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
|
|
+}
|
|
+
|
|
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
|
|
+ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
|
|
+ // make sure the pointer points inside the frame
|
|
+ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
|
|
+ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer");
|
|
+ return result;
|
|
+}
|
|
+
|
|
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
|
|
+ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
|
|
+}
|
|
+
|
|
+// Used by template based interpreter deoptimization
|
|
+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
|
|
+ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
|
|
+}
|
|
+
|
|
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
|
|
+ assert(map != NULL, "map must be set");
|
|
+ // Java frame called from C; skip all C frames and return top C
|
|
+ // frame of that chunk as the sender
|
|
+ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
|
|
+ assert(!entry_frame_is_first(), "next Java fp must be non zero");
|
|
+ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
|
|
+ // Since we are walking the stack now this nested anchor is obviously walkable
|
|
+ // even if it wasn't when it was stacked.
|
|
+ if (!jfa->walkable()) {
|
|
+ // Capture _last_Java_pc (if needed) and mark anchor walkable.
|
|
+ jfa->capture_last_Java_pc();
|
|
+ }
|
|
+ map->clear();
|
|
+ assert(map->include_argument_oops(), "should be set by clear");
|
|
+ vmassert(jfa->last_Java_pc() != NULL, "not walkable");
|
|
+ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
|
|
+ return fr;
|
|
+}
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::verify_deopt_original_pc
|
|
+//
|
|
+// Verifies the calculated original PC of a deoptimization PC for the
|
|
+// given unextended SP.
|
|
+#ifdef ASSERT
|
|
+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
|
|
+ frame fr;
|
|
+
|
|
+ // This is ugly but it's better than to change {get,set}_original_pc
|
|
+ // to take an SP value as argument. And it's only a debugging
|
|
+ // method anyway.
|
|
+ fr._unextended_sp = unextended_sp;
|
|
+
|
|
+ assert_cond(nm != NULL);
|
|
+ address original_pc = nm->get_original_pc(&fr);
|
|
+ assert(nm->insts_contains_inclusive(original_pc),
|
|
+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
|
|
+}
|
|
+#endif
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::adjust_unextended_sp
|
|
+void frame::adjust_unextended_sp() {
|
|
+ // On riscv, sites calling method handle intrinsics and lambda forms are treated
|
|
+ // as any other call site. Therefore, no special action is needed when we are
|
|
+ // returning to any of these call sites.
|
|
+
|
|
+ if (_cb != NULL) {
|
|
+ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
|
|
+ if (sender_cm != NULL) {
|
|
+ // If the sender PC is a deoptimization point, get the original PC.
|
|
+ if (sender_cm->is_deopt_entry(_pc) ||
|
|
+ sender_cm->is_deopt_mh_entry(_pc)) {
|
|
+ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::update_map_with_saved_link
|
|
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
|
|
+ // The interpreter and compiler(s) always save fp in a known
|
|
+ // location on entry. We must record where that location is
|
|
+ // so that if fp was live on callout from c2 we can find
|
|
+ // the saved copy no matter what it called.
|
|
+
|
|
+ // Since the interpreter always saves fp if we record where it is then
|
|
+ // we don't have to always save fp on entry and exit to c2 compiled
|
|
+ // code, on entry will be enough.
|
|
+ assert(map != NULL, "map must be set");
|
|
+ map->set_location(::fp->as_VMReg(), (address) link_addr);
|
|
+ // this is weird "H" ought to be at a higher address however the
|
|
+ // oopMaps seems to have the "H" regs at the same address and the
|
|
+ // vanilla register.
|
|
+ map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
|
|
+}
|
|
+
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::sender_for_interpreter_frame
|
|
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
|
|
+ // SP is the raw SP from the sender after adapter or interpreter
|
|
+ // extension.
|
|
+ intptr_t* sender_sp = this->sender_sp();
|
|
+
|
|
+ // This is the sp before any possible extension (adapter/locals).
|
|
+ intptr_t* unextended_sp = interpreter_frame_sender_sp();
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ assert(map != NULL, "map must be set");
|
|
+ if (map->update_map()) {
|
|
+ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
|
|
+ }
|
|
+#endif // COMPILER2
|
|
+
|
|
+ return frame(sender_sp, unextended_sp, link(), sender_pc());
|
|
+}
|
|
+
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::sender_for_compiled_frame
|
|
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
|
|
+ // we cannot rely upon the last fp having been saved to the thread
|
|
+ // in C2 code but it will have been pushed onto the stack. so we
|
|
+ // have to find it relative to the unextended sp
|
|
+
|
|
+ assert(_cb->frame_size() >= 0, "must have non-zero frame size");
|
|
+ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
|
|
+ intptr_t* unextended_sp = l_sender_sp;
|
|
+
|
|
+ // the return_address is always the word on the stack
|
|
+ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
|
|
+
|
|
+ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
|
|
+
|
|
+ assert(map != NULL, "map must be set");
|
|
+ if (map->update_map()) {
|
|
+ // Tell GC to use argument oopmaps for some runtime stubs that need it.
|
|
+ // For C1, the runtime stub might not have oop maps, so set this flag
|
|
+ // outside of update_register_map.
|
|
+ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
|
|
+ if (_cb->oop_maps() != NULL) {
|
|
+ OopMapSet::update_register_map(this, map);
|
|
+ }
|
|
+
|
|
+ // Since the prolog does the save and restore of FP there is no
|
|
+ // oopmap for it so we must fill in its location as if there was
|
|
+ // an oopmap entry since if our caller was compiled code there
|
|
+ // could be live jvm state in it.
|
|
+ update_map_with_saved_link(map, saved_fp_addr);
|
|
+ }
|
|
+
|
|
+ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
|
|
+}
|
|
+
|
|
+//------------------------------------------------------------------------------
|
|
+// frame::sender
|
|
+frame frame::sender(RegisterMap* map) const {
|
|
+ // Default is we done have to follow them. The sender_for_xxx will
|
|
+ // update it accordingly
|
|
+ assert(map != NULL, "map must be set");
|
|
+ map->set_include_argument_oops(false);
|
|
+
|
|
+ if (is_entry_frame()) {
|
|
+ return sender_for_entry_frame(map);
|
|
+ }
|
|
+ if (is_interpreted_frame()) {
|
|
+ return sender_for_interpreter_frame(map);
|
|
+ }
|
|
+ assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
|
|
+
|
|
+ // This test looks odd: why is it not is_compiled_frame() ? That's
|
|
+ // because stubs also have OOP maps.
|
|
+ if (_cb != NULL) {
|
|
+ return sender_for_compiled_frame(map);
|
|
+ }
|
|
+
|
|
+ // Must be native-compiled frame, i.e. the marshaling code for native
|
|
+ // methods that exists in the core system.
|
|
+ return frame(sender_sp(), link(), sender_pc());
|
|
+}
|
|
+
|
|
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
|
|
+ assert(is_interpreted_frame(), "Not an interpreted frame");
|
|
+ // These are reasonable sanity checks
|
|
+ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
|
|
+ return false;
|
|
+ }
|
|
+ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
|
|
+ return false;
|
|
+ }
|
|
+ if (fp() + interpreter_frame_initial_sp_offset < sp()) {
|
|
+ return false;
|
|
+ }
|
|
+ // These are hacks to keep us out of trouble.
|
|
+ // The problem with these is that they mask other problems
|
|
+ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // do some validation of frame elements
|
|
+
|
|
+ // first the method
|
|
+
|
|
+ Method* m = *interpreter_frame_method_addr();
|
|
+
|
|
+ // validate the method we'd find in this potential sender
|
|
+ if (!Method::is_valid_method(m)) {
|
|
+ return false;
|
|
+ }
|
|
+ // stack frames shouldn't be much larger than max_stack elements
|
|
+ // this test requires the use of unextended_sp which is the sp as seen by
|
|
+ // the current frame, and not sp which is the "raw" pc which could point
|
|
+ // further because of local variables of the callee method inserted after
|
|
+ // method arguments
|
|
+ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // validate bci/bcx
|
|
+ address bcp = interpreter_frame_bcp();
|
|
+ if (m->validate_bci_from_bcp(bcp) < 0) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // validate constantPoolCache*
|
|
+ ConstantPoolCache* cp = *interpreter_frame_cache_addr();
|
|
+ if (MetaspaceObj::is_valid(cp) == false) {
|
|
+ return false;
|
|
+ }
|
|
+ // validate locals
|
|
+ address locals = (address) *interpreter_frame_locals_addr();
|
|
+
|
|
+ if (locals > thread->stack_base() || locals < (address) fp()) {
|
|
+ return false;
|
|
+ }
|
|
+ // We'd have to be pretty unlucky to be mislead at this point
|
|
+ return true;
|
|
+}
|
|
+
|
|
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
|
|
+ assert(is_interpreted_frame(), "interpreted frame expected");
|
|
+ Method* method = interpreter_frame_method();
|
|
+ BasicType type = method->result_type();
|
|
+
|
|
+ intptr_t* tos_addr = NULL;
|
|
+ if (method->is_native()) {
|
|
+ tos_addr = (intptr_t*)sp();
|
|
+ if (type == T_FLOAT || type == T_DOUBLE) {
|
|
+ // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
|
|
+ tos_addr += 2 * Interpreter::stackElementWords;
|
|
+ }
|
|
+ } else {
|
|
+ tos_addr = (intptr_t*)interpreter_frame_tos_address();
|
|
+ }
|
|
+
|
|
+ switch (type) {
|
|
+ case T_OBJECT :
|
|
+ case T_ARRAY : {
|
|
+ oop obj;
|
|
+ if (method->is_native()) {
|
|
+ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
|
|
+ } else {
|
|
+ oop* obj_p = (oop*)tos_addr;
|
|
+ obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
|
|
+ }
|
|
+ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
|
|
+ *oop_result = obj;
|
|
+ break;
|
|
+ }
|
|
+ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
|
|
+ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break;
|
|
+ case T_CHAR : value_result->c = *(jchar*)tos_addr; break;
|
|
+ case T_SHORT : value_result->s = *(jshort*)tos_addr; break;
|
|
+ case T_INT : value_result->i = *(jint*)tos_addr; break;
|
|
+ case T_LONG : value_result->j = *(jlong*)tos_addr; break;
|
|
+ case T_FLOAT : {
|
|
+ value_result->f = *(jfloat*)tos_addr;
|
|
+ break;
|
|
+ }
|
|
+ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break;
|
|
+ case T_VOID : /* Nothing to do */ break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ return type;
|
|
+}
|
|
+
|
|
+
|
|
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
|
|
+ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
|
|
+ return &interpreter_frame_tos_address()[index];
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+
|
|
+#define DESCRIBE_FP_OFFSET(name) \
|
|
+ values.describe(frame_no, fp() + frame::name##_offset, #name)
|
|
+
|
|
+void frame::describe_pd(FrameValues& values, int frame_no) {
|
|
+ if (is_interpreted_frame()) {
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_method);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_cache);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_locals);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
|
|
+ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+intptr_t *frame::initial_deoptimization_info() {
|
|
+ // Not used on riscv, but we must return something.
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+intptr_t* frame::real_fp() const {
|
|
+ if (_cb != NULL) {
|
|
+ // use the frame size if valid
|
|
+ int size = _cb->frame_size();
|
|
+ if (size > 0) {
|
|
+ return unextended_sp() + size;
|
|
+ }
|
|
+ }
|
|
+ // else rely on fp()
|
|
+ assert(!is_compiled_frame(), "unknown compiled frame size");
|
|
+ return fp();
|
|
+}
|
|
+
|
|
+#undef DESCRIBE_FP_OFFSET
|
|
+
|
|
+#ifndef PRODUCT
|
|
+// This is a generic constructor which is only used by pns() in debug.cpp.
|
|
+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
|
|
+ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
|
|
+}
|
|
+
|
|
+void frame::pd_ps() {}
|
|
+#endif
|
|
+
|
|
+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
|
|
+ // last frame set?
|
|
+ if (last_Java_sp() == NULL) { return; }
|
|
+ // already walkable?
|
|
+ if (walkable()) { return; }
|
|
+ vmassert(Thread::current() == (Thread*)thread, "not current thread");
|
|
+ vmassert(last_Java_sp() != NULL, "not called from Java code?");
|
|
+ vmassert(last_Java_pc() == NULL, "already walkable");
|
|
+ capture_last_Java_pc();
|
|
+ vmassert(walkable(), "something went wrong");
|
|
+}
|
|
+
|
|
+void JavaFrameAnchor::capture_last_Java_pc() {
|
|
+ vmassert(_last_Java_sp != NULL, "no last frame set");
|
|
+ vmassert(_last_Java_pc == NULL, "already walkable");
|
|
+ _last_Java_pc = (address)_last_Java_sp[-1];
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..7acabcbba
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
|
|
@@ -0,0 +1,200 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_FRAME_RISCV_HPP
|
|
+#define CPU_RISCV_FRAME_RISCV_HPP
|
|
+
|
|
+#include "runtime/synchronizer.hpp"
|
|
+
|
|
+// A frame represents a physical stack frame (an activation). Frames can be
|
|
+// C or Java frames, and the Java frames can be interpreted or compiled.
|
|
+// In contrast, vframes represent source-level activations, so that one physical frame
|
|
+// can correspond to multiple source level frames because of inlining.
|
|
+// A frame is comprised of {pc, fp, sp}
|
|
+// ------------------------------ Asm interpreter ----------------------------------------
|
|
+// Layout of asm interpreter frame:
|
|
+// [expression stack ] * <- sp
|
|
+
|
|
+// [monitors[0] ] \
|
|
+// ... | monitor block size = k
|
|
+// [monitors[k-1] ] /
|
|
+// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset
|
|
+// [byte code index/pointr] = bcx() bcx_offset
|
|
+
|
|
+// [pointer to locals ] = locals() locals_offset
|
|
+// [constant pool cache ] = cache() cache_offset
|
|
+
|
|
+// [klass of method ] = mirror() mirror_offset
|
|
+// [padding ]
|
|
+
|
|
+// [methodData ] = mdp() mdx_offset
|
|
+// [methodOop ] = method() method_offset
|
|
+
|
|
+// [last esp ] = last_sp() last_sp_offset
|
|
+// [old stack pointer ] (sender_sp) sender_sp_offset
|
|
+
|
|
+// [old frame pointer ]
|
|
+// [return pc ]
|
|
+
|
|
+// [last sp ] <- fp = link()
|
|
+// [oop temp ] (only for native calls)
|
|
+
|
|
+// [padding ] (to preserve machine SP alignment)
|
|
+// [locals and parameters ]
|
|
+// <- sender sp
|
|
+// ------------------------------ Asm interpreter ----------------------------------------
|
|
+
|
|
+// ------------------------------ C Frame ------------------------------------------------
|
|
+// Stack: gcc with -fno-omit-frame-pointer
|
|
+// .
|
|
+// .
|
|
+// +-> .
|
|
+// | +-----------------+ |
|
|
+// | | return address | |
|
|
+// | | previous fp ------+
|
|
+// | | saved registers |
|
|
+// | | local variables |
|
|
+// | | ... | <-+
|
|
+// | +-----------------+ |
|
|
+// | | return address | |
|
|
+// +------ previous fp | |
|
|
+// | saved registers | |
|
|
+// | local variables | |
|
|
+// +-> | ... | |
|
|
+// | +-----------------+ |
|
|
+// | | return address | |
|
|
+// | | previous fp ------+
|
|
+// | | saved registers |
|
|
+// | | local variables |
|
|
+// | | ... | <-+
|
|
+// | +-----------------+ |
|
|
+// | | return address | |
|
|
+// +------ previous fp | |
|
|
+// | saved registers | |
|
|
+// | local variables | |
|
|
+// $fp --> | ... | |
|
|
+// +-----------------+ |
|
|
+// | return address | |
|
|
+// | previous fp ------+
|
|
+// | saved registers |
|
|
+// $sp --> | local variables |
|
|
+// +-----------------+
|
|
+// ------------------------------ C Frame ------------------------------------------------
|
|
+
|
|
+ public:
|
|
+ enum {
|
|
+ pc_return_offset = 0,
|
|
+ // All frames
|
|
+ link_offset = -2,
|
|
+ return_addr_offset = -1,
|
|
+ sender_sp_offset = 0,
|
|
+ // Interpreter frames
|
|
+ interpreter_frame_oop_temp_offset = 1, // for native calls only
|
|
+
|
|
+ interpreter_frame_sender_sp_offset = -3,
|
|
+ // outgoing sp before a call to an invoked method
|
|
+ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1,
|
|
+ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1,
|
|
+ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1,
|
|
+ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1,
|
|
+ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1,
|
|
+ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1,
|
|
+ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1,
|
|
+ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1,
|
|
+ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1,
|
|
+
|
|
+ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset,
|
|
+ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset,
|
|
+
|
|
+ // Entry frames
|
|
+ // n.b. these values are determined by the layout defined in
|
|
+ // stubGenerator for the Java call stub
|
|
+ entry_frame_after_call_words = 34,
|
|
+ entry_frame_call_wrapper_offset = -10,
|
|
+
|
|
+ // we don't need a save area
|
|
+ arg_reg_save_area_bytes = 0
|
|
+ };
|
|
+
|
|
+ intptr_t ptr_at(int offset) const {
|
|
+ return *ptr_at_addr(offset);
|
|
+ }
|
|
+
|
|
+ void ptr_at_put(int offset, intptr_t value) {
|
|
+ *ptr_at_addr(offset) = value;
|
|
+ }
|
|
+
|
|
+ private:
|
|
+ // an additional field beyond _sp and _pc:
|
|
+ intptr_t* _fp; // frame pointer
|
|
+ // The interpreter and adapters will extend the frame of the caller.
|
|
+ // Since oopMaps are based on the sp of the caller before extension
|
|
+ // we need to know that value. However in order to compute the address
|
|
+ // of the return address we need the real "raw" sp. Since sparc already
|
|
+ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
|
|
+ // original sp we use that convention.
|
|
+
|
|
+ intptr_t* _unextended_sp;
|
|
+ void adjust_unextended_sp();
|
|
+
|
|
+ intptr_t* ptr_at_addr(int offset) const {
|
|
+ return (intptr_t*) addr_at(offset);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // Used in frame::sender_for_{interpreter,compiled}_frame
|
|
+ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp);
|
|
+#endif
|
|
+
|
|
+ public:
|
|
+ // Constructors
|
|
+
|
|
+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
|
|
+
|
|
+ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
|
|
+
|
|
+ frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
|
|
+
|
|
+ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
|
|
+
|
|
+ // accessors for the instance variables
|
|
+ // Note: not necessarily the real 'frame pointer' (see real_fp)
|
|
+ intptr_t* fp() const { return _fp; }
|
|
+
|
|
+ inline address* sender_pc_addr() const;
|
|
+
|
|
+ // expression stack tos if we are nested in a java call
|
|
+ intptr_t* interpreter_frame_last_sp() const;
|
|
+
|
|
+ // helper to update a map with callee-saved RBP
|
|
+ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
|
|
+
|
|
+ // deoptimization support
|
|
+ void interpreter_frame_set_last_sp(intptr_t* ptr_sp);
|
|
+
|
|
+ static jint interpreter_frame_expression_stack_direction() { return -1; }
|
|
+
|
|
+#endif // CPU_RISCV_FRAME_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..5bc6b430c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
|
|
@@ -0,0 +1,257 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
|
|
+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
|
|
+
|
|
+#include "code/codeCache.hpp"
|
|
+#include "code/vmreg.inline.hpp"
|
|
+
|
|
+// Inline functions for RISCV frames:
|
|
+
|
|
+// Constructors:
|
|
+
|
|
+inline frame::frame() {
|
|
+ _pc = NULL;
|
|
+ _sp = NULL;
|
|
+ _unextended_sp = NULL;
|
|
+ _fp = NULL;
|
|
+ _cb = NULL;
|
|
+ _deopt_state = unknown;
|
|
+}
|
|
+
|
|
+static int spin;
|
|
+
|
|
+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
|
|
+ intptr_t a = intptr_t(ptr_sp);
|
|
+ intptr_t b = intptr_t(ptr_fp);
|
|
+ _sp = ptr_sp;
|
|
+ _unextended_sp = ptr_sp;
|
|
+ _fp = ptr_fp;
|
|
+ _pc = pc;
|
|
+ assert(pc != NULL, "no pc?");
|
|
+ _cb = CodeCache::find_blob(pc);
|
|
+ adjust_unextended_sp();
|
|
+
|
|
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
|
|
+ if (original_pc != NULL) {
|
|
+ _pc = original_pc;
|
|
+ _deopt_state = is_deoptimized;
|
|
+ } else {
|
|
+ _deopt_state = not_deoptimized;
|
|
+ }
|
|
+}
|
|
+
|
|
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
|
|
+ init(ptr_sp, ptr_fp, pc);
|
|
+}
|
|
+
|
|
+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
|
|
+ intptr_t a = intptr_t(ptr_sp);
|
|
+ intptr_t b = intptr_t(ptr_fp);
|
|
+ _sp = ptr_sp;
|
|
+ _unextended_sp = unextended_sp;
|
|
+ _fp = ptr_fp;
|
|
+ _pc = pc;
|
|
+ assert(pc != NULL, "no pc?");
|
|
+ _cb = CodeCache::find_blob(pc);
|
|
+ adjust_unextended_sp();
|
|
+
|
|
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
|
|
+ if (original_pc != NULL) {
|
|
+ _pc = original_pc;
|
|
+ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
|
|
+ "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
|
|
+ _deopt_state = is_deoptimized;
|
|
+ } else {
|
|
+ _deopt_state = not_deoptimized;
|
|
+ }
|
|
+}
|
|
+
|
|
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
|
|
+ intptr_t a = intptr_t(ptr_sp);
|
|
+ intptr_t b = intptr_t(ptr_fp);
|
|
+ _sp = ptr_sp;
|
|
+ _unextended_sp = ptr_sp;
|
|
+ _fp = ptr_fp;
|
|
+ _pc = (address)(ptr_sp[-1]);
|
|
+
|
|
+ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
|
|
+ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
|
|
+ // unlucky the junk value could be to a zombied method and we'll die on the
|
|
+ // find_blob call. This is also why we can have no asserts on the validity
|
|
+ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
|
|
+ // -> pd_last_frame should use a specialized version of pd_last_frame which could
|
|
+ // call a specilaized frame constructor instead of this one.
|
|
+ // Then we could use the assert below. However this assert is of somewhat dubious
|
|
+ // value.
|
|
+
|
|
+ _cb = CodeCache::find_blob(_pc);
|
|
+ adjust_unextended_sp();
|
|
+
|
|
+ address original_pc = CompiledMethod::get_deopt_original_pc(this);
|
|
+ if (original_pc != NULL) {
|
|
+ _pc = original_pc;
|
|
+ _deopt_state = is_deoptimized;
|
|
+ } else {
|
|
+ _deopt_state = not_deoptimized;
|
|
+ }
|
|
+}
|
|
+
|
|
+// Accessors
|
|
+
|
|
+inline bool frame::equal(frame other) const {
|
|
+ bool ret = sp() == other.sp() &&
|
|
+ unextended_sp() == other.unextended_sp() &&
|
|
+ fp() == other.fp() &&
|
|
+ pc() == other.pc();
|
|
+ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+// Return unique id for this frame. The id must have a value where we can distinguish
|
|
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
|
|
+// frame.
|
|
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
|
|
+
|
|
+// Relationals on frames based
|
|
+
|
|
+// Return true if the frame is younger (more recent activation) than the frame represented by id
|
|
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
|
|
+ return this->id() < id ; }
|
|
+// Return true if the frame is older (less recent activation) than the frame represented by id
|
|
+inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
|
|
+ return this->id() > id ; }
|
|
+
|
|
+inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
|
|
+
|
|
+inline intptr_t* frame::link_or_null() const {
|
|
+ intptr_t** ptr = (intptr_t **)addr_at(link_offset);
|
|
+ return os::is_readable_pointer(ptr) ? *ptr : NULL;
|
|
+}
|
|
+
|
|
+inline intptr_t* frame::unextended_sp() const { return _unextended_sp; }
|
|
+
|
|
+// Return address
|
|
+inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); }
|
|
+inline address frame::sender_pc() const { return *sender_pc_addr(); }
|
|
+inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); }
|
|
+
|
|
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
|
|
+ return (intptr_t**)addr_at(interpreter_frame_locals_offset);
|
|
+}
|
|
+
|
|
+inline intptr_t* frame::interpreter_frame_last_sp() const {
|
|
+ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
|
|
+}
|
|
+
|
|
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
|
|
+ return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
|
|
+}
|
|
+
|
|
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
|
|
+ return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
|
|
+}
|
|
+
|
|
+
|
|
+// Constant pool cache
|
|
+
|
|
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
|
|
+ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
|
|
+}
|
|
+
|
|
+// Method
|
|
+
|
|
+inline Method** frame::interpreter_frame_method_addr() const {
|
|
+ return (Method**)addr_at(interpreter_frame_method_offset);
|
|
+}
|
|
+
|
|
+// Mirror
|
|
+
|
|
+inline oop* frame::interpreter_frame_mirror_addr() const {
|
|
+ return (oop*)addr_at(interpreter_frame_mirror_offset);
|
|
+}
|
|
+
|
|
+// top of expression stack
|
|
+inline intptr_t* frame::interpreter_frame_tos_address() const {
|
|
+ intptr_t* last_sp = interpreter_frame_last_sp();
|
|
+ if (last_sp == NULL) {
|
|
+ return sp();
|
|
+ } else {
|
|
+ // sp() may have been extended or shrunk by an adapter. At least
|
|
+ // check that we don't fall behind the legal region.
|
|
+ // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
|
|
+ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
|
|
+ return last_sp;
|
|
+ }
|
|
+}
|
|
+
|
|
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
|
|
+ return (oop *)(fp() + interpreter_frame_oop_temp_offset);
|
|
+}
|
|
+
|
|
+inline int frame::interpreter_frame_monitor_size() {
|
|
+ return BasicObjectLock::size();
|
|
+}
|
|
+
|
|
+
|
|
+// expression stack
|
|
+// (the max_stack arguments are used by the GC; see class FrameClosure)
|
|
+
|
|
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
|
|
+ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
|
|
+ return monitor_end-1;
|
|
+}
|
|
+
|
|
+
|
|
+// Entry frames
|
|
+
|
|
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
|
|
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
|
|
+}
|
|
+
|
|
+
|
|
+// Compiled frames
|
|
+inline oop frame::saved_oop_result(RegisterMap* map) const {
|
|
+ oop* result_adr = (oop *)map->location(x10->as_VMReg());
|
|
+ if(result_adr != NULL) {
|
|
+ return (*result_adr);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ return NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
|
|
+ oop* result_adr = (oop *)map->location(x10->as_VMReg());
|
|
+ if(result_adr != NULL) {
|
|
+ *result_adr = obj;
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..6f778956d
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
|
|
@@ -0,0 +1,479 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/g1/g1BarrierSet.hpp"
|
|
+#include "gc/g1/g1BarrierSetAssembler.hpp"
|
|
+#include "gc/g1/g1BarrierSetRuntime.hpp"
|
|
+#include "gc/g1/g1CardTable.hpp"
|
|
+#include "gc/g1/g1ThreadLocalData.hpp"
|
|
+#include "gc/g1/heapRegion.hpp"
|
|
+#include "gc/shared/collectedHeap.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/thread.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "gc/g1/c1/g1BarrierSetC1.hpp"
|
|
+#endif
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register addr, Register count, RegSet saved_regs) {
|
|
+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
|
|
+ if (!dest_uninitialized) {
|
|
+ Label done;
|
|
+ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
|
|
+
|
|
+ // Is marking active?
|
|
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
|
|
+ __ lwu(t0, in_progress);
|
|
+ } else {
|
|
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
|
|
+ __ lbu(t0, in_progress);
|
|
+ }
|
|
+ __ beqz(t0, done);
|
|
+
|
|
+ __ push_reg(saved_regs, sp);
|
|
+ if (count == c_rarg0) {
|
|
+ if (addr == c_rarg1) {
|
|
+ // exactly backwards!!
|
|
+ __ mv(t0, c_rarg0);
|
|
+ __ mv(c_rarg0, c_rarg1);
|
|
+ __ mv(c_rarg1, t0);
|
|
+ } else {
|
|
+ __ mv(c_rarg1, count);
|
|
+ __ mv(c_rarg0, addr);
|
|
+ }
|
|
+ } else {
|
|
+ __ mv(c_rarg0, addr);
|
|
+ __ mv(c_rarg1, count);
|
|
+ }
|
|
+ if (UseCompressedOops) {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
|
|
+ } else {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
|
|
+ }
|
|
+ __ pop_reg(saved_regs, sp);
|
|
+
|
|
+ __ bind(done);
|
|
+ }
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs) {
|
|
+ __ push_reg(saved_regs, sp);
|
|
+ assert_different_registers(start, count, tmp);
|
|
+ assert_different_registers(c_rarg0, count);
|
|
+ __ mv(c_rarg0, start);
|
|
+ __ mv(c_rarg1, count);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
|
|
+ __ pop_reg(saved_regs, sp);
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call) {
|
|
+ // If expand_call is true then we expand the call_VM_leaf macro
|
|
+ // directly to skip generating the check by
|
|
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
|
|
+
|
|
+ assert(thread == xthread, "must be");
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ assert_different_registers(obj, pre_val, tmp, t0);
|
|
+ assert(pre_val != noreg && tmp != noreg, "expecting a register");
|
|
+
|
|
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
|
|
+ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
|
|
+
|
|
+ // Is marking active?
|
|
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
|
|
+ __ lwu(tmp, in_progress);
|
|
+ } else {
|
|
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
|
|
+ __ lbu(tmp, in_progress);
|
|
+ }
|
|
+ __ beqz(tmp, done);
|
|
+
|
|
+ // Do we need to load the previous value?
|
|
+ if (obj != noreg) {
|
|
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
|
|
+ }
|
|
+
|
|
+ // Is the previous value null?
|
|
+ __ beqz(pre_val, done);
|
|
+
|
|
+ // Can we store original value in the thread's buffer?
|
|
+ // Is index == 0?
|
|
+ // (The index field is typed as size_t.)
|
|
+
|
|
+ __ ld(tmp, index); // tmp := *index_adr
|
|
+ __ beqz(tmp, runtime); // tmp == 0?
|
|
+ // If yes, goto runtime
|
|
+
|
|
+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize
|
|
+ __ sd(tmp, index); // *index_adr := tmp
|
|
+ __ ld(t0, buffer);
|
|
+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr
|
|
+
|
|
+ // Record the previous value
|
|
+ __ sd(pre_val, Address(tmp, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ // save the live input values
|
|
+ RegSet saved = RegSet::of(pre_val);
|
|
+ if (tosca_live) { saved += RegSet::of(x10); }
|
|
+ if (obj != noreg) { saved += RegSet::of(obj); }
|
|
+
|
|
+ __ push_reg(saved, sp);
|
|
+
|
|
+ if (expand_call) {
|
|
+ assert(pre_val != c_rarg1, "smashed arg");
|
|
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ } else {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ }
|
|
+
|
|
+ __ pop_reg(saved, sp);
|
|
+
|
|
+ __ bind(done);
|
|
+
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
|
|
+ Register store_addr,
|
|
+ Register new_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ Register tmp2) {
|
|
+ assert(thread == xthread, "must be");
|
|
+ assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
|
|
+ t0);
|
|
+ assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
|
|
+ tmp2 != noreg, "expecting a register");
|
|
+
|
|
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
|
+
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+ CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
|
+ CardTable* ct = ctbs->card_table();
|
|
+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ // Does store cross heap regions?
|
|
+
|
|
+ __ xorr(tmp, store_addr, new_val);
|
|
+ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
|
|
+ __ beqz(tmp, done);
|
|
+
|
|
+ // crosses regions, storing NULL?
|
|
+
|
|
+ __ beqz(new_val, done);
|
|
+
|
|
+ // storing region crossing non-NULL, is card already dirty?
|
|
+
|
|
+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
|
|
+ const Register card_addr = tmp;
|
|
+
|
|
+ __ srli(card_addr, store_addr, CardTable::card_shift);
|
|
+
|
|
+ // get the address of the card
|
|
+ __ load_byte_map_base(tmp2);
|
|
+ __ add(card_addr, card_addr, tmp2);
|
|
+ __ lbu(tmp2, Address(card_addr));
|
|
+ __ mv(t0, (int)G1CardTable::g1_young_card_val());
|
|
+ __ beq(tmp2, t0, done);
|
|
+
|
|
+ assert((int)CardTable::dirty_card_val() == 0, "must be 0");
|
|
+
|
|
+ __ membar(MacroAssembler::StoreLoad);
|
|
+
|
|
+ __ lbu(tmp2, Address(card_addr));
|
|
+ __ beqz(tmp2, done);
|
|
+
|
|
+ // storing a region crossing, non-NULL oop, card is clean.
|
|
+ // dirty card and log.
|
|
+
|
|
+ __ sb(zr, Address(card_addr));
|
|
+
|
|
+ __ ld(t0, queue_index);
|
|
+ __ beqz(t0, runtime);
|
|
+ __ sub(t0, t0, wordSize);
|
|
+ __ sd(t0, queue_index);
|
|
+
|
|
+ __ ld(tmp2, buffer);
|
|
+ __ add(t0, tmp2, t0);
|
|
+ __ sd(card_addr, Address(t0, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ // save the live input values
|
|
+ RegSet saved = RegSet::of(store_addr, new_val);
|
|
+ __ push_reg(saved, sp);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
|
|
+ __ pop_reg(saved, sp);
|
|
+
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Register dst, Address src, Register tmp1, Register tmp_thread) {
|
|
+ bool on_oop = type == T_OBJECT || type == T_ARRAY;
|
|
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
|
|
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
|
|
+ bool on_reference = on_weak || on_phantom;
|
|
+ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
|
|
+ if (on_oop && on_reference) {
|
|
+ // RA is live. It must be saved around calls.
|
|
+ __ enter(); // barrier may call runtime
|
|
+ // Generate the G1 pre-barrier code to log the value of
|
|
+ // the referent field in an SATB buffer.
|
|
+ g1_write_barrier_pre(masm /* masm */,
|
|
+ noreg /* obj */,
|
|
+ dst /* pre_val */,
|
|
+ xthread /* thread */,
|
|
+ tmp1 /* tmp */,
|
|
+ true /* tosca_live */,
|
|
+ true /* expand_call */);
|
|
+ __ leave();
|
|
+ }
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ // flatten object address if needed
|
|
+ if (dst.offset() == 0) {
|
|
+ __ mv(tmp3, dst.base());
|
|
+ } else {
|
|
+ __ la(tmp3, dst);
|
|
+ }
|
|
+
|
|
+ g1_write_barrier_pre(masm,
|
|
+ tmp3 /* obj */,
|
|
+ tmp2 /* pre_val */,
|
|
+ xthread /* thread */,
|
|
+ tmp1 /* tmp */,
|
|
+ val != noreg /* tosca_live */,
|
|
+ false /* expand_call */);
|
|
+
|
|
+ if (val == noreg) {
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg);
|
|
+ } else {
|
|
+ // G1 barrier needs uncompressed oop for region cross check.
|
|
+ Register new_val = val;
|
|
+ if (UseCompressedOops) {
|
|
+ new_val = t1;
|
|
+ __ mv(new_val, val);
|
|
+ }
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
|
|
+ g1_write_barrier_post(masm,
|
|
+ tmp3 /* store_adr */,
|
|
+ new_val /* new_val */,
|
|
+ xthread /* thread */,
|
|
+ tmp1 /* tmp */,
|
|
+ tmp2 /* tmp2 */);
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifdef COMPILER1
|
|
+
|
|
+#undef __
|
|
+#define __ ce->masm()->
|
|
+
|
|
+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
|
|
+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
|
+
|
|
+ // At this point we know that marking is in progress.
|
|
+ // If do_load() is true then we have to emit the
|
|
+ // load of the previous value; otherwise it has already
|
|
+ // been loaded into _pre_val.
|
|
+ __ bind(*stub->entry());
|
|
+
|
|
+ assert(stub->pre_val()->is_register(), "Precondition.");
|
|
+
|
|
+ Register pre_val_reg = stub->pre_val()->as_register();
|
|
+
|
|
+ if (stub->do_load()) {
|
|
+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(),
|
|
+ false /* wide */, false /* unaligned */);
|
|
+ }
|
|
+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
|
|
+ ce->store_parameter(stub->pre_val()->as_register(), 0);
|
|
+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
|
|
+ __ j(*stub->continuation());
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
|
|
+ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
|
+ __ bind(*stub->entry());
|
|
+ assert(stub->addr()->is_register(), "Precondition");
|
|
+ assert(stub->new_val()->is_register(), "Precondition");
|
|
+ Register new_val_reg = stub->new_val()->as_register();
|
|
+ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
|
|
+ ce->store_parameter(stub->addr()->as_pointer_register(), 0);
|
|
+ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
|
|
+ __ j(*stub->continuation());
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#define __ sasm->
|
|
+
|
|
+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
|
+ __ prologue("g1_pre_barrier", false);
|
|
+
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+
|
|
+ // arg0 : previous value of memory
|
|
+ const Register pre_val = x10;
|
|
+ const Register thread = xthread;
|
|
+ const Register tmp = t0;
|
|
+
|
|
+ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
|
|
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ // Is marking still active?
|
|
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
|
|
+ __ lwu(tmp, in_progress);
|
|
+ } else {
|
|
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
|
|
+ __ lbu(tmp, in_progress);
|
|
+ }
|
|
+ __ beqz(tmp, done);
|
|
+
|
|
+ // Can we store original value in the thread's buffer?
|
|
+ __ ld(tmp, queue_index);
|
|
+ __ beqz(tmp, runtime);
|
|
+
|
|
+ __ sub(tmp, tmp, wordSize);
|
|
+ __ sd(tmp, queue_index);
|
|
+ __ ld(t1, buffer);
|
|
+ __ add(tmp, tmp, t1);
|
|
+ __ load_parameter(0, t1);
|
|
+ __ sd(t1, Address(tmp, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ __ push_call_clobbered_registers();
|
|
+ __ load_parameter(0, pre_val);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ bind(done);
|
|
+
|
|
+ __ epilogue();
|
|
+}
|
|
+
|
|
+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
|
|
+ __ prologue("g1_post_barrier", false);
|
|
+
|
|
+ // arg0 : store_address
|
|
+ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
|
|
+
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+ CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
|
+ CardTable* ct = ctbs->card_table();
|
|
+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ // At this point we know new_value is non-NULL and the new_value crosses regions.
|
|
+ // Must check to see if card is already dirty
|
|
+ const Register thread = xthread;
|
|
+
|
|
+ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
|
|
+
|
|
+ const Register card_offset = t1;
|
|
+ // RA is free here, so we can use it to hold the byte_map_base.
|
|
+ const Register byte_map_base = ra;
|
|
+
|
|
+ assert_different_registers(card_offset, byte_map_base, t0);
|
|
+
|
|
+ __ load_parameter(0, card_offset);
|
|
+ __ srli(card_offset, card_offset, CardTable::card_shift);
|
|
+ __ load_byte_map_base(byte_map_base);
|
|
+
|
|
+ // Convert card offset into an address in card_addr
|
|
+ Register card_addr = card_offset;
|
|
+ __ add(card_addr, byte_map_base, card_addr);
|
|
+
|
|
+ __ lbu(t0, Address(card_addr, 0));
|
|
+ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
|
|
+ __ beqz(t0, done);
|
|
+
|
|
+ assert((int)CardTable::dirty_card_val() == 0, "must be 0");
|
|
+
|
|
+ __ membar(MacroAssembler::StoreLoad);
|
|
+ __ lbu(t0, Address(card_addr, 0));
|
|
+ __ beqz(t0, done);
|
|
+
|
|
+ // storing region crossing non-NULL, card is clean.
|
|
+ // dirty card and log.
|
|
+ __ sb(zr, Address(card_addr, 0));
|
|
+
|
|
+ __ ld(t0, queue_index);
|
|
+ __ beqz(t0, runtime);
|
|
+ __ sub(t0, t0, wordSize);
|
|
+ __ sd(t0, queue_index);
|
|
+
|
|
+ // Reuse RA to hold buffer_addr
|
|
+ const Register buffer_addr = ra;
|
|
+
|
|
+ __ ld(buffer_addr, buffer);
|
|
+ __ add(t0, buffer_addr, t0);
|
|
+ __ sd(card_addr, Address(t0, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ __ push_call_clobbered_registers();
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ bind(done);
|
|
+ __ epilogue();
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#endif // COMPILER1
|
|
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..7f85e002d
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
|
|
@@ -0,0 +1,78 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+#ifdef COMPILER1
|
|
+class LIR_Assembler;
|
|
+#endif
|
|
+class StubAssembler;
|
|
+class G1PreBarrierStub;
|
|
+class G1PostBarrierStub;
|
|
+
|
|
+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
|
|
+protected:
|
|
+ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register addr, Register count, RegSet saved_regs);
|
|
+ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs);
|
|
+
|
|
+ void g1_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call);
|
|
+
|
|
+ void g1_write_barrier_post(MacroAssembler* masm,
|
|
+ Register store_addr,
|
|
+ Register new_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ Register tmp2);
|
|
+
|
|
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
|
+
|
|
+public:
|
|
+#ifdef COMPILER1
|
|
+ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
|
|
+ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
|
|
+
|
|
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
|
+ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
|
|
+#endif
|
|
+
|
|
+ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Register dst, Address src, Register tmp1, Register tmp_thread);
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..203b82744
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
|
|
@@ -0,0 +1,226 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "gc/shared/collectedHeap.hpp"
|
|
+#include "runtime/jniHandles.hpp"
|
|
+#include "runtime/thread.hpp"
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Register dst, Address src, Register tmp1, Register tmp_thread) {
|
|
+ // RA is live. It must be saved around calls.
|
|
+
|
|
+ bool in_heap = (decorators & IN_HEAP) != 0;
|
|
+ bool in_native = (decorators & IN_NATIVE) != 0;
|
|
+ bool is_not_null = (decorators & IS_NOT_NULL) != 0;
|
|
+ switch (type) {
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ARRAY: {
|
|
+ if (in_heap) {
|
|
+ if (UseCompressedOops) {
|
|
+ __ lwu(dst, src);
|
|
+ if (is_not_null) {
|
|
+ __ decode_heap_oop_not_null(dst);
|
|
+ } else {
|
|
+ __ decode_heap_oop(dst);
|
|
+ }
|
|
+ } else {
|
|
+ __ ld(dst, src);
|
|
+ }
|
|
+ } else {
|
|
+ assert(in_native, "why else?");
|
|
+ __ ld(dst, src);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
|
|
+ case T_BYTE: __ load_signed_byte (dst, src); break;
|
|
+ case T_CHAR: __ load_unsigned_short(dst, src); break;
|
|
+ case T_SHORT: __ load_signed_short (dst, src); break;
|
|
+ case T_INT: __ lw (dst, src); break;
|
|
+ case T_LONG: __ ld (dst, src); break;
|
|
+ case T_ADDRESS: __ ld (dst, src); break;
|
|
+ case T_FLOAT: __ flw (f10, src); break;
|
|
+ case T_DOUBLE: __ fld (f10, src); break;
|
|
+ default: Unimplemented();
|
|
+ }
|
|
+}
|
|
+
|
|
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ bool in_heap = (decorators & IN_HEAP) != 0;
|
|
+ bool in_native = (decorators & IN_NATIVE) != 0;
|
|
+ switch (type) {
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ARRAY: {
|
|
+ val = val == noreg ? zr : val;
|
|
+ if (in_heap) {
|
|
+ if (UseCompressedOops) {
|
|
+ assert(!dst.uses(val), "not enough registers");
|
|
+ if (val != zr) {
|
|
+ __ encode_heap_oop(val);
|
|
+ }
|
|
+ __ sw(val, dst);
|
|
+ } else {
|
|
+ __ sd(val, dst);
|
|
+ }
|
|
+ } else {
|
|
+ assert(in_native, "why else?");
|
|
+ __ sd(val, dst);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case T_BOOLEAN:
|
|
+ __ andi(val, val, 0x1); // boolean is true if LSB is 1
|
|
+ __ sb(val, dst);
|
|
+ break;
|
|
+ case T_BYTE: __ sb(val, dst); break;
|
|
+ case T_CHAR: __ sh(val, dst); break;
|
|
+ case T_SHORT: __ sh(val, dst); break;
|
|
+ case T_INT: __ sw(val, dst); break;
|
|
+ case T_LONG: __ sd(val, dst); break;
|
|
+ case T_ADDRESS: __ sd(val, dst); break;
|
|
+ case T_FLOAT: __ fsw(f10, dst); break;
|
|
+ case T_DOUBLE: __ fsd(f10, dst); break;
|
|
+ default: Unimplemented();
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) {
|
|
+ __ beq(obj1, obj2, equal, is_far);
|
|
+}
|
|
+
|
|
+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) {
|
|
+ __ bne(obj1, obj2, nequal, is_far);
|
|
+}
|
|
+
|
|
+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
|
+ Register obj, Register tmp, Label& slowpath) {
|
|
+ // If mask changes we need to ensure that the inverse is still encodable as an immediate
|
|
+ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
|
|
+ __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
|
|
+ __ ld(obj, Address(obj, 0)); // *obj
|
|
+}
|
|
+
|
|
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
|
|
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
|
|
+ Register var_size_in_bytes,
|
|
+ int con_size_in_bytes,
|
|
+ Register tmp1,
|
|
+ Register tmp2,
|
|
+ Label& slow_case,
|
|
+ bool is_far) {
|
|
+ assert_different_registers(obj, tmp2);
|
|
+ assert_different_registers(obj, var_size_in_bytes);
|
|
+ Register end = tmp2;
|
|
+
|
|
+ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
|
|
+ if (var_size_in_bytes == noreg) {
|
|
+ __ la(end, Address(obj, con_size_in_bytes));
|
|
+ } else {
|
|
+ __ add(end, obj, var_size_in_bytes);
|
|
+ }
|
|
+ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
|
|
+ __ bgtu(end, t0, slow_case, is_far);
|
|
+
|
|
+ // update the tlab top pointer
|
|
+ __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
|
|
+
|
|
+ // recover var_size_in_bytes if necessary
|
|
+ if (var_size_in_bytes == end) {
|
|
+ __ sub(var_size_in_bytes, var_size_in_bytes, obj);
|
|
+ }
|
|
+}
|
|
+
|
|
+// Defines obj, preserves var_size_in_bytes
|
|
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
|
|
+ Register var_size_in_bytes,
|
|
+ int con_size_in_bytes,
|
|
+ Register tmp1,
|
|
+ Label& slow_case,
|
|
+ bool is_far) {
|
|
+ assert_different_registers(obj, var_size_in_bytes, tmp1);
|
|
+ if (!Universe::heap()->supports_inline_contig_alloc()) {
|
|
+ __ j(slow_case);
|
|
+ } else {
|
|
+ Register end = tmp1;
|
|
+ Label retry;
|
|
+ __ bind(retry);
|
|
+
|
|
+ // Get the current end of the heap
|
|
+ ExternalAddress address_end((address) Universe::heap()->end_addr());
|
|
+ {
|
|
+ int32_t offset;
|
|
+ __ la_patchable(t1, address_end, offset);
|
|
+ __ ld(t1, Address(t1, offset));
|
|
+ }
|
|
+
|
|
+ // Get the current top of the heap
|
|
+ ExternalAddress address_top((address) Universe::heap()->top_addr());
|
|
+ {
|
|
+ int32_t offset;
|
|
+ __ la_patchable(t0, address_top, offset);
|
|
+ __ addi(t0, t0, offset);
|
|
+ __ lr_d(obj, t0, Assembler::aqrl);
|
|
+ }
|
|
+
|
|
+ // Adjust it my the size of our new object
|
|
+ if (var_size_in_bytes == noreg) {
|
|
+ __ la(end, Address(obj, con_size_in_bytes));
|
|
+ } else {
|
|
+ __ add(end, obj, var_size_in_bytes);
|
|
+ }
|
|
+
|
|
+ // if end < obj then we wrapped around high memory
|
|
+ __ bltu(end, obj, slow_case, is_far);
|
|
+
|
|
+ __ bgtu(end, t1, slow_case, is_far);
|
|
+
|
|
+ // If heap_top hasn't been changed by some other thread, update it.
|
|
+ __ sc_d(t1, end, t0, Assembler::rl);
|
|
+ __ bnez(t1, retry);
|
|
+
|
|
+ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
|
|
+ }
|
|
+}
|
|
+
|
|
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
|
|
+ Register var_size_in_bytes,
|
|
+ int con_size_in_bytes,
|
|
+ Register tmp1) {
|
|
+ assert(tmp1->is_valid(), "need temp reg");
|
|
+
|
|
+ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
|
|
+ if (var_size_in_bytes->is_valid()) {
|
|
+ __ add(tmp1, tmp1, var_size_in_bytes);
|
|
+ } else {
|
|
+ __ add(tmp1, tmp1, con_size_in_bytes);
|
|
+ }
|
|
+ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..964fc28be
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
|
|
@@ -0,0 +1,75 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "memory/allocation.hpp"
|
|
+#include "oops/access.hpp"
|
|
+
|
|
+class BarrierSetAssembler: public CHeapObj<mtGC> {
|
|
+private:
|
|
+ void incr_allocated_bytes(MacroAssembler* masm,
|
|
+ Register var_size_in_bytes, int con_size_in_bytes,
|
|
+ Register t1 = noreg);
|
|
+
|
|
+public:
|
|
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register src, Register dst, Register count, RegSet saved_regs) {}
|
|
+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register start, Register end, Register tmp, RegSet saved_regs) {}
|
|
+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Register dst, Address src, Register tmp1, Register tmp_thread);
|
|
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
|
+ virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false);
|
|
+ virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false);
|
|
+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
|
+ Register obj, Register tmp, Label& slowpath);
|
|
+
|
|
+ virtual void tlab_allocate(MacroAssembler* masm,
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Register tmp2, // temp register
|
|
+ Label& slow_case, // continuation point if fast allocation fails
|
|
+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1
|
|
+ );
|
|
+
|
|
+ void eden_allocate(MacroAssembler* masm,
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Label& slow_case, // continuation point if fast allocation fails
|
|
+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1
|
|
+ );
|
|
+ virtual void barrier_stubs_init() {}
|
|
+ virtual ~BarrierSetAssembler() {}
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..1720488fb
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
|
|
@@ -0,0 +1,120 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/cardTable.hpp"
|
|
+#include "gc/shared/cardTableBarrierSet.hpp"
|
|
+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+
|
|
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
|
|
+ assert_different_registers(obj, tmp);
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
|
|
+
|
|
+ CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
|
+ CardTable* ct = ctbs->card_table();
|
|
+ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
|
|
+
|
|
+ __ srli(obj, obj, CardTable::card_shift);
|
|
+
|
|
+ assert(CardTable::dirty_card_val() == 0, "must be");
|
|
+
|
|
+ __ load_byte_map_base(tmp);
|
|
+ __ add(tmp, obj, tmp);
|
|
+
|
|
+ if (UseCondCardMark) {
|
|
+ Label L_already_dirty;
|
|
+ __ membar(MacroAssembler::StoreLoad);
|
|
+ __ lbu(t1, Address(tmp));
|
|
+ __ beqz(t1, L_already_dirty);
|
|
+ __ sb(zr, Address(tmp));
|
|
+ __ bind(L_already_dirty);
|
|
+ } else {
|
|
+ if (ct->scanned_concurrently()) {
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+ }
|
|
+ __ sb(zr, Address(tmp));
|
|
+ }
|
|
+}
|
|
+
|
|
+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs) {
|
|
+ assert_different_registers(start, tmp);
|
|
+ assert_different_registers(count, tmp);
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+ CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
|
|
+ CardTable* ct = ctbs->card_table();
|
|
+
|
|
+ Label L_loop, L_done;
|
|
+ const Register end = count;
|
|
+
|
|
+ __ beqz(count, L_done); // zero count - nothing to do
|
|
+ // end = start + count << LogBytesPerHeapOop
|
|
+ __ shadd(end, count, start, count, LogBytesPerHeapOop);
|
|
+ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
|
|
+
|
|
+ __ srli(start, start, CardTable::card_shift);
|
|
+ __ srli(end, end, CardTable::card_shift);
|
|
+ __ sub(count, end, start); // number of bytes to copy
|
|
+
|
|
+ __ load_byte_map_base(tmp);
|
|
+ __ add(start, start, tmp);
|
|
+ if (ct->scanned_concurrently()) {
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+ }
|
|
+
|
|
+ __ bind(L_loop);
|
|
+ __ add(tmp, start, count);
|
|
+ __ sb(zr, Address(tmp));
|
|
+ __ sub(count, count, 1);
|
|
+ __ bgez(count, L_loop);
|
|
+ __ bind(L_done);
|
|
+}
|
|
+
|
|
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ bool in_heap = (decorators & IN_HEAP) != 0;
|
|
+ bool is_array = (decorators & IS_ARRAY) != 0;
|
|
+ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
|
|
+ bool precise = is_array || on_anonymous;
|
|
+
|
|
+ bool needs_post_barrier = val != noreg && in_heap;
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg);
|
|
+ if (needs_post_barrier) {
|
|
+ // flatten object address if needed
|
|
+ if (!precise || dst.offset() == 0) {
|
|
+ store_check(masm, dst.base(), tmp3);
|
|
+ } else {
|
|
+ __ la(tmp3, dst);
|
|
+ store_check(masm, tmp3, t0);
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..a5b3f9fe8
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
|
|
@@ -0,0 +1,43 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
|
|
+
|
|
+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
|
|
+protected:
|
|
+ void store_check(MacroAssembler* masm, Register obj, Register tmp);
|
|
+
|
|
+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs);
|
|
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
|
+
|
|
+};
|
|
+
|
|
+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..b82275297
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
|
|
@@ -0,0 +1,54 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register src, Register dst, Register count, RegSet saved_regs) {
|
|
+ if (is_oop) {
|
|
+ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
|
|
+ }
|
|
+}
|
|
+
|
|
+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register start, Register count, Register tmp,
|
|
+ RegSet saved_regs) {
|
|
+ if (is_oop) {
|
|
+ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
|
|
+ }
|
|
+}
|
|
+
|
|
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ if (type == T_OBJECT || type == T_ARRAY) {
|
|
+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
|
|
+ } else {
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..df206cc87
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+
|
|
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
|
|
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
|
|
+// accesses, which are overridden in the concrete BarrierSetAssembler.
|
|
+
|
|
+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
|
|
+protected:
|
|
+ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register addr, Register count, RegSet saved_regs) {}
|
|
+ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs) {}
|
|
+
|
|
+ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0;
|
|
+
|
|
+public:
|
|
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register src, Register dst, Register count, RegSet saved_regs);
|
|
+ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register start, Register count, Register tmp, RegSet saved_regs);
|
|
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..6657f1be0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
|
|
@@ -0,0 +1,124 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
|
|
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
|
|
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
|
|
+
|
|
+#define __ masm->masm()->
|
|
+
|
|
+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
|
|
+ Register addr = _addr->as_register_lo();
|
|
+ Register newval = _new_value->as_register();
|
|
+ Register cmpval = _cmp_value->as_register();
|
|
+ Register tmp1 = _tmp1->as_register();
|
|
+ Register tmp2 = _tmp2->as_register();
|
|
+ Register result = result_opr()->as_register();
|
|
+
|
|
+ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
|
|
+
|
|
+ if (UseCompressedOops) {
|
|
+ __ encode_heap_oop(tmp1, cmpval);
|
|
+ cmpval = tmp1;
|
|
+ __ encode_heap_oop(tmp2, newval);
|
|
+ newval = tmp2;
|
|
+ }
|
|
+
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
|
|
+ /* release */ Assembler::rl, /* is_cae */ false, result);
|
|
+ if (UseBarriersForVolatile) {
|
|
+ // The membar here is necessary to prevent reordering between the
|
|
+ // release store in the CAS above and a subsequent volatile load.
|
|
+ // However for !UseBarriersForVolatile, C1 inserts a full barrier before
|
|
+ // volatile loads which means we don't need an additional barrier
|
|
+ // here (see LIRGenerator::volatile_field_load()).
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ }
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#ifdef ASSERT
|
|
+#define __ gen->lir(__FILE__, __LINE__)->
|
|
+#else
|
|
+#define __ gen->lir()->
|
|
+#endif
|
|
+
|
|
+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
|
|
+ BasicType bt = access.type();
|
|
+ if (access.is_oop()) {
|
|
+ LIRGenerator *gen = access.gen();
|
|
+ if (ShenandoahSATBBarrier) {
|
|
+ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
|
|
+ LIR_OprFact::illegalOpr /* pre_val */);
|
|
+ }
|
|
+ if (ShenandoahCASBarrier) {
|
|
+ cmp_value.load_item();
|
|
+ new_value.load_item();
|
|
+
|
|
+ LIR_Opr tmp1 = gen->new_register(T_OBJECT);
|
|
+ LIR_Opr tmp2 = gen->new_register(T_OBJECT);
|
|
+ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
|
|
+ LIR_Opr result = gen->new_register(T_INT);
|
|
+
|
|
+ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
|
|
+ return result;
|
|
+ }
|
|
+ }
|
|
+ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
|
|
+}
|
|
+
|
|
+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
|
|
+ LIRGenerator* gen = access.gen();
|
|
+ BasicType type = access.type();
|
|
+
|
|
+ LIR_Opr result = gen->new_register(type);
|
|
+ value.load_item();
|
|
+ LIR_Opr value_opr = value.result();
|
|
+
|
|
+ if (access.is_oop()) {
|
|
+ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
|
|
+ }
|
|
+
|
|
+ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
|
|
+ LIR_Opr tmp = gen->new_register(T_INT);
|
|
+ __ xchg(access.resolved_addr(), value_opr, result, tmp);
|
|
+
|
|
+ if (access.is_oop()) {
|
|
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
|
|
+ LIR_Opr tmp_opr = gen->new_register(type);
|
|
+ __ move(result, tmp_opr);
|
|
+ result = tmp_opr;
|
|
+ if (ShenandoahSATBBarrier) {
|
|
+ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
|
|
+ result /* pre_val */);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return result;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..1bc01e454
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
|
|
@@ -0,0 +1,743 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
|
|
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
|
|
+#include "gc/shenandoah/shenandoahForwarding.hpp"
|
|
+#include "gc/shenandoah/shenandoahHeap.hpp"
|
|
+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
|
|
+#include "gc/shenandoah/shenandoahRuntime.hpp"
|
|
+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
|
|
+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/thread.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#include "c1/c1_MacroAssembler.hpp"
|
|
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
|
|
+#endif
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register src, Register dst, Register count, RegSet saved_regs) {
|
|
+ if (is_oop) {
|
|
+ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
|
|
+ if ((ShenandoahSATBBarrier && !dest_uninitialized) ||
|
|
+ ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
|
|
+ Label done;
|
|
+
|
|
+ // Avoid calling runtime if count == 0
|
|
+ __ beqz(count, done);
|
|
+
|
|
+ // Is GC active?
|
|
+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
|
|
+ assert_different_registers(src, dst, count, t0);
|
|
+
|
|
+ __ lbu(t0, gc_state);
|
|
+ if (ShenandoahSATBBarrier && dest_uninitialized) {
|
|
+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
|
|
+ __ beqz(t0, done);
|
|
+ } else {
|
|
+ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
|
|
+ __ beqz(t0, done);
|
|
+ }
|
|
+
|
|
+ __ push_reg(saved_regs, sp);
|
|
+ if (UseCompressedOops) {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
|
|
+ src, dst, count);
|
|
+ } else {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
|
|
+ }
|
|
+ __ pop_reg(saved_regs, sp);
|
|
+ __ bind(done);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call) {
|
|
+ if (ShenandoahSATBBarrier) {
|
|
+ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
|
|
+ }
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call) {
|
|
+ // If expand_call is true then we expand the call_VM_leaf macro
|
|
+ // directly to skip generating the check by
|
|
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
|
|
+ assert(thread == xthread, "must be");
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ assert_different_registers(obj, pre_val, tmp, t0);
|
|
+ assert(pre_val != noreg && tmp != noreg, "expecting a register");
|
|
+
|
|
+ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
|
|
+ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
|
|
+
|
|
+ // Is marking active?
|
|
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
|
|
+ __ lwu(tmp, in_progress);
|
|
+ } else {
|
|
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
|
|
+ __ lbu(tmp, in_progress);
|
|
+ }
|
|
+ __ beqz(tmp, done);
|
|
+
|
|
+ // Do we need to load the previous value?
|
|
+ if (obj != noreg) {
|
|
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
|
|
+ }
|
|
+
|
|
+ // Is the previous value null?
|
|
+ __ beqz(pre_val, done);
|
|
+
|
|
+ // Can we store original value in the thread's buffer?
|
|
+ // Is index == 0?
|
|
+ // (The index field is typed as size_t.)
|
|
+ __ ld(tmp, index); // tmp := *index_adr
|
|
+ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime
|
|
+
|
|
+ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize
|
|
+ __ sd(tmp, index); // *index_adr := tmp
|
|
+ __ ld(t0, buffer);
|
|
+ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr
|
|
+
|
|
+ // Record the previous value
|
|
+ __ sd(pre_val, Address(tmp, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ // save the live input values
|
|
+ RegSet saved = RegSet::of(pre_val);
|
|
+ if (tosca_live) saved += RegSet::of(x10);
|
|
+ if (obj != noreg) saved += RegSet::of(obj);
|
|
+
|
|
+ __ push_reg(saved, sp);
|
|
+
|
|
+ // Calling the runtime using the regular call_VM_leaf mechanism generates
|
|
+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
|
|
+ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
|
|
+ //
|
|
+ // If we care generating the pre-barrier without a frame (e.g. in the
|
|
+ // intrinsified Reference.get() routine) then ebp might be pointing to
|
|
+ // the caller frame and so this check will most likely fail at runtime.
|
|
+ //
|
|
+ // Expanding the call directly bypasses the generation of the check.
|
|
+ // So when we do not have have a full interpreter frame on the stack
|
|
+ // expand_call should be passed true.
|
|
+ if (expand_call) {
|
|
+ assert(pre_val != c_rarg1, "smashed arg");
|
|
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ } else {
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ }
|
|
+
|
|
+ __ pop_reg(saved, sp);
|
|
+
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
|
|
+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
|
|
+
|
|
+ Label is_null;
|
|
+ __ beqz(dst, is_null);
|
|
+ resolve_forward_pointer_not_null(masm, dst, tmp);
|
|
+ __ bind(is_null);
|
|
+}
|
|
+
|
|
+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
|
|
+// passed in.
|
|
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
|
|
+ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
|
|
+ // The below loads the mark word, checks if the lowest two bits are
|
|
+ // set, and if so, clear the lowest two bits and copy the result
|
|
+ // to dst. Otherwise it leaves dst alone.
|
|
+ // Implementing this is surprisingly awkward. I do it here by:
|
|
+ // - Inverting the mark word
|
|
+ // - Test lowest two bits == 0
|
|
+ // - If so, set the lowest two bits
|
|
+ // - Invert the result back, and copy to dst
|
|
+ RegSet savedRegs = RegSet::of(t2);
|
|
+ bool borrow_reg = (tmp == noreg);
|
|
+ if (borrow_reg) {
|
|
+ // No free registers available. Make one useful.
|
|
+ tmp = t0;
|
|
+ if (tmp == dst) {
|
|
+ tmp = t1;
|
|
+ }
|
|
+ savedRegs += RegSet::of(tmp);
|
|
+ }
|
|
+
|
|
+ assert_different_registers(tmp, dst, t2);
|
|
+ __ push_reg(savedRegs, sp);
|
|
+
|
|
+ Label done;
|
|
+ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
|
|
+ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
|
|
+ __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
|
|
+ __ bnez(t2, done);
|
|
+ __ ori(tmp, tmp, markOopDesc::marked_value);
|
|
+ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
|
|
+ __ bind(done);
|
|
+
|
|
+ __ pop_reg(savedRegs, sp);
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
|
|
+ Register dst, Address load_addr) {
|
|
+ assert(ShenandoahLoadRefBarrier, "Should be enabled");
|
|
+ assert(dst != t1 && load_addr.base() != t1, "need t1");
|
|
+ assert_different_registers(load_addr.base(), t1, t2);
|
|
+
|
|
+ Label done;
|
|
+ __ enter();
|
|
+ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
|
|
+ __ lbu(t1, gc_state);
|
|
+
|
|
+ // Check for heap stability
|
|
+ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
|
|
+ __ beqz(t1, done);
|
|
+
|
|
+ // use x11 for load address
|
|
+ Register result_dst = dst;
|
|
+ if (dst == x11) {
|
|
+ __ mv(t1, dst);
|
|
+ dst = t1;
|
|
+ }
|
|
+
|
|
+ // Save x10 and x11, unless it is an output register
|
|
+ RegSet to_save = RegSet::of(x10, x11) - result_dst;
|
|
+ __ push_reg(to_save, sp);
|
|
+ __ la(x11, load_addr);
|
|
+ __ mv(x10, dst);
|
|
+
|
|
+ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
|
|
+
|
|
+ __ mv(result_dst, x10);
|
|
+ __ pop_reg(to_save, sp);
|
|
+
|
|
+ __ bind(done);
|
|
+ __ leave();
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
|
|
+ if (ShenandoahIUBarrier) {
|
|
+ __ push_call_clobbered_registers();
|
|
+ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ }
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
|
|
+ if (ShenandoahLoadRefBarrier) {
|
|
+ Label is_null;
|
|
+ __ beqz(dst, is_null);
|
|
+ load_reference_barrier_not_null(masm, dst, load_addr);
|
|
+ __ bind(is_null);
|
|
+ }
|
|
+}
|
|
+
|
|
+//
|
|
+// Arguments:
|
|
+//
|
|
+// Inputs:
|
|
+// src: oop location to load from, might be clobbered
|
|
+//
|
|
+// Output:
|
|
+// dst: oop loaded from src location
|
|
+//
|
|
+// Kill:
|
|
+// x30 (tmp reg)
|
|
+//
|
|
+// Alias:
|
|
+// dst: x30 (might use x30 as temporary output register to avoid clobbering src)
|
|
+//
|
|
+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
|
|
+ DecoratorSet decorators,
|
|
+ BasicType type,
|
|
+ Register dst,
|
|
+ Address src,
|
|
+ Register tmp1,
|
|
+ Register tmp_thread) {
|
|
+ // 1: non-reference load, no additional barrier is needed
|
|
+ if (!is_reference_type(type)) {
|
|
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ // 2: load a reference from src location and apply LRB if needed
|
|
+ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
|
|
+ guarantee(dst != x30 && src.base() != x30, "load_at need x30");
|
|
+ bool ist5 = (dst == src.base());
|
|
+ if (ist5) {
|
|
+ __ push_reg(RegSet::of(x30), sp);
|
|
+ }
|
|
+ Register result_dst = dst;
|
|
+
|
|
+ // Preserve src location for LRB
|
|
+ if (dst == src.base()) {
|
|
+ dst = x30;
|
|
+ }
|
|
+ assert_different_registers(dst, src.base());
|
|
+
|
|
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
|
|
+
|
|
+ load_reference_barrier(masm, dst, src);
|
|
+
|
|
+ if (dst != result_dst) {
|
|
+ __ mv(result_dst, dst);
|
|
+ dst = result_dst;
|
|
+ }
|
|
+
|
|
+ if (ist5) {
|
|
+ __ pop_reg(RegSet::of(x30), sp);
|
|
+ }
|
|
+ } else {
|
|
+ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
|
|
+ }
|
|
+
|
|
+ // 3: apply keep-alive barrier if needed
|
|
+ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
|
|
+ __ enter();
|
|
+ __ push_call_clobbered_registers();
|
|
+ satb_write_barrier_pre(masm /* masm */,
|
|
+ noreg /* obj */,
|
|
+ dst /* pre_val */,
|
|
+ xthread /* thread */,
|
|
+ tmp1 /* tmp */,
|
|
+ true /* tosca_live */,
|
|
+ true /* expand_call */);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ leave();
|
|
+ }
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ bool on_oop = is_reference_type(type);
|
|
+ if (!on_oop) {
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ // flatten object address if needed
|
|
+ if (dst.offset() == 0) {
|
|
+ if (dst.base() != tmp3) {
|
|
+ __ mv(tmp3, dst.base());
|
|
+ }
|
|
+ } else {
|
|
+ __ la(tmp3, dst);
|
|
+ }
|
|
+
|
|
+ shenandoah_write_barrier_pre(masm,
|
|
+ tmp3 /* obj */,
|
|
+ tmp2 /* pre_val */,
|
|
+ xthread /* thread */,
|
|
+ tmp1 /* tmp */,
|
|
+ val != noreg /* tosca_live */,
|
|
+ false /* expand_call */);
|
|
+
|
|
+ if (val == noreg) {
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg);
|
|
+ } else {
|
|
+ iu_barrier(masm, val, tmp1);
|
|
+ // G1 barrier needs uncompressed oop for region cross check.
|
|
+ Register new_val = val;
|
|
+ if (UseCompressedOops) {
|
|
+ new_val = t1;
|
|
+ __ mv(new_val, val);
|
|
+ }
|
|
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
|
|
+ }
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
|
+ Register obj, Register tmp, Label& slowpath) {
|
|
+ Label done;
|
|
+ // Resolve jobject
|
|
+ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
|
|
+
|
|
+ // Check for null.
|
|
+ __ beqz(obj, done);
|
|
+
|
|
+ assert(obj != t1, "need t1");
|
|
+ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
|
|
+ __ lbu(t1, gc_state);
|
|
+
|
|
+ // Check for heap in evacuation phase
|
|
+ __ andi(t0, t1, ShenandoahHeap::EVACUATION);
|
|
+ __ bnez(t0, slowpath);
|
|
+
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+// Special Shenandoah CAS implementation that handles false negatives due
|
|
+// to concurrent evacuation. The service is more complex than a
|
|
+// traditional CAS operation because the CAS operation is intended to
|
|
+// succeed if the reference at addr exactly matches expected or if the
|
|
+// reference at addr holds a pointer to a from-space object that has
|
|
+// been relocated to the location named by expected. There are two
|
|
+// races that must be addressed:
|
|
+// a) A parallel thread may mutate the contents of addr so that it points
|
|
+// to a different object. In this case, the CAS operation should fail.
|
|
+// b) A parallel thread may heal the contents of addr, replacing a
|
|
+// from-space pointer held in addr with the to-space pointer
|
|
+// representing the new location of the object.
|
|
+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
|
|
+// or it refers to an object that is not being evacuated out of
|
|
+// from-space, or it refers to the to-space version of an object that
|
|
+// is being evacuated out of from-space.
|
|
+//
|
|
+// By default, this operation implements sequential consistency and the
|
|
+// value held in the result register following execution of the
|
|
+// generated code sequence is 0 to indicate failure of CAS, non-zero
|
|
+// to indicate success. Arguments support variations on this theme:
|
|
+//
|
|
+// acquire: Allow relaxation of the memory ordering on CAS from
|
|
+// sequential consistency. This can be useful when
|
|
+// sequential consistency is not required, such as when
|
|
+// another sequentially consistent operation is already
|
|
+// present in the execution stream. If acquire, successful
|
|
+// execution has the side effect of assuring that memory
|
|
+// values updated by other threads and "released" will be
|
|
+// visible to any read operations perfomed by this thread
|
|
+// which follow this operation in program order. This is a
|
|
+// special optimization that should not be enabled by default.
|
|
+// release: Allow relaxation of the memory ordering on CAS from
|
|
+// sequential consistency. This can be useful when
|
|
+// sequential consistency is not required, such as when
|
|
+// another sequentially consistent operation is already
|
|
+// present in the execution stream. If release, successful
|
|
+// completion of this operation has the side effect of
|
|
+// assuring that all writes to memory performed by this
|
|
+// thread that precede this operation in program order are
|
|
+// visible to all other threads that subsequently "acquire"
|
|
+// before reading the respective memory values. This is a
|
|
+// special optimization that should not be enabled by default.
|
|
+// is_cae: This turns CAS (compare and swap) into CAE (compare and
|
|
+// exchange). This HotSpot convention is that CAE makes
|
|
+// available to the caller the "failure witness", which is
|
|
+// the value that was stored in memory which did not match
|
|
+// the expected value. If is_cae, the result is the value
|
|
+// most recently fetched from addr rather than a boolean
|
|
+// success indicator.
|
|
+//
|
|
+// Clobbers t0, t1
|
|
+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
|
|
+ Register addr,
|
|
+ Register expected,
|
|
+ Register new_val,
|
|
+ Assembler::Aqrl acquire,
|
|
+ Assembler::Aqrl release,
|
|
+ bool is_cae,
|
|
+ Register result) {
|
|
+ bool is_narrow = UseCompressedOops;
|
|
+ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
|
|
+
|
|
+ assert_different_registers(addr, expected, t0, t1);
|
|
+ assert_different_registers(addr, new_val, t0, t1);
|
|
+
|
|
+ Label retry, success, fail, done;
|
|
+
|
|
+ __ bind(retry);
|
|
+
|
|
+ // Step1: Try to CAS.
|
|
+ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
|
|
+
|
|
+ // If success, then we are done.
|
|
+ __ beq(expected, t1, success);
|
|
+
|
|
+ // Step2: CAS failed, check the forwared pointer.
|
|
+ __ mv(t0, t1);
|
|
+
|
|
+ if (is_narrow) {
|
|
+ __ decode_heap_oop(t0, t0);
|
|
+ }
|
|
+ resolve_forward_pointer(masm, t0);
|
|
+
|
|
+ __ encode_heap_oop(t0, t0);
|
|
+
|
|
+ // Report failure when the forwarded oop was not expected.
|
|
+ __ bne(t0, expected, fail);
|
|
+
|
|
+ // Step 3: CAS again using the forwarded oop.
|
|
+ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
|
|
+
|
|
+ // Retry when failed.
|
|
+ __ bne(t0, t1, retry);
|
|
+
|
|
+ __ bind(success);
|
|
+ if (is_cae) {
|
|
+ __ mv(result, expected);
|
|
+ } else {
|
|
+ __ mv(result, 1);
|
|
+ }
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(fail);
|
|
+ if (is_cae) {
|
|
+ __ mv(result, t0);
|
|
+ } else {
|
|
+ __ mv(result, zr);
|
|
+ }
|
|
+
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#ifdef COMPILER1
|
|
+
|
|
+#define __ ce->masm()->
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
|
|
+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
|
+ // At this point we know that marking is in progress.
|
|
+ // If do_load() is true then we have to emit the
|
|
+ // load of the previous value; otherwise it has already
|
|
+ // been loaded into _pre_val.
|
|
+ __ bind(*stub->entry());
|
|
+
|
|
+ assert(stub->pre_val()->is_register(), "Precondition.");
|
|
+
|
|
+ Register pre_val_reg = stub->pre_val()->as_register();
|
|
+
|
|
+ if (stub->do_load()) {
|
|
+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(),
|
|
+ stub->info(), false /* wide */, false /* unaligned */);
|
|
+ }
|
|
+ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
|
|
+ ce->store_parameter(stub->pre_val()->as_register(), 0);
|
|
+ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
|
|
+ __ j(*stub->continuation());
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
|
|
+ ShenandoahLoadReferenceBarrierStub* stub) {
|
|
+ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
|
|
+ __ bind(*stub->entry());
|
|
+
|
|
+ Register obj = stub->obj()->as_register();
|
|
+ Register res = stub->result()->as_register();
|
|
+ Register addr = stub->addr()->as_pointer_register();
|
|
+ Register tmp1 = stub->tmp1()->as_register();
|
|
+ Register tmp2 = stub->tmp2()->as_register();
|
|
+
|
|
+ assert(res == x10, "result must arrive in x10");
|
|
+ assert_different_registers(tmp1, tmp2, t0);
|
|
+
|
|
+ if (res != obj) {
|
|
+ __ mv(res, obj);
|
|
+ }
|
|
+
|
|
+ // Check for null.
|
|
+ __ beqz(res, *stub->continuation(), /* is_far */ true);
|
|
+
|
|
+ // Check for object in cset.
|
|
+ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
|
|
+ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
|
|
+ __ add(t0, tmp2, tmp1);
|
|
+ __ lb(tmp2, Address(t0));
|
|
+ __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
|
|
+
|
|
+ // Check if object is already forwarded.
|
|
+ Label slow_path;
|
|
+ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
|
|
+ __ xori(tmp1, tmp1, -1);
|
|
+ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
|
|
+ __ bnez(t0, slow_path);
|
|
+
|
|
+ // Decode forwarded object.
|
|
+ __ ori(tmp1, tmp1, markOopDesc::marked_value);
|
|
+ __ xori(res, tmp1, -1);
|
|
+ __ j(*stub->continuation());
|
|
+
|
|
+ __ bind(slow_path);
|
|
+ ce->store_parameter(res, 0);
|
|
+ ce->store_parameter(addr, 1);
|
|
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
|
|
+
|
|
+ __ j(*stub->continuation());
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#define __ sasm->
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
|
|
+ __ prologue("shenandoah_pre_barrier", false);
|
|
+
|
|
+ // arg0 : previous value of memory
|
|
+
|
|
+ BarrierSet* bs = BarrierSet::barrier_set();
|
|
+
|
|
+ const Register pre_val = x10;
|
|
+ const Register thread = xthread;
|
|
+ const Register tmp = t0;
|
|
+
|
|
+ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
|
|
+ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
|
|
+
|
|
+ Label done;
|
|
+ Label runtime;
|
|
+
|
|
+ // Is marking still active?
|
|
+ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
|
|
+ __ lb(tmp, gc_state);
|
|
+ __ andi(tmp, tmp, ShenandoahHeap::MARKING);
|
|
+ __ beqz(tmp, done);
|
|
+
|
|
+ // Can we store original value in the thread's buffer?
|
|
+ __ ld(tmp, queue_index);
|
|
+ __ beqz(tmp, runtime);
|
|
+
|
|
+ __ sub(tmp, tmp, wordSize);
|
|
+ __ sd(tmp, queue_index);
|
|
+ __ ld(t1, buffer);
|
|
+ __ add(tmp, tmp, t1);
|
|
+ __ load_parameter(0, t1);
|
|
+ __ sd(t1, Address(tmp, 0));
|
|
+ __ j(done);
|
|
+
|
|
+ __ bind(runtime);
|
|
+ __ push_call_clobbered_registers();
|
|
+ __ load_parameter(0, pre_val);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ bind(done);
|
|
+
|
|
+ __ epilogue();
|
|
+}
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
|
|
+ __ prologue("shenandoah_load_reference_barrier", false);
|
|
+ // arg0 : object to be resolved
|
|
+
|
|
+ __ push_call_clobbered_registers();
|
|
+ __ load_parameter(0, x10);
|
|
+ __ load_parameter(1, x11);
|
|
+ if (UseCompressedOops) {
|
|
+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
|
|
+ } else {
|
|
+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
|
|
+ }
|
|
+ __ jalr(ra);
|
|
+ __ mv(t0, x10);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ mv(x10, t0);
|
|
+
|
|
+ __ epilogue();
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+#endif // COMPILER1
|
|
+
|
|
+address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
|
|
+ assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
|
|
+ return _shenandoah_lrb;
|
|
+}
|
|
+
|
|
+#define __ cgen->assembler()->
|
|
+
|
|
+// Shenandoah load reference barrier.
|
|
+//
|
|
+// Input:
|
|
+// x10: OOP to evacuate. Not null.
|
|
+// x11: load address
|
|
+//
|
|
+// Output:
|
|
+// x10: Pointer to evacuated OOP.
|
|
+//
|
|
+// Trash t0 t1 Preserve everything else.
|
|
+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
|
|
+ __ align(6);
|
|
+ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
|
|
+ address start = __ pc();
|
|
+
|
|
+ Label slow_path;
|
|
+ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
|
|
+ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
|
|
+ __ add(t1, t1, t0);
|
|
+ __ lbu(t1, Address(t1, 0));
|
|
+ __ andi(t0, t1, 1);
|
|
+ __ bnez(t0, slow_path);
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(slow_path);
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+
|
|
+ __ push_call_clobbered_registers();
|
|
+
|
|
+ if (UseCompressedOops) {
|
|
+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
|
|
+ } else {
|
|
+ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
|
|
+ }
|
|
+ __ jalr(ra);
|
|
+ __ mv(t0, x10);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ mv(x10, t0);
|
|
+
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+}
|
|
+
|
|
+#undef __
|
|
+
|
|
+void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
|
|
+ if (ShenandoahLoadRefBarrier) {
|
|
+ int stub_code_size = 2048;
|
|
+ ResourceMark rm;
|
|
+ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
|
|
+ CodeBuffer buf(bb);
|
|
+ StubCodeGenerator cgen(&buf);
|
|
+ _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..84bc55706
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
|
|
@@ -0,0 +1,92 @@
|
|
+/*
|
|
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#ifdef COMPILER1
|
|
+class LIR_Assembler;
|
|
+class ShenandoahPreBarrierStub;
|
|
+class ShenandoahLoadReferenceBarrierStub;
|
|
+class StubAssembler;
|
|
+#endif
|
|
+class StubCodeGenerator;
|
|
+
|
|
+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
|
|
+public:
|
|
+ static address shenandoah_lrb();
|
|
+
|
|
+ void iu_barrier(MacroAssembler *masm, Register dst, Register tmp);
|
|
+
|
|
+#ifdef COMPILER1
|
|
+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
|
|
+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
|
|
+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
|
|
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
|
|
+#endif
|
|
+
|
|
+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
|
+ Register src, Register dst, Register count, RegSet saved_regs);
|
|
+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Register dst, Address src, Register tmp1, Register tmp_thread);
|
|
+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
|
+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
|
+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
|
+ Register obj, Register tmp, Label& slowpath);
|
|
+ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
|
|
+
|
|
+ virtual void barrier_stubs_init();
|
|
+
|
|
+private:
|
|
+
|
|
+ static address _shenandoah_lrb;
|
|
+
|
|
+ void satb_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call);
|
|
+ void shenandoah_write_barrier_pre(MacroAssembler* masm,
|
|
+ Register obj,
|
|
+ Register pre_val,
|
|
+ Register thread,
|
|
+ Register tmp,
|
|
+ bool tosca_live,
|
|
+ bool expand_call);
|
|
+
|
|
+ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
|
|
+ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
|
|
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
|
|
+ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
|
|
+
|
|
+ address generate_shenandoah_lrb(StubCodeGenerator* cgen);
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
|
|
new file mode 100644
|
|
index 000000000..6e310697d
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
|
|
@@ -0,0 +1,188 @@
|
|
+//
|
|
+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
|
|
+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+//
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
+// published by the Free Software Foundation.
|
|
+//
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
+// accompanied this code).
|
|
+//
|
|
+// You should have received a copy of the GNU General Public License version
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+//
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
+// questions.
|
|
+//
|
|
+//
|
|
+
|
|
+source_hpp %{
|
|
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
|
|
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::aq /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
|
|
+ format %{
|
|
+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ true /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP_DEF res, TEMP tmp, KILL cr);
|
|
+ format %{
|
|
+ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ true /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+ format %{
|
|
+ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
|
|
+ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
|
|
+ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
|
|
+ ins_cost(10 * DEFAULT_COST);
|
|
+
|
|
+ effect(TEMP tmp, KILL cr);
|
|
+ format %{
|
|
+ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ Register tmp = $tmp$$Register;
|
|
+ __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
|
|
+ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */,
|
|
+ false /* is_cae */, $res$$Register);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..96068e637
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
|
|
@@ -0,0 +1,44 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
|
|
+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
|
|
+
|
|
+const int StackAlignmentInBytes = 16;
|
|
+
|
|
+// Indicates whether the C calling conventions require that
|
|
+// 32-bit integer argument values are extended to 64 bits.
|
|
+const bool CCallingConventionRequiresIntsAsLongs = false;
|
|
+
|
|
+#define DEOPTIMIZE_WHEN_PATCHING
|
|
+
|
|
+#define SUPPORTS_NATIVE_CX8
|
|
+
|
|
+#define SUPPORT_RESERVED_STACK_AREA
|
|
+
|
|
+#define THREAD_LOCAL_POLL
|
|
+
|
|
+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..b46661a8f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
|
|
@@ -0,0 +1,120 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
|
|
+#define CPU_RISCV_GLOBALS_RISCV_HPP
|
|
+
|
|
+#include "utilities/globalDefinitions.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+// Sets the default values for platform dependent flags used by the runtime system.
|
|
+// (see globals.hpp)
|
|
+
|
|
+define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this
|
|
+
|
|
+define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks
|
|
+define_pd_global(bool, TrapBasedNullChecks, false);
|
|
+define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast
|
|
+
|
|
+define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
|
|
+define_pd_global(intx, CodeEntryAlignment, 64);
|
|
+define_pd_global(intx, OptoLoopAlignment, 16);
|
|
+define_pd_global(intx, InlineFrequencyCount, 100);
|
|
+
|
|
+#define DEFAULT_STACK_YELLOW_PAGES (2)
|
|
+#define DEFAULT_STACK_RED_PAGES (1)
|
|
+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
|
|
+// stack if compiled for unix and LP64. To pass stack overflow tests we need
|
|
+// 20 shadow pages.
|
|
+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
|
|
+#define DEFAULT_STACK_RESERVED_PAGES (1)
|
|
+
|
|
+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
|
|
+#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
|
|
+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
|
|
+#define MIN_STACK_RESERVED_PAGES (0)
|
|
+
|
|
+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
|
|
+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
|
|
+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
|
|
+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
|
|
+
|
|
+define_pd_global(bool, RewriteBytecodes, true);
|
|
+define_pd_global(bool, RewriteFrequentPairs, true);
|
|
+
|
|
+define_pd_global(bool, UseMembar, true);
|
|
+
|
|
+define_pd_global(bool, PreserveFramePointer, false);
|
|
+
|
|
+// GC Ergo Flags
|
|
+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread
|
|
+
|
|
+define_pd_global(uintx, TypeProfileLevel, 111);
|
|
+
|
|
+define_pd_global(bool, CompactStrings, true);
|
|
+
|
|
+// Clear short arrays bigger than one word in an arch-specific way
|
|
+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
|
|
+
|
|
+define_pd_global(bool, ThreadLocalHandshakes, true);
|
|
+
|
|
+define_pd_global(intx, InlineSmallCode, 1000);
|
|
+
|
|
+#define ARCH_FLAGS(develop, \
|
|
+ product, \
|
|
+ diagnostic, \
|
|
+ experimental, \
|
|
+ notproduct, \
|
|
+ range, \
|
|
+ constraint, \
|
|
+ writeable) \
|
|
+ \
|
|
+ product(bool, NearCpool, true, \
|
|
+ "constant pool is close to instructions") \
|
|
+ product(bool, UseBarriersForVolatile, false, \
|
|
+ "Use memory barriers to implement volatile accesses") \
|
|
+ product(bool, UseCRC32, false, \
|
|
+ "Use CRC32 instructions for CRC32 computation") \
|
|
+ product(bool, UseBlockZeroing, true, \
|
|
+ "Use DC ZVA for block zeroing") \
|
|
+ product(intx, BlockZeroingLowLimit, 256, \
|
|
+ "Minimum size in bytes when block zeroing will be used") \
|
|
+ range(1, max_jint) \
|
|
+ product(bool, TraceTraps, false, "Trace all traps the signal handler") \
|
|
+ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
|
|
+ product(bool, UseConservativeFence, true, \
|
|
+ "Extend i for r and o for w in the pred/succ flags of fence") \
|
|
+ product(bool, AvoidUnalignedAccesses, true, \
|
|
+ "Avoid generating unaligned memory accesses") \
|
|
+ product(intx, EagerArrayCopyThreshold, 128, \
|
|
+ "Threshod of array length by bytes to " \
|
|
+ "trigger the eager array copy") \
|
|
+ range(0, 65535) \
|
|
+ experimental(bool, UseRVV, false, "Use RVV instructions") \
|
|
+ experimental(bool, UseZba, false, "Use Zba instructions") \
|
|
+ experimental(bool, UseZbb, false, "Use Zbb instructions")
|
|
+
|
|
+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..980b2a81b
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
|
|
@@ -0,0 +1,79 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "code/icBuffer.hpp"
|
|
+#include "gc/shared/collectedHeap.inline.hpp"
|
|
+#include "interpreter/bytecodes.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+
|
|
+int InlineCacheBuffer::ic_stub_code_size() {
|
|
+ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
|
|
+ // 5: auipc + ld + j + address(2 * instruction_size )
|
|
+ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
|
|
+ assert_cond(code_begin != NULL && entry_point != NULL);
|
|
+ ResourceMark rm;
|
|
+ CodeBuffer code(code_begin, ic_stub_code_size());
|
|
+ MacroAssembler* masm = new MacroAssembler(&code);
|
|
+ // Note: even though the code contains an embedded value, we do not need reloc info
|
|
+ // because
|
|
+ // (1) the value is old (i.e., doesn't matter for scavenges)
|
|
+ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
|
|
+
|
|
+ address start = __ pc();
|
|
+ Label l;
|
|
+ __ ld(t1, l);
|
|
+ __ far_jump(ExternalAddress(entry_point));
|
|
+ __ align(wordSize);
|
|
+ __ bind(l);
|
|
+ __ emit_int64((intptr_t)cached_value);
|
|
+ // Only need to invalidate the 1st two instructions - not the whole ic stub
|
|
+ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
|
|
+ assert(__ pc() - start == ic_stub_code_size(), "must be");
|
|
+}
|
|
+
|
|
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
|
|
+ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object
|
|
+ NativeJump* jump = nativeJump_at(move->next_instruction_address());
|
|
+ return jump->jump_destination();
|
|
+}
|
|
+
|
|
+
|
|
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
|
|
+ // The word containing the cached value is at the end of this IC buffer
|
|
+ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
|
|
+ void* o = (void*)*p;
|
|
+ return o;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..ed8022784
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
|
|
@@ -0,0 +1,61 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "runtime/icache.hpp"
|
|
+#include "macroAssembler_riscv.hpp"
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+static int icache_flush(address addr, int lines, int magic) {
|
|
+ // To make a store to instruction memory visible to all RISC-V harts,
|
|
+ // the writing hart has to execute a data FENCE before requesting that
|
|
+ // all remote RISC-V harts execute a FENCE.I
|
|
+ //
|
|
+ // No such-assurance is defined at the interface level of the builtin
|
|
+ // method, and so we should make sure it works.
|
|
+ __asm__ volatile("fence rw, rw" : : : "memory");
|
|
+
|
|
+ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size));
|
|
+ return magic;
|
|
+}
|
|
+
|
|
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
|
|
+
|
|
+ address start = (address)icache_flush;
|
|
+
|
|
+ *flush_icache_stub = (ICache::flush_icache_stub_t)start;
|
|
+
|
|
+ // ICache::invalidate_range() contains explicit condition that the first
|
|
+ // call is invoked on the generated icache flush stub code range.
|
|
+ ICache::invalidate_range(start, 0);
|
|
+
|
|
+ {
|
|
+ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
|
|
+ __ ret();
|
|
+ }
|
|
+}
|
|
+
|
|
+#undef __
|
|
diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..a503d3be3
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
|
|
@@ -0,0 +1,42 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
|
|
+#define CPU_RISCV_ICACHE_RISCV_HPP
|
|
+
|
|
+// Interface for updating the instruction cache. Whenever the VM
|
|
+// modifies code, part of the processor instruction cache potentially
|
|
+// has to be flushed.
|
|
+
|
|
+class ICache : public AbstractICache {
|
|
+public:
|
|
+ enum {
|
|
+ stub_size = 16, // Size of the icache flush stub in bytes
|
|
+ line_size = BytesPerWord, // conservative
|
|
+ log2_line_size = LogBytesPerWord // log2(line_size)
|
|
+ };
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_ICACHE_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..91deb0ae2
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
|
|
@@ -0,0 +1,1932 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "interp_masm_riscv.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interpreterRuntime.hpp"
|
|
+#include "logging/log.hpp"
|
|
+#include "oops/arrayOop.hpp"
|
|
+#include "oops/markOop.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/methodData.hpp"
|
|
+#include "prims/jvmtiExport.hpp"
|
|
+#include "prims/jvmtiThreadState.hpp"
|
|
+#include "runtime/basicLock.hpp"
|
|
+#include "runtime/biasedLocking.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/safepointMechanism.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/thread.inline.hpp"
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::narrow(Register result) {
|
|
+ // Get method->_constMethod->_result_type
|
|
+ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
|
|
+ ld(t0, Address(t0, Method::const_offset()));
|
|
+ lbu(t0, Address(t0, ConstMethod::result_type_offset()));
|
|
+
|
|
+ Label done, notBool, notByte, notChar;
|
|
+
|
|
+ // common case first
|
|
+ mv(t1, T_INT);
|
|
+ beq(t0, t1, done);
|
|
+
|
|
+ // mask integer result to narrower return type.
|
|
+ mv(t1, T_BOOLEAN);
|
|
+ bne(t0, t1, notBool);
|
|
+
|
|
+ andi(result, result, 0x1);
|
|
+ j(done);
|
|
+
|
|
+ bind(notBool);
|
|
+ mv(t1, T_BYTE);
|
|
+ bne(t0, t1, notByte);
|
|
+ sign_extend(result, result, 8);
|
|
+ j(done);
|
|
+
|
|
+ bind(notByte);
|
|
+ mv(t1, T_CHAR);
|
|
+ bne(t0, t1, notChar);
|
|
+ zero_extend(result, result, 16);
|
|
+ j(done);
|
|
+
|
|
+ bind(notChar);
|
|
+ sign_extend(result, result, 16);
|
|
+
|
|
+ // Nothing to do for T_INT
|
|
+ bind(done);
|
|
+ addw(result, result, zr);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
|
|
+ assert(entry != NULL, "Entry must have been generated by now");
|
|
+ j(entry);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
|
|
+ if (JvmtiExport::can_pop_frame()) {
|
|
+ Label L;
|
|
+ // Initiate popframe handling only if it is not already being
|
|
+ // processed. If the flag has the popframe_processing bit set,
|
|
+ // it means that this code is called *during* popframe handling - we
|
|
+ // don't want to reenter.
|
|
+ // This method is only called just after the call into the vm in
|
|
+ // call_VM_base, so the arg registers are available.
|
|
+ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
|
|
+ andi(t0, t1, JavaThread::popframe_pending_bit);
|
|
+ beqz(t0, L);
|
|
+ andi(t0, t1, JavaThread::popframe_processing_bit);
|
|
+ bnez(t0, L);
|
|
+ // Call Interpreter::remove_activation_preserving_args_entry() to get the
|
|
+ // address of the same-named entrypoint in the generated interpreter code.
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
|
|
+ jr(x10);
|
|
+ bind(L);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
|
|
+ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
|
|
+ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
|
|
+ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
|
|
+ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
|
|
+ switch (state) {
|
|
+ case atos:
|
|
+ ld(x10, oop_addr);
|
|
+ sd(zr, oop_addr);
|
|
+ verify_oop(x10);
|
|
+ break;
|
|
+ case ltos:
|
|
+ ld(x10, val_addr);
|
|
+ break;
|
|
+ case btos: // fall through
|
|
+ case ztos: // fall through
|
|
+ case ctos: // fall through
|
|
+ case stos: // fall through
|
|
+ case itos:
|
|
+ lwu(x10, val_addr);
|
|
+ break;
|
|
+ case ftos:
|
|
+ flw(f10, val_addr);
|
|
+ break;
|
|
+ case dtos:
|
|
+ fld(f10, val_addr);
|
|
+ break;
|
|
+ case vtos:
|
|
+ /* nothing to do */
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ // Clean up tos value in the thread object
|
|
+ mvw(t0, (int) ilgl);
|
|
+ sw(t0, tos_addr);
|
|
+ sw(zr, val_addr);
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
|
|
+ if (JvmtiExport::can_force_early_return()) {
|
|
+ Label L;
|
|
+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
|
|
+ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit
|
|
+
|
|
+ // Initiate earlyret handling only if it is not already being processed.
|
|
+ // If the flag has the earlyret_processing bit set, it means that this code
|
|
+ // is called *during* earlyret handling - we don't want to reenter.
|
|
+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
|
|
+ mv(t1, JvmtiThreadState::earlyret_pending);
|
|
+ bne(t0, t1, L);
|
|
+
|
|
+ // Call Interpreter::remove_activation_early_entry() to get the address of the
|
|
+ // same-named entrypoint in the generated interpreter code.
|
|
+ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
|
|
+ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
|
|
+ jr(x10);
|
|
+ bind(L);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
|
|
+ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
|
|
+ lhu(reg, Address(xbcp, bcp_offset));
|
|
+ revb_h(reg, reg);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::get_dispatch() {
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
|
|
+ addi(xdispatch, xdispatch, offset);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
|
|
+ int bcp_offset,
|
|
+ size_t index_size) {
|
|
+ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
|
|
+ if (index_size == sizeof(u2)) {
|
|
+ load_unsigned_short(index, Address(xbcp, bcp_offset));
|
|
+ } else if (index_size == sizeof(u4)) {
|
|
+ lwu(index, Address(xbcp, bcp_offset));
|
|
+ // Check if the secondary index definition is still ~x, otherwise
|
|
+ // we have to change the following assembler code to calculate the
|
|
+ // plain index.
|
|
+ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
|
|
+ xori(index, index, -1);
|
|
+ addw(index, index, zr);
|
|
+ } else if (index_size == sizeof(u1)) {
|
|
+ load_unsigned_byte(index, Address(xbcp, bcp_offset));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+// Return
|
|
+// Rindex: index into constant pool
|
|
+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
|
|
+//
|
|
+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
|
|
+// the true address of the cache entry.
|
|
+//
|
|
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
|
|
+ Register index,
|
|
+ int bcp_offset,
|
|
+ size_t index_size) {
|
|
+ assert_different_registers(cache, index);
|
|
+ assert_different_registers(cache, xcpool);
|
|
+ get_cache_index_at_bcp(index, bcp_offset, index_size);
|
|
+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
|
|
+ // Convert from field index to ConstantPoolCacheEntry
|
|
+ // riscv already has the cache in xcpool so there is no need to
|
|
+ // install it in cache. Instead we pre-add the indexed offset to
|
|
+ // xcpool and return it in cache. All clients of this method need to
|
|
+ // be modified accordingly.
|
|
+ shadd(cache, index, xcpool, cache, 5);
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
|
|
+ Register index,
|
|
+ Register bytecode,
|
|
+ int byte_no,
|
|
+ int bcp_offset,
|
|
+ size_t index_size) {
|
|
+ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
|
|
+ // We use a 32-bit load here since the layout of 64-bit words on
|
|
+ // little-endian machines allow us that.
|
|
+ // n.b. unlike x86 cache already includes the index offset
|
|
+ la(bytecode, Address(cache,
|
|
+ ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::indices_offset()));
|
|
+ membar(MacroAssembler::AnyAny);
|
|
+ lwu(bytecode, bytecode);
|
|
+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ const int shift_count = (1 + byte_no) * BitsPerByte;
|
|
+ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
|
|
+ srli(bytecode, bytecode, XLEN - BitsPerByte);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
|
|
+ Register tmp,
|
|
+ int bcp_offset,
|
|
+ size_t index_size) {
|
|
+ assert(cache != tmp, "must use different register");
|
|
+ get_cache_index_at_bcp(tmp, bcp_offset, index_size);
|
|
+ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
|
|
+ // Convert from field index to ConstantPoolCacheEntry index
|
|
+ // and from word offset to byte offset
|
|
+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
|
|
+ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
|
|
+ // skip past the header
|
|
+ add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
|
|
+ // construct pointer to cache entry
|
|
+ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
|
|
+}
|
|
+
|
|
+// Load object from cpool->resolved_references(index)
|
|
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
|
|
+ Register result, Register index, Register tmp) {
|
|
+ assert_different_registers(result, index);
|
|
+
|
|
+ get_constant_pool(result);
|
|
+ // Load pointer for resolved_references[] objArray
|
|
+ ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
|
|
+ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
|
|
+ resolve_oop_handle(result, tmp);
|
|
+ // Add in the index
|
|
+ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
|
|
+ shadd(result, index, result, index, LogBytesPerHeapOop);
|
|
+ load_heap_oop(result, Address(result, 0));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
|
|
+ Register cpool, Register index, Register klass, Register temp) {
|
|
+ shadd(temp, index, cpool, temp, LogBytesPerWord);
|
|
+ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
|
|
+ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
|
|
+ shadd(klass, temp, klass, temp, LogBytesPerWord);
|
|
+ ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
|
|
+}
|
|
+
|
|
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
|
|
+// subtype of super_klass.
|
|
+//
|
|
+// Args:
|
|
+// x10: superklass
|
|
+// Rsub_klass: subklass
|
|
+//
|
|
+// Kills:
|
|
+// x12, x15
|
|
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
|
|
+ Label& ok_is_subtype) {
|
|
+ assert(Rsub_klass != x10, "x10 holds superklass");
|
|
+ assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
|
|
+ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
|
|
+
|
|
+ // Profile the not-null value's klass.
|
|
+ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
|
|
+
|
|
+ // Do the check.
|
|
+ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
|
|
+
|
|
+ // Profile the failure of the check.
|
|
+ profile_typecheck_failed(x12); // blows x12
|
|
+}
|
|
+
|
|
+// Java Expression Stack
|
|
+
|
|
+void InterpreterMacroAssembler::pop_ptr(Register r) {
|
|
+ ld(r, Address(esp, 0));
|
|
+ addi(esp, esp, wordSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::pop_i(Register r) {
|
|
+ lw(r, Address(esp, 0)); // lw do signed extended
|
|
+ addi(esp, esp, wordSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::pop_l(Register r) {
|
|
+ ld(r, Address(esp, 0));
|
|
+ addi(esp, esp, 2 * Interpreter::stackElementSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push_ptr(Register r) {
|
|
+ addi(esp, esp, -wordSize);
|
|
+ sd(r, Address(esp, 0));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push_i(Register r) {
|
|
+ addi(esp, esp, -wordSize);
|
|
+ addw(r, r, zr); // signed extended
|
|
+ sd(r, Address(esp, 0));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push_l(Register r) {
|
|
+ addi(esp, esp, -2 * wordSize);
|
|
+ sd(zr, Address(esp, wordSize));
|
|
+ sd(r, Address(esp));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
|
|
+ flw(r, esp, 0);
|
|
+ addi(esp, esp, wordSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
|
|
+ fld(r, esp, 0);
|
|
+ addi(esp, esp, 2 * Interpreter::stackElementSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
|
|
+ addi(esp, esp, -wordSize);
|
|
+ fsw(r, Address(esp, 0));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
|
|
+ addi(esp, esp, -2 * wordSize);
|
|
+ fsd(r, Address(esp, 0));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::pop(TosState state) {
|
|
+ switch (state) {
|
|
+ case atos:
|
|
+ pop_ptr();
|
|
+ verify_oop(x10);
|
|
+ break;
|
|
+ case btos: // fall through
|
|
+ case ztos: // fall through
|
|
+ case ctos: // fall through
|
|
+ case stos: // fall through
|
|
+ case itos:
|
|
+ pop_i();
|
|
+ break;
|
|
+ case ltos:
|
|
+ pop_l();
|
|
+ break;
|
|
+ case ftos:
|
|
+ pop_f();
|
|
+ break;
|
|
+ case dtos:
|
|
+ pop_d();
|
|
+ break;
|
|
+ case vtos:
|
|
+ /* nothing to do */
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::push(TosState state) {
|
|
+ switch (state) {
|
|
+ case atos:
|
|
+ verify_oop(x10);
|
|
+ push_ptr();
|
|
+ break;
|
|
+ case btos: // fall through
|
|
+ case ztos: // fall through
|
|
+ case ctos: // fall through
|
|
+ case stos: // fall through
|
|
+ case itos:
|
|
+ push_i();
|
|
+ break;
|
|
+ case ltos:
|
|
+ push_l();
|
|
+ break;
|
|
+ case ftos:
|
|
+ push_f();
|
|
+ break;
|
|
+ case dtos:
|
|
+ push_d();
|
|
+ break;
|
|
+ case vtos:
|
|
+ /* nothing to do */
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+// Helpers for swap and dup
|
|
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
|
|
+ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
|
|
+ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::load_float(Address src) {
|
|
+ flw(f10, src);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::load_double(Address src) {
|
|
+ fld(f10, src);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
|
|
+ // set sender sp
|
|
+ mv(x30, sp);
|
|
+ // record last_sp
|
|
+ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+}
|
|
+
|
|
+// Jump to from_interpreted entry of a call unless single stepping is possible
|
|
+// in this thread in which case we must call the i2i entry
|
|
+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
|
|
+ prepare_to_jump_from_interpreted();
|
|
+ if (JvmtiExport::can_post_interpreter_events()) {
|
|
+ Label run_compiled_code;
|
|
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
|
|
+ // compiled code in threads for which the event is enabled. Check here for
|
|
+ // interp_only_mode if these events CAN be enabled.
|
|
+ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
|
|
+ beqz(t0, run_compiled_code);
|
|
+ ld(t0, Address(method, Method::interpreter_entry_offset()));
|
|
+ jr(t0);
|
|
+ bind(run_compiled_code);
|
|
+ }
|
|
+
|
|
+ ld(t0, Address(method, Method::from_interpreted_offset()));
|
|
+ jr(t0);
|
|
+}
|
|
+
|
|
+// The following two routines provide a hook so that an implementation
|
|
+// can schedule the dispatch in two parts. amd64 does not do this.
|
|
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
|
|
+ dispatch_next(state, step);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_base(TosState state,
|
|
+ address* table,
|
|
+ bool verifyoop,
|
|
+ bool generate_poll,
|
|
+ Register Rs) {
|
|
+ // Pay attention to the argument Rs, which is acquiesce in t0.
|
|
+ if (VerifyActivationFrameSize) {
|
|
+ Unimplemented();
|
|
+ }
|
|
+ if (verifyoop && state == atos) {
|
|
+ verify_oop(x10);
|
|
+ }
|
|
+
|
|
+ Label safepoint;
|
|
+ address* const safepoint_table = Interpreter::safept_table(state);
|
|
+ bool needs_thread_local_poll = generate_poll &&
|
|
+ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
|
|
+
|
|
+ if (needs_thread_local_poll) {
|
|
+ NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
|
|
+ ld(t1, Address(xthread, Thread::polling_page_offset()));
|
|
+ andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit()));
|
|
+ bnez(t1, safepoint);
|
|
+ }
|
|
+ if (table == Interpreter::dispatch_table(state)) {
|
|
+ mv(t1, Interpreter::distance_from_dispatch_table(state));
|
|
+ add(t1, Rs, t1);
|
|
+ shadd(t1, t1, xdispatch, t1, 3);
|
|
+ } else {
|
|
+ mv(t1, (address)table);
|
|
+ shadd(t1, Rs, t1, Rs, 3);
|
|
+ }
|
|
+ ld(t1, Address(t1));
|
|
+ jr(t1);
|
|
+
|
|
+ if (needs_thread_local_poll) {
|
|
+ bind(safepoint);
|
|
+ la(t1, ExternalAddress((address)safepoint_table));
|
|
+ shadd(t1, Rs, t1, Rs, 3);
|
|
+ ld(t1, Address(t1));
|
|
+ jr(t1);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
|
|
+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
|
|
+ dispatch_base(state, Interpreter::normal_table(state), Rs);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
|
|
+ dispatch_base(state, Interpreter::normal_table(state), false, Rs);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
|
|
+ // load next bytecode
|
|
+ load_unsigned_byte(t0, Address(xbcp, step));
|
|
+ add(xbcp, xbcp, step);
|
|
+ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
|
|
+ // load current bytecode
|
|
+ lbu(t0, Address(xbcp, 0));
|
|
+ dispatch_base(state, table);
|
|
+}
|
|
+
|
|
+// remove activation
|
|
+//
|
|
+// Unlock the receiver if this is a synchronized method.
|
|
+// Unlock any Java monitors from syncronized blocks.
|
|
+// Remove the activation from the stack.
|
|
+//
|
|
+// If there are locked Java monitors
|
|
+// If throw_monitor_exception
|
|
+// throws IllegalMonitorStateException
|
|
+// Else if install_monitor_exception
|
|
+// installs IllegalMonitorStateException
|
|
+// Else
|
|
+// no error processing
|
|
+void InterpreterMacroAssembler::remove_activation(
|
|
+ TosState state,
|
|
+ bool throw_monitor_exception,
|
|
+ bool install_monitor_exception,
|
|
+ bool notify_jvmdi) {
|
|
+ // Note: Registers x13 may be in use for the
|
|
+ // result check if synchronized method
|
|
+ Label unlocked, unlock, no_unlock;
|
|
+
|
|
+ // get the value of _do_not_unlock_if_synchronized into x13
|
|
+ const Address do_not_unlock_if_synchronized(xthread,
|
|
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
|
|
+ lbu(x13, do_not_unlock_if_synchronized);
|
|
+ sb(zr, do_not_unlock_if_synchronized); // reset the flag
|
|
+
|
|
+ // get method access flags
|
|
+ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
|
|
+ ld(x12, Address(x11, Method::access_flags_offset()));
|
|
+ andi(t0, x12, JVM_ACC_SYNCHRONIZED);
|
|
+ beqz(t0, unlocked);
|
|
+
|
|
+ // Don't unlock anything if the _do_not_unlock_if_synchronized flag
|
|
+ // is set.
|
|
+ bnez(x13, no_unlock);
|
|
+
|
|
+ // unlock monitor
|
|
+ push(state); // save result
|
|
+
|
|
+ // BasicObjectLock will be first in list, since this is a
|
|
+ // synchronized method. However, need to check that the object has
|
|
+ // not been unlocked by an explicit monitorexit bytecode.
|
|
+ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
|
|
+ wordSize - (int) sizeof(BasicObjectLock));
|
|
+ // We use c_rarg1 so that if we go slow path it will be the correct
|
|
+ // register for unlock_object to pass to VM directly
|
|
+ la(c_rarg1, monitor); // address of first monitor
|
|
+
|
|
+ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ bnez(x10, unlock);
|
|
+
|
|
+ pop(state);
|
|
+ if (throw_monitor_exception) {
|
|
+ // Entry already unlocked, need to throw exception
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
|
|
+ should_not_reach_here();
|
|
+ } else {
|
|
+ // Monitor already unlocked during a stack unroll. If requested,
|
|
+ // install an illegal_monitor_state_exception. Continue with
|
|
+ // stack unrolling.
|
|
+ if (install_monitor_exception) {
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::new_illegal_monitor_state_exception));
|
|
+ }
|
|
+ j(unlocked);
|
|
+ }
|
|
+
|
|
+ bind(unlock);
|
|
+ unlock_object(c_rarg1);
|
|
+ pop(state);
|
|
+
|
|
+ // Check that for block-structured locking (i.e., that all locked
|
|
+ // objects has been unlocked)
|
|
+ bind(unlocked);
|
|
+
|
|
+ // x10: Might contain return value
|
|
+
|
|
+ // Check that all monitors are unlocked
|
|
+ {
|
|
+ Label loop, exception, entry, restart;
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
|
|
+ const Address monitor_block_top(
|
|
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
|
|
+ const Address monitor_block_bot(
|
|
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
|
|
+
|
|
+ bind(restart);
|
|
+ // We use c_rarg1 so that if we go slow path it will be the correct
|
|
+ // register for unlock_object to pass to VM directly
|
|
+ ld(c_rarg1, monitor_block_top); // points to current entry, starting
|
|
+ // with top-most entry
|
|
+ la(x9, monitor_block_bot); // points to word before bottom of
|
|
+ // monitor block
|
|
+
|
|
+ j(entry);
|
|
+
|
|
+ // Entry already locked, need to throw exception
|
|
+ bind(exception);
|
|
+
|
|
+ if (throw_monitor_exception) {
|
|
+ // Throw exception
|
|
+ MacroAssembler::call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::
|
|
+ throw_illegal_monitor_state_exception));
|
|
+
|
|
+ should_not_reach_here();
|
|
+ } else {
|
|
+ // Stack unrolling. Unlock object and install illegal_monitor_exception.
|
|
+ // Unlock does not block, so don't have to worry about the frame.
|
|
+ // We don't have to preserve c_rarg1 since we are going to throw an exception.
|
|
+
|
|
+ push(state);
|
|
+ unlock_object(c_rarg1);
|
|
+ pop(state);
|
|
+
|
|
+ if (install_monitor_exception) {
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::
|
|
+ new_illegal_monitor_state_exception));
|
|
+ }
|
|
+
|
|
+ j(restart);
|
|
+ }
|
|
+
|
|
+ bind(loop);
|
|
+ // check if current entry is used
|
|
+ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
|
|
+ ld(t0, Address(t0, 0));
|
|
+ bnez(t0, exception);
|
|
+
|
|
+ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
|
|
+ bind(entry);
|
|
+ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
|
|
+ }
|
|
+
|
|
+ bind(no_unlock);
|
|
+
|
|
+ // jvmti support
|
|
+ if (notify_jvmdi) {
|
|
+ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
|
|
+
|
|
+ } else {
|
|
+ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
|
|
+ }
|
|
+
|
|
+ // remove activation
|
|
+ // get sender esp
|
|
+ ld(t1,
|
|
+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
|
|
+ if (StackReservedPages > 0) {
|
|
+ // testing if reserved zone needs to be re-enabled
|
|
+ Label no_reserved_zone_enabling;
|
|
+
|
|
+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
|
|
+ ble(t1, t0, no_reserved_zone_enabling);
|
|
+
|
|
+ call_VM_leaf(
|
|
+ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_delayed_StackOverflowError));
|
|
+ should_not_reach_here();
|
|
+
|
|
+ bind(no_reserved_zone_enabling);
|
|
+ }
|
|
+
|
|
+ // restore sender esp
|
|
+ mv(esp, t1);
|
|
+ // remove frame anchor
|
|
+ leave();
|
|
+ // If we're returning to interpreted code we will shortly be
|
|
+ // adjusting SP to allow some space for ESP. If we're returning to
|
|
+ // compiled code the saved sender SP was saved in sender_sp, so this
|
|
+ // restores it.
|
|
+ andi(sp, esp, -16);
|
|
+}
|
|
+
|
|
+// Lock object
|
|
+//
|
|
+// Args:
|
|
+// c_rarg1: BasicObjectLock to be used for locking
|
|
+//
|
|
+// Kills:
|
|
+// x10
|
|
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
|
|
+// t0, t1 (temp regs)
|
|
+void InterpreterMacroAssembler::lock_object(Register lock_reg)
|
|
+{
|
|
+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
|
|
+ if (UseHeavyMonitors) {
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
|
|
+ lock_reg);
|
|
+ } else {
|
|
+ Label done;
|
|
+
|
|
+ const Register swap_reg = x10;
|
|
+ const Register tmp = c_rarg2;
|
|
+ const Register obj_reg = c_rarg3; // Will contain the oop
|
|
+
|
|
+ const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
|
|
+ const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
|
|
+ const int mark_offset = lock_offset +
|
|
+ BasicLock::displaced_header_offset_in_bytes();
|
|
+
|
|
+ Label slow_case;
|
|
+
|
|
+ // Load object pointer into obj_reg c_rarg3
|
|
+ ld(obj_reg, Address(lock_reg, obj_offset));
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
|
|
+ }
|
|
+
|
|
+ // Load (object->mark() | 1) into swap_reg
|
|
+ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
|
|
+ ori(swap_reg, t0, 1);
|
|
+
|
|
+ // Save (object->mark() | 1) into BasicLock's displaced header
|
|
+ sd(swap_reg, Address(lock_reg, mark_offset));
|
|
+
|
|
+ assert(lock_offset == 0,
|
|
+ "displached header must be first word in BasicObjectLock");
|
|
+
|
|
+ if (PrintBiasedLockingStatistics) {
|
|
+ Label fail, fast;
|
|
+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
|
|
+ bind(fast);
|
|
+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
|
|
+ t1, t0);
|
|
+ j(done);
|
|
+ bind(fail);
|
|
+ } else {
|
|
+ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
|
|
+ }
|
|
+
|
|
+ // Test if the oopMark is an obvious stack pointer, i.e.,
|
|
+ // 1) (mark & 7) == 0, and
|
|
+ // 2) sp <= mark < mark + os::pagesize()
|
|
+ //
|
|
+ // These 3 tests can be done by evaluating the following
|
|
+ // expression: ((mark - sp) & (7 - os::vm_page_size())),
|
|
+ // assuming both stack pointer and pagesize have their
|
|
+ // least significant 3 bits clear.
|
|
+ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
|
|
+ sub(swap_reg, swap_reg, sp);
|
|
+ mv(t0, (int64_t)(7 - os::vm_page_size()));
|
|
+ andr(swap_reg, swap_reg, t0);
|
|
+
|
|
+ // Save the test result, for recursive case, the result is zero
|
|
+ sd(swap_reg, Address(lock_reg, mark_offset));
|
|
+
|
|
+ if (PrintBiasedLockingStatistics) {
|
|
+ bnez(swap_reg, slow_case);
|
|
+ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
|
|
+ t1, t0);
|
|
+ }
|
|
+ beqz(swap_reg, done);
|
|
+
|
|
+ bind(slow_case);
|
|
+
|
|
+ // Call the runtime routine for slow case
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
|
|
+ lock_reg);
|
|
+
|
|
+ bind(done);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+// Unlocks an object. Used in monitorexit bytecode and
|
|
+// remove_activation. Throws an IllegalMonitorException if object is
|
|
+// not locked by current thread.
|
|
+//
|
|
+// Args:
|
|
+// c_rarg1: BasicObjectLock for lock
|
|
+//
|
|
+// Kills:
|
|
+// x10
|
|
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
|
|
+// t0, t1 (temp regs)
|
|
+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
|
|
+{
|
|
+ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
|
|
+
|
|
+ if (UseHeavyMonitors) {
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
|
|
+ lock_reg);
|
|
+ } else {
|
|
+ Label done;
|
|
+
|
|
+ const Register swap_reg = x10;
|
|
+ const Register header_reg = c_rarg2; // Will contain the old oopMark
|
|
+ const Register obj_reg = c_rarg3; // Will contain the oop
|
|
+
|
|
+ save_bcp(); // Save in case of exception
|
|
+
|
|
+ // Convert from BasicObjectLock structure to object and BasicLock
|
|
+ // structure Store the BasicLock address into x10
|
|
+ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
|
|
+
|
|
+ // Load oop into obj_reg(c_rarg3)
|
|
+ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
|
|
+
|
|
+ // Free entry
|
|
+ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ biased_locking_exit(obj_reg, header_reg, done);
|
|
+ }
|
|
+
|
|
+ // Load the old header from BasicLock structure
|
|
+ ld(header_reg, Address(swap_reg,
|
|
+ BasicLock::displaced_header_offset_in_bytes()));
|
|
+
|
|
+ // Test for recursion
|
|
+ beqz(header_reg, done);
|
|
+
|
|
+ // Atomic swap back the old header
|
|
+ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
|
|
+
|
|
+ // Call the runtime routine for slow case.
|
|
+ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
|
|
+ lock_reg);
|
|
+
|
|
+ bind(done);
|
|
+
|
|
+ restore_bcp();
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
|
|
+ Label& zero_continue) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+ beqz(mdp, zero_continue);
|
|
+}
|
|
+
|
|
+// Set the method data pointer for the current bcp.
|
|
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ Label set_mdp;
|
|
+ push_reg(RegSet::of(x10, x11), sp); // save x10, x11
|
|
+
|
|
+ // Test MDO to avoid the call if it is NULL.
|
|
+ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
|
|
+ beqz(x10, set_mdp);
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
|
|
+ // x10: mdi
|
|
+ // mdo is guaranteed to be non-zero here, we checked for it before the call.
|
|
+ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
|
|
+ la(x11, Address(x11, in_bytes(MethodData::data_offset())));
|
|
+ add(x10, x11, x10);
|
|
+ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+ bind(set_mdp);
|
|
+ pop_reg(RegSet::of(x10, x11), sp);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::verify_method_data_pointer() {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+#ifdef ASSERT
|
|
+ Label verify_continue;
|
|
+ add(sp, sp, -4 * wordSize);
|
|
+ sd(x10, Address(sp, 0));
|
|
+ sd(x11, Address(sp, wordSize));
|
|
+ sd(x12, Address(sp, 2 * wordSize));
|
|
+ sd(x13, Address(sp, 3 * wordSize));
|
|
+ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
|
|
+ get_method(x11);
|
|
+
|
|
+ // If the mdp is valid, it will point to a DataLayout header which is
|
|
+ // consistent with the bcp. The converse is highly probable also.
|
|
+ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
|
|
+ ld(t0, Address(x11, Method::const_offset()));
|
|
+ add(x12, x12, t0);
|
|
+ la(x12, Address(x12, ConstMethod::codes_offset()));
|
|
+ beq(x12, xbcp, verify_continue);
|
|
+ // x10: method
|
|
+ // xbcp: bcp // xbcp == 22
|
|
+ // x13: mdp
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
|
|
+ x11, xbcp, x13);
|
|
+ bind(verify_continue);
|
|
+ ld(x10, Address(sp, 0));
|
|
+ ld(x11, Address(sp, wordSize));
|
|
+ ld(x12, Address(sp, 2 * wordSize));
|
|
+ ld(x13, Address(sp, 3 * wordSize));
|
|
+ add(sp, sp, 4 * wordSize);
|
|
+#endif // ASSERT
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
|
|
+ int constant,
|
|
+ Register value) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ Address data(mdp_in, constant);
|
|
+ sd(value, data);
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
|
|
+ int constant,
|
|
+ bool decrement) {
|
|
+ increment_mdp_data_at(mdp_in, noreg, constant, decrement);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
|
|
+ Register reg,
|
|
+ int constant,
|
|
+ bool decrement) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ // %%% this does 64bit counters at best it is wasting space
|
|
+ // at worst it is a rare bug when counters overflow
|
|
+
|
|
+ assert_different_registers(t1, t0, mdp_in, reg);
|
|
+
|
|
+ Address addr1(mdp_in, constant);
|
|
+ Address addr2(t1, 0);
|
|
+ Address &addr = addr1;
|
|
+ if (reg != noreg) {
|
|
+ la(t1, addr1);
|
|
+ add(t1, t1, reg);
|
|
+ addr = addr2;
|
|
+ }
|
|
+
|
|
+ if (decrement) {
|
|
+ ld(t0, addr);
|
|
+ addi(t0, t0, -DataLayout::counter_increment);
|
|
+ Label L;
|
|
+ bltz(t0, L); // skip store if counter underflow
|
|
+ sd(t0, addr);
|
|
+ bind(L);
|
|
+ } else {
|
|
+ assert(DataLayout::counter_increment == 1,
|
|
+ "flow-free idiom only works with 1");
|
|
+ ld(t0, addr);
|
|
+ addi(t0, t0, DataLayout::counter_increment);
|
|
+ Label L;
|
|
+ blez(t0, L); // skip store if counter overflow
|
|
+ sd(t0, addr);
|
|
+ bind(L);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
|
|
+ int flag_byte_constant) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ int flags_offset = in_bytes(DataLayout::flags_offset());
|
|
+ // Set the flag
|
|
+ lbu(t1, Address(mdp_in, flags_offset));
|
|
+ ori(t1, t1, flag_byte_constant);
|
|
+ sb(t1, Address(mdp_in, flags_offset));
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
|
|
+ int offset,
|
|
+ Register value,
|
|
+ Register test_value_out,
|
|
+ Label& not_equal_continue) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ if (test_value_out == noreg) {
|
|
+ ld(t1, Address(mdp_in, offset));
|
|
+ bne(value, t1, not_equal_continue);
|
|
+ } else {
|
|
+ // Put the test value into a register, so caller can use it:
|
|
+ ld(test_value_out, Address(mdp_in, offset));
|
|
+ bne(value, test_value_out, not_equal_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
|
|
+ int offset_of_disp) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ ld(t1, Address(mdp_in, offset_of_disp));
|
|
+ add(mdp_in, mdp_in, t1);
|
|
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
|
|
+ Register reg,
|
|
+ int offset_of_disp) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ add(t1, mdp_in, reg);
|
|
+ ld(t1, Address(t1, offset_of_disp));
|
|
+ add(mdp_in, mdp_in, t1);
|
|
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
|
|
+ int constant) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+ addi(mdp_in, mdp_in, constant);
|
|
+ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
|
|
+ assert(ProfileInterpreter, "must be profiling interpreter");
|
|
+
|
|
+ // save/restore across call_VM
|
|
+ addi(sp, sp, -2 * wordSize);
|
|
+ sd(zr, Address(sp, 0));
|
|
+ sd(return_bci, Address(sp, wordSize));
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
|
|
+ return_bci);
|
|
+ ld(zr, Address(sp, 0));
|
|
+ ld(return_bci, Address(sp, wordSize));
|
|
+ addi(sp, sp, 2 * wordSize);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
|
|
+ Register bumped_count) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ // Otherwise, assign to mdp
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // We are taking a branch. Increment the taken count.
|
|
+ Address data(mdp, in_bytes(JumpData::taken_offset()));
|
|
+ ld(bumped_count, data);
|
|
+ assert(DataLayout::counter_increment == 1,
|
|
+ "flow-free idiom only works with 1");
|
|
+ addi(bumped_count, bumped_count, DataLayout::counter_increment);
|
|
+ Label L;
|
|
+ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0;
|
|
+ blez(bumped_count, L); // skip store if counter overflow,
|
|
+ sd(bumped_count, data);
|
|
+ bind(L);
|
|
+ // The method data pointer needs to be updated to reflect the new target.
|
|
+ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // We are taking a branch. Increment the not taken count.
|
|
+ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
|
|
+
|
|
+ // The method data pointer needs to be updated to correspond to
|
|
+ // the next bytecode
|
|
+ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_call(Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // We are making a call. Increment the count.
|
|
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
|
|
+
|
|
+ // The method data pointer needs to be updated to reflect the new target.
|
|
+ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // We are making a call. Increment the count.
|
|
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
|
|
+
|
|
+ // The method data pointer needs to be updated to reflect the new target.
|
|
+ update_mdp_by_constant(mdp,
|
|
+ in_bytes(VirtualCallData::
|
|
+ virtual_call_data_size()));
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
|
|
+ Register mdp,
|
|
+ Register reg2,
|
|
+ bool receiver_can_be_null) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ Label skip_receiver_profile;
|
|
+ if (receiver_can_be_null) {
|
|
+ Label not_null;
|
|
+ // We are making a call. Increment the count for null receiver.
|
|
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
|
|
+ j(skip_receiver_profile);
|
|
+ bind(not_null);
|
|
+ }
|
|
+
|
|
+ // Record the receiver type.
|
|
+ record_klass_in_profile(receiver, mdp, reg2, true);
|
|
+ bind(skip_receiver_profile);
|
|
+
|
|
+ // The method data pointer needs to be updated to reflect the new target.
|
|
+
|
|
+ update_mdp_by_constant(mdp,
|
|
+ in_bytes(VirtualCallData::
|
|
+ virtual_call_data_size()));
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+// This routine creates a state machine for updating the multi-row
|
|
+// type profile at a virtual call site (or other type-sensitive bytecode).
|
|
+// The machine visits each row (of receiver/count) until the receiver type
|
|
+// is found, or until it runs out of rows. At the same time, it remembers
|
|
+// the location of the first empty row. (An empty row records null for its
|
|
+// receiver, and can be allocated for a newly-observed receiver type.)
|
|
+// Because there are two degrees of freedom in the state, a simple linear
|
|
+// search will not work; it must be a decision tree. Hence this helper
|
|
+// function is recursive, to generate the required tree structured code.
|
|
+// It's the interpreter, so we are trading off code space for speed.
|
|
+// See below for example code.
|
|
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
|
|
+ Register receiver, Register mdp,
|
|
+ Register reg2,
|
|
+ Label& done, bool is_virtual_call) {
|
|
+ if (TypeProfileWidth == 0) {
|
|
+ if (is_virtual_call) {
|
|
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
|
|
+ }
|
|
+
|
|
+ } else {
|
|
+ int non_profiled_offset = -1;
|
|
+ if (is_virtual_call) {
|
|
+ non_profiled_offset = in_bytes(CounterData::count_offset());
|
|
+ }
|
|
+
|
|
+ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
|
|
+ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::record_item_in_profile_helper(
|
|
+ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
|
|
+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
|
|
+ int last_row = total_rows - 1;
|
|
+ assert(start_row <= last_row, "must be work left to do");
|
|
+ // Test this row for both the item and for null.
|
|
+ // Take any of three different outcomes:
|
|
+ // 1. found item => increment count and goto done
|
|
+ // 2. found null => keep looking for case 1, maybe allocate this cell
|
|
+ // 3. found something else => keep looking for cases 1 and 2
|
|
+ // Case 3 is handled by a recursive call.
|
|
+ for (int row = start_row; row <= last_row; row++) {
|
|
+ Label next_test;
|
|
+ bool test_for_null_also = (row == start_row);
|
|
+
|
|
+ // See if the item is item[n].
|
|
+ int item_offset = in_bytes(item_offset_fn(row));
|
|
+ test_mdp_data_at(mdp, item_offset, item,
|
|
+ (test_for_null_also ? reg2 : noreg),
|
|
+ next_test);
|
|
+ // (Reg2 now contains the item from the CallData.)
|
|
+
|
|
+ // The item is item[n]. Increment count[n].
|
|
+ int count_offset = in_bytes(item_count_offset_fn(row));
|
|
+ increment_mdp_data_at(mdp, count_offset);
|
|
+ j(done);
|
|
+ bind(next_test);
|
|
+
|
|
+ if (test_for_null_also) {
|
|
+ Label found_null;
|
|
+ // Failed the equality check on item[n]... Test for null.
|
|
+ if (start_row == last_row) {
|
|
+ // The only thing left to do is handle the null case.
|
|
+ if (non_profiled_offset >= 0) {
|
|
+ beqz(reg2, found_null);
|
|
+ // Item did not match any saved item and there is no empty row for it.
|
|
+ // Increment total counter to indicate polymorphic case.
|
|
+ increment_mdp_data_at(mdp, non_profiled_offset);
|
|
+ j(done);
|
|
+ bind(found_null);
|
|
+ } else {
|
|
+ bnez(reg2, done);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ // Since null is rare, make it be the branch-taken case.
|
|
+ beqz(reg2, found_null);
|
|
+
|
|
+ // Put all the "Case 3" tests here.
|
|
+ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
|
|
+ item_offset_fn, item_count_offset_fn, non_profiled_offset);
|
|
+
|
|
+ // Found a null. Keep searching for a matching item,
|
|
+ // but remember that this is an empty (unused) slot.
|
|
+ bind(found_null);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // In the fall-through case, we found no matching item, but we
|
|
+ // observed the item[start_row] is NULL.
|
|
+ // Fill in the item field and increment the count.
|
|
+ int item_offset = in_bytes(item_offset_fn(start_row));
|
|
+ set_mdp_data_at(mdp, item_offset, item);
|
|
+ int count_offset = in_bytes(item_count_offset_fn(start_row));
|
|
+ mv(reg2, DataLayout::counter_increment);
|
|
+ set_mdp_data_at(mdp, count_offset, reg2);
|
|
+ if (start_row > 0) {
|
|
+ j(done);
|
|
+ }
|
|
+}
|
|
+
|
|
+// Example state machine code for three profile rows:
|
|
+// # main copy of decision tree, rooted at row[1]
|
|
+// if (row[0].rec == rec) then [
|
|
+// row[0].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// if (row[0].rec != NULL) then [
|
|
+// # inner copy of decision tree, rooted at row[1]
|
|
+// if (row[1].rec == rec) then [
|
|
+// row[1].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// if (row[1].rec != NULL) then [
|
|
+// # degenerate decision tree, rooted at row[2]
|
|
+// if (row[2].rec == rec) then [
|
|
+// row[2].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// if (row[2].rec != NULL) then [
|
|
+// count.incr()
|
|
+// goto done
|
|
+// ] # overflow
|
|
+// row[2].init(rec)
|
|
+// goto done
|
|
+// ] else [
|
|
+// # remember row[1] is empty
|
|
+// if (row[2].rec == rec) then [
|
|
+// row[2].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// row[1].init(rec)
|
|
+// goto done
|
|
+// ]
|
|
+// else [
|
|
+// # remember row[0] is empty
|
|
+// if (row[1].rec == rec) then [
|
|
+// row[1].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// if (row[2].rec == rec) then [
|
|
+// row[2].incr()
|
|
+// goto done
|
|
+// ]
|
|
+// row[0].init(rec)
|
|
+// goto done
|
|
+// ]
|
|
+// done:
|
|
+
|
|
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
|
|
+ Register mdp, Register reg2,
|
|
+ bool is_virtual_call) {
|
|
+ assert(ProfileInterpreter, "must be profiling");
|
|
+ Label done;
|
|
+
|
|
+ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
|
|
+
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // Update the total ret count.
|
|
+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
|
|
+
|
|
+ for (uint row = 0; row < RetData::row_limit(); row++) {
|
|
+ Label next_test;
|
|
+
|
|
+ // See if return_bci is equal to bci[n]:
|
|
+ test_mdp_data_at(mdp,
|
|
+ in_bytes(RetData::bci_offset(row)),
|
|
+ return_bci, noreg,
|
|
+ next_test);
|
|
+
|
|
+ // return_bci is equal to bci[n]. Increment the count.
|
|
+ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
|
|
+
|
|
+ // The method data pointer needs to be updated to reflect the new target.
|
|
+ update_mdp_by_offset(mdp,
|
|
+ in_bytes(RetData::bci_displacement_offset(row)));
|
|
+ j(profile_continue);
|
|
+ bind(next_test);
|
|
+ }
|
|
+
|
|
+ update_mdp_for_ret(return_bci);
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
|
|
+
|
|
+ // The method data pointer needs to be updated.
|
|
+ int mdp_delta = in_bytes(BitData::bit_data_size());
|
|
+ if (TypeProfileCasts) {
|
|
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
|
|
+ }
|
|
+ update_mdp_by_constant(mdp, mdp_delta);
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
|
|
+ if (ProfileInterpreter && TypeProfileCasts) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ int count_offset = in_bytes(CounterData::count_offset());
|
|
+ // Back up the address, since we have already bumped the mdp.
|
|
+ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
|
|
+
|
|
+ // *Decrement* the counter. We expect to see zero or small negatives.
|
|
+ increment_mdp_data_at(mdp, count_offset, true);
|
|
+
|
|
+ bind (profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // The method data pointer needs to be updated.
|
|
+ int mdp_delta = in_bytes(BitData::bit_data_size());
|
|
+ if (TypeProfileCasts) {
|
|
+ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
|
|
+
|
|
+ // Record the object type.
|
|
+ record_klass_in_profile(klass, mdp, reg2, false);
|
|
+ }
|
|
+ update_mdp_by_constant(mdp, mdp_delta);
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // Update the default case count
|
|
+ increment_mdp_data_at(mdp,
|
|
+ in_bytes(MultiBranchData::default_count_offset()));
|
|
+
|
|
+ // The method data pointer needs to be updated.
|
|
+ update_mdp_by_offset(mdp,
|
|
+ in_bytes(MultiBranchData::
|
|
+ default_displacement_offset()));
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_switch_case(Register index,
|
|
+ Register mdp,
|
|
+ Register reg2) {
|
|
+ if (ProfileInterpreter) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ // If no method data exists, go to profile_continue.
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // Build the base (index * per_case_size_in_bytes()) +
|
|
+ // case_array_offset_in_bytes()
|
|
+ mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
|
|
+ mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
|
|
+ Assembler::mul(index, index, reg2);
|
|
+ Assembler::add(index, index, t0);
|
|
+
|
|
+ // Update the case count
|
|
+ increment_mdp_data_at(mdp,
|
|
+ index,
|
|
+ in_bytes(MultiBranchData::relative_count_offset()));
|
|
+
|
|
+ // The method data pointer need to be updated.
|
|
+ update_mdp_by_offset(mdp,
|
|
+ index,
|
|
+ in_bytes(MultiBranchData::
|
|
+ relative_displacement_offset()));
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
|
|
+
|
|
+void InterpreterMacroAssembler::notify_method_entry() {
|
|
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
|
|
+ // track stack depth. If it is possible to enter interp_only_mode we add
|
|
+ // the code to check if the event should be sent.
|
|
+ if (JvmtiExport::can_post_interpreter_events()) {
|
|
+ Label L;
|
|
+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
|
|
+ beqz(x13, L);
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::post_method_entry));
|
|
+ bind(L);
|
|
+ }
|
|
+
|
|
+ {
|
|
+ SkipIfEqual skip(this, &DTraceMethodProbes, false);
|
|
+ get_method(c_rarg1);
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
|
|
+ xthread, c_rarg1);
|
|
+ }
|
|
+
|
|
+ // RedefineClasses() tracing support for obsolete method entry
|
|
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
|
|
+ get_method(c_rarg1);
|
|
+ call_VM_leaf(
|
|
+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
|
|
+ xthread, c_rarg1);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void InterpreterMacroAssembler::notify_method_exit(
|
|
+ TosState state, NotifyMethodExitMode mode) {
|
|
+ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
|
|
+ // track stack depth. If it is possible to enter interp_only_mode we add
|
|
+ // the code to check if the event should be sent.
|
|
+ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
|
|
+ Label L;
|
|
+ // Note: frame::interpreter_frame_result has a dependency on how the
|
|
+ // method result is saved across the call to post_method_exit. If this
|
|
+ // is changed then the interpreter_frame_result implementation will
|
|
+ // need to be updated too.
|
|
+
|
|
+ // template interpreter will leave the result on the top of the stack.
|
|
+ push(state);
|
|
+ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
|
|
+ beqz(x13, L);
|
|
+ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
|
|
+ bind(L);
|
|
+ pop(state);
|
|
+ }
|
|
+
|
|
+ {
|
|
+ SkipIfEqual skip(this, &DTraceMethodProbes, false);
|
|
+ push(state);
|
|
+ get_method(c_rarg1);
|
|
+ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
|
|
+ xthread, c_rarg1);
|
|
+ pop(state);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
|
|
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
|
|
+ int increment, Address mask,
|
|
+ Register tmp1, Register tmp2,
|
|
+ bool preloaded, Label* where) {
|
|
+ Label done;
|
|
+ if (!preloaded) {
|
|
+ lwu(tmp1, counter_addr);
|
|
+ }
|
|
+ add(tmp1, tmp1, increment);
|
|
+ sw(tmp1, counter_addr);
|
|
+ lwu(tmp2, mask);
|
|
+ andr(tmp1, tmp1, tmp2);
|
|
+ bnez(tmp1, done);
|
|
+ j(*where); // offset is too large so we have to use j instead of beqz here
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
|
|
+ int number_of_arguments) {
|
|
+ // interpreter specific
|
|
+ //
|
|
+ // Note: No need to save/restore rbcp & rlocals pointer since these
|
|
+ // are callee saved registers and no blocking/ GC can happen
|
|
+ // in leaf calls.
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ beqz(t0, L);
|
|
+ stop("InterpreterMacroAssembler::call_VM_leaf_base:"
|
|
+ " last_sp != NULL");
|
|
+ bind(L);
|
|
+ }
|
|
+#endif /* ASSERT */
|
|
+ // super call
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
|
|
+ Register java_thread,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ int number_of_arguments,
|
|
+ bool check_exceptions) {
|
|
+ // interpreter specific
|
|
+ //
|
|
+ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
|
|
+ // really make a difference for these runtime calls, since they are
|
|
+ // slow anyway. Btw., bcp must be saved/restored since it may change
|
|
+ // due to GC.
|
|
+ save_bcp();
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ beqz(t0, L);
|
|
+ stop("InterpreterMacroAssembler::call_VM_base:"
|
|
+ " last_sp != NULL");
|
|
+ bind(L);
|
|
+ }
|
|
+#endif /* ASSERT */
|
|
+ // super call
|
|
+ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
|
|
+ entry_point, number_of_arguments,
|
|
+ check_exceptions);
|
|
+// interpreter specific
|
|
+ restore_bcp();
|
|
+ restore_locals();
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
|
|
+ assert_different_registers(obj, tmp, t0, mdo_addr.base());
|
|
+ Label update, next, none;
|
|
+
|
|
+ verify_oop(obj);
|
|
+
|
|
+ bnez(obj, update);
|
|
+ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
|
|
+ j(next);
|
|
+
|
|
+ bind(update);
|
|
+ load_klass(obj, obj);
|
|
+
|
|
+ ld(t0, mdo_addr);
|
|
+ xorr(obj, obj, t0);
|
|
+ andi(t0, obj, TypeEntries::type_klass_mask);
|
|
+ beqz(t0, next); // klass seen before, nothing to
|
|
+ // do. The unknown bit may have been
|
|
+ // set already but no need to check.
|
|
+
|
|
+ andi(t0, obj, TypeEntries::type_unknown);
|
|
+ bnez(t0, next);
|
|
+ // already unknown. Nothing to do anymore.
|
|
+
|
|
+ ld(t0, mdo_addr);
|
|
+ beqz(t0, none);
|
|
+ mv(tmp, (u1)TypeEntries::null_seen);
|
|
+ beq(t0, tmp, none);
|
|
+ // There is a chance that the checks above (re-reading profiling
|
|
+ // data from memory) fail if another thread has just set the
|
|
+ // profiling to this obj's klass
|
|
+ ld(t0, mdo_addr);
|
|
+ xorr(obj, obj, t0);
|
|
+ andi(t0, obj, TypeEntries::type_klass_mask);
|
|
+ beqz(t0, next);
|
|
+
|
|
+ // different than before. Cannot keep accurate profile.
|
|
+ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
|
|
+ j(next);
|
|
+
|
|
+ bind(none);
|
|
+ // first time here. Set profile type.
|
|
+ sd(obj, mdo_addr);
|
|
+
|
|
+ bind(next);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
|
|
+ if (!ProfileInterpreter) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (MethodData::profile_arguments() || MethodData::profile_return()) {
|
|
+ Label profile_continue;
|
|
+
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
|
|
+
|
|
+ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
|
|
+ if (is_virtual) {
|
|
+ mv(tmp, (u1)DataLayout::virtual_call_type_data_tag);
|
|
+ bne(t0, tmp, profile_continue);
|
|
+ } else {
|
|
+ mv(tmp, (u1)DataLayout::call_type_data_tag);
|
|
+ bne(t0, tmp, profile_continue);
|
|
+ }
|
|
+
|
|
+ // calculate slot step
|
|
+ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
|
|
+ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
|
|
+
|
|
+ // calculate type step
|
|
+ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
|
|
+ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
|
|
+
|
|
+ if (MethodData::profile_arguments()) {
|
|
+ Label done, loop, loopEnd, profileArgument, profileReturnType;
|
|
+ RegSet pushed_registers;
|
|
+ pushed_registers += x15;
|
|
+ pushed_registers += x16;
|
|
+ pushed_registers += x17;
|
|
+ Register mdo_addr = x15;
|
|
+ Register index = x16;
|
|
+ Register off_to_args = x17;
|
|
+ push_reg(pushed_registers, sp);
|
|
+
|
|
+ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
|
|
+ mv(t0, TypeProfileArgsLimit);
|
|
+ beqz(t0, loopEnd);
|
|
+
|
|
+ mv(index, zr); // index < TypeProfileArgsLimit
|
|
+ bind(loop);
|
|
+ bgtz(index, profileReturnType);
|
|
+ mv(t0, (int)MethodData::profile_return());
|
|
+ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
|
|
+ bind(profileReturnType);
|
|
+ // If return value type is profiled we may have no argument to profile
|
|
+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
|
|
+ mv(t1, - TypeStackSlotEntries::per_arg_count());
|
|
+ mul(t1, index, t1);
|
|
+ add(tmp, tmp, t1);
|
|
+ mv(t1, TypeStackSlotEntries::per_arg_count());
|
|
+ add(t0, mdp, off_to_args);
|
|
+ blt(tmp, t1, done);
|
|
+
|
|
+ bind(profileArgument);
|
|
+
|
|
+ ld(tmp, Address(callee, Method::const_offset()));
|
|
+ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
|
|
+ // stack offset o (zero based) from the start of the argument
|
|
+ // list, for n arguments translates into offset n - o - 1 from
|
|
+ // the end of the argument list
|
|
+ mv(t0, stack_slot_offset0);
|
|
+ mv(t1, slot_step);
|
|
+ mul(t1, index, t1);
|
|
+ add(t0, t0, t1);
|
|
+ add(t0, mdp, t0);
|
|
+ ld(t0, Address(t0));
|
|
+ sub(tmp, tmp, t0);
|
|
+ addi(tmp, tmp, -1);
|
|
+ Address arg_addr = argument_address(tmp);
|
|
+ ld(tmp, arg_addr);
|
|
+
|
|
+ mv(t0, argument_type_offset0);
|
|
+ mv(t1, type_step);
|
|
+ mul(t1, index, t1);
|
|
+ add(t0, t0, t1);
|
|
+ add(mdo_addr, mdp, t0);
|
|
+ Address mdo_arg_addr(mdo_addr, 0);
|
|
+ profile_obj_type(tmp, mdo_arg_addr, t1);
|
|
+
|
|
+ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
|
|
+ addi(off_to_args, off_to_args, to_add);
|
|
+
|
|
+ // increment index by 1
|
|
+ addi(index, index, 1);
|
|
+ mv(t1, TypeProfileArgsLimit);
|
|
+ blt(index, t1, loop);
|
|
+ bind(loopEnd);
|
|
+
|
|
+ if (MethodData::profile_return()) {
|
|
+ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
|
|
+ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
|
|
+ }
|
|
+
|
|
+ add(t0, mdp, off_to_args);
|
|
+ bind(done);
|
|
+ mv(mdp, t0);
|
|
+
|
|
+ // unspill the clobbered registers
|
|
+ pop_reg(pushed_registers, sp);
|
|
+
|
|
+ if (MethodData::profile_return()) {
|
|
+ // We're right after the type profile for the last
|
|
+ // argument. tmp is the number of cells left in the
|
|
+ // CallTypeData/VirtualCallTypeData to reach its end. Non null
|
|
+ // if there's a return to profile.
|
|
+ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
|
|
+ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
|
|
+ }
|
|
+ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
|
|
+ } else {
|
|
+ assert(MethodData::profile_return(), "either profile call args or call ret");
|
|
+ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
|
|
+ }
|
|
+
|
|
+ // mdp points right after the end of the
|
|
+ // CallTypeData/VirtualCallTypeData, right after the cells for the
|
|
+ // return value type if there's one
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
|
|
+ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
|
|
+ if (ProfileInterpreter && MethodData::profile_return()) {
|
|
+ Label profile_continue, done;
|
|
+
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ if (MethodData::profile_return_jsr292_only()) {
|
|
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
|
|
+
|
|
+ // If we don't profile all invoke bytecodes we must make sure
|
|
+ // it's a bytecode we indeed profile. We can't go back to the
|
|
+ // begining of the ProfileData we intend to update to check its
|
|
+ // type because we're right after it and we don't known its
|
|
+ // length
|
|
+ Label do_profile;
|
|
+ lbu(t0, Address(xbcp, 0));
|
|
+ mv(tmp, (u1)Bytecodes::_invokedynamic);
|
|
+ beq(t0, tmp, do_profile);
|
|
+ mv(tmp, (u1)Bytecodes::_invokehandle);
|
|
+ beq(t0, tmp, do_profile);
|
|
+ get_method(tmp);
|
|
+ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
|
|
+ mv(t1, vmIntrinsics::_compiledLambdaForm);
|
|
+ bne(t0, t1, profile_continue);
|
|
+ bind(do_profile);
|
|
+ }
|
|
+
|
|
+ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
|
|
+ mv(tmp, ret);
|
|
+ profile_obj_type(tmp, mdo_ret_addr, t1);
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
|
|
+ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
|
|
+ if (ProfileInterpreter && MethodData::profile_parameters()) {
|
|
+ Label profile_continue, done;
|
|
+
|
|
+ test_method_data_pointer(mdp, profile_continue);
|
|
+
|
|
+ // Load the offset of the area within the MDO used for
|
|
+ // parameters. If it's negative we're not profiling any parameters
|
|
+ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
|
|
+ srli(tmp2, tmp1, 31);
|
|
+ bnez(tmp2, profile_continue); // i.e. sign bit set
|
|
+
|
|
+ // Compute a pointer to the area for parameters from the offset
|
|
+ // and move the pointer to the slot for the last
|
|
+ // parameters. Collect profiling from last parameter down.
|
|
+ // mdo start + parameters offset + array length - 1
|
|
+ add(mdp, mdp, tmp1);
|
|
+ ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
|
|
+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
|
|
+
|
|
+ Label loop;
|
|
+ bind(loop);
|
|
+
|
|
+ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
|
|
+ int type_base = in_bytes(ParametersTypeData::type_offset(0));
|
|
+ int per_arg_scale = exact_log2(DataLayout::cell_size);
|
|
+ add(t0, mdp, off_base);
|
|
+ add(t1, mdp, type_base);
|
|
+
|
|
+
|
|
+ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
|
|
+ // load offset on the stack from the slot for this parameter
|
|
+ ld(tmp2, Address(tmp2, 0));
|
|
+ neg(tmp2, tmp2);
|
|
+
|
|
+ // read the parameter from the local area
|
|
+ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
|
|
+ ld(tmp2, Address(tmp2, 0));
|
|
+
|
|
+ // profile the parameter
|
|
+ shadd(t1, tmp1, t1, t0, per_arg_scale);
|
|
+ Address arg_type(t1, 0);
|
|
+ profile_obj_type(tmp2, arg_type, tmp3);
|
|
+
|
|
+ // go to next parameter
|
|
+ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
|
|
+ bgez(tmp1, loop);
|
|
+
|
|
+ bind(profile_continue);
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::get_method_counters(Register method,
|
|
+ Register mcs, Label& skip) {
|
|
+ Label has_counters;
|
|
+ ld(mcs, Address(method, Method::method_counters_offset()));
|
|
+ bnez(mcs, has_counters);
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::build_method_counters), method);
|
|
+ ld(mcs, Address(method, Method::method_counters_offset()));
|
|
+ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
|
|
+ bind(has_counters);
|
|
+}
|
|
+
|
|
+#ifdef ASSERT
|
|
+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
|
|
+ const char* msg, bool stop_by_hit) {
|
|
+ Label L;
|
|
+ andi(t0, access_flags, flag_bits);
|
|
+ if (stop_by_hit) {
|
|
+ beqz(t0, L);
|
|
+ } else {
|
|
+ bnez(t0, L);
|
|
+ }
|
|
+ stop(msg);
|
|
+ bind(L);
|
|
+}
|
|
+
|
|
+void InterpreterMacroAssembler::verify_frame_setup() {
|
|
+ Label L;
|
|
+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
|
|
+ ld(t0, monitor_block_top);
|
|
+ beq(esp, t0, L);
|
|
+ stop("broken stack frame setup in interpreter");
|
|
+ bind(L);
|
|
+}
|
|
+#endif
|
|
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..042ee8280
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
|
|
@@ -0,0 +1,283 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
|
|
+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "interpreter/invocationCounter.hpp"
|
|
+#include "runtime/frame.hpp"
|
|
+
|
|
+// This file specializes the assember with interpreter-specific macros
|
|
+
|
|
+typedef ByteSize (*OffsetFunction)(uint);
|
|
+
|
|
+class InterpreterMacroAssembler: public MacroAssembler {
|
|
+ protected:
|
|
+ // Interpreter specific version of call_VM_base
|
|
+ using MacroAssembler::call_VM_leaf_base;
|
|
+
|
|
+ virtual void call_VM_leaf_base(address entry_point,
|
|
+ int number_of_arguments);
|
|
+
|
|
+ virtual void call_VM_base(Register oop_result,
|
|
+ Register java_thread,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ int number_of_arguments,
|
|
+ bool check_exceptions);
|
|
+
|
|
+ // base routine for all dispatches
|
|
+ void dispatch_base(TosState state, address* table, bool verifyoop = true,
|
|
+ bool generate_poll = false, Register Rs = t0);
|
|
+
|
|
+ public:
|
|
+ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
|
|
+ virtual ~InterpreterMacroAssembler() {}
|
|
+
|
|
+ void load_earlyret_value(TosState state);
|
|
+
|
|
+ void jump_to_entry(address entry);
|
|
+
|
|
+ virtual void check_and_handle_popframe(Register java_thread);
|
|
+ virtual void check_and_handle_earlyret(Register java_thread);
|
|
+
|
|
+ // Interpreter-specific registers
|
|
+ void save_bcp() {
|
|
+ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ void restore_bcp() {
|
|
+ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ void restore_locals() {
|
|
+ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ void restore_constant_pool_cache() {
|
|
+ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ void get_dispatch();
|
|
+
|
|
+ // Helpers for runtime call arguments/results
|
|
+ void get_method(Register reg) {
|
|
+ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ void get_const(Register reg) {
|
|
+ get_method(reg);
|
|
+ ld(reg, Address(reg, in_bytes(Method::const_offset())));
|
|
+ }
|
|
+
|
|
+ void get_constant_pool(Register reg) {
|
|
+ get_const(reg);
|
|
+ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
|
|
+ }
|
|
+
|
|
+ void get_constant_pool_cache(Register reg) {
|
|
+ get_constant_pool(reg);
|
|
+ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
|
|
+ }
|
|
+
|
|
+ void get_cpool_and_tags(Register cpool, Register tags) {
|
|
+ get_constant_pool(cpool);
|
|
+ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
|
|
+ }
|
|
+
|
|
+ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
|
|
+ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
|
|
+ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
|
|
+ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
|
|
+ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
|
|
+ void get_method_counters(Register method, Register mcs, Label& skip);
|
|
+
|
|
+ // Load cpool->resolved_references(index).
|
|
+ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
|
|
+
|
|
+ // Load cpool->resolved_klass_at(index).
|
|
+ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
|
|
+
|
|
+ void pop_ptr(Register r = x10);
|
|
+ void pop_i(Register r = x10);
|
|
+ void pop_l(Register r = x10);
|
|
+ void pop_f(FloatRegister r = f10);
|
|
+ void pop_d(FloatRegister r = f10);
|
|
+ void push_ptr(Register r = x10);
|
|
+ void push_i(Register r = x10);
|
|
+ void push_l(Register r = x10);
|
|
+ void push_f(FloatRegister r = f10);
|
|
+ void push_d(FloatRegister r = f10);
|
|
+
|
|
+ void pop(TosState state); // transition vtos -> state
|
|
+ void push(TosState state); // transition state -> vtos
|
|
+
|
|
+ void empty_expression_stack() {
|
|
+ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
|
|
+ // NULL last_sp until next java call
|
|
+ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ // Helpers for swap and dup
|
|
+ void load_ptr(int n, Register val);
|
|
+ void store_ptr(int n, Register val);
|
|
+
|
|
+// Load float value from 'address'. The value is loaded onto the FPU register v0.
|
|
+ void load_float(Address src);
|
|
+ void load_double(Address src);
|
|
+
|
|
+ // Generate a subtype check: branch to ok_is_subtype if sub_klass is
|
|
+ // a subtype of super_klass.
|
|
+ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
|
|
+
|
|
+ // Dispatching
|
|
+ void dispatch_prolog(TosState state, int step = 0);
|
|
+ void dispatch_epilog(TosState state, int step = 0);
|
|
+ // dispatch via t0
|
|
+ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
|
|
+ // dispatch normal table via t0 (assume t0 is loaded already)
|
|
+ void dispatch_only_normal(TosState state, Register Rs = t0);
|
|
+ void dispatch_only_noverify(TosState state, Register Rs = t0);
|
|
+ // load t0 from [xbcp + step] and dispatch via t0
|
|
+ void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
|
|
+ // load t0 from [xbcp] and dispatch via t0 and table
|
|
+ void dispatch_via (TosState state, address* table);
|
|
+
|
|
+ // jump to an invoked target
|
|
+ void prepare_to_jump_from_interpreted();
|
|
+ void jump_from_interpreted(Register method);
|
|
+
|
|
+
|
|
+ // Returning from interpreted functions
|
|
+ //
|
|
+ // Removes the current activation (incl. unlocking of monitors)
|
|
+ // and sets up the return address. This code is also used for
|
|
+ // exception unwindwing. In that case, we do not want to throw
|
|
+ // IllegalMonitorStateExceptions, since that might get us into an
|
|
+ // infinite rethrow exception loop.
|
|
+ // Additionally this code is used for popFrame and earlyReturn.
|
|
+ // In popFrame case we want to skip throwing an exception,
|
|
+ // installing an exception, and notifying jvmdi.
|
|
+ // In earlyReturn case we only want to skip throwing an exception
|
|
+ // and installing an exception.
|
|
+ void remove_activation(TosState state,
|
|
+ bool throw_monitor_exception = true,
|
|
+ bool install_monitor_exception = true,
|
|
+ bool notify_jvmdi = true);
|
|
+
|
|
+ // FIXME: Give us a valid frame at a null check.
|
|
+ virtual void null_check(Register reg, int offset = -1) {
|
|
+ MacroAssembler::null_check(reg, offset);
|
|
+ }
|
|
+
|
|
+ // Object locking
|
|
+ void lock_object (Register lock_reg);
|
|
+ void unlock_object(Register lock_reg);
|
|
+
|
|
+ // Interpreter profiling operations
|
|
+ void set_method_data_pointer_for_bcp();
|
|
+ void test_method_data_pointer(Register mdp, Label& zero_continue);
|
|
+ void verify_method_data_pointer();
|
|
+
|
|
+ void set_mdp_data_at(Register mdp_in, int constant, Register value);
|
|
+ void increment_mdp_data_at(Address data, bool decrement = false);
|
|
+ void increment_mdp_data_at(Register mdp_in, int constant,
|
|
+ bool decrement = false);
|
|
+ void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
|
|
+ bool decrement = false);
|
|
+ void increment_mask_and_jump(Address counter_addr,
|
|
+ int increment, Address mask,
|
|
+ Register tmp1, Register tmp2,
|
|
+ bool preloaded, Label* where);
|
|
+
|
|
+ void set_mdp_flag_at(Register mdp_in, int flag_constant);
|
|
+ void test_mdp_data_at(Register mdp_in, int offset, Register value,
|
|
+ Register test_value_out,
|
|
+ Label& not_equal_continue);
|
|
+
|
|
+ void record_klass_in_profile(Register receiver, Register mdp,
|
|
+ Register reg2, bool is_virtual_call);
|
|
+ void record_klass_in_profile_helper(Register receiver, Register mdp,
|
|
+ Register reg2,
|
|
+ Label& done, bool is_virtual_call);
|
|
+ void record_item_in_profile_helper(Register item, Register mdp,
|
|
+ Register reg2, int start_row, Label& done, int total_rows,
|
|
+ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
|
|
+ int non_profiled_offset);
|
|
+
|
|
+ void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
|
|
+ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
|
|
+ void update_mdp_by_constant(Register mdp_in, int constant);
|
|
+ void update_mdp_for_ret(Register return_bci);
|
|
+
|
|
+ // narrow int return value
|
|
+ void narrow(Register result);
|
|
+
|
|
+ void profile_taken_branch(Register mdp, Register bumped_count);
|
|
+ void profile_not_taken_branch(Register mdp);
|
|
+ void profile_call(Register mdp);
|
|
+ void profile_final_call(Register mdp);
|
|
+ void profile_virtual_call(Register receiver, Register mdp,
|
|
+ Register t1,
|
|
+ bool receiver_can_be_null = false);
|
|
+ void profile_ret(Register return_bci, Register mdp);
|
|
+ void profile_null_seen(Register mdp);
|
|
+ void profile_typecheck(Register mdp, Register klass, Register temp);
|
|
+ void profile_typecheck_failed(Register mdp);
|
|
+ void profile_switch_default(Register mdp);
|
|
+ void profile_switch_case(Register index_in_scratch, Register mdp,
|
|
+ Register temp);
|
|
+
|
|
+ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
|
|
+ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
|
|
+ void profile_return_type(Register mdp, Register ret, Register tmp);
|
|
+ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
|
|
+
|
|
+ // Debugging
|
|
+ // only if +VerifyFPU && (state == ftos || state == dtos)
|
|
+ void verify_FPU(int stack_depth, TosState state = ftos);
|
|
+
|
|
+ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
|
|
+
|
|
+ // support for jvmti/dtrace
|
|
+ void notify_method_entry();
|
|
+ void notify_method_exit(TosState state, NotifyMethodExitMode mode);
|
|
+
|
|
+ virtual void _call_Unimplemented(address call_site) {
|
|
+ save_bcp();
|
|
+ set_last_Java_frame(esp, fp, (address) pc(), t0);
|
|
+ MacroAssembler::_call_Unimplemented(call_site);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ void verify_access_flags(Register access_flags, uint32_t flag_bits,
|
|
+ const char* msg, bool stop_by_hit = true);
|
|
+ void verify_frame_setup();
|
|
+#endif
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..777f326e3
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
|
|
@@ -0,0 +1,296 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interpreterRuntime.hpp"
|
|
+#include "memory/allocation.inline.hpp"
|
|
+#include "memory/universe.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "runtime/handles.inline.hpp"
|
|
+#include "runtime/icache.hpp"
|
|
+#include "runtime/interfaceSupport.inline.hpp"
|
|
+#include "runtime/signature.hpp"
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+// Implementation of SignatureHandlerGenerator
|
|
+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
|
|
+Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; }
|
|
+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
|
|
+
|
|
+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
|
|
+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
|
|
+ return g_INTArgReg[++_num_reg_int_args];
|
|
+ }
|
|
+ return noreg;
|
|
+}
|
|
+
|
|
+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
|
|
+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
|
|
+ return g_FPArgReg[_num_reg_fp_args++];
|
|
+ }
|
|
+ return fnoreg;
|
|
+}
|
|
+
|
|
+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
|
|
+ int ret = _stack_offset;
|
|
+ _stack_offset += wordSize;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
|
|
+ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
|
|
+ _masm = new MacroAssembler(buffer); // allocate on resourse area by default
|
|
+ _num_reg_int_args = (method->is_static() ? 1 : 0);
|
|
+ _num_reg_fp_args = 0;
|
|
+ _stack_offset = 0;
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
|
|
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
|
|
+
|
|
+ Register reg = next_gpr();
|
|
+ if (reg != noreg) {
|
|
+ __ lw(reg, src);
|
|
+ } else {
|
|
+ __ lw(x10, src);
|
|
+ __ sw(x10, Address(to(), next_stack_offset()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
|
|
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
|
|
+
|
|
+ Register reg = next_gpr();
|
|
+ if (reg != noreg) {
|
|
+ __ ld(reg, src);
|
|
+ } else {
|
|
+ __ ld(x10, src);
|
|
+ __ sd(x10, Address(to(), next_stack_offset()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
|
|
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
|
|
+
|
|
+ FloatRegister reg = next_fpr();
|
|
+ if (reg != fnoreg) {
|
|
+ __ flw(reg, src);
|
|
+ } else {
|
|
+ // a floating-point argument is passed according to the integer calling
|
|
+ // convention if no floating-point argument register available
|
|
+ pass_int();
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
|
|
+ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
|
|
+
|
|
+ FloatRegister reg = next_fpr();
|
|
+ if (reg != fnoreg) {
|
|
+ __ fld(reg, src);
|
|
+ } else {
|
|
+ // a floating-point argument is passed according to the integer calling
|
|
+ // convention if no floating-point argument register available
|
|
+ pass_long();
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
|
|
+ Register reg = next_gpr();
|
|
+ if (reg == c_rarg1) {
|
|
+ assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
|
|
+ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
|
|
+ } else if (reg != noreg) {
|
|
+ // c_rarg2-c_rarg7
|
|
+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
|
|
+ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3...
|
|
+ __ ld(temp(), x10);
|
|
+ Label L;
|
|
+ __ beqz(temp(), L);
|
|
+ __ mv(reg, x10);
|
|
+ __ bind(L);
|
|
+ } else {
|
|
+ //to stack
|
|
+ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
|
|
+ __ ld(temp(), x10);
|
|
+ Label L;
|
|
+ __ bnez(temp(), L);
|
|
+ __ mv(x10, zr);
|
|
+ __ bind(L);
|
|
+ assert(sizeof(jobject) == wordSize, "");
|
|
+ __ sd(x10, Address(to(), next_stack_offset()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
|
|
+ // generate code to handle arguments
|
|
+ iterate(fingerprint);
|
|
+
|
|
+ // return result handler
|
|
+ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
|
|
+ __ ret();
|
|
+
|
|
+ __ flush();
|
|
+}
|
|
+
|
|
+
|
|
+// Implementation of SignatureHandlerLibrary
|
|
+
|
|
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
|
|
+
|
|
+
|
|
+class SlowSignatureHandler
|
|
+ : public NativeSignatureIterator {
|
|
+ private:
|
|
+ address _from;
|
|
+ intptr_t* _to;
|
|
+ intptr_t* _int_args;
|
|
+ intptr_t* _fp_args;
|
|
+ intptr_t* _fp_identifiers;
|
|
+ unsigned int _num_reg_int_args;
|
|
+ unsigned int _num_reg_fp_args;
|
|
+
|
|
+
|
|
+ intptr_t* single_slot_addr() {
|
|
+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
|
|
+ _from -= Interpreter::stackElementSize;
|
|
+ return from_addr;
|
|
+ }
|
|
+
|
|
+ intptr_t* double_slot_addr() {
|
|
+ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
|
|
+ _from -= 2 * Interpreter::stackElementSize;
|
|
+ return from_addr;
|
|
+ }
|
|
+
|
|
+ int pass_gpr(intptr_t value) {
|
|
+ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
|
|
+ *_int_args++ = value;
|
|
+ return _num_reg_int_args++;
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ int pass_fpr(intptr_t value) {
|
|
+ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
|
|
+ *_fp_args++ = value;
|
|
+ return _num_reg_fp_args++;
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ void pass_stack(intptr_t value) {
|
|
+ *_to++ = value;
|
|
+ }
|
|
+
|
|
+
|
|
+ virtual void pass_int() {
|
|
+ jint value = *(jint*)single_slot_addr();
|
|
+ if (pass_gpr(value) < 0) {
|
|
+ pass_stack(value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+
|
|
+ virtual void pass_long() {
|
|
+ intptr_t value = *double_slot_addr();
|
|
+ if (pass_gpr(value) < 0) {
|
|
+ pass_stack(value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual void pass_object() {
|
|
+ intptr_t* addr = single_slot_addr();
|
|
+ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
|
|
+ if (pass_gpr(value) < 0) {
|
|
+ pass_stack(value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual void pass_float() {
|
|
+ jint value = *(jint*) single_slot_addr();
|
|
+ // a floating-point argument is passed according to the integer calling
|
|
+ // convention if no floating-point argument register available
|
|
+ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
|
|
+ pass_stack(value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual void pass_double() {
|
|
+ intptr_t value = *double_slot_addr();
|
|
+ int arg = pass_fpr(value);
|
|
+ if (0 <= arg) {
|
|
+ *_fp_identifiers |= (1ull << arg); // mark as double
|
|
+ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
|
|
+ pass_stack(value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public:
|
|
+ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
|
|
+ : NativeSignatureIterator(method)
|
|
+ {
|
|
+ _from = from;
|
|
+ _to = to;
|
|
+
|
|
+ _int_args = to - (method->is_static() ? 16 : 17);
|
|
+ _fp_args = to - 8;
|
|
+ _fp_identifiers = to - 9;
|
|
+ *(int*) _fp_identifiers = 0;
|
|
+ _num_reg_int_args = (method->is_static() ? 1 : 0);
|
|
+ _num_reg_fp_args = 0;
|
|
+ }
|
|
+ ~SlowSignatureHandler()
|
|
+ {
|
|
+ _from = NULL;
|
|
+ _to = NULL;
|
|
+ _int_args = NULL;
|
|
+ _fp_args = NULL;
|
|
+ _fp_identifiers = NULL;
|
|
+ }
|
|
+};
|
|
+
|
|
+
|
|
+IRT_ENTRY(address,
|
|
+ InterpreterRuntime::slow_signature_handler(JavaThread* thread,
|
|
+ Method* method,
|
|
+ intptr_t* from,
|
|
+ intptr_t* to))
|
|
+ methodHandle m(thread, (Method*)method);
|
|
+ assert(m->is_native(), "sanity check");
|
|
+
|
|
+ // handle arguments
|
|
+ SlowSignatureHandler ssh(m, (address)from, to);
|
|
+ ssh.iterate((uint64_t)UCONST64(-1));
|
|
+
|
|
+ // return result handler
|
|
+ return Interpreter::result_handler(m->result_type());
|
|
+IRT_END
|
|
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..06342869f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
|
|
@@ -0,0 +1,68 @@
|
|
+/*
|
|
+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
|
|
+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
|
|
+
|
|
+// This is included in the middle of class Interpreter.
|
|
+// Do not include files here.
|
|
+
|
|
+// native method calls
|
|
+
|
|
+class SignatureHandlerGenerator: public NativeSignatureIterator {
|
|
+ private:
|
|
+ MacroAssembler* _masm;
|
|
+ unsigned int _num_reg_fp_args;
|
|
+ unsigned int _num_reg_int_args;
|
|
+ int _stack_offset;
|
|
+
|
|
+ void pass_int();
|
|
+ void pass_long();
|
|
+ void pass_float();
|
|
+ void pass_double();
|
|
+ void pass_object();
|
|
+
|
|
+ Register next_gpr();
|
|
+ FloatRegister next_fpr();
|
|
+ int next_stack_offset();
|
|
+
|
|
+ public:
|
|
+ // Creation
|
|
+ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
|
|
+ virtual ~SignatureHandlerGenerator() {
|
|
+ _masm = NULL;
|
|
+ }
|
|
+
|
|
+ // Code generation
|
|
+ void generate(uint64_t fingerprint);
|
|
+
|
|
+ // Code generation support
|
|
+ static Register from();
|
|
+ static Register to();
|
|
+ static Register temp();
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..a169b8c5f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
|
|
@@ -0,0 +1,89 @@
|
|
+/*
|
|
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
|
|
+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
|
|
+
|
|
+private:
|
|
+
|
|
+ // FP value associated with _last_Java_sp:
|
|
+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
|
|
+
|
|
+public:
|
|
+ // Each arch must define reset, save, restore
|
|
+ // These are used by objects that only care about:
|
|
+ // 1 - initializing a new state (thread creation, javaCalls)
|
|
+ // 2 - saving a current state (javaCalls)
|
|
+ // 3 - restoring an old state (javaCalls)
|
|
+
|
|
+ void clear(void) {
|
|
+ // clearing _last_Java_sp must be first
|
|
+ _last_Java_sp = NULL;
|
|
+ OrderAccess::release();
|
|
+ _last_Java_fp = NULL;
|
|
+ _last_Java_pc = NULL;
|
|
+ }
|
|
+
|
|
+ void copy(JavaFrameAnchor* src) {
|
|
+ // In order to make sure the transition state is valid for "this"
|
|
+ // We must clear _last_Java_sp before copying the rest of the new data
|
|
+ //
|
|
+ // Hack Alert: Temporary bugfix for 4717480/4721647
|
|
+ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
|
|
+ // unless the value is changing
|
|
+ //
|
|
+ assert(src != NULL, "Src should not be NULL.");
|
|
+ if (_last_Java_sp != src->_last_Java_sp) {
|
|
+ _last_Java_sp = NULL;
|
|
+ OrderAccess::release();
|
|
+ }
|
|
+ _last_Java_fp = src->_last_Java_fp;
|
|
+ _last_Java_pc = src->_last_Java_pc;
|
|
+ // Must be last so profiler will always see valid frame if has_last_frame() is true
|
|
+ _last_Java_sp = src->_last_Java_sp;
|
|
+ }
|
|
+
|
|
+ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
|
|
+ void make_walkable(JavaThread* thread);
|
|
+ void capture_last_Java_pc(void);
|
|
+
|
|
+ intptr_t* last_Java_sp(void) const { return _last_Java_sp; }
|
|
+
|
|
+ const address last_Java_pc(void) { return _last_Java_pc; }
|
|
+
|
|
+private:
|
|
+
|
|
+ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
|
|
+
|
|
+public:
|
|
+
|
|
+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); }
|
|
+
|
|
+ intptr_t* last_Java_fp(void) { return _last_Java_fp; }
|
|
+ // Assert (last_Java_sp == NULL || fp == NULL)
|
|
+ void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; }
|
|
+
|
|
+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..9bab8e78f
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
|
|
@@ -0,0 +1,193 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "prims/jniFastGetField.hpp"
|
|
+#include "prims/jvm_misc.hpp"
|
|
+#include "runtime/safepoint.hpp"
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+#define BUFFER_SIZE 30*wordSize
|
|
+
|
|
+// Instead of issuing a LoadLoad barrier we create an address
|
|
+// dependency between loads; this might be more efficient.
|
|
+
|
|
+// Common register usage:
|
|
+// x10/f10: result
|
|
+// c_rarg0: jni env
|
|
+// c_rarg1: obj
|
|
+// c_rarg2: jfield id
|
|
+
|
|
+static const Register robj = x13;
|
|
+static const Register rcounter = x14;
|
|
+static const Register roffset = x15;
|
|
+static const Register rcounter_addr = x16;
|
|
+static const Register result = x17;
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
|
|
+ const char *name;
|
|
+ switch (type) {
|
|
+ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
|
|
+ case T_BYTE: name = "jni_fast_GetByteField"; break;
|
|
+ case T_CHAR: name = "jni_fast_GetCharField"; break;
|
|
+ case T_SHORT: name = "jni_fast_GetShortField"; break;
|
|
+ case T_INT: name = "jni_fast_GetIntField"; break;
|
|
+ case T_LONG: name = "jni_fast_GetLongField"; break;
|
|
+ case T_FLOAT: name = "jni_fast_GetFloatField"; break;
|
|
+ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
|
|
+ default: ShouldNotReachHere();
|
|
+ name = NULL; // unreachable
|
|
+ }
|
|
+ ResourceMark rm;
|
|
+ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
|
|
+ CodeBuffer cbuf(blob);
|
|
+ MacroAssembler* masm = new MacroAssembler(&cbuf);
|
|
+ address fast_entry = __ pc();
|
|
+
|
|
+ Label slow;
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
|
|
+ __ addi(rcounter_addr, rcounter_addr, offset);
|
|
+
|
|
+ Address safepoint_counter_addr(rcounter_addr, 0);
|
|
+ __ lwu(rcounter, safepoint_counter_addr);
|
|
+ // An even value means there are no ongoing safepoint operations
|
|
+ __ andi(t0, rcounter, 1);
|
|
+ __ bnez(t0, slow);
|
|
+ __ xorr(robj, c_rarg1, rcounter);
|
|
+ __ xorr(robj, robj, rcounter); // obj, since
|
|
+ // robj ^ rcounter ^ rcounter == robj
|
|
+ // robj is address dependent on rcounter.
|
|
+
|
|
+
|
|
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ assert_cond(bs != NULL);
|
|
+ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
|
|
+
|
|
+ __ srli(roffset, c_rarg2, 2); // offset
|
|
+
|
|
+ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
|
|
+ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler
|
|
+ __ add(roffset, robj, roffset);
|
|
+ switch (type) {
|
|
+ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
|
|
+ case T_BYTE: __ lb(result, Address(roffset, 0)); break;
|
|
+ case T_CHAR: __ lhu(result, Address(roffset, 0)); break;
|
|
+ case T_SHORT: __ lh(result, Address(roffset, 0)); break;
|
|
+ case T_INT: __ lw(result, Address(roffset, 0)); break;
|
|
+ case T_LONG: __ ld(result, Address(roffset, 0)); break;
|
|
+ case T_FLOAT: {
|
|
+ __ flw(f28, Address(roffset, 0)); // f28 as temporaries
|
|
+ __ fmv_x_w(result, f28); // f{31--0}-->x
|
|
+ break;
|
|
+ }
|
|
+ case T_DOUBLE: {
|
|
+ __ fld(f28, Address(roffset, 0)); // f28 as temporaries
|
|
+ __ fmv_x_d(result, f28); // d{63--0}-->x
|
|
+ break;
|
|
+ }
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // counter_addr is address dependent on result.
|
|
+ __ xorr(rcounter_addr, rcounter_addr, result);
|
|
+ __ xorr(rcounter_addr, rcounter_addr, result);
|
|
+ __ lw(t0, safepoint_counter_addr);
|
|
+ __ bne(rcounter, t0, slow);
|
|
+
|
|
+ switch (type) {
|
|
+ case T_FLOAT: __ fmv_w_x(f10, result); break;
|
|
+ case T_DOUBLE: __ fmv_d_x(f10, result); break;
|
|
+ default: __ mv(x10, result); break;
|
|
+ }
|
|
+ __ ret();
|
|
+
|
|
+ slowcase_entry_pclist[count++] = __ pc();
|
|
+ __ bind(slow);
|
|
+ address slow_case_addr;
|
|
+ switch (type) {
|
|
+ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
|
|
+ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break;
|
|
+ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break;
|
|
+ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break;
|
|
+ case T_INT: slow_case_addr = jni_GetIntField_addr(); break;
|
|
+ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break;
|
|
+ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break;
|
|
+ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ slow_case_addr = NULL; // unreachable
|
|
+ }
|
|
+
|
|
+ {
|
|
+ __ enter();
|
|
+ int32_t tmp_offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
|
|
+ __ jalr(x1, t0, tmp_offset);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ }
|
|
+ __ flush();
|
|
+
|
|
+ return fast_entry;
|
|
+}
|
|
+
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_boolean_field() {
|
|
+ return generate_fast_get_int_field0(T_BOOLEAN);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_byte_field() {
|
|
+ return generate_fast_get_int_field0(T_BYTE);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_char_field() {
|
|
+ return generate_fast_get_int_field0(T_CHAR);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_short_field() {
|
|
+ return generate_fast_get_int_field0(T_SHORT);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_int_field() {
|
|
+ return generate_fast_get_int_field0(T_INT);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_long_field() {
|
|
+ return generate_fast_get_int_field0(T_LONG);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_float_field() {
|
|
+ return generate_fast_get_int_field0(T_FLOAT);
|
|
+}
|
|
+
|
|
+address JNI_FastGetField::generate_fast_get_double_field() {
|
|
+ return generate_fast_get_int_field0(T_DOUBLE);
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..96775e0db
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
|
|
@@ -0,0 +1,108 @@
|
|
+/*
|
|
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
|
|
+#define CPU_RISCV_JNITYPES_RISCV_HPP
|
|
+
|
|
+#include "jni.h"
|
|
+#include "memory/allocation.hpp"
|
|
+#include "oops/oop.hpp"
|
|
+
|
|
+// This file holds platform-dependent routines used to write primitive jni
|
|
+// types to the array of arguments passed into JavaCalls::call
|
|
+
|
|
+class JNITypes : private AllStatic {
|
|
+ // These functions write a java primitive type (in native format)
|
|
+ // to a java stack slot array to be passed as an argument to JavaCalls:calls.
|
|
+ // I.e., they are functionally 'push' operations if they have a 'pos'
|
|
+ // formal parameter. Note that jlong's and jdouble's are written
|
|
+ // _in reverse_ of the order in which they appear in the interpreter
|
|
+ // stack. This is because call stubs (see stubGenerator_sparc.cpp)
|
|
+ // reverse the argument list constructed by JavaCallArguments (see
|
|
+ // javaCalls.hpp).
|
|
+
|
|
+public:
|
|
+ // Ints are stored in native format in one JavaCallArgument slot at *to.
|
|
+ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; }
|
|
+ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; }
|
|
+ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
|
|
+
|
|
+ // Longs are stored in native format in one JavaCallArgument slot at
|
|
+ // *(to+1).
|
|
+ static inline void put_long(jlong from, intptr_t *to) {
|
|
+ *(jlong*) (to + 1) = from;
|
|
+ }
|
|
+
|
|
+ static inline void put_long(jlong from, intptr_t *to, int& pos) {
|
|
+ *(jlong*) (to + 1 + pos) = from;
|
|
+ pos += 2;
|
|
+ }
|
|
+
|
|
+ static inline void put_long(jlong *from, intptr_t *to, int& pos) {
|
|
+ *(jlong*) (to + 1 + pos) = *from;
|
|
+ pos += 2;
|
|
+ }
|
|
+
|
|
+ // Oops are stored in native format in one JavaCallArgument slot at *to.
|
|
+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; }
|
|
+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; }
|
|
+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
|
|
+
|
|
+ // Floats are stored in native format in one JavaCallArgument slot at *to.
|
|
+ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; }
|
|
+ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; }
|
|
+ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
|
|
+
|
|
+#undef _JNI_SLOT_OFFSET
|
|
+#define _JNI_SLOT_OFFSET 1
|
|
+ // Doubles are stored in native word format in one JavaCallArgument
|
|
+ // slot at *(to+1).
|
|
+ static inline void put_double(jdouble from, intptr_t *to) {
|
|
+ *(jdouble*) (to + 1) = from;
|
|
+ }
|
|
+
|
|
+ static inline void put_double(jdouble from, intptr_t *to, int& pos) {
|
|
+ *(jdouble*) (to + 1 + pos) = from;
|
|
+ pos += 2;
|
|
+ }
|
|
+
|
|
+ static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
|
|
+ *(jdouble*) (to + 1 + pos) = *from;
|
|
+ pos += 2;
|
|
+ }
|
|
+
|
|
+ // The get_xxx routines, on the other hand, actually _do_ fetch
|
|
+ // java primitive types from the interpreter stack.
|
|
+ // No need to worry about alignment on Intel.
|
|
+ static inline jint get_int (intptr_t *from) { return *(jint *) from; }
|
|
+ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); }
|
|
+ static inline oop get_obj (intptr_t *from) { return *(oop *) from; }
|
|
+ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; }
|
|
+ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
|
|
+#undef _JNI_SLOT_OFFSET
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..5d6078bb3
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
|
|
@@ -0,0 +1,5861 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "asm/assembler.inline.hpp"
|
|
+#include "compiler/disassembler.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "gc/shared/cardTable.hpp"
|
|
+#include "gc/shared/cardTableBarrierSet.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/accessDecorators.hpp"
|
|
+#include "oops/compressedOops.inline.hpp"
|
|
+#include "oops/klass.inline.hpp"
|
|
+#include "runtime/biasedLocking.hpp"
|
|
+#include "runtime/interfaceSupport.inline.hpp"
|
|
+#include "runtime/jniHandles.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/thread.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_LIRAssembler.hpp"
|
|
+#endif
|
|
+#ifdef COMPILER2
|
|
+#include "oops/oop.hpp"
|
|
+#include "opto/compile.hpp"
|
|
+#include "opto/intrinsicnode.hpp"
|
|
+#include "opto/subnode.hpp"
|
|
+#endif
|
|
+
|
|
+#ifdef PRODUCT
|
|
+#define BLOCK_COMMENT(str) /* nothing */
|
|
+#else
|
|
+#define BLOCK_COMMENT(str) block_comment(str)
|
|
+#endif
|
|
+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
|
|
+
|
|
+static void pass_arg0(MacroAssembler* masm, Register arg) {
|
|
+ if (c_rarg0 != arg) {
|
|
+ masm->mv(c_rarg0, arg);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pass_arg1(MacroAssembler* masm, Register arg) {
|
|
+ if (c_rarg1 != arg) {
|
|
+ masm->mv(c_rarg1, arg);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pass_arg2(MacroAssembler* masm, Register arg) {
|
|
+ if (c_rarg2 != arg) {
|
|
+ masm->mv(c_rarg2, arg);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pass_arg3(MacroAssembler* masm, Register arg) {
|
|
+ if (c_rarg3 != arg) {
|
|
+ masm->mv(c_rarg3, arg);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::align(int modulus) {
|
|
+ while (offset() % modulus != 0) { nop(); }
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
|
|
+ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
|
|
+}
|
|
+
|
|
+// Implementation of call_VM versions
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ bool check_exceptions) {
|
|
+ call_VM_helper(oop_result, entry_point, 0, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ bool check_exceptions) {
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM_helper(oop_result, entry_point, 1, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ Register arg_2,
|
|
+ bool check_exceptions) {
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM_helper(oop_result, entry_point, 2, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ Register arg_2,
|
|
+ Register arg_3,
|
|
+ bool check_exceptions) {
|
|
+ assert(arg_1 != c_rarg3, "smashed arg");
|
|
+ assert(arg_2 != c_rarg3, "smashed arg");
|
|
+ pass_arg3(this, arg_3);
|
|
+
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM_helper(oop_result, entry_point, 3, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ int number_of_arguments,
|
|
+ bool check_exceptions) {
|
|
+ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ bool check_exceptions) {
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ Register arg_2,
|
|
+ bool check_exceptions) {
|
|
+
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ Register arg_2,
|
|
+ Register arg_3,
|
|
+ bool check_exceptions) {
|
|
+ assert(arg_1 != c_rarg3, "smashed arg");
|
|
+ assert(arg_2 != c_rarg3, "smashed arg");
|
|
+ pass_arg3(this, arg_3);
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
|
|
+}
|
|
+
|
|
+// these are no-ops overridden by InterpreterMacroAssembler
|
|
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
|
|
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
|
|
+
|
|
+// Calls to C land
|
|
+//
|
|
+// When entering C land, the fp, & esp of the last Java frame have to be recorded
|
|
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
|
|
+// has to be reset to 0. This is required to allow proper stack traversal.
|
|
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
|
|
+ Register last_java_fp,
|
|
+ Register last_java_pc,
|
|
+ Register tmp) {
|
|
+
|
|
+ if (last_java_pc->is_valid()) {
|
|
+ sd(last_java_pc, Address(xthread,
|
|
+ JavaThread::frame_anchor_offset() +
|
|
+ JavaFrameAnchor::last_Java_pc_offset()));
|
|
+ }
|
|
+
|
|
+ // determine last_java_sp register
|
|
+ if (last_java_sp == sp) {
|
|
+ mv(tmp, sp);
|
|
+ last_java_sp = tmp;
|
|
+ } else if (!last_java_sp->is_valid()) {
|
|
+ last_java_sp = esp;
|
|
+ }
|
|
+
|
|
+ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
|
|
+
|
|
+ // last_java_fp is optional
|
|
+ if (last_java_fp->is_valid()) {
|
|
+ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
|
|
+ Register last_java_fp,
|
|
+ address last_java_pc,
|
|
+ Register tmp) {
|
|
+ assert(last_java_pc != NULL, "must provide a valid PC");
|
|
+
|
|
+ la(tmp, last_java_pc);
|
|
+ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
|
|
+
|
|
+ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
|
|
+}
|
|
+
|
|
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
|
|
+ Register last_java_fp,
|
|
+ Label &L,
|
|
+ Register tmp) {
|
|
+ if (L.is_bound()) {
|
|
+ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
|
|
+ } else {
|
|
+ L.add_patch_at(code(), locator());
|
|
+ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
|
|
+ // we must set sp to zero to clear frame
|
|
+ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
|
|
+
|
|
+ // must clear fp, so that compiled frames are not confused; it is
|
|
+ // possible that we need it only for debugging
|
|
+ if (clear_fp) {
|
|
+ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
|
|
+ }
|
|
+
|
|
+ // Always clear the pc because it could have been set by make_walkable()
|
|
+ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_base(Register oop_result,
|
|
+ Register java_thread,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ int number_of_arguments,
|
|
+ bool check_exceptions) {
|
|
+ // determine java_thread register
|
|
+ if (!java_thread->is_valid()) {
|
|
+ java_thread = xthread;
|
|
+ }
|
|
+ // determine last_java_sp register
|
|
+ if (!last_java_sp->is_valid()) {
|
|
+ last_java_sp = esp;
|
|
+ }
|
|
+
|
|
+ // debugging support
|
|
+ assert(number_of_arguments >= 0 , "cannot have negative number of arguments");
|
|
+ assert(java_thread == xthread, "unexpected register");
|
|
+
|
|
+ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result");
|
|
+ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
|
|
+
|
|
+ // push java thread (becomes first argument of C function)
|
|
+ mv(c_rarg0, java_thread);
|
|
+
|
|
+ // set last Java frame before call
|
|
+ assert(last_java_sp != fp, "can't use fp");
|
|
+
|
|
+ Label l;
|
|
+ set_last_Java_frame(last_java_sp, fp, l, t0);
|
|
+
|
|
+ // do the call, remove parameters
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
|
|
+
|
|
+ // reset last Java frame
|
|
+ // Only interpreter should have to clear fp
|
|
+ reset_last_Java_frame(true);
|
|
+
|
|
+ // C++ interp handles this in the interpreter
|
|
+ check_and_handle_popframe(java_thread);
|
|
+ check_and_handle_earlyret(java_thread);
|
|
+
|
|
+ if (check_exceptions) {
|
|
+ // check for pending exceptions (java_thread is set upon return)
|
|
+ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
|
|
+ Label ok;
|
|
+ beqz(t0, ok);
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
|
|
+ jalr(x0, t0, offset);
|
|
+ bind(ok);
|
|
+ }
|
|
+
|
|
+ // get oop result if there is one and reset the value in the thread
|
|
+ if (oop_result->is_valid()) {
|
|
+ get_vm_result(oop_result, java_thread);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
|
|
+ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
|
|
+ sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
|
|
+ verify_oop(oop_result, "broken oop in call_VM_base");
|
|
+}
|
|
+
|
|
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
|
|
+ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
|
|
+ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
|
|
+}
|
|
+
|
|
+
|
|
+void MacroAssembler::verify_oop(Register reg, const char* s) {
|
|
+ if (!VerifyOops) { return; }
|
|
+
|
|
+ // Pass register number to verify_oop_subroutine
|
|
+ const char* b = NULL;
|
|
+ {
|
|
+ ResourceMark rm;
|
|
+ stringStream ss;
|
|
+ ss.print("verify_oop: %s: %s", reg->name(), s);
|
|
+ b = code_string(ss.as_string());
|
|
+ }
|
|
+ BLOCK_COMMENT("verify_oop {");
|
|
+
|
|
+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
|
|
+
|
|
+ mv(c_rarg0, reg); // c_rarg0 : x10
|
|
+ if(b != NULL) {
|
|
+ movptr(t0, (uintptr_t)(address)b);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // call indirectly to solve generation ordering problem
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
|
|
+ ld(t1, Address(t1, offset));
|
|
+ jalr(t1);
|
|
+
|
|
+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
|
|
+
|
|
+ BLOCK_COMMENT("} verify_oop");
|
|
+}
|
|
+
|
|
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
|
|
+ if (!VerifyOops) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ const char* b = NULL;
|
|
+ {
|
|
+ ResourceMark rm;
|
|
+ stringStream ss;
|
|
+ ss.print("verify_oop_addr: %s", s);
|
|
+ b = code_string(ss.as_string());
|
|
+ }
|
|
+ BLOCK_COMMENT("verify_oop_addr {");
|
|
+
|
|
+ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
|
|
+
|
|
+ if (addr.uses(sp)) {
|
|
+ la(x10, addr);
|
|
+ ld(x10, Address(x10, 4 * wordSize));
|
|
+ } else {
|
|
+ ld(x10, addr);
|
|
+ }
|
|
+ if(b != NULL) {
|
|
+ movptr(t0, (uintptr_t)(address)b);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // call indirectly to solve generation ordering problem
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
|
|
+ ld(t1, Address(t1, offset));
|
|
+ jalr(t1);
|
|
+
|
|
+ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
|
|
+
|
|
+ BLOCK_COMMENT("} verify_oop_addr");
|
|
+}
|
|
+
|
|
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
|
|
+ int extra_slot_offset) {
|
|
+ // cf. TemplateTable::prepare_invoke(), if (load_receiver).
|
|
+ int stackElementSize = Interpreter::stackElementSize;
|
|
+ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
|
|
+#ifdef ASSERT
|
|
+ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
|
|
+ assert(offset1 - offset == stackElementSize, "correct arithmetic");
|
|
+#endif
|
|
+ if (arg_slot.is_constant()) {
|
|
+ return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
|
|
+ } else {
|
|
+ assert_different_registers(t0, arg_slot.as_register());
|
|
+ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
|
|
+ return Address(t0, offset);
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+extern "C" void findpc(intptr_t x);
|
|
+#endif
|
|
+
|
|
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
|
|
+{
|
|
+ // In order to get locks to work, we need to fake a in_VM state
|
|
+ if (ShowMessageBoxOnError) {
|
|
+ JavaThread* thread = JavaThread::current();
|
|
+ JavaThreadState saved_state = thread->thread_state();
|
|
+ thread->set_thread_state(_thread_in_vm);
|
|
+#ifndef PRODUCT
|
|
+ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
|
|
+ ttyLocker ttyl;
|
|
+ BytecodeCounter::print();
|
|
+ }
|
|
+#endif
|
|
+ if (os::message_box(msg, "Execution stopped, print registers?")) {
|
|
+ ttyLocker ttyl;
|
|
+ tty->print_cr(" pc = 0x%016" PRIX64, pc);
|
|
+#ifndef PRODUCT
|
|
+ tty->cr();
|
|
+ findpc(pc);
|
|
+ tty->cr();
|
|
+#endif
|
|
+ tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]);
|
|
+ tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]);
|
|
+ tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]);
|
|
+ tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]);
|
|
+ tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]);
|
|
+ tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]);
|
|
+ tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]);
|
|
+ tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]);
|
|
+ tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]);
|
|
+ tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]);
|
|
+ tty->print_cr("x10 = 0x%016" PRIx64, regs[10]);
|
|
+ tty->print_cr("x11 = 0x%016" PRIx64, regs[11]);
|
|
+ tty->print_cr("x12 = 0x%016" PRIx64, regs[12]);
|
|
+ tty->print_cr("x13 = 0x%016" PRIx64, regs[13]);
|
|
+ tty->print_cr("x14 = 0x%016" PRIx64, regs[14]);
|
|
+ tty->print_cr("x15 = 0x%016" PRIx64, regs[15]);
|
|
+ tty->print_cr("x16 = 0x%016" PRIx64, regs[16]);
|
|
+ tty->print_cr("x17 = 0x%016" PRIx64, regs[17]);
|
|
+ tty->print_cr("x18 = 0x%016" PRIx64, regs[18]);
|
|
+ tty->print_cr("x19 = 0x%016" PRIx64, regs[19]);
|
|
+ tty->print_cr("x20 = 0x%016" PRIx64, regs[20]);
|
|
+ tty->print_cr("x21 = 0x%016" PRIx64, regs[21]);
|
|
+ tty->print_cr("x22 = 0x%016" PRIx64, regs[22]);
|
|
+ tty->print_cr("x23 = 0x%016" PRIx64, regs[23]);
|
|
+ tty->print_cr("x24 = 0x%016" PRIx64, regs[24]);
|
|
+ tty->print_cr("x25 = 0x%016" PRIx64, regs[25]);
|
|
+ tty->print_cr("x26 = 0x%016" PRIx64, regs[26]);
|
|
+ tty->print_cr("x27 = 0x%016" PRIx64, regs[27]);
|
|
+ tty->print_cr("x28 = 0x%016" PRIx64, regs[28]);
|
|
+ tty->print_cr("x30 = 0x%016" PRIx64, regs[30]);
|
|
+ tty->print_cr("x31 = 0x%016" PRIx64, regs[31]);
|
|
+ BREAKPOINT;
|
|
+ }
|
|
+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
|
|
+ } else {
|
|
+ ttyLocker ttyl;
|
|
+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
|
|
+ assert(false, "DEBUG MESSAGE: %s", msg);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
|
|
+ Label done, not_weak;
|
|
+ beqz(value, done); // Use NULL as-is.
|
|
+
|
|
+ // Test for jweak tag.
|
|
+ andi(t0, value, JNIHandles::weak_tag_mask);
|
|
+ beqz(t0, not_weak);
|
|
+
|
|
+ // Resolve jweak.
|
|
+ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
|
|
+ Address(value, -JNIHandles::weak_tag_value), tmp, thread);
|
|
+ verify_oop(value);
|
|
+ j(done);
|
|
+
|
|
+ bind(not_weak);
|
|
+ // Resolve (untagged) jobject.
|
|
+ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
|
|
+ verify_oop(value);
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void MacroAssembler::stop(const char* msg) {
|
|
+ address ip = pc();
|
|
+ push_reg(RegSet::range(x0, x31), sp);
|
|
+ if(msg != NULL && ip != NULL) {
|
|
+ mv(c_rarg0, (uintptr_t)(address)msg);
|
|
+ mv(c_rarg1, (uintptr_t)(address)ip);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ mv(c_rarg2, sp);
|
|
+ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
|
|
+ jalr(c_rarg3);
|
|
+ ebreak();
|
|
+}
|
|
+
|
|
+void MacroAssembler::unimplemented(const char* what) {
|
|
+ const char* buf = NULL;
|
|
+ {
|
|
+ ResourceMark rm;
|
|
+ stringStream ss;
|
|
+ ss.print("unimplemented: %s", what);
|
|
+ buf = code_string(ss.as_string());
|
|
+ }
|
|
+ stop(buf);
|
|
+}
|
|
+
|
|
+void MacroAssembler::emit_static_call_stub() {
|
|
+ // CompiledDirectStaticCall::set_to_interpreted knows the
|
|
+ // exact layout of this stub.
|
|
+
|
|
+ mov_metadata(xmethod, (Metadata*)NULL);
|
|
+
|
|
+ // Jump to the entry point of the i2c stub.
|
|
+ int32_t offset = 0;
|
|
+ movptr_with_offset(t0, 0, offset);
|
|
+ jalr(x0, t0, offset);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_leaf_base(address entry_point,
|
|
+ int number_of_arguments,
|
|
+ Label *retaddr) {
|
|
+ int32_t offset = 0;
|
|
+ push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
|
|
+ movptr_with_offset(t0, entry_point, offset);
|
|
+ jalr(x1, t0, offset);
|
|
+ if (retaddr != NULL) {
|
|
+ bind(*retaddr);
|
|
+ }
|
|
+ pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
|
|
+ call_VM_leaf_base(entry_point, number_of_arguments);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
|
|
+ pass_arg0(this, arg_0);
|
|
+ call_VM_leaf_base(entry_point, 1);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
|
|
+ pass_arg0(this, arg_0);
|
|
+ pass_arg1(this, arg_1);
|
|
+ call_VM_leaf_base(entry_point, 2);
|
|
+}
|
|
+
|
|
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
|
|
+ Register arg_1, Register arg_2) {
|
|
+ pass_arg0(this, arg_0);
|
|
+ pass_arg1(this, arg_1);
|
|
+ pass_arg2(this, arg_2);
|
|
+ call_VM_leaf_base(entry_point, 3);
|
|
+}
|
|
+
|
|
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
|
|
+ pass_arg0(this, arg_0);
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, 1);
|
|
+}
|
|
+
|
|
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
|
|
+
|
|
+ assert(arg_0 != c_rarg1, "smashed arg");
|
|
+ pass_arg1(this, arg_1);
|
|
+ pass_arg0(this, arg_0);
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, 2);
|
|
+}
|
|
+
|
|
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
|
|
+ assert(arg_0 != c_rarg2, "smashed arg");
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+ assert(arg_0 != c_rarg1, "smashed arg");
|
|
+ pass_arg1(this, arg_1);
|
|
+ pass_arg0(this, arg_0);
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, 3);
|
|
+}
|
|
+
|
|
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
|
|
+ assert(arg_0 != c_rarg3, "smashed arg");
|
|
+ assert(arg_1 != c_rarg3, "smashed arg");
|
|
+ assert(arg_2 != c_rarg3, "smashed arg");
|
|
+ pass_arg3(this, arg_3);
|
|
+ assert(arg_0 != c_rarg2, "smashed arg");
|
|
+ assert(arg_1 != c_rarg2, "smashed arg");
|
|
+ pass_arg2(this, arg_2);
|
|
+ assert(arg_0 != c_rarg1, "smashed arg");
|
|
+ pass_arg1(this, arg_1);
|
|
+ pass_arg0(this, arg_0);
|
|
+ MacroAssembler::call_VM_leaf_base(entry_point, 4);
|
|
+}
|
|
+
|
|
+void MacroAssembler::nop() {
|
|
+ addi(x0, x0, 0);
|
|
+}
|
|
+
|
|
+void MacroAssembler::mv(Register Rd, Register Rs) {
|
|
+ if (Rd != Rs) {
|
|
+ addi(Rd, Rs, 0);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::notr(Register Rd, Register Rs) {
|
|
+ xori(Rd, Rs, -1);
|
|
+}
|
|
+
|
|
+void MacroAssembler::neg(Register Rd, Register Rs) {
|
|
+ sub(Rd, x0, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::negw(Register Rd, Register Rs) {
|
|
+ subw(Rd, x0, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::sext_w(Register Rd, Register Rs) {
|
|
+ addiw(Rd, Rs, 0);
|
|
+}
|
|
+
|
|
+void MacroAssembler::zext_b(Register Rd, Register Rs) {
|
|
+ andi(Rd, Rs, 0xFF);
|
|
+}
|
|
+
|
|
+void MacroAssembler::seqz(Register Rd, Register Rs) {
|
|
+ sltiu(Rd, Rs, 1);
|
|
+}
|
|
+
|
|
+void MacroAssembler::snez(Register Rd, Register Rs) {
|
|
+ sltu(Rd, x0, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::sltz(Register Rd, Register Rs) {
|
|
+ slt(Rd, Rs, x0);
|
|
+}
|
|
+
|
|
+void MacroAssembler::sgtz(Register Rd, Register Rs) {
|
|
+ slt(Rd, x0, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
|
|
+ if (Rd != Rs) {
|
|
+ fsgnj_s(Rd, Rs, Rs);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
|
|
+ fsgnjx_s(Rd, Rs, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
|
|
+ fsgnjn_s(Rd, Rs, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
|
|
+ if (Rd != Rs) {
|
|
+ fsgnj_d(Rd, Rs, Rs);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
|
|
+ fsgnjx_d(Rd, Rs, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
|
|
+ fsgnjn_d(Rd, Rs, Rs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
|
|
+ vmnand_mm(vd, vs, vs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
|
|
+ vnsrl_wx(vd, vs, x0, vm);
|
|
+}
|
|
+
|
|
+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
|
|
+ vfsgnjn_vv(vd, vs, vs);
|
|
+}
|
|
+
|
|
+void MacroAssembler::la(Register Rd, const address &dest) {
|
|
+ int64_t offset = dest - pc();
|
|
+ if (is_offset_in_range(offset, 32)) {
|
|
+ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit
|
|
+ addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
|
|
+ } else {
|
|
+ movptr(Rd, dest);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::la(Register Rd, const Address &adr) {
|
|
+ code_section()->relocate(pc(), adr.rspec());
|
|
+ relocInfo::relocType rtype = adr.rspec().reloc()->type();
|
|
+
|
|
+ switch(adr.getMode()) {
|
|
+ case Address::literal: {
|
|
+ if (rtype == relocInfo::none) {
|
|
+ mv(Rd, (intptr_t)(adr.target()));
|
|
+ } else {
|
|
+ movptr(Rd, adr.target());
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case Address::base_plus_offset:{
|
|
+ Register base = adr.base();
|
|
+ int64_t offset = adr.offset();
|
|
+ if (offset == 0 && Rd != base) {
|
|
+ mv(Rd, base);
|
|
+ } else if (offset != 0 && Rd != base) {
|
|
+ add(Rd, base, offset, Rd);
|
|
+ } else if (offset != 0 && Rd == base) {
|
|
+ Register tmp = (Rd == t0) ? t1 : t0;
|
|
+ add(base, base, offset, tmp);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::la(Register Rd, Label &label) {
|
|
+ la(Rd, target(label));
|
|
+}
|
|
+
|
|
+#define INSN(NAME) \
|
|
+ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \
|
|
+ NAME(Rs, zr, dest); \
|
|
+ } \
|
|
+ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \
|
|
+ NAME(Rs, zr, l, is_far); \
|
|
+ } \
|
|
+
|
|
+ INSN(beq);
|
|
+ INSN(bne);
|
|
+ INSN(blt);
|
|
+ INSN(ble);
|
|
+ INSN(bge);
|
|
+ INSN(bgt);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+// Float compare branch instructions
|
|
+
|
|
+#define INSN(NAME, FLOATCMP, BRANCH) \
|
|
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
|
|
+ FLOATCMP##_s(t0, Rs1, Rs2); \
|
|
+ BRANCH(t0, l, is_far); \
|
|
+ } \
|
|
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
|
|
+ FLOATCMP##_d(t0, Rs1, Rs2); \
|
|
+ BRANCH(t0, l, is_far); \
|
|
+ }
|
|
+
|
|
+ INSN(beq, feq, bnez);
|
|
+ INSN(bne, feq, beqz);
|
|
+#undef INSN
|
|
+
|
|
+
|
|
+#define INSN(NAME, FLOATCMP1, FLOATCMP2) \
|
|
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
|
|
+ bool is_far, bool is_unordered) { \
|
|
+ if(is_unordered) { \
|
|
+ FLOATCMP2##_s(t0, Rs2, Rs1); \
|
|
+ beqz(t0, l, is_far); \
|
|
+ } else { \
|
|
+ FLOATCMP1##_s(t0, Rs1, Rs2); \
|
|
+ bnez(t0, l, is_far); \
|
|
+ } \
|
|
+ } \
|
|
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
|
|
+ bool is_far, bool is_unordered) { \
|
|
+ if(is_unordered) { \
|
|
+ FLOATCMP2##_d(t0, Rs2, Rs1); \
|
|
+ beqz(t0, l, is_far); \
|
|
+ } else { \
|
|
+ FLOATCMP1##_d(t0, Rs1, Rs2); \
|
|
+ bnez(t0, l, is_far); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+ INSN(ble, fle, flt);
|
|
+ INSN(blt, flt, fle);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, CMP) \
|
|
+ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
|
|
+ bool is_far, bool is_unordered) { \
|
|
+ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
|
|
+ } \
|
|
+ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
|
|
+ bool is_far, bool is_unordered) { \
|
|
+ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \
|
|
+ }
|
|
+
|
|
+ INSN(bgt, blt);
|
|
+ INSN(bge, ble);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+
|
|
+#define INSN(NAME, CSR) \
|
|
+ void MacroAssembler::NAME(Register Rd) { \
|
|
+ csrr(Rd, CSR); \
|
|
+ }
|
|
+
|
|
+ INSN(rdinstret, CSR_INSTERT);
|
|
+ INSN(rdcycle, CSR_CYCLE);
|
|
+ INSN(rdtime, CSR_TIME);
|
|
+ INSN(frcsr, CSR_FCSR);
|
|
+ INSN(frrm, CSR_FRM);
|
|
+ INSN(frflags, CSR_FFLAGS);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+void MacroAssembler::csrr(Register Rd, unsigned csr) {
|
|
+ csrrs(Rd, csr, x0);
|
|
+}
|
|
+
|
|
+#define INSN(NAME, OPFUN) \
|
|
+ void MacroAssembler::NAME(unsigned csr, Register Rs) { \
|
|
+ OPFUN(x0, csr, Rs); \
|
|
+ }
|
|
+
|
|
+ INSN(csrw, csrrw);
|
|
+ INSN(csrs, csrrs);
|
|
+ INSN(csrc, csrrc);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, OPFUN) \
|
|
+ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \
|
|
+ OPFUN(x0, csr, imm); \
|
|
+ }
|
|
+
|
|
+ INSN(csrwi, csrrwi);
|
|
+ INSN(csrsi, csrrsi);
|
|
+ INSN(csrci, csrrci);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME, CSR) \
|
|
+ void MacroAssembler::NAME(Register Rd, Register Rs) { \
|
|
+ csrrw(Rd, CSR, Rs); \
|
|
+ }
|
|
+
|
|
+ INSN(fscsr, CSR_FCSR);
|
|
+ INSN(fsrm, CSR_FRM);
|
|
+ INSN(fsflags, CSR_FFLAGS);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#define INSN(NAME) \
|
|
+ void MacroAssembler::NAME(Register Rs) { \
|
|
+ NAME(x0, Rs); \
|
|
+ }
|
|
+
|
|
+ INSN(fscsr);
|
|
+ INSN(fsrm);
|
|
+ INSN(fsflags);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
|
|
+ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
|
|
+ csrrwi(Rd, CSR_FRM, imm);
|
|
+}
|
|
+
|
|
+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
|
|
+ csrrwi(Rd, CSR_FFLAGS, imm);
|
|
+}
|
|
+
|
|
+#define INSN(NAME) \
|
|
+ void MacroAssembler::NAME(unsigned imm) { \
|
|
+ NAME(x0, imm); \
|
|
+ }
|
|
+
|
|
+ INSN(fsrmi);
|
|
+ INSN(fsflagsi);
|
|
+
|
|
+#undef INSN
|
|
+
|
|
+#ifdef COMPILER2
|
|
+
|
|
+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
|
|
+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
|
|
+ bool is_far, bool is_unordered);
|
|
+
|
|
+static conditional_branch_insn conditional_branches[] =
|
|
+{
|
|
+ /* SHORT branches */
|
|
+ (conditional_branch_insn)&Assembler::beq,
|
|
+ (conditional_branch_insn)&Assembler::bgt,
|
|
+ NULL, // BoolTest::overflow
|
|
+ (conditional_branch_insn)&Assembler::blt,
|
|
+ (conditional_branch_insn)&Assembler::bne,
|
|
+ (conditional_branch_insn)&Assembler::ble,
|
|
+ NULL, // BoolTest::no_overflow
|
|
+ (conditional_branch_insn)&Assembler::bge,
|
|
+
|
|
+ /* UNSIGNED branches */
|
|
+ (conditional_branch_insn)&Assembler::beq,
|
|
+ (conditional_branch_insn)&Assembler::bgtu,
|
|
+ NULL,
|
|
+ (conditional_branch_insn)&Assembler::bltu,
|
|
+ (conditional_branch_insn)&Assembler::bne,
|
|
+ (conditional_branch_insn)&Assembler::bleu,
|
|
+ NULL,
|
|
+ (conditional_branch_insn)&Assembler::bgeu
|
|
+};
|
|
+
|
|
+static float_conditional_branch_insn float_conditional_branches[] =
|
|
+{
|
|
+ /* FLOAT SHORT branches */
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_beq,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_bgt,
|
|
+ NULL, // BoolTest::overflow
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_blt,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_bne,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_ble,
|
|
+ NULL, // BoolTest::no_overflow
|
|
+ (float_conditional_branch_insn)&MacroAssembler::float_bge,
|
|
+
|
|
+ /* DOUBLE SHORT branches */
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_beq,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_bgt,
|
|
+ NULL,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_blt,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_bne,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_ble,
|
|
+ NULL,
|
|
+ (float_conditional_branch_insn)&MacroAssembler::double_bge
|
|
+};
|
|
+
|
|
+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
|
|
+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
|
|
+ "invalid conditional branch index");
|
|
+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
|
|
+}
|
|
+
|
|
+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
|
|
+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
|
|
+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
|
|
+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
|
|
+ "invalid float conditional branch index");
|
|
+ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
|
|
+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
|
|
+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
|
|
+}
|
|
+
|
|
+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
|
|
+ switch (cmpFlag) {
|
|
+ case BoolTest::eq:
|
|
+ case BoolTest::le:
|
|
+ beqz(op1, L, is_far);
|
|
+ break;
|
|
+ case BoolTest::ne:
|
|
+ case BoolTest::gt:
|
|
+ bnez(op1, L, is_far);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
|
|
+ switch (cmpFlag) {
|
|
+ case BoolTest::eq:
|
|
+ beqz(op1, L, is_far);
|
|
+ break;
|
|
+ case BoolTest::ne:
|
|
+ bnez(op1, L, is_far);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
|
|
+ Label L;
|
|
+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
|
|
+ mv(dst, src);
|
|
+ bind(L);
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MacroAssembler::push_reg(Register Rs)
|
|
+{
|
|
+ addi(esp, esp, 0 - wordSize);
|
|
+ sd(Rs, Address(esp, 0));
|
|
+}
|
|
+
|
|
+void MacroAssembler::pop_reg(Register Rd)
|
|
+{
|
|
+ ld(Rd, esp, 0);
|
|
+ addi(esp, esp, wordSize);
|
|
+}
|
|
+
|
|
+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
|
|
+ int count = 0;
|
|
+ // Scan bitset to accumulate register pairs
|
|
+ for (int reg = 31; reg >= 0; reg --) {
|
|
+ if ((1U << 31) & bitset) {
|
|
+ regs[count++] = reg;
|
|
+ }
|
|
+ bitset <<= 1;
|
|
+ }
|
|
+ return count;
|
|
+}
|
|
+
|
|
+// Push lots of registers in the bit set supplied. Don't push sp.
|
|
+// Return the number of words pushed
|
|
+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
|
|
+ DEBUG_ONLY(int words_pushed = 0;)
|
|
+
|
|
+ unsigned char regs[32];
|
|
+ int count = bitset_to_regs(bitset, regs);
|
|
+ // reserve one slot to align for odd count
|
|
+ int offset = is_even(count) ? 0 : wordSize;
|
|
+
|
|
+ if (count) {
|
|
+ addi(stack, stack, - count * wordSize - offset);
|
|
+ }
|
|
+ for (int i = count - 1; i >= 0; i--) {
|
|
+ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
|
|
+ DEBUG_ONLY(words_pushed ++;)
|
|
+ }
|
|
+
|
|
+ assert(words_pushed == count, "oops, pushed != count");
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
|
|
+ DEBUG_ONLY(int words_popped = 0;)
|
|
+
|
|
+ unsigned char regs[32];
|
|
+ int count = bitset_to_regs(bitset, regs);
|
|
+ // reserve one slot to align for odd count
|
|
+ int offset = is_even(count) ? 0 : wordSize;
|
|
+
|
|
+ for (int i = count - 1; i >= 0; i--) {
|
|
+ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
|
|
+ DEBUG_ONLY(words_popped ++;)
|
|
+ }
|
|
+
|
|
+ if (count) {
|
|
+ addi(stack, stack, count * wordSize + offset);
|
|
+ }
|
|
+ assert(words_popped == count, "oops, popped != count");
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+RegSet MacroAssembler::call_clobbered_registers() {
|
|
+ // Push integer registers x7, x10-x17, x28-x31.
|
|
+ return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31);
|
|
+}
|
|
+
|
|
+void MacroAssembler::push_call_clobbered_registers() {
|
|
+ push_reg(call_clobbered_registers(), sp);
|
|
+
|
|
+ // Push float registers f0-f7, f10-f17, f28-f31.
|
|
+ addi(sp, sp, - wordSize * 20);
|
|
+ int offset = 0;
|
|
+ for (int i = 0; i < 32; i++) {
|
|
+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
|
|
+ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::pop_call_clobbered_registers() {
|
|
+ int offset = 0;
|
|
+ for (int i = 0; i < 32; i++) {
|
|
+ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
|
|
+ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
|
|
+ }
|
|
+ }
|
|
+ addi(sp, sp, wordSize * 20);
|
|
+
|
|
+ pop_reg(call_clobbered_registers(), sp);
|
|
+}
|
|
+
|
|
+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
|
|
+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
|
|
+ push_reg(RegSet::range(x5, x31), sp);
|
|
+
|
|
+ // float registers
|
|
+ addi(sp, sp, - 32 * wordSize);
|
|
+ for (int i = 0; i < 32; i++) {
|
|
+ fsd(as_FloatRegister(i), Address(sp, i * wordSize));
|
|
+ }
|
|
+
|
|
+ // vector registers
|
|
+ if (save_vectors) {
|
|
+ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
|
|
+ vsetvli(t0, x0, Assembler::e64, Assembler::m8);
|
|
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
|
|
+ add(t0, sp, vector_size_in_bytes * i);
|
|
+ vse64_v(as_VectorRegister(i), t0);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
|
|
+ // vector registers
|
|
+ if (restore_vectors) {
|
|
+ vsetvli(t0, x0, Assembler::e64, Assembler::m8);
|
|
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
|
|
+ vle64_v(as_VectorRegister(i), sp);
|
|
+ add(sp, sp, vector_size_in_bytes * 8);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // float registers
|
|
+ for (int i = 0; i < 32; i++) {
|
|
+ fld(as_FloatRegister(i), Address(sp, i * wordSize));
|
|
+ }
|
|
+ addi(sp, sp, 32 * wordSize);
|
|
+
|
|
+ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
|
|
+ pop_reg(RegSet::range(x5, x31), sp);
|
|
+}
|
|
+
|
|
+static int patch_offset_in_jal(address branch, int64_t offset) {
|
|
+ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
|
|
+ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
|
|
+ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
|
|
+ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
|
|
+ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12]
|
|
+ return NativeInstruction::instruction_size; // only one instruction
|
|
+}
|
|
+
|
|
+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
|
|
+ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
|
|
+ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31]
|
|
+ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25]
|
|
+ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7]
|
|
+ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8]
|
|
+ return NativeInstruction::instruction_size; // only one instruction
|
|
+}
|
|
+
|
|
+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
|
|
+ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load
|
|
+ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12]
|
|
+ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20]
|
|
+ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+static int patch_addr_in_movptr(address branch, address target) {
|
|
+ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load
|
|
+ int32_t lower = ((intptr_t)target << 35) >> 35;
|
|
+ int64_t upper = ((intptr_t)target - lower) >> 29;
|
|
+ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12]
|
|
+ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20]
|
|
+ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20]
|
|
+ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20]
|
|
+ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+static int patch_imm_in_li64(address branch, address target) {
|
|
+ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi
|
|
+ int64_t lower = (intptr_t)target & 0xffffffff;
|
|
+ lower = lower - ((lower << 44) >> 44);
|
|
+ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
|
|
+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
|
|
+ int64_t tmp_upper = upper, tmp_lower = upper;
|
|
+ tmp_lower = (tmp_lower << 52) >> 52;
|
|
+ tmp_upper -= tmp_lower;
|
|
+ tmp_upper >>= 12;
|
|
+ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
|
|
+ // upper = target[63:32] + 1.
|
|
+ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui.
|
|
+ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi.
|
|
+ // Load the rest 32 bits.
|
|
+ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi.
|
|
+ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi.
|
|
+ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi.
|
|
+ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+static int patch_imm_in_li32(address branch, int32_t target) {
|
|
+ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw
|
|
+ int64_t upper = (intptr_t)target;
|
|
+ int32_t lower = (((int32_t)target) << 20) >> 20;
|
|
+ upper -= lower;
|
|
+ upper = (int32_t)upper;
|
|
+ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui.
|
|
+ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw.
|
|
+ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
|
|
+}
|
|
+
|
|
+static long get_offset_of_jal(address insn_addr) {
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ long offset = 0;
|
|
+ unsigned insn = *(unsigned*)insn_addr;
|
|
+ long val = (long)Assembler::sextract(insn, 31, 12);
|
|
+ offset |= ((val >> 19) & 0x1) << 20;
|
|
+ offset |= (val & 0xff) << 12;
|
|
+ offset |= ((val >> 8) & 0x1) << 11;
|
|
+ offset |= ((val >> 9) & 0x3ff) << 1;
|
|
+ offset = (offset << 43) >> 43;
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+static long get_offset_of_conditional_branch(address insn_addr) {
|
|
+ long offset = 0;
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ unsigned insn = *(unsigned*)insn_addr;
|
|
+ offset = (long)Assembler::sextract(insn, 31, 31);
|
|
+ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
|
|
+ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
|
|
+ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
|
|
+ offset = (offset << 41) >> 41;
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+static long get_offset_of_pc_relative(address insn_addr) {
|
|
+ long offset = 0;
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc.
|
|
+ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load.
|
|
+ offset = (offset << 32) >> 32;
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+static address get_target_of_movptr(address insn_addr) {
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load.
|
|
+ return (address) target_address;
|
|
+}
|
|
+
|
|
+static address get_target_of_li64(address insn_addr) {
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi.
|
|
+ return (address)target_address;
|
|
+}
|
|
+
|
|
+static address get_target_of_li32(address insn_addr) {
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui.
|
|
+ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw.
|
|
+ return (address)target_address;
|
|
+}
|
|
+
|
|
+// Patch any kind of instruction; there may be several instructions.
|
|
+// Return the total length (in bytes) of the instructions.
|
|
+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
|
|
+ assert_cond(branch != NULL);
|
|
+ int64_t offset = target - branch;
|
|
+ if (NativeInstruction::is_jal_at(branch)) { // jal
|
|
+ return patch_offset_in_jal(branch, offset);
|
|
+ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne
|
|
+ return patch_offset_in_conditional_branch(branch, offset);
|
|
+ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load
|
|
+ return patch_offset_in_pc_relative(branch, offset);
|
|
+ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr
|
|
+ return patch_addr_in_movptr(branch, target);
|
|
+ } else if (NativeInstruction::is_li64_at(branch)) { // li64
|
|
+ return patch_imm_in_li64(branch, target);
|
|
+ } else if (NativeInstruction::is_li32_at(branch)) { // li32
|
|
+ int64_t imm = (intptr_t)target;
|
|
+ return patch_imm_in_li32(branch, (int32_t)imm);
|
|
+ } else {
|
|
+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch);
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+address MacroAssembler::target_addr_for_insn(address insn_addr) {
|
|
+ long offset = 0;
|
|
+ assert_cond(insn_addr != NULL);
|
|
+ if (NativeInstruction::is_jal_at(insn_addr)) { // jal
|
|
+ offset = get_offset_of_jal(insn_addr);
|
|
+ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne
|
|
+ offset = get_offset_of_conditional_branch(insn_addr);
|
|
+ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load
|
|
+ offset = get_offset_of_pc_relative(insn_addr);
|
|
+ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr
|
|
+ return get_target_of_movptr(insn_addr);
|
|
+ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64
|
|
+ return get_target_of_li64(insn_addr);
|
|
+ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32
|
|
+ return get_target_of_li32(insn_addr);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ return address(((uintptr_t)insn_addr + offset));
|
|
+}
|
|
+
|
|
+int MacroAssembler::patch_oop(address insn_addr, address o) {
|
|
+ // OOPs are either narrow (32 bits) or wide (48 bits). We encode
|
|
+ // narrow OOPs by setting the upper 16 bits in the first
|
|
+ // instruction.
|
|
+ if (NativeInstruction::is_li32_at(insn_addr)) {
|
|
+ // Move narrow OOP
|
|
+ narrowOop n = CompressedOops::encode((oop)o);
|
|
+ return patch_imm_in_li32(insn_addr, (int32_t)n);
|
|
+ } else if (NativeInstruction::is_movptr_at(insn_addr)) {
|
|
+ // Move wide OOP
|
|
+ return patch_addr_in_movptr(insn_addr, o);
|
|
+ }
|
|
+ ShouldNotReachHere();
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+void MacroAssembler::reinit_heapbase() {
|
|
+ if (UseCompressedOops) {
|
|
+ if (Universe::is_fully_initialized()) {
|
|
+ mv(xheapbase, Universe::narrow_ptrs_base());
|
|
+ } else {
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
|
|
+ ld(xheapbase, Address(xheapbase, offset));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::mv(Register Rd, Address dest) {
|
|
+ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
|
|
+ code_section()->relocate(pc(), dest.rspec());
|
|
+ movptr(Rd, dest.target());
|
|
+}
|
|
+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
|
|
+ if (src.is_register()) {
|
|
+ mv(Rd, src.as_register());
|
|
+ } else {
|
|
+ mv(Rd, src.as_constant());
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
|
|
+ andr(Rd, Rs1, Rs2);
|
|
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
|
|
+ // and the result is stored in Rd
|
|
+ addw(Rd, Rd, zr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
|
|
+ orr(Rd, Rs1, Rs2);
|
|
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
|
|
+ // and the result is stored in Rd
|
|
+ addw(Rd, Rd, zr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
|
|
+ xorr(Rd, Rs1, Rs2);
|
|
+ // addw: The result is clipped to 32 bits, then the sign bit is extended,
|
|
+ // and the result is stored in Rd
|
|
+ addw(Rd, Rd, zr);
|
|
+}
|
|
+
|
|
+// Note: load_unsigned_short used to be called load_unsigned_word.
|
|
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
|
|
+ int off = offset();
|
|
+ lhu(dst, src);
|
|
+ return off;
|
|
+}
|
|
+
|
|
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
|
|
+ int off = offset();
|
|
+ lbu(dst, src);
|
|
+ return off;
|
|
+}
|
|
+
|
|
+int MacroAssembler::load_signed_short(Register dst, Address src) {
|
|
+ int off = offset();
|
|
+ lh(dst, src);
|
|
+ return off;
|
|
+}
|
|
+
|
|
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
|
|
+ int off = offset();
|
|
+ lb(dst, src);
|
|
+ return off;
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
|
|
+ switch (size_in_bytes) {
|
|
+ case 8: ld(dst, src); break;
|
|
+ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break;
|
|
+ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
|
|
+ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
|
|
+ switch (size_in_bytes) {
|
|
+ case 8: sd(src, dst); break;
|
|
+ case 4: sw(src, dst); break;
|
|
+ case 2: sh(src, dst); break;
|
|
+ case 1: sb(src, dst); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+// rotate right with imm bits
|
|
+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
|
|
+{
|
|
+ if (UseZbb) {
|
|
+ rori(dst, src, shift);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ assert_different_registers(dst, tmp);
|
|
+ assert_different_registers(src, tmp);
|
|
+ assert(shift < 64, "shift amount must be < 64");
|
|
+ slli(tmp, src, 64 - shift);
|
|
+ srli(dst, src, shift);
|
|
+ orr(dst, dst, tmp);
|
|
+}
|
|
+
|
|
+// reverse bytes in halfword in lower 16 bits and sign-extend
|
|
+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
|
|
+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ srai(Rd, Rd, 48);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp);
|
|
+ assert_different_registers(Rd, tmp);
|
|
+ srli(tmp, Rs, 8);
|
|
+ andi(tmp, tmp, 0xFF);
|
|
+ slli(Rd, Rs, 56);
|
|
+ srai(Rd, Rd, 48); // sign-extend
|
|
+ orr(Rd, Rd, tmp);
|
|
+}
|
|
+
|
|
+// reverse bytes in lower word and sign-extend
|
|
+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
|
|
+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ srai(Rd, Rd, 32);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1, tmp2);
|
|
+ revb_h_w_u(Rd, Rs, tmp1, tmp2);
|
|
+ slli(tmp2, Rd, 48);
|
|
+ srai(tmp2, tmp2, 32); // sign-extend
|
|
+ srli(Rd, Rd, 16);
|
|
+ orr(Rd, Rd, tmp2);
|
|
+}
|
|
+
|
|
+// reverse bytes in halfword in lower 16 bits and zero-extend
|
|
+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
|
|
+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ srli(Rd, Rd, 48);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp);
|
|
+ assert_different_registers(Rd, tmp);
|
|
+ srli(tmp, Rs, 8);
|
|
+ andi(tmp, tmp, 0xFF);
|
|
+ andi(Rd, Rs, 0xFF);
|
|
+ slli(Rd, Rd, 8);
|
|
+ orr(Rd, Rd, tmp);
|
|
+}
|
|
+
|
|
+// reverse bytes in halfwords in lower 32 bits and zero-extend
|
|
+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
|
|
+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ rori(Rd, Rd, 32);
|
|
+ roriw(Rd, Rd, 16);
|
|
+ zero_extend(Rd, Rd, 32);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1, tmp2);
|
|
+ srli(tmp2, Rs, 16);
|
|
+ revb_h_h_u(tmp2, tmp2, tmp1);
|
|
+ revb_h_h_u(Rd, Rs, tmp1);
|
|
+ slli(tmp2, tmp2, 16);
|
|
+ orr(Rd, Rd, tmp2);
|
|
+}
|
|
+
|
|
+// This method is only used for revb_h
|
|
+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
|
|
+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1);
|
|
+ srli(tmp1, Rs, 48);
|
|
+ andi(tmp2, tmp1, 0xFF);
|
|
+ slli(tmp2, tmp2, 8);
|
|
+ srli(tmp1, tmp1, 8);
|
|
+ orr(tmp1, tmp1, tmp2);
|
|
+ slli(Rd, Rs, 16);
|
|
+ orr(Rd, Rd, tmp1);
|
|
+}
|
|
+// reverse bytes in each halfword
|
|
+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
|
|
+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ if (UseZbb) {
|
|
+ assert_different_registers(Rs, tmp1);
|
|
+ assert_different_registers(Rd, tmp1);
|
|
+ rev8(Rd, Rs);
|
|
+ zero_extend(tmp1, Rd, 32);
|
|
+ roriw(tmp1, tmp1, 16);
|
|
+ slli(tmp1, tmp1, 32);
|
|
+ srli(Rd, Rd, 32);
|
|
+ roriw(Rd, Rd, 16);
|
|
+ zero_extend(Rd, Rd, 32);
|
|
+ orr(Rd, Rd, tmp1);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1, tmp2);
|
|
+ revb_h_helper(Rd, Rs, tmp1, tmp2);
|
|
+ for (int i = 0; i < 3; ++i) {
|
|
+ revb_h_helper(Rd, Rd, tmp1, tmp2);
|
|
+ }
|
|
+}
|
|
+
|
|
+// reverse bytes in each word
|
|
+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
|
|
+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ rori(Rd, Rd, 32);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1, tmp2);
|
|
+ revb(Rd, Rs, tmp1, tmp2);
|
|
+ ror_imm(Rd, Rd, 32);
|
|
+}
|
|
+
|
|
+// reverse bytes in doubleword
|
|
+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
|
|
+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
|
|
+ if (UseZbb) {
|
|
+ rev8(Rd, Rs);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rs, tmp1, tmp2);
|
|
+ assert_different_registers(Rd, tmp1, tmp2);
|
|
+ andi(tmp1, Rs, 0xFF);
|
|
+ slli(tmp1, tmp1, 8);
|
|
+ for (int step = 8; step < 56; step += 8) {
|
|
+ srli(tmp2, Rs, step);
|
|
+ andi(tmp2, tmp2, 0xFF);
|
|
+ orr(tmp1, tmp1, tmp2);
|
|
+ slli(tmp1, tmp1, 8);
|
|
+ }
|
|
+ srli(Rd, Rs, 56);
|
|
+ andi(Rd, Rd, 0xFF);
|
|
+ orr(Rd, tmp1, Rd);
|
|
+}
|
|
+
|
|
+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
|
|
+ if (is_imm_in_range(imm, 12, 0)) {
|
|
+ and_imm12(Rd, Rn, imm);
|
|
+ } else {
|
|
+ assert_different_registers(Rn, tmp);
|
|
+ mv(tmp, imm);
|
|
+ andr(Rd, Rn, tmp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
|
|
+ ld(tmp1, adr);
|
|
+ if (src.is_register()) {
|
|
+ orr(tmp1, tmp1, src.as_register());
|
|
+ } else {
|
|
+ if(is_imm_in_range(src.as_constant(), 12, 0)) {
|
|
+ ori(tmp1, tmp1, src.as_constant());
|
|
+ } else {
|
|
+ assert_different_registers(tmp1, tmp2);
|
|
+ mv(tmp2, src.as_constant());
|
|
+ orr(tmp1, tmp1, tmp2);
|
|
+ }
|
|
+ }
|
|
+ sd(tmp1, adr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
|
|
+ if (Universe::narrow_klass_base() == NULL) {
|
|
+ slli(tmp, tmp, Universe::narrow_klass_shift());
|
|
+ beq(trial_klass, tmp, L);
|
|
+ return;
|
|
+ }
|
|
+ decode_klass_not_null(tmp);
|
|
+ } else {
|
|
+ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+ beq(trial_klass, tmp, L);
|
|
+}
|
|
+
|
|
+// Move an oop into a register. immediate is true if we want
|
|
+// immediate instrcutions, i.e. we are not going to patch this
|
|
+// instruction while the code is being executed by another thread. In
|
|
+// that case we can use move immediates rather than the constant pool.
|
|
+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
|
|
+ int oop_index;
|
|
+ if (obj == NULL) {
|
|
+ oop_index = oop_recorder()->allocate_oop_index(obj);
|
|
+ } else {
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ ThreadInVMfromUnknown tiv;
|
|
+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
|
|
+ }
|
|
+#endif
|
|
+ oop_index = oop_recorder()->find_index(obj);
|
|
+ }
|
|
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
|
|
+ if (!immediate) {
|
|
+ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
|
|
+ ld_constant(dst, Address(dummy, rspec));
|
|
+ } else
|
|
+ mv(dst, Address((address)obj, rspec));
|
|
+}
|
|
+
|
|
+// Move a metadata address into a register.
|
|
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
|
|
+ int oop_index;
|
|
+ if (obj == NULL) {
|
|
+ oop_index = oop_recorder()->allocate_metadata_index(obj);
|
|
+ } else {
|
|
+ oop_index = oop_recorder()->find_index(obj);
|
|
+ }
|
|
+ RelocationHolder rspec = metadata_Relocation::spec(oop_index);
|
|
+ mv(dst, Address((address)obj, rspec));
|
|
+}
|
|
+
|
|
+// Writes to stack successive pages until offset reached to check for
|
|
+// stack overflow + shadow pages. This clobbers tmp.
|
|
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
|
|
+ assert_different_registers(tmp, size, t0);
|
|
+ // Bang stack for total size given plus shadow page size.
|
|
+ // Bang one page at a time because large size can bang beyond yellow and
|
|
+ // red zones.
|
|
+ mv(t0, os::vm_page_size());
|
|
+ Label loop;
|
|
+ bind(loop);
|
|
+ sub(tmp, sp, t0);
|
|
+ subw(size, size, t0);
|
|
+ sd(size, Address(tmp));
|
|
+ bgtz(size, loop);
|
|
+
|
|
+ // Bang down shadow pages too.
|
|
+ // At this point, (tmp-0) is the last address touched, so don't
|
|
+ // touch it again. (It was touched as (tmp-pagesize) but then tmp
|
|
+ // was post-decremented.) Skip this address by starting at i=1, and
|
|
+ // touch a few more pages below. N.B. It is important to touch all
|
|
+ // the way down to and including i=StackShadowPages.
|
|
+ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
|
|
+ // this could be any sized move but this is can be a debugging crumb
|
|
+ // so the bigger the better.
|
|
+ sub(tmp, tmp, os::vm_page_size());
|
|
+ sd(size, Address(tmp, 0));
|
|
+ }
|
|
+}
|
|
+
|
|
+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
|
|
+ int32_t offset = 0;
|
|
+ _masm = masm;
|
|
+ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
|
|
+ _masm->lbu(t0, Address(t0, offset));
|
|
+ _masm->beqz(t0, _label);
|
|
+}
|
|
+
|
|
+SkipIfEqual::~SkipIfEqual() {
|
|
+ _masm->bind(_label);
|
|
+ _masm = NULL;
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
|
|
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
|
+ ld(dst, Address(xmethod, Method::const_offset()));
|
|
+ ld(dst, Address(dst, ConstMethod::constants_offset()));
|
|
+ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
|
|
+ ld(dst, Address(dst, mirror_offset));
|
|
+ resolve_oop_handle(dst, tmp);
|
|
+}
|
|
+
|
|
+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
|
|
+ // OopHandle::resolve is an indirection.
|
|
+ assert_different_registers(result, tmp);
|
|
+ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
|
|
+}
|
|
+
|
|
+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
|
|
+ Register dst, Address src,
|
|
+ Register tmp1, Register thread_tmp) {
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ decorators = AccessInternal::decorator_fixup(decorators);
|
|
+ bool as_raw = (decorators & AS_RAW) != 0;
|
|
+ if (as_raw) {
|
|
+ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
|
|
+ } else {
|
|
+ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::null_check(Register reg, int offset) {
|
|
+ if (needs_explicit_null_check(offset)) {
|
|
+ // provoke OS NULL exception if reg = NULL by
|
|
+ // accessing M[reg] w/o changing any registers
|
|
+ // NOTE: this is plenty to provoke a segv
|
|
+ ld(zr, Address(reg, 0));
|
|
+ } else {
|
|
+ // nothing to do, (later) access of M[reg + offset]
|
|
+ // will provoke OS NULL exception if reg = NULL
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
|
|
+ Address dst, Register src,
|
|
+ Register tmp1, Register tmp2, Register tmp3) {
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ decorators = AccessInternal::decorator_fixup(decorators);
|
|
+ bool as_raw = (decorators & AS_RAW) != 0;
|
|
+ if (as_raw) {
|
|
+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
|
|
+ } else {
|
|
+ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
|
|
+ }
|
|
+}
|
|
+
|
|
+// Algorithm must match CompressedOops::encode.
|
|
+void MacroAssembler::encode_heap_oop(Register d, Register s) {
|
|
+ verify_oop(s, "broken oop in encode_heap_oop");
|
|
+ if (Universe::narrow_oop_base() == NULL) {
|
|
+ if (Universe::narrow_oop_shift() != 0) {
|
|
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
|
|
+ srli(d, s, LogMinObjAlignmentInBytes);
|
|
+ } else {
|
|
+ mv(d, s);
|
|
+ }
|
|
+ } else {
|
|
+ Label notNull;
|
|
+ sub(d, s, xheapbase);
|
|
+ bgez(d, notNull);
|
|
+ mv(d, zr);
|
|
+ bind(notNull);
|
|
+ if (Universe::narrow_oop_shift() != 0) {
|
|
+ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
|
|
+ srli(d, d, Universe::narrow_oop_shift());
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_klass(Register dst, Register src) {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ decode_klass_not_null(dst);
|
|
+ } else {
|
|
+ ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::store_klass(Register dst, Register src) {
|
|
+ // FIXME: Should this be a store release? concurrent gcs assumes
|
|
+ // klass length is valid if klass field is not null.
|
|
+ if (UseCompressedClassPointers) {
|
|
+ encode_klass_not_null(src);
|
|
+ sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ } else {
|
|
+ sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
|
|
+ if (UseCompressedClassPointers) {
|
|
+ // Store to klass gap in destination
|
|
+ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::decode_klass_not_null(Register r) {
|
|
+ decode_klass_not_null(r, r);
|
|
+}
|
|
+
|
|
+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
|
|
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
|
|
+
|
|
+ if (Universe::narrow_klass_base() == NULL) {
|
|
+ if (Universe::narrow_klass_shift() != 0) {
|
|
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
|
|
+ slli(dst, src, LogKlassAlignmentInBytes);
|
|
+ } else {
|
|
+ mv(dst, src);
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ Register xbase = dst;
|
|
+ if (dst == src) {
|
|
+ xbase = tmp;
|
|
+ }
|
|
+
|
|
+ assert_different_registers(src, xbase);
|
|
+ mv(xbase, (uintptr_t)Universe::narrow_klass_base());
|
|
+ if (Universe::narrow_klass_shift() != 0) {
|
|
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
|
|
+ assert_different_registers(t0, xbase);
|
|
+ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
|
|
+ } else {
|
|
+ add(dst, xbase, src);
|
|
+ }
|
|
+ if (xbase == xheapbase) { reinit_heapbase(); }
|
|
+
|
|
+}
|
|
+
|
|
+void MacroAssembler::encode_klass_not_null(Register r) {
|
|
+ encode_klass_not_null(r, r);
|
|
+}
|
|
+
|
|
+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
|
|
+ assert(UseCompressedClassPointers, "should only be used for compressed headers");
|
|
+
|
|
+ if (Universe::narrow_klass_base() == NULL) {
|
|
+ if (Universe::narrow_klass_shift() != 0) {
|
|
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
|
|
+ srli(dst, src, LogKlassAlignmentInBytes);
|
|
+ } else {
|
|
+ mv(dst, src);
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
|
|
+ Universe::narrow_klass_shift() == 0) {
|
|
+ zero_extend(dst, src, 32);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ Register xbase = dst;
|
|
+ if (dst == src) {
|
|
+ xbase = tmp;
|
|
+ }
|
|
+
|
|
+ assert_different_registers(src, xbase);
|
|
+ mv(xbase, (intptr_t)Universe::narrow_klass_base());
|
|
+ sub(dst, src, xbase);
|
|
+ if (Universe::narrow_klass_shift() != 0) {
|
|
+ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
|
|
+ srli(dst, dst, LogKlassAlignmentInBytes);
|
|
+ }
|
|
+ if (xbase == xheapbase) {
|
|
+ reinit_heapbase();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::decode_heap_oop_not_null(Register r) {
|
|
+ decode_heap_oop_not_null(r, r);
|
|
+}
|
|
+
|
|
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
|
|
+ assert(UseCompressedOops, "should only be used for compressed headers");
|
|
+ assert(Universe::heap() != NULL, "java heap should be initialized");
|
|
+ // Cannot assert, unverified entry point counts instructions (see .ad file)
|
|
+ // vtableStubs also counts instructions in pd_code_size_limit.
|
|
+ // Also do not verify_oop as this is called by verify_oop.
|
|
+ if (Universe::narrow_oop_shift() != 0) {
|
|
+ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
|
|
+ slli(dst, src, LogMinObjAlignmentInBytes);
|
|
+ if (Universe::narrow_oop_base() != NULL) {
|
|
+ add(dst, xheapbase, dst);
|
|
+ }
|
|
+ } else {
|
|
+ assert(Universe::narrow_oop_base() == NULL, "sanity");
|
|
+ mv(dst, src);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::decode_heap_oop(Register d, Register s) {
|
|
+ if (Universe::narrow_oop_base() == NULL) {
|
|
+ if (Universe::narrow_oop_shift() != 0 || d != s) {
|
|
+ slli(d, s, Universe::narrow_oop_shift());
|
|
+ }
|
|
+ } else {
|
|
+ Label done;
|
|
+ mv(d, s);
|
|
+ beqz(s, done);
|
|
+ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
|
|
+ bind(done);
|
|
+ }
|
|
+ verify_oop(d, "broken oop in decode_heap_oop");
|
|
+}
|
|
+
|
|
+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
|
|
+ Register tmp2, Register tmp3, DecoratorSet decorators) {
|
|
+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
|
|
+ Register thread_tmp, DecoratorSet decorators) {
|
|
+ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
|
|
+ Register thread_tmp, DecoratorSet decorators) {
|
|
+ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
|
|
+}
|
|
+
|
|
+// Used for storing NULLs.
|
|
+void MacroAssembler::store_heap_oop_null(Address dst) {
|
|
+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
|
|
+ bool want_remainder)
|
|
+{
|
|
+ // Full implementation of Java idiv and irem. The function
|
|
+ // returns the (pc) offset of the div instruction - may be needed
|
|
+ // for implicit exceptions.
|
|
+ //
|
|
+ // input : rs1: dividend
|
|
+ // rs2: divisor
|
|
+ //
|
|
+ // result: either
|
|
+ // quotient (= rs1 idiv rs2)
|
|
+ // remainder (= rs1 irem rs2)
|
|
+
|
|
+
|
|
+ int idivl_offset = offset();
|
|
+ if (!want_remainder) {
|
|
+ divw(result, rs1, rs2);
|
|
+ } else {
|
|
+ remw(result, rs1, rs2); // result = rs1 % rs2;
|
|
+ }
|
|
+ return idivl_offset;
|
|
+}
|
|
+
|
|
+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
|
|
+ bool want_remainder)
|
|
+{
|
|
+ // Full implementation of Java ldiv and lrem. The function
|
|
+ // returns the (pc) offset of the div instruction - may be needed
|
|
+ // for implicit exceptions.
|
|
+ //
|
|
+ // input : rs1: dividend
|
|
+ // rs2: divisor
|
|
+ //
|
|
+ // result: either
|
|
+ // quotient (= rs1 idiv rs2)
|
|
+ // remainder (= rs1 irem rs2)
|
|
+
|
|
+ int idivq_offset = offset();
|
|
+ if (!want_remainder) {
|
|
+ div(result, rs1, rs2);
|
|
+ } else {
|
|
+ rem(result, rs1, rs2); // result = rs1 % rs2;
|
|
+ }
|
|
+ return idivq_offset;
|
|
+}
|
|
+
|
|
+// Look up the method for a megamorpic invkkeinterface call.
|
|
+// The target method is determined by <intf_klass, itable_index>.
|
|
+// The receiver klass is in recv_klass.
|
|
+// On success, the result will be in method_result, and execution falls through.
|
|
+// On failure, execution transfers to the given label.
|
|
+void MacroAssembler::lookup_interface_method(Register recv_klass,
|
|
+ Register intf_klass,
|
|
+ RegisterOrConstant itable_index,
|
|
+ Register method_result,
|
|
+ Register scan_tmp,
|
|
+ Label& L_no_such_interface,
|
|
+ bool return_method) {
|
|
+ assert_different_registers(recv_klass, intf_klass, scan_tmp);
|
|
+ assert_different_registers(method_result, intf_klass, scan_tmp);
|
|
+ assert(recv_klass != method_result || !return_method,
|
|
+ "recv_klass can be destroyed when mehtid isn't needed");
|
|
+ assert(itable_index.is_constant() || itable_index.as_register() == method_result,
|
|
+ "caller must be same register for non-constant itable index as for method");
|
|
+
|
|
+ // Compute start of first itableOffsetEntry (which is at the end of the vtable).
|
|
+ int vtable_base = in_bytes(Klass::vtable_start_offset());
|
|
+ int itentry_off = itableMethodEntry::method_offset_in_bytes();
|
|
+ int scan_step = itableOffsetEntry::size() * wordSize;
|
|
+ int vte_size = vtableEntry::size_in_bytes();
|
|
+ assert(vte_size == wordSize, "else adjust times_vte_scale");
|
|
+
|
|
+ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
|
|
+
|
|
+ // %%% Could store the aligned, prescaled offset in the klassoop.
|
|
+ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
|
|
+ add(scan_tmp, scan_tmp, vtable_base);
|
|
+
|
|
+ if (return_method) {
|
|
+ // Adjust recv_klass by scaled itable_index, so we can free itable_index.
|
|
+ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
|
|
+ if (itable_index.is_register()) {
|
|
+ slli(t0, itable_index.as_register(), 3);
|
|
+ } else {
|
|
+ mv(t0, itable_index.as_constant() << 3);
|
|
+ }
|
|
+ add(recv_klass, recv_klass, t0);
|
|
+ if (itentry_off) {
|
|
+ add(recv_klass, recv_klass, itentry_off);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Label search, found_method;
|
|
+
|
|
+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
|
|
+ beq(intf_klass, method_result, found_method);
|
|
+ bind(search);
|
|
+ // Check that the previous entry is non-null. A null entry means that
|
|
+ // the receiver class doens't implement the interface, and wasn't the
|
|
+ // same as when the caller was compiled.
|
|
+ beqz(method_result, L_no_such_interface, /* is_far */ true);
|
|
+ addi(scan_tmp, scan_tmp, scan_step);
|
|
+ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
|
|
+ bne(intf_klass, method_result, search);
|
|
+
|
|
+ bind(found_method);
|
|
+
|
|
+ // Got a hit.
|
|
+ if (return_method) {
|
|
+ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
|
|
+ add(method_result, recv_klass, scan_tmp);
|
|
+ ld(method_result, Address(method_result));
|
|
+ }
|
|
+}
|
|
+
|
|
+// virtual method calling
|
|
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
|
|
+ RegisterOrConstant vtable_index,
|
|
+ Register method_result) {
|
|
+ const int base = in_bytes(Klass::vtable_start_offset());
|
|
+ assert(vtableEntry::size() * wordSize == 8,
|
|
+ "adjust the scaling in the code below");
|
|
+ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
|
|
+
|
|
+ if (vtable_index.is_register()) {
|
|
+ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
|
|
+ ld(method_result, Address(method_result, vtable_offset_in_bytes));
|
|
+ } else {
|
|
+ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
|
|
+ Address addr = form_address(recv_klass, /* base */
|
|
+ vtable_offset_in_bytes, /* offset */
|
|
+ 12, /* expect offset bits */
|
|
+ method_result); /* temp reg */
|
|
+ ld(method_result, addr);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::membar(uint32_t order_constraint) {
|
|
+ if (!os::is_MP()) { return; }
|
|
+
|
|
+ address prev = pc() - NativeMembar::instruction_size;
|
|
+ address last = code()->last_insn();
|
|
+
|
|
+ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
|
|
+ NativeMembar *bar = NativeMembar_at(prev);
|
|
+ // We are merging two memory barrier instructions. On RISCV we
|
|
+ // can do this simply by ORing them together.
|
|
+ bar->set_kind(bar->get_kind() | order_constraint);
|
|
+ BLOCK_COMMENT("merged membar");
|
|
+ } else {
|
|
+ code()->set_last_insn(pc());
|
|
+
|
|
+ uint32_t predecessor = 0;
|
|
+ uint32_t successor = 0;
|
|
+
|
|
+ membar_mask_to_pred_succ(order_constraint, predecessor, successor);
|
|
+ fence(predecessor, successor);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::check_klass_subtype(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Label& L_success) {
|
|
+ Label L_failure;
|
|
+ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
|
|
+ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
|
|
+ bind(L_failure);
|
|
+}
|
|
+
|
|
+// Write serialization page so VM thread can do a pseudo remote membar.
|
|
+// We use the current thread pointer to calculate a thread specific
|
|
+// offset to write to within the page. This minimizes bus traffic
|
|
+// due to cache line collision.
|
|
+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
|
|
+ srli(tmp2, thread, os::get_serialize_page_shift_count());
|
|
+
|
|
+ int mask = os::vm_page_size() - sizeof(int);
|
|
+ andi(tmp2, tmp2, mask, tmp1);
|
|
+
|
|
+ add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page());
|
|
+ membar(MacroAssembler::AnyAny);
|
|
+ sw(zr, Address(tmp1));
|
|
+}
|
|
+
|
|
+void MacroAssembler::safepoint_poll(Label& slow_path) {
|
|
+ if (SafepointMechanism::uses_thread_local_poll()) {
|
|
+ ld(t1, Address(xthread, Thread::polling_page_offset()));
|
|
+ andi(t0, t1, SafepointMechanism::poll_bit());
|
|
+ bnez(t0, slow_path);
|
|
+ } else {
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
|
|
+ lwu(t0, Address(t0, offset));
|
|
+ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
|
|
+ bnez(t0, slow_path);
|
|
+ }
|
|
+}
|
|
+
|
|
+// Just like safepoint_poll, but use an acquiring load for thread-
|
|
+// local polling.
|
|
+//
|
|
+// We need an acquire here to ensure that any subsequent load of the
|
|
+// global SafepointSynchronize::_state flag is ordered after this load
|
|
+// of the local Thread::_polling page. We don't want this poll to
|
|
+// return false (i.e. not safepointing) and a later poll of the global
|
|
+// SafepointSynchronize::_state spuriously to return true.
|
|
+//
|
|
+// This is to avoid a race when we're in a native->Java transition
|
|
+// racing the code which wakes up from a safepoint.
|
|
+//
|
|
+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
|
|
+ if (SafepointMechanism::uses_thread_local_poll()) {
|
|
+ membar(MacroAssembler::AnyAny);
|
|
+ ld(t1, Address(xthread, Thread::polling_page_offset()));
|
|
+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ andi(t0, t1, SafepointMechanism::poll_bit());
|
|
+ bnez(t0, slow_path);
|
|
+ } else {
|
|
+ safepoint_poll(slow_path);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
|
|
+ Label &succeed, Label *fail) {
|
|
+ // oldv holds comparison value
|
|
+ // newv holds value to write in exchange
|
|
+ // addr identifies memory word to compare against/update
|
|
+ Label retry_load, nope;
|
|
+ bind(retry_load);
|
|
+ // flush and load exclusive from the memory location
|
|
+ // and fail if it is not what we expect
|
|
+ lr_d(tmp, addr, Assembler::aqrl);
|
|
+ bne(tmp, oldv, nope);
|
|
+ // if we store+flush with no intervening write tmp wil be zero
|
|
+ sc_d(tmp, newv, addr, Assembler::rl);
|
|
+ beqz(tmp, succeed);
|
|
+ // retry so we only ever return after a load fails to compare
|
|
+ // ensures we don't return a stale value after a failed write.
|
|
+ j(retry_load);
|
|
+ // if the memory word differs we return it in oldv and signal a fail
|
|
+ bind(nope);
|
|
+ membar(AnyAny);
|
|
+ mv(oldv, tmp);
|
|
+ if (fail != NULL) {
|
|
+ j(*fail);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
|
|
+ Label &succeed, Label *fail) {
|
|
+ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
|
|
+ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_reserved(Register addr,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire) {
|
|
+ switch (size) {
|
|
+ case int64:
|
|
+ lr_d(t0, addr, acquire);
|
|
+ break;
|
|
+ case int32:
|
|
+ lr_w(t0, addr, acquire);
|
|
+ break;
|
|
+ case uint32:
|
|
+ lr_w(t0, addr, acquire);
|
|
+ zero_extend(t0, t0, 32);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::store_conditional(Register addr,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl release) {
|
|
+ switch (size) {
|
|
+ case int64:
|
|
+ sc_d(t0, new_val, addr, release);
|
|
+ break;
|
|
+ case int32:
|
|
+ case uint32:
|
|
+ sc_w(t0, new_val, addr, release);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Register tmp1, Register tmp2, Register tmp3) {
|
|
+ assert(size == int8 || size == int16, "unsupported operand size");
|
|
+
|
|
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
|
|
+
|
|
+ andi(shift, addr, 3);
|
|
+ slli(shift, shift, 3);
|
|
+
|
|
+ andi(aligned_addr, addr, ~3);
|
|
+
|
|
+ if (size == int8) {
|
|
+ mv(mask, 0xff);
|
|
+ } else {
|
|
+ mv(mask, -1);
|
|
+ zero_extend(mask, mask, 16);
|
|
+ }
|
|
+ sll(mask, mask, shift);
|
|
+
|
|
+ xori(not_mask, mask, -1);
|
|
+
|
|
+ sll(expected, expected, shift);
|
|
+ andr(expected, expected, mask);
|
|
+
|
|
+ sll(new_val, new_val, shift);
|
|
+ andr(new_val, new_val, mask);
|
|
+}
|
|
+
|
|
+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
|
|
+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
|
|
+// which are forced to work with 4-byte aligned address.
|
|
+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result, bool result_as_bool,
|
|
+ Register tmp1, Register tmp2, Register tmp3) {
|
|
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
|
|
+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
|
|
+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
|
|
+
|
|
+ Label retry, fail, done;
|
|
+
|
|
+ bind(retry);
|
|
+ lr_w(old, aligned_addr, acquire);
|
|
+ andr(tmp, old, mask);
|
|
+ bne(tmp, expected, fail);
|
|
+
|
|
+ andr(tmp, old, not_mask);
|
|
+ orr(tmp, tmp, new_val);
|
|
+ sc_w(tmp, tmp, aligned_addr, release);
|
|
+ bnez(tmp, retry);
|
|
+
|
|
+ if (result_as_bool) {
|
|
+ mv(result, 1);
|
|
+ j(done);
|
|
+
|
|
+ bind(fail);
|
|
+ mv(result, zr);
|
|
+
|
|
+ bind(done);
|
|
+ } else {
|
|
+ andr(tmp, old, mask);
|
|
+
|
|
+ bind(fail);
|
|
+ srl(result, tmp, shift);
|
|
+ }
|
|
+
|
|
+ if (size == int8) {
|
|
+ sign_extend(result, result, 8);
|
|
+ } else if (size == int16) {
|
|
+ sign_extend(result, result, 16);
|
|
+ }
|
|
+}
|
|
+
|
|
+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps.
|
|
+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
|
|
+// the weak CAS stuff. The major difference is that it just failed when store conditional
|
|
+// failed.
|
|
+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result,
|
|
+ Register tmp1, Register tmp2, Register tmp3) {
|
|
+ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
|
|
+ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
|
|
+ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
|
|
+
|
|
+ Label fail, done;
|
|
+
|
|
+ lr_w(old, aligned_addr, acquire);
|
|
+ andr(tmp, old, mask);
|
|
+ bne(tmp, expected, fail);
|
|
+
|
|
+ andr(tmp, old, not_mask);
|
|
+ orr(tmp, tmp, new_val);
|
|
+ sc_w(tmp, tmp, aligned_addr, release);
|
|
+ bnez(tmp, fail);
|
|
+
|
|
+ // Success
|
|
+ mv(result, 1);
|
|
+ j(done);
|
|
+
|
|
+ // Fail
|
|
+ bind(fail);
|
|
+ mv(result, zr);
|
|
+
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmpxchg(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result, bool result_as_bool) {
|
|
+ assert(size != int8 && size != int16, "unsupported operand size");
|
|
+
|
|
+ Label retry_load, done, ne_done;
|
|
+ bind(retry_load);
|
|
+ load_reserved(addr, size, acquire);
|
|
+ bne(t0, expected, ne_done);
|
|
+ store_conditional(addr, new_val, size, release);
|
|
+ bnez(t0, retry_load);
|
|
+
|
|
+ // equal, succeed
|
|
+ if (result_as_bool) {
|
|
+ mv(result, 1);
|
|
+ } else {
|
|
+ mv(result, expected);
|
|
+ }
|
|
+ j(done);
|
|
+
|
|
+ // not equal, failed
|
|
+ bind(ne_done);
|
|
+ if (result_as_bool) {
|
|
+ mv(result, zr);
|
|
+ } else {
|
|
+ mv(result, t0);
|
|
+ }
|
|
+
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result) {
|
|
+ assert(size != int8 && size != int16, "unsupported operand size");
|
|
+
|
|
+ Label fail, done;
|
|
+ load_reserved(addr, size, acquire);
|
|
+ bne(t0, expected, fail);
|
|
+ store_conditional(addr, new_val, size, release);
|
|
+ bnez(t0, fail);
|
|
+
|
|
+ // Success
|
|
+ mv(result, 1);
|
|
+ j(done);
|
|
+
|
|
+ // Fail
|
|
+ bind(fail);
|
|
+ mv(result, zr);
|
|
+
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \
|
|
+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
|
|
+ prev = prev->is_valid() ? prev : zr; \
|
|
+ if (incr.is_register()) { \
|
|
+ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
|
|
+ } else { \
|
|
+ mv(t0, incr.as_constant()); \
|
|
+ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
|
|
+ } \
|
|
+ return; \
|
|
+}
|
|
+
|
|
+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
|
|
+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
|
|
+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
|
|
+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
|
|
+
|
|
+#undef ATOMIC_OP
|
|
+
|
|
+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \
|
|
+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
|
|
+ prev = prev->is_valid() ? prev : zr; \
|
|
+ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \
|
|
+ return; \
|
|
+}
|
|
+
|
|
+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
|
|
+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
|
|
+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
|
|
+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
|
|
+
|
|
+#undef ATOMIC_XCHG
|
|
+
|
|
+#define ATOMIC_XCHGU(OP1, OP2) \
|
|
+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \
|
|
+ atomic_##OP2(prev, newv, addr); \
|
|
+ zero_extend(prev, prev, 32); \
|
|
+ return; \
|
|
+}
|
|
+
|
|
+ATOMIC_XCHGU(xchgwu, xchgw)
|
|
+ATOMIC_XCHGU(xchgalwu, xchgalw)
|
|
+
|
|
+#undef ATOMIC_XCHGU
|
|
+
|
|
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
|
|
+ assert(UseBiasedLocking, "why call this otherwise?");
|
|
+
|
|
+ // Check for biased locking unlock case, which is a no-op
|
|
+ // Note: we do not have to check the thread ID for two reasons.
|
|
+ // First, the interpreter checks for IllegalMonitorStateException at
|
|
+ // a higher level. Second, if the bias was revoked while we held the
|
|
+ // lock, the object could not be rebiased toward another thread, so
|
|
+ // the bias bit would be clear.
|
|
+ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
|
|
+ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3
|
|
+ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
|
|
+ if (flag->is_valid()) { mv(flag, tmp_reg); }
|
|
+ beqz(tmp_reg, done);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
|
|
+ load_klass(dst, src);
|
|
+ ld(dst, Address(dst, Klass::prototype_header_offset()));
|
|
+}
|
|
+
|
|
+int MacroAssembler::biased_locking_enter(Register lock_reg,
|
|
+ Register obj_reg,
|
|
+ Register swap_reg,
|
|
+ Register tmp_reg,
|
|
+ bool swap_reg_contains_mark,
|
|
+ Label& done,
|
|
+ Label* slow_case,
|
|
+ BiasedLockingCounters* counters,
|
|
+ Register flag) {
|
|
+ assert(UseBiasedLocking, "why call this otherwise?");
|
|
+ assert_different_registers(lock_reg, obj_reg, swap_reg);
|
|
+
|
|
+ if (PrintBiasedLockingStatistics && counters == NULL) {
|
|
+ counters = BiasedLocking::counters();
|
|
+ }
|
|
+
|
|
+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag);
|
|
+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
|
|
+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
|
|
+
|
|
+ // Biased locking
|
|
+ // See whether the lock is currently biased toward our thread and
|
|
+ // whether the epoch is still valid
|
|
+ // Note that the runtime guarantees sufficient alignment of JavaThread
|
|
+ // pointers to allow age to be placed into low bits
|
|
+ // First check to see whether biasing is even enabled for this object
|
|
+ Label cas_label;
|
|
+ int null_check_offset = -1;
|
|
+ if (!swap_reg_contains_mark) {
|
|
+ null_check_offset = offset();
|
|
+ ld(swap_reg, mark_addr);
|
|
+ }
|
|
+ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
|
|
+ xori(t0, tmp_reg, markOopDesc::biased_lock_pattern);
|
|
+ bnez(t0, cas_label); // don't care flag unless jumping to done
|
|
+ // The bias pattern is present in the object's header. Need to check
|
|
+ // whether the bias owner and the epoch are both still current.
|
|
+ load_prototype_header(tmp_reg, obj_reg);
|
|
+ orr(tmp_reg, tmp_reg, xthread);
|
|
+ xorr(tmp_reg, swap_reg, tmp_reg);
|
|
+ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
|
|
+ if (flag->is_valid()) {
|
|
+ mv(flag, tmp_reg);
|
|
+ }
|
|
+
|
|
+ if (counters != NULL) {
|
|
+ Label around;
|
|
+ bnez(tmp_reg, around);
|
|
+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
|
|
+ j(done);
|
|
+ bind(around);
|
|
+ } else {
|
|
+ beqz(tmp_reg, done);
|
|
+ }
|
|
+
|
|
+ Label try_revoke_bias;
|
|
+ Label try_rebias;
|
|
+
|
|
+ // At this point we know that the header has the bias pattern and
|
|
+ // that we are not the bias owner in the current epoch. We need to
|
|
+ // figure out more details about the state of the header in order to
|
|
+ // know what operations can be legally performed on the object's
|
|
+ // header.
|
|
+
|
|
+ // If the low three bits in the xor result aren't clear, that means
|
|
+ // the prototype header is no longer biased and we have to revoke
|
|
+ // the bias on this object.
|
|
+ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
|
|
+ bnez(t0, try_revoke_bias);
|
|
+
|
|
+ // Biasing is still enabled for this data type. See whether the
|
|
+ // epoch of the current bias is still valid, meaning that the epoch
|
|
+ // bits of the mark word are equal to the epoch bits of the
|
|
+ // prototype header. (Note that the prototype header's epoch bits
|
|
+ // only change at a safepoint.) If not, attempt to rebias the object
|
|
+ // toward the current thread. Note that we must be absolutely sure
|
|
+ // that the current epoch is invalid in order to do this because
|
|
+ // otherwise the manipulations it performs on the mark word are
|
|
+ // illegal.
|
|
+ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
|
|
+ bnez(t0, try_rebias);
|
|
+
|
|
+ // The epoch of the current bias is still valid but we know nothing
|
|
+ // about the owner; it might be set or it might be clear. Try to
|
|
+ // acquire the bias of the object using an atomic operation. If this
|
|
+ // fails we will go in to the runtime to revoke the object's bias.
|
|
+ // Note that we first construct the presumed unbiased header so we
|
|
+ // don't accidentally blow away another thread's valid bias.
|
|
+ {
|
|
+ Label cas_success;
|
|
+ Label counter;
|
|
+ mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
|
|
+ andr(swap_reg, swap_reg, t0);
|
|
+ orr(tmp_reg, swap_reg, xthread);
|
|
+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
|
|
+ // cas failed here if slow_cass == NULL
|
|
+ if (flag->is_valid()) {
|
|
+ mv(flag, 1);
|
|
+ j(counter);
|
|
+ }
|
|
+
|
|
+ // If the biasing toward our thread failed, this means that
|
|
+ // another thread succeeded in biasing it toward itself and we
|
|
+ // need to revoke that bias. The revocation will occur in the
|
|
+ // interpreter runtime in the slow case.
|
|
+ bind(cas_success);
|
|
+ if (flag->is_valid()) {
|
|
+ mv(flag, 0);
|
|
+ bind(counter);
|
|
+ }
|
|
+
|
|
+ if (counters != NULL) {
|
|
+ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
|
|
+ tmp_reg, t0);
|
|
+ }
|
|
+ }
|
|
+ j(done);
|
|
+
|
|
+ bind(try_rebias);
|
|
+ // At this point we know the epoch has expired, meaning that the
|
|
+ // current "bias owner", if any, is actually invalid. Under these
|
|
+ // circumstances _only_, we are allowed to use the current header's
|
|
+ // value as the comparison value when doing the cas to acquire the
|
|
+ // bias in the current epoch. In other words, we allow transfer of
|
|
+ // the bias from one thread to another directly in this situation.
|
|
+ //
|
|
+ // FIXME: due to a lack of registers we currently blow away the age
|
|
+ // bits in this situation. Should attempt to preserve them.
|
|
+ {
|
|
+ Label cas_success;
|
|
+ Label counter;
|
|
+ load_prototype_header(tmp_reg, obj_reg);
|
|
+ orr(tmp_reg, xthread, tmp_reg);
|
|
+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
|
|
+ // cas failed here if slow_cass == NULL
|
|
+ if (flag->is_valid()) {
|
|
+ mv(flag, 1);
|
|
+ j(counter);
|
|
+ }
|
|
+
|
|
+ // If the biasing toward our thread failed, then another thread
|
|
+ // succeeded in biasing it toward itself and we need to revoke that
|
|
+ // bias. The revocation will occur in the runtime in the slow case.
|
|
+ bind(cas_success);
|
|
+ if (flag->is_valid()) {
|
|
+ mv(flag, 0);
|
|
+ bind(counter);
|
|
+ }
|
|
+
|
|
+ if (counters != NULL) {
|
|
+ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
|
|
+ tmp_reg, t0);
|
|
+ }
|
|
+ }
|
|
+ j(done);
|
|
+
|
|
+ // don't care flag unless jumping to done
|
|
+ bind(try_revoke_bias);
|
|
+ // The prototype mark in the klass doesn't have the bias bit set any
|
|
+ // more, indicating that objects of this data type are not supposed
|
|
+ // to be biased any more. We are going to try to reset the mark of
|
|
+ // this object to the prototype value and fall through to the
|
|
+ // CAS-based locking scheme. Note that if our CAS fails, it means
|
|
+ // that another thread raced us for the privilege of revoking the
|
|
+ // bias of this particular object, so it's okay to continue in the
|
|
+ // normal locking code.
|
|
+ //
|
|
+ // FIXME: due to a lack of registers we currently blow away the age
|
|
+ // bits in this situation. Should attempt to preserve them.
|
|
+ {
|
|
+ Label cas_success, nope;
|
|
+ load_prototype_header(tmp_reg, obj_reg);
|
|
+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
|
|
+ bind(cas_success);
|
|
+
|
|
+ // Fall through to the normal CAS-based lock, because no matter what
|
|
+ // the result of the above CAS, some thread must have succeeded in
|
|
+ // removing the bias bit from the object's header.
|
|
+ if (counters != NULL) {
|
|
+ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
|
|
+ t0);
|
|
+ }
|
|
+ bind(nope);
|
|
+ }
|
|
+
|
|
+ bind(cas_label);
|
|
+
|
|
+ return null_check_offset;
|
|
+}
|
|
+
|
|
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
|
|
+ Label retry_load;
|
|
+ bind(retry_load);
|
|
+ // flush and load exclusive from the memory location
|
|
+ lr_w(tmp, counter_addr);
|
|
+ addw(tmp, tmp, 1);
|
|
+ // if we store+flush with no intervening write tmp wil be zero
|
|
+ sc_w(tmp, tmp, counter_addr);
|
|
+ bnez(tmp, retry_load);
|
|
+}
|
|
+
|
|
+void MacroAssembler::far_jump(Address entry, Register tmp) {
|
|
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
|
|
+ assert(CodeCache::find_blob(entry.target()) != NULL,
|
|
+ "destination of far call not found in code cache");
|
|
+ int32_t offset = 0;
|
|
+ if (far_branches()) {
|
|
+ // We can use auipc + jalr here because we know that the total size of
|
|
+ // the code cache cannot exceed 2Gb.
|
|
+ la_patchable(tmp, entry, offset);
|
|
+ jalr(x0, tmp, offset);
|
|
+ } else {
|
|
+ j(entry);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::far_call(Address entry, Register tmp) {
|
|
+ assert(ReservedCodeCacheSize < 4*G, "branch out of range");
|
|
+ assert(CodeCache::find_blob(entry.target()) != NULL,
|
|
+ "destination of far call not found in code cache");
|
|
+ int32_t offset = 0;
|
|
+ if (far_branches()) {
|
|
+ // We can use auipc + jalr here because we know that the total size of
|
|
+ // the code cache cannot exceed 2Gb.
|
|
+ la_patchable(tmp, entry, offset);
|
|
+ jalr(x1, tmp, offset); // link
|
|
+ } else {
|
|
+ jal(entry); // link
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Label* L_success,
|
|
+ Label* L_failure,
|
|
+ Label* L_slow_path,
|
|
+ Register super_check_offset) {
|
|
+ assert_different_registers(sub_klass, super_klass, tmp_reg);
|
|
+ bool must_load_sco = (super_check_offset == noreg);
|
|
+ if (must_load_sco) {
|
|
+ assert(tmp_reg != noreg, "supply either a tmp or a register offset");
|
|
+ } else {
|
|
+ assert_different_registers(sub_klass, super_klass, super_check_offset);
|
|
+ }
|
|
+
|
|
+ Label L_fallthrough;
|
|
+ int label_nulls = 0;
|
|
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
|
|
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
|
|
+ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
|
|
+ assert(label_nulls <= 1, "at most one NULL in batch");
|
|
+
|
|
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
|
|
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
|
+ Address super_check_offset_addr(super_klass, sco_offset);
|
|
+
|
|
+ // Hacked jmp, which may only be used just before L_fallthrough.
|
|
+#define final_jmp(label) \
|
|
+ if (&(label) == &L_fallthrough) { /*do nothing*/ } \
|
|
+ else j(label) /*omit semi*/
|
|
+
|
|
+ // If the pointers are equal, we are done (e.g., String[] elements).
|
|
+ // This self-check enables sharing of secondary supertype arrays among
|
|
+ // non-primary types such as array-of-interface. Otherwise, each such
|
|
+ // type would need its own customized SSA.
|
|
+ // We move this check to the front fo the fast path because many
|
|
+ // type checks are in fact trivially successful in this manner,
|
|
+ // so we get a nicely predicted branch right at the start of the check.
|
|
+ beq(sub_klass, super_klass, *L_success);
|
|
+
|
|
+ // Check the supertype display:
|
|
+ if (must_load_sco) {
|
|
+ lwu(tmp_reg, super_check_offset_addr);
|
|
+ super_check_offset = tmp_reg;
|
|
+ }
|
|
+ add(t0, sub_klass, super_check_offset);
|
|
+ Address super_check_addr(t0);
|
|
+ ld(t0, super_check_addr); // load displayed supertype
|
|
+
|
|
+ // Ths check has worked decisively for primary supers.
|
|
+ // Secondary supers are sought in the super_cache ('super_cache_addr').
|
|
+ // (Secondary supers are interfaces and very deeply nested subtypes.)
|
|
+ // This works in the same check above because of a tricky aliasing
|
|
+ // between the super_Cache and the primary super dispaly elements.
|
|
+ // (The 'super_check_addr' can address either, as the case requires.)
|
|
+ // Note that the cache is updated below if it does not help us find
|
|
+ // what we need immediately.
|
|
+ // So if it was a primary super, we can just fail immediately.
|
|
+ // Otherwise, it's the slow path for us (no success at this point).
|
|
+
|
|
+ beq(super_klass, t0, *L_success);
|
|
+ mv(t1, sc_offset);
|
|
+ if (L_failure == &L_fallthrough) {
|
|
+ beq(super_check_offset, t1, *L_slow_path);
|
|
+ } else {
|
|
+ bne(super_check_offset, t1, *L_failure, /* is_far */ true);
|
|
+ final_jmp(*L_slow_path);
|
|
+ }
|
|
+
|
|
+ bind(L_fallthrough);
|
|
+
|
|
+#undef final_jmp
|
|
+}
|
|
+
|
|
+// Scans count pointer sized words at [addr] for occurence of value,
|
|
+// generic
|
|
+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
|
|
+ Register tmp) {
|
|
+ Label Lloop, Lexit;
|
|
+ beqz(count, Lexit);
|
|
+ bind(Lloop);
|
|
+ ld(tmp, addr);
|
|
+ beq(value, tmp, Lexit);
|
|
+ add(addr, addr, wordSize);
|
|
+ sub(count, count, 1);
|
|
+ bnez(count, Lloop);
|
|
+ bind(Lexit);
|
|
+}
|
|
+
|
|
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Register tmp2_reg,
|
|
+ Label* L_success,
|
|
+ Label* L_failure) {
|
|
+ assert_different_registers(sub_klass, super_klass, tmp_reg);
|
|
+ if (tmp2_reg != noreg) {
|
|
+ assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0);
|
|
+ }
|
|
+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg)
|
|
+
|
|
+ Label L_fallthrough;
|
|
+ int label_nulls = 0;
|
|
+ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
|
|
+ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
|
|
+
|
|
+ assert(label_nulls <= 1, "at most one NULL in the batch");
|
|
+
|
|
+ // A couple of useful fields in sub_klass:
|
|
+ int ss_offset = in_bytes(Klass::secondary_supers_offset());
|
|
+ int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
|
|
+ Address secondary_supers_addr(sub_klass, ss_offset);
|
|
+ Address super_cache_addr( sub_klass, sc_offset);
|
|
+
|
|
+ BLOCK_COMMENT("check_klass_subtype_slow_path");
|
|
+
|
|
+ // Do a linear scan of the secondary super-klass chain.
|
|
+ // This code is rarely used, so simplicity is a virtue here.
|
|
+ // The repne_scan instruction uses fixed registers, which we must spill.
|
|
+ // Don't worry too much about pre-existing connecitons with the input regs.
|
|
+
|
|
+ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
|
|
+ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
|
|
+
|
|
+ RegSet pushed_registers;
|
|
+ if (!IS_A_TEMP(x12)) {
|
|
+ pushed_registers += x12;
|
|
+ }
|
|
+ if (!IS_A_TEMP(x15)) {
|
|
+ pushed_registers += x15;
|
|
+ }
|
|
+
|
|
+ if (super_klass != x10 || UseCompressedOops) {
|
|
+ if (!IS_A_TEMP(x10)) {
|
|
+ pushed_registers += x10;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ push_reg(pushed_registers, sp);
|
|
+
|
|
+ // Get super_klass value into x10 (even if it was in x15 or x12)
|
|
+ mv(x10, super_klass);
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
|
|
+ Address pst_counter_addr(t1);
|
|
+ ld(t0, pst_counter_addr);
|
|
+ add(t0, t0, 1);
|
|
+ sd(t0, pst_counter_addr);
|
|
+#endif // PRODUCT
|
|
+
|
|
+ // We will consult the secondary-super array.
|
|
+ ld(x15, secondary_supers_addr);
|
|
+ // Load the array length.
|
|
+ lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
|
|
+ // Skip to start of data.
|
|
+ add(x15, x15, Array<Klass*>::base_offset_in_bytes());
|
|
+
|
|
+ // Set t0 to an obvious invalid value, falling through by default
|
|
+ mv(t0, -1);
|
|
+ // Scan X12 words at [X15] for an occurrence of X10.
|
|
+ repne_scan(x15, x10, x12, t0);
|
|
+
|
|
+ // pop will restore x10, so we should use a temp register to keep its value
|
|
+ mv(t1, x10);
|
|
+
|
|
+ // Unspill the temp. registers:
|
|
+ pop_reg(pushed_registers, sp);
|
|
+
|
|
+ bne(t1, t0, *L_failure);
|
|
+
|
|
+ // Success. Cache the super we found an proceed in triumph.
|
|
+ sd(super_klass, super_cache_addr);
|
|
+
|
|
+ if (L_success != &L_fallthrough) {
|
|
+ j(*L_success);
|
|
+ }
|
|
+
|
|
+#undef IS_A_TEMP
|
|
+
|
|
+ bind(L_fallthrough);
|
|
+}
|
|
+
|
|
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
|
|
+void MacroAssembler::tlab_allocate(Register obj,
|
|
+ Register var_size_in_bytes,
|
|
+ int con_size_in_bytes,
|
|
+ Register tmp1,
|
|
+ Register tmp2,
|
|
+ Label& slow_case,
|
|
+ bool is_far) {
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
|
|
+}
|
|
+
|
|
+// Defines obj, preserves var_size_in_bytes
|
|
+void MacroAssembler::eden_allocate(Register obj,
|
|
+ Register var_size_in_bytes,
|
|
+ int con_size_in_bytes,
|
|
+ Register tmp1,
|
|
+ Label& slow_case,
|
|
+ bool is_far) {
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far);
|
|
+}
|
|
+
|
|
+
|
|
+// get_thread() can be called anywhere inside generated code so we
|
|
+// need to save whatever non-callee save context might get clobbered
|
|
+// by the call to Thread::current() or, indeed, the call setup code
|
|
+void MacroAssembler::get_thread(Register thread) {
|
|
+ // save all call-clobbered regs except thread
|
|
+ RegSet saved_regs = RegSet::of(x10) + ra - thread;
|
|
+ push_reg(saved_regs, sp);
|
|
+
|
|
+ mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
|
|
+ jalr(ra);
|
|
+ if (thread != c_rarg0) {
|
|
+ mv(thread, c_rarg0);
|
|
+ }
|
|
+
|
|
+ // restore pushed registers
|
|
+ pop_reg(saved_regs, sp);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_byte_map_base(Register reg) {
|
|
+ jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
|
|
+ mv(reg, (uint64_t)byte_map_base);
|
|
+}
|
|
+
|
|
+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
|
|
+ relocInfo::relocType rtype = dest.rspec().reloc()->type();
|
|
+ unsigned long low_address = (uintptr_t)CodeCache::low_bound();
|
|
+ unsigned long high_address = (uintptr_t)CodeCache::high_bound();
|
|
+ unsigned long dest_address = (uintptr_t)dest.target();
|
|
+ long offset_low = dest_address - low_address;
|
|
+ long offset_high = dest_address - high_address;
|
|
+
|
|
+ assert(is_valid_riscv64_address(dest.target()), "bad address");
|
|
+ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
|
|
+
|
|
+ code_section()->relocate(pc(), dest.rspec());
|
|
+ // RISC-V doesn't compute a page-aligned address, in order to partially
|
|
+ // compensate for the use of *signed* offsets in its base+disp12
|
|
+ // addressing mode (RISC-V's PC-relative reach remains asymmetric
|
|
+ // [-(2G + 2K), 2G - 2K)).
|
|
+ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
|
|
+ int64_t distance = dest.target() - pc();
|
|
+ auipc(reg1, (int32_t)distance + 0x800);
|
|
+ offset = ((int32_t)distance << 20) >> 20;
|
|
+ } else {
|
|
+ movptr_with_offset(reg1, dest.target(), offset);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::build_frame(int framesize) {
|
|
+ assert(framesize > 0, "framesize must be > 0");
|
|
+ sub(sp, sp, framesize);
|
|
+ sd(fp, Address(sp, framesize - 2 * wordSize));
|
|
+ sd(ra, Address(sp, framesize - wordSize));
|
|
+ if (PreserveFramePointer) { add(fp, sp, framesize); }
|
|
+}
|
|
+
|
|
+void MacroAssembler::remove_frame(int framesize) {
|
|
+ assert(framesize > 0, "framesize must be > 0");
|
|
+ ld(fp, Address(sp, framesize - 2 * wordSize));
|
|
+ ld(ra, Address(sp, framesize - wordSize));
|
|
+ add(sp, sp, framesize);
|
|
+}
|
|
+
|
|
+void MacroAssembler::reserved_stack_check() {
|
|
+ // testing if reserved zone needs to be enabled
|
|
+ Label no_reserved_zone_enabling;
|
|
+
|
|
+ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
|
|
+ bltu(sp, t0, no_reserved_zone_enabling);
|
|
+
|
|
+ enter(); // RA and FP are live.
|
|
+ mv(c_rarg0, xthread);
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
|
|
+ jalr(x1, t0, offset);
|
|
+ leave();
|
|
+
|
|
+ // We have already removed our own frame.
|
|
+ // throw_delayed_StackOverflowError will think that it's been
|
|
+ // called by our caller.
|
|
+ offset = 0;
|
|
+ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
|
|
+ jalr(x0, t0, offset);
|
|
+ should_not_reach_here();
|
|
+
|
|
+ bind(no_reserved_zone_enabling);
|
|
+}
|
|
+
|
|
+// Move the address of the polling page into dest.
|
|
+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
|
|
+ if (SafepointMechanism::uses_thread_local_poll()) {
|
|
+ ld(dest, Address(xthread, Thread::polling_page_offset()));
|
|
+ } else {
|
|
+ uint64_t align = (uint64_t)page & 0xfff;
|
|
+ assert(align == 0, "polling page must be page aligned");
|
|
+ la_patchable(dest, Address(page, rtype), offset);
|
|
+ }
|
|
+}
|
|
+
|
|
+// Move the address of the polling page into dest.
|
|
+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
|
|
+ int32_t offset = 0;
|
|
+ get_polling_page(dest, page, offset, rtype);
|
|
+ read_polling_page(dest, offset, rtype);
|
|
+}
|
|
+
|
|
+// Read the polling page. The address of the polling page must
|
|
+// already be in r.
|
|
+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
|
|
+ code_section()->relocate(pc(), rtype);
|
|
+ lwu(zr, Address(r, offset));
|
|
+}
|
|
+
|
|
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ ThreadInVMfromUnknown tiv;
|
|
+ assert (UseCompressedOops, "should only be used for compressed oops");
|
|
+ assert (Universe::heap() != NULL, "java heap should be initialized");
|
|
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
|
|
+ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
|
|
+ }
|
|
+#endif
|
|
+ int oop_index = oop_recorder()->find_index(obj);
|
|
+ RelocationHolder rspec = oop_Relocation::spec(oop_index);
|
|
+ code_section()->relocate(pc(), rspec);
|
|
+ li32(dst, 0xDEADBEEF);
|
|
+ zero_extend(dst, dst, 32);
|
|
+}
|
|
+
|
|
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
|
|
+ assert (UseCompressedClassPointers, "should only be used for compressed headers");
|
|
+ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
|
|
+ int index = oop_recorder()->find_index(k);
|
|
+ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
|
|
+
|
|
+ RelocationHolder rspec = metadata_Relocation::spec(index);
|
|
+ code_section()->relocate(pc(), rspec);
|
|
+ narrowKlass nk = Klass::encode_klass(k);
|
|
+ li32(dst, nk);
|
|
+ zero_extend(dst, dst, 32);
|
|
+}
|
|
+
|
|
+// Maybe emit a call via a trampoline. If the code cache is small
|
|
+// trampolines won't be emitted.
|
|
+address MacroAssembler::trampoline_call(Address entry) {
|
|
+ assert(JavaThread::current()->is_Compiler_thread(), "just checking");
|
|
+ assert(entry.rspec().type() == relocInfo::runtime_call_type ||
|
|
+ entry.rspec().type() == relocInfo::opt_virtual_call_type ||
|
|
+ entry.rspec().type() == relocInfo::static_call_type ||
|
|
+ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
|
|
+
|
|
+ // We need a trampoline if branches are far.
|
|
+ if (far_branches()) {
|
|
+ bool in_scratch_emit_size = false;
|
|
+#ifdef COMPILER2
|
|
+ // We don't want to emit a trampoline if C2 is generating dummy
|
|
+ // code during its branch shortening phase.
|
|
+ CompileTask* task = ciEnv::current()->task();
|
|
+ in_scratch_emit_size =
|
|
+ (task != NULL && is_c2_compile(task->comp_level()) &&
|
|
+ Compile::current()->in_scratch_emit_size());
|
|
+#endif
|
|
+ if (!in_scratch_emit_size) {
|
|
+ address stub = emit_trampoline_stub(offset(), entry.target());
|
|
+ if (stub == NULL) {
|
|
+ postcond(pc() == badAddress);
|
|
+ return NULL; // CodeCache is full
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ address call_pc = pc();
|
|
+ relocate(entry.rspec());
|
|
+ if (!far_branches()) {
|
|
+ jal(entry.target());
|
|
+ } else {
|
|
+ jal(pc());
|
|
+ }
|
|
+
|
|
+ postcond(pc() != badAddress);
|
|
+ return call_pc;
|
|
+}
|
|
+
|
|
+address MacroAssembler::ic_call(address entry, jint method_index) {
|
|
+ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
|
|
+ movptr(t1, (address)Universe::non_oop_word());
|
|
+ assert_cond(entry != NULL);
|
|
+ return trampoline_call(Address(entry, rh));
|
|
+}
|
|
+
|
|
+// Emit a trampoline stub for a call to a target which is too far away.
|
|
+//
|
|
+// code sequences:
|
|
+//
|
|
+// call-site:
|
|
+// branch-and-link to <destination> or <trampoline stub>
|
|
+//
|
|
+// Related trampoline stub for this call site in the stub section:
|
|
+// load the call target from the constant pool
|
|
+// branch (RA still points to the call site above)
|
|
+
|
|
+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
|
|
+ address dest) {
|
|
+ // Max stub size: alignment nop, TrampolineStub.
|
|
+ address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size);
|
|
+ if (stub == NULL) {
|
|
+ return NULL; // CodeBuffer::expand failed
|
|
+ }
|
|
+
|
|
+ // Create a trampoline stub relocation which relates this trampoline stub
|
|
+ // with the call instruction at insts_call_instruction_offset in the
|
|
+ // instructions code-section.
|
|
+
|
|
+ // make sure 4 byte aligned here, so that the destination address would be
|
|
+ // 8 byte aligned after 3 intructions
|
|
+ while (offset() % wordSize == 0) { nop(); }
|
|
+
|
|
+ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
|
|
+ insts_call_instruction_offset));
|
|
+ const int stub_start_offset = offset();
|
|
+
|
|
+ // Now, create the trampoline stub's code:
|
|
+ // - load the call
|
|
+ // - call
|
|
+ Label target;
|
|
+ ld(t0, target); // auipc + ld
|
|
+ jr(t0); // jalr
|
|
+ bind(target);
|
|
+ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
|
|
+ "should be");
|
|
+ emit_int64((intptr_t)dest);
|
|
+
|
|
+ const address stub_start_addr = addr_at(stub_start_offset);
|
|
+
|
|
+ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
|
|
+
|
|
+ end_a_stub();
|
|
+ return stub_start_addr;
|
|
+}
|
|
+
|
|
+Address MacroAssembler::add_memory_helper(const Address dst) {
|
|
+ switch (dst.getMode()) {
|
|
+ case Address::base_plus_offset:
|
|
+ // This is the expected mode, although we allow all the other
|
|
+ // forms below.
|
|
+ return form_address(dst.base(), dst.offset(), 12, t1);
|
|
+ default:
|
|
+ la(t1, dst);
|
|
+ return Address(t1);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::increment(const Address dst, int64_t value) {
|
|
+ assert(((dst.getMode() == Address::base_plus_offset &&
|
|
+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
|
|
+ "invalid value and address mode combination");
|
|
+ Address adr = add_memory_helper(dst);
|
|
+ assert(!adr.uses(t0), "invalid dst for address increment");
|
|
+ ld(t0, adr);
|
|
+ add(t0, t0, value, t1);
|
|
+ sd(t0, adr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::incrementw(const Address dst, int32_t value) {
|
|
+ assert(((dst.getMode() == Address::base_plus_offset &&
|
|
+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
|
|
+ "invalid value and address mode combination");
|
|
+ Address adr = add_memory_helper(dst);
|
|
+ assert(!adr.uses(t0), "invalid dst for address increment");
|
|
+ lwu(t0, adr);
|
|
+ addw(t0, t0, value, t1);
|
|
+ sw(t0, adr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::decrement(const Address dst, int64_t value) {
|
|
+ assert(((dst.getMode() == Address::base_plus_offset &&
|
|
+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
|
|
+ "invalid value and address mode combination");
|
|
+ Address adr = add_memory_helper(dst);
|
|
+ assert(!adr.uses(t0), "invalid dst for address decrement");
|
|
+ ld(t0, adr);
|
|
+ sub(t0, t0, value, t1);
|
|
+ sd(t0, adr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::decrementw(const Address dst, int32_t value) {
|
|
+ assert(((dst.getMode() == Address::base_plus_offset &&
|
|
+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
|
|
+ "invalid value and address mode combination");
|
|
+ Address adr = add_memory_helper(dst);
|
|
+ assert(!adr.uses(t0), "invalid dst for address decrement");
|
|
+ lwu(t0, adr);
|
|
+ subw(t0, t0, value, t1);
|
|
+ sw(t0, adr);
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
|
|
+ assert_different_registers(src1, t0);
|
|
+ int32_t offset;
|
|
+ la_patchable(t0, src2, offset);
|
|
+ ld(t0, Address(t0, offset));
|
|
+ beq(src1, t0, equal);
|
|
+}
|
|
+
|
|
+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) {
|
|
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->obj_equals(this, obj1, obj2, equal, is_far);
|
|
+}
|
|
+
|
|
+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) {
|
|
+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->obj_nequals(this, obj1, obj2, nequal, is_far);
|
|
+}
|
|
+
|
|
+#ifdef COMPILER2
|
|
+// Set dst NaN if either source is NaN.
|
|
+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
|
|
+ bool is_double, bool is_min) {
|
|
+ assert_different_registers(dst, src1, src2);
|
|
+ Label Ldone;
|
|
+ fsflags(zr);
|
|
+ if (is_double) {
|
|
+ if (is_min) {
|
|
+ fmin_d(dst, src1, src2);
|
|
+ } else {
|
|
+ fmax_d(dst, src1, src2);
|
|
+ }
|
|
+ // flt is just used for set fflag NV
|
|
+ flt_d(zr, src1, src2);
|
|
+ } else {
|
|
+ if (is_min) {
|
|
+ fmin_s(dst, src1, src2);
|
|
+ } else {
|
|
+ fmax_s(dst, src1, src2);
|
|
+ }
|
|
+ // flt is just used for set fflag NV
|
|
+ flt_s(zr, src1, src2);
|
|
+ }
|
|
+ frflags(t0);
|
|
+ beqz(t0, Ldone);
|
|
+
|
|
+ // Src1 or src2 must be NaN here. Set dst NaN.
|
|
+ if (is_double) {
|
|
+ fadd_d(dst, src1, src2);
|
|
+ } else {
|
|
+ fadd_s(dst, src1, src2);
|
|
+ }
|
|
+ bind(Ldone);
|
|
+}
|
|
+
|
|
+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
|
|
+ Register tmp4, Register tmp5, Register tmp6, Register result,
|
|
+ Register cnt1, int elem_size) {
|
|
+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
|
|
+ Register tmp1 = t0;
|
|
+ Register tmp2 = t1;
|
|
+ Register cnt2 = tmp2; // cnt2 only used in array length compare
|
|
+ Register elem_per_word = tmp6;
|
|
+ int log_elem_size = exact_log2(elem_size);
|
|
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
|
|
+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
|
|
+
|
|
+ assert(elem_size == 1 || elem_size == 2, "must be char or byte");
|
|
+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
|
|
+ mv(elem_per_word, wordSize / elem_size);
|
|
+
|
|
+ BLOCK_COMMENT("arrays_equals {");
|
|
+
|
|
+ // if (a1 == a2), return true
|
|
+ oop_equal(a1, a2, SAME);
|
|
+
|
|
+ mv(result, false);
|
|
+ beqz(a1, DONE);
|
|
+ beqz(a2, DONE);
|
|
+ lwu(cnt1, Address(a1, length_offset));
|
|
+ lwu(cnt2, Address(a2, length_offset));
|
|
+ bne(cnt2, cnt1, DONE);
|
|
+ beqz(cnt1, SAME);
|
|
+
|
|
+ slli(tmp5, cnt1, 3 + log_elem_size);
|
|
+ sub(tmp5, zr, tmp5);
|
|
+ add(a1, a1, base_offset);
|
|
+ add(a2, a2, base_offset);
|
|
+ ld(tmp3, Address(a1, 0));
|
|
+ ld(tmp4, Address(a2, 0));
|
|
+ ble(cnt1, elem_per_word, SHORT); // short or same
|
|
+
|
|
+ // Main 16 byte comparison loop with 2 exits
|
|
+ bind(NEXT_DWORD); {
|
|
+ ld(tmp1, Address(a1, wordSize));
|
|
+ ld(tmp2, Address(a2, wordSize));
|
|
+ sub(cnt1, cnt1, 2 * wordSize / elem_size);
|
|
+ blez(cnt1, TAIL);
|
|
+ bne(tmp3, tmp4, DONE);
|
|
+ ld(tmp3, Address(a1, 2 * wordSize));
|
|
+ ld(tmp4, Address(a2, 2 * wordSize));
|
|
+ add(a1, a1, 2 * wordSize);
|
|
+ add(a2, a2, 2 * wordSize);
|
|
+ ble(cnt1, elem_per_word, TAIL2);
|
|
+ } beq(tmp1, tmp2, NEXT_DWORD);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(TAIL);
|
|
+ xorr(tmp4, tmp3, tmp4);
|
|
+ xorr(tmp2, tmp1, tmp2);
|
|
+ sll(tmp2, tmp2, tmp5);
|
|
+ orr(tmp5, tmp4, tmp2);
|
|
+ j(IS_TMP5_ZR);
|
|
+
|
|
+ bind(TAIL2);
|
|
+ bne(tmp1, tmp2, DONE);
|
|
+
|
|
+ bind(SHORT);
|
|
+ xorr(tmp4, tmp3, tmp4);
|
|
+ sll(tmp5, tmp4, tmp5);
|
|
+
|
|
+ bind(IS_TMP5_ZR);
|
|
+ bnez(tmp5, DONE);
|
|
+
|
|
+ bind(SAME);
|
|
+ mv(result, true);
|
|
+ // That's it.
|
|
+ bind(DONE);
|
|
+
|
|
+ BLOCK_COMMENT("} array_equals");
|
|
+ postcond(pc() != badAddress);
|
|
+ return pc();
|
|
+}
|
|
+
|
|
+// Compare Strings
|
|
+
|
|
+// For Strings we're passed the address of the first characters in a1
|
|
+// and a2 and the length in cnt1.
|
|
+// elem_size is the element size in bytes: either 1 or 2.
|
|
+// There are two implementations. For arrays >= 8 bytes, all
|
|
+// comparisons (including the final one, which may overlap) are
|
|
+// performed 8 bytes at a time. For strings < 8 bytes, we compare a
|
|
+// halfword, then a short, and then a byte.
|
|
+
|
|
+void MacroAssembler::string_equals(Register a1, Register a2,
|
|
+ Register result, Register cnt1, int elem_size)
|
|
+{
|
|
+ Label SAME, DONE, SHORT, NEXT_WORD;
|
|
+ Register tmp1 = t0;
|
|
+ Register tmp2 = t1;
|
|
+
|
|
+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
|
|
+ assert_different_registers(a1, a2, result, cnt1, t0, t1);
|
|
+
|
|
+ BLOCK_COMMENT("string_equals {");
|
|
+
|
|
+ beqz(cnt1, SAME);
|
|
+ mv(result, false);
|
|
+
|
|
+ // Check for short strings, i.e. smaller than wordSize.
|
|
+ sub(cnt1, cnt1, wordSize);
|
|
+ blez(cnt1, SHORT);
|
|
+
|
|
+ // Main 8 byte comparison loop.
|
|
+ bind(NEXT_WORD); {
|
|
+ ld(tmp1, Address(a1, 0));
|
|
+ add(a1, a1, wordSize);
|
|
+ ld(tmp2, Address(a2, 0));
|
|
+ add(a2, a2, wordSize);
|
|
+ sub(cnt1, cnt1, wordSize);
|
|
+ bne(tmp1, tmp2, DONE);
|
|
+ } bgtz(cnt1, NEXT_WORD);
|
|
+
|
|
+ if (!AvoidUnalignedAccesses) {
|
|
+ // Last longword. In the case where length == 4 we compare the
|
|
+ // same longword twice, but that's still faster than another
|
|
+ // conditional branch.
|
|
+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
|
|
+ // length == 4.
|
|
+ add(tmp1, a1, cnt1);
|
|
+ ld(tmp1, Address(tmp1, 0));
|
|
+ add(tmp2, a2, cnt1);
|
|
+ ld(tmp2, Address(tmp2, 0));
|
|
+ bne(tmp1, tmp2, DONE);
|
|
+ j(SAME);
|
|
+ }
|
|
+
|
|
+ bind(SHORT);
|
|
+ ld(tmp1, Address(a1));
|
|
+ ld(tmp2, Address(a2));
|
|
+ xorr(tmp1, tmp1, tmp2);
|
|
+ neg(cnt1, cnt1);
|
|
+ slli(cnt1, cnt1, LogBitsPerByte);
|
|
+ sll(tmp1, tmp1, cnt1);
|
|
+ bnez(tmp1, DONE);
|
|
+
|
|
+ // Arrays are equal.
|
|
+ bind(SAME);
|
|
+ mv(result, true);
|
|
+
|
|
+ // That's it.
|
|
+ bind(DONE);
|
|
+ BLOCK_COMMENT("} string_equals");
|
|
+}
|
|
+
|
|
+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp);
|
|
+
|
|
+// Compare strings.
|
|
+void MacroAssembler::string_compare(Register str1, Register str2,
|
|
+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
|
|
+ Register tmp3, int ae)
|
|
+{
|
|
+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
|
|
+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
|
+ SHORT_LOOP_START, TAIL_CHECK, L;
|
|
+
|
|
+ const int STUB_THRESHOLD = 64 + 8;
|
|
+ bool isLL = ae == StrIntrinsicNode::LL;
|
|
+ bool isLU = ae == StrIntrinsicNode::LU;
|
|
+ bool isUL = ae == StrIntrinsicNode::UL;
|
|
+
|
|
+ bool str1_isL = isLL || isLU;
|
|
+ bool str2_isL = isLL || isUL;
|
|
+
|
|
+ // for L strings, 1 byte for 1 character
|
|
+ // for U strings, 2 bytes for 1 character
|
|
+ int str1_chr_size = str1_isL ? 1 : 2;
|
|
+ int str2_chr_size = str2_isL ? 1 : 2;
|
|
+ int minCharsInWord = isLL ? wordSize : wordSize / 2;
|
|
+
|
|
+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
|
+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
|
|
+
|
|
+ BLOCK_COMMENT("string_compare {");
|
|
+
|
|
+ // Bizzarely, the counts are passed in bytes, regardless of whether they
|
|
+ // are L or U strings, however the result is always in characters.
|
|
+ if (!str1_isL) {
|
|
+ sraiw(cnt1, cnt1, 1);
|
|
+ }
|
|
+ if (!str2_isL) {
|
|
+ sraiw(cnt2, cnt2, 1);
|
|
+ }
|
|
+
|
|
+ // Compute the minimum of the string lengths and save the difference in result.
|
|
+ sub(result, cnt1, cnt2);
|
|
+ bgt(cnt1, cnt2, L);
|
|
+ mv(cnt2, cnt1);
|
|
+ bind(L);
|
|
+
|
|
+ // A very short string
|
|
+ mv(t0, minCharsInWord);
|
|
+ ble(cnt2, t0, SHORT_STRING);
|
|
+
|
|
+ // Compare longwords
|
|
+ // load first parts of strings and finish initialization while loading
|
|
+ {
|
|
+ if (str1_isL == str2_isL) { // LL or UU
|
|
+ // check if str1 and str2 are same string
|
|
+ beq(str1, str2, DONE);
|
|
+ // load 8 bytes once to compare
|
|
+ ld(tmp1, Address(str1));
|
|
+ ld(tmp2, Address(str2));
|
|
+ mv(t0, STUB_THRESHOLD);
|
|
+ bge(cnt2, t0, STUB);
|
|
+ sub(cnt2, cnt2, minCharsInWord);
|
|
+ beqz(cnt2, TAIL_CHECK);
|
|
+ // convert cnt2 from characters to bytes
|
|
+ if(!str1_isL) {
|
|
+ slli(cnt2, cnt2, 1);
|
|
+ }
|
|
+ add(str2, str2, cnt2);
|
|
+ add(str1, str1, cnt2);
|
|
+ sub(cnt2, zr, cnt2);
|
|
+ } else if (isLU) { // LU case
|
|
+ lwu(tmp1, Address(str1));
|
|
+ ld(tmp2, Address(str2));
|
|
+ mv(t0, STUB_THRESHOLD);
|
|
+ bge(cnt2, t0, STUB);
|
|
+ addi(cnt2, cnt2, -4);
|
|
+ add(str1, str1, cnt2);
|
|
+ sub(cnt1, zr, cnt2);
|
|
+ slli(cnt2, cnt2, 1);
|
|
+ add(str2, str2, cnt2);
|
|
+ inflate_lo32(tmp3, tmp1);
|
|
+ mv(tmp1, tmp3);
|
|
+ sub(cnt2, zr, cnt2);
|
|
+ addi(cnt1, cnt1, 4);
|
|
+ } else { // UL case
|
|
+ ld(tmp1, Address(str1));
|
|
+ lwu(tmp2, Address(str2));
|
|
+ mv(t0, STUB_THRESHOLD);
|
|
+ bge(cnt2, t0, STUB);
|
|
+ addi(cnt2, cnt2, -4);
|
|
+ slli(t0, cnt2, 1);
|
|
+ sub(cnt1, zr, t0);
|
|
+ add(str1, str1, t0);
|
|
+ add(str2, str2, cnt2);
|
|
+ inflate_lo32(tmp3, tmp2);
|
|
+ mv(tmp2, tmp3);
|
|
+ sub(cnt2, zr, cnt2);
|
|
+ addi(cnt1, cnt1, 8);
|
|
+ }
|
|
+ addi(cnt2, cnt2, isUL ? 4 : 8);
|
|
+ bgez(cnt2, TAIL);
|
|
+ xorr(tmp3, tmp1, tmp2);
|
|
+ bnez(tmp3, DIFFERENCE);
|
|
+
|
|
+ // main loop
|
|
+ bind(NEXT_WORD);
|
|
+ if (str1_isL == str2_isL) { // LL or UU
|
|
+ add(t0, str1, cnt2);
|
|
+ ld(tmp1, Address(t0));
|
|
+ add(t0, str2, cnt2);
|
|
+ ld(tmp2, Address(t0));
|
|
+ addi(cnt2, cnt2, 8);
|
|
+ } else if (isLU) { // LU case
|
|
+ add(t0, str1, cnt1);
|
|
+ lwu(tmp1, Address(t0));
|
|
+ add(t0, str2, cnt2);
|
|
+ ld(tmp2, Address(t0));
|
|
+ addi(cnt1, cnt1, 4);
|
|
+ inflate_lo32(tmp3, tmp1);
|
|
+ mv(tmp1, tmp3);
|
|
+ addi(cnt2, cnt2, 8);
|
|
+ } else { // UL case
|
|
+ add(t0, str2, cnt2);
|
|
+ lwu(tmp2, Address(t0));
|
|
+ add(t0, str1, cnt1);
|
|
+ ld(tmp1, Address(t0));
|
|
+ inflate_lo32(tmp3, tmp2);
|
|
+ mv(tmp2, tmp3);
|
|
+ addi(cnt1, cnt1, 8);
|
|
+ addi(cnt2, cnt2, 4);
|
|
+ }
|
|
+ bgez(cnt2, TAIL);
|
|
+
|
|
+ xorr(tmp3, tmp1, tmp2);
|
|
+ beqz(tmp3, NEXT_WORD);
|
|
+ j(DIFFERENCE);
|
|
+ bind(TAIL);
|
|
+ xorr(tmp3, tmp1, tmp2);
|
|
+ bnez(tmp3, DIFFERENCE);
|
|
+ // Last longword.
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ // Aligned access. Load bytes from byte-aligned address,
|
|
+ // which may contain invalid bytes when remaining bytes is
|
|
+ // less than 4(UL/LU) or 8 (LL/UU).
|
|
+ // Invalid bytes should be removed before comparison.
|
|
+ if (str1_isL == str2_isL) { // LL or UU
|
|
+ add(t0, str1, cnt2);
|
|
+ ld(tmp1, Address(t0));
|
|
+ add(t0, str2, cnt2);
|
|
+ ld(tmp2, Address(t0));
|
|
+ } else if (isLU) { // LU
|
|
+ add(t0, str1, cnt1);
|
|
+ lwu(tmp1, Address(t0));
|
|
+ add(t0, str2, cnt2);
|
|
+ ld(tmp2, Address(t0));
|
|
+ inflate_lo32(tmp3, tmp1);
|
|
+ mv(tmp1, tmp3);
|
|
+ } else { // UL
|
|
+ add(t0, str1, cnt1);
|
|
+ ld(tmp1, Address(t0));
|
|
+ add(t0, str2, cnt2);
|
|
+ lwu(tmp2, Address(t0));
|
|
+ inflate_lo32(tmp3, tmp2);
|
|
+ mv(tmp2, tmp3);
|
|
+ slli(cnt2, cnt2, 1); // UL case should convert cnt2 to bytes
|
|
+ }
|
|
+ // remove invalid bytes
|
|
+ slli(t0, cnt2, LogBitsPerByte);
|
|
+ sll(tmp1, tmp1, t0);
|
|
+ sll(tmp2, tmp2, t0);
|
|
+ } else {
|
|
+ // Last longword. In the case where length == 4 we compare the
|
|
+ // same longword twice, but that's still faster than another
|
|
+ // conditional branch.
|
|
+ if (str1_isL == str2_isL) { // LL or UU
|
|
+ ld(tmp1, Address(str1));
|
|
+ ld(tmp2, Address(str2));
|
|
+ } else if (isLU) { // LU case
|
|
+ lwu(tmp1, Address(str1));
|
|
+ ld(tmp2, Address(str2));
|
|
+ inflate_lo32(tmp3, tmp1);
|
|
+ mv(tmp1, tmp3);
|
|
+ } else { // UL case
|
|
+ ld(tmp1, Address(str1));
|
|
+ lwu(tmp2, Address(str2));
|
|
+ inflate_lo32(tmp3, tmp2);
|
|
+ mv(tmp2, tmp3);
|
|
+ }
|
|
+ }
|
|
+ bind(TAIL_CHECK);
|
|
+ xorr(tmp3, tmp1, tmp2);
|
|
+ beqz(tmp3, DONE);
|
|
+
|
|
+ // Find the first different characters in the longwords and
|
|
+ // compute their difference.
|
|
+ bind(DIFFERENCE);
|
|
+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
|
|
+ srl(tmp1, tmp1, result);
|
|
+ srl(tmp2, tmp2, result);
|
|
+ if (isLL) {
|
|
+ andi(tmp1, tmp1, 0xFF);
|
|
+ andi(tmp2, tmp2, 0xFF);
|
|
+ } else {
|
|
+ andi(tmp1, tmp1, 0xFFFF);
|
|
+ andi(tmp2, tmp2, 0xFFFF);
|
|
+ }
|
|
+ sub(result, tmp1, tmp2);
|
|
+ j(DONE);
|
|
+ }
|
|
+
|
|
+ bind(STUB);
|
|
+ RuntimeAddress stub = NULL;
|
|
+ switch (ae) {
|
|
+ case StrIntrinsicNode::LL:
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
|
|
+ break;
|
|
+ case StrIntrinsicNode::UU:
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
|
|
+ break;
|
|
+ case StrIntrinsicNode::LU:
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
|
|
+ break;
|
|
+ case StrIntrinsicNode::UL:
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ assert(stub.target() != NULL, "compare_long_string stub has not been generated");
|
|
+ trampoline_call(stub);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(SHORT_STRING);
|
|
+ // Is the minimum length zero?
|
|
+ beqz(cnt2, DONE);
|
|
+ // arrange code to do most branches while loading and loading next characters
|
|
+ // while comparing previous
|
|
+ (this->*str1_load_chr)(tmp1, Address(str1), t0);
|
|
+ addi(str1, str1, str1_chr_size);
|
|
+ addi(cnt2, cnt2, -1);
|
|
+ beqz(cnt2, SHORT_LAST_INIT);
|
|
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
|
|
+ addi(str2, str2, str2_chr_size);
|
|
+ j(SHORT_LOOP_START);
|
|
+ bind(SHORT_LOOP);
|
|
+ addi(cnt2, cnt2, -1);
|
|
+ beqz(cnt2, SHORT_LAST);
|
|
+ bind(SHORT_LOOP_START);
|
|
+ (this->*str1_load_chr)(tmp2, Address(str1), t0);
|
|
+ addi(str1, str1, str1_chr_size);
|
|
+ (this->*str2_load_chr)(t0, Address(str2), t0);
|
|
+ addi(str2, str2, str2_chr_size);
|
|
+ bne(tmp1, cnt1, SHORT_LOOP_TAIL);
|
|
+ addi(cnt2, cnt2, -1);
|
|
+ beqz(cnt2, SHORT_LAST2);
|
|
+ (this->*str1_load_chr)(tmp1, Address(str1), t0);
|
|
+ addi(str1, str1, str1_chr_size);
|
|
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
|
|
+ addi(str2, str2, str2_chr_size);
|
|
+ beq(tmp2, t0, SHORT_LOOP);
|
|
+ sub(result, tmp2, t0);
|
|
+ j(DONE);
|
|
+ bind(SHORT_LOOP_TAIL);
|
|
+ sub(result, tmp1, cnt1);
|
|
+ j(DONE);
|
|
+ bind(SHORT_LAST2);
|
|
+ beq(tmp2, t0, DONE);
|
|
+ sub(result, tmp2, t0);
|
|
+
|
|
+ j(DONE);
|
|
+ bind(SHORT_LAST_INIT);
|
|
+ (this->*str2_load_chr)(cnt1, Address(str2), t0);
|
|
+ addi(str2, str2, str2_chr_size);
|
|
+ bind(SHORT_LAST);
|
|
+ beq(tmp1, cnt1, DONE);
|
|
+ sub(result, tmp1, cnt1);
|
|
+
|
|
+ bind(DONE);
|
|
+
|
|
+ BLOCK_COMMENT("} string_compare");
|
|
+}
|
|
+
|
|
+// short string
|
|
+// StringUTF16.indexOfChar
|
|
+// StringLatin1.indexOfChar
|
|
+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ bool isL)
|
|
+{
|
|
+ Register ch1 = t0;
|
|
+ Register index = t1;
|
|
+
|
|
+ BLOCK_COMMENT("string_indexof_char_short {");
|
|
+
|
|
+ Label LOOP, LOOP1, LOOP4, LOOP8;
|
|
+ Label MATCH, MATCH1, MATCH2, MATCH3,
|
|
+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
|
|
+
|
|
+ mv(result, -1);
|
|
+ mv(index, zr);
|
|
+
|
|
+ bind(LOOP);
|
|
+ addi(t0, index, 8);
|
|
+ ble(t0, cnt1, LOOP8);
|
|
+ addi(t0, index, 4);
|
|
+ ble(t0, cnt1, LOOP4);
|
|
+ j(LOOP1);
|
|
+
|
|
+ bind(LOOP8);
|
|
+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
|
|
+ beq(ch, ch1, MATCH);
|
|
+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
|
|
+ beq(ch, ch1, MATCH1);
|
|
+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
|
|
+ beq(ch, ch1, MATCH2);
|
|
+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
|
|
+ beq(ch, ch1, MATCH3);
|
|
+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
|
|
+ beq(ch, ch1, MATCH4);
|
|
+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
|
|
+ beq(ch, ch1, MATCH5);
|
|
+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
|
|
+ beq(ch, ch1, MATCH6);
|
|
+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
|
|
+ beq(ch, ch1, MATCH7);
|
|
+ addi(index, index, 8);
|
|
+ addi(str1, str1, isL ? 8 : 16);
|
|
+ blt(index, cnt1, LOOP);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(LOOP4);
|
|
+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
|
|
+ beq(ch, ch1, MATCH);
|
|
+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
|
|
+ beq(ch, ch1, MATCH1);
|
|
+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
|
|
+ beq(ch, ch1, MATCH2);
|
|
+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
|
|
+ beq(ch, ch1, MATCH3);
|
|
+ addi(index, index, 4);
|
|
+ addi(str1, str1, isL ? 4 : 8);
|
|
+ bge(index, cnt1, NOMATCH);
|
|
+
|
|
+ bind(LOOP1);
|
|
+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
|
|
+ beq(ch, ch1, MATCH);
|
|
+ addi(index, index, 1);
|
|
+ addi(str1, str1, isL ? 1 : 2);
|
|
+ blt(index, cnt1, LOOP1);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(MATCH1);
|
|
+ addi(index, index, 1);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH2);
|
|
+ addi(index, index, 2);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH3);
|
|
+ addi(index, index, 3);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH4);
|
|
+ addi(index, index, 4);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH5);
|
|
+ addi(index, index, 5);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH6);
|
|
+ addi(index, index, 6);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(MATCH7);
|
|
+ addi(index, index, 7);
|
|
+
|
|
+ bind(MATCH);
|
|
+ mv(result, index);
|
|
+ bind(NOMATCH);
|
|
+ BLOCK_COMMENT("} string_indexof_char_short");
|
|
+}
|
|
+
|
|
+// StringUTF16.indexOfChar
|
|
+// StringLatin1.indexOfChar
|
|
+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ bool isL)
|
|
+{
|
|
+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
|
|
+ Register ch1 = t0;
|
|
+ Register orig_cnt = t1;
|
|
+ Register mask1 = tmp3;
|
|
+ Register mask2 = tmp2;
|
|
+ Register match_mask = tmp1;
|
|
+ Register trailing_char = tmp4;
|
|
+ Register unaligned_elems = tmp4;
|
|
+
|
|
+ BLOCK_COMMENT("string_indexof_char {");
|
|
+ beqz(cnt1, NOMATCH);
|
|
+
|
|
+ addi(t0, cnt1, isL ? -32 : -16);
|
|
+ bgtz(t0, DO_LONG);
|
|
+ string_indexof_char_short(str1, cnt1, ch, result, isL);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(DO_LONG);
|
|
+ mv(orig_cnt, cnt1);
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ Label ALIGNED;
|
|
+ andi(unaligned_elems, str1, 0x7);
|
|
+ beqz(unaligned_elems, ALIGNED);
|
|
+ sub(unaligned_elems, unaligned_elems, 8);
|
|
+ neg(unaligned_elems, unaligned_elems);
|
|
+ if (!isL) {
|
|
+ srli(unaligned_elems, unaligned_elems, 1);
|
|
+ }
|
|
+ // do unaligned part per element
|
|
+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
|
|
+ bgez(result, DONE);
|
|
+ mv(orig_cnt, cnt1);
|
|
+ sub(cnt1, cnt1, unaligned_elems);
|
|
+ bind(ALIGNED);
|
|
+ }
|
|
+
|
|
+ // duplicate ch
|
|
+ if (isL) {
|
|
+ slli(ch1, ch, 8);
|
|
+ orr(ch, ch1, ch);
|
|
+ }
|
|
+ slli(ch1, ch, 16);
|
|
+ orr(ch, ch1, ch);
|
|
+ slli(ch1, ch, 32);
|
|
+ orr(ch, ch1, ch);
|
|
+
|
|
+ if (!isL) {
|
|
+ slli(cnt1, cnt1, 1);
|
|
+ }
|
|
+
|
|
+ mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001);
|
|
+ mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
|
|
+
|
|
+ bind(CH1_LOOP);
|
|
+ ld(ch1, Address(str1));
|
|
+ addi(str1, str1, 8);
|
|
+ addi(cnt1, cnt1, -8);
|
|
+ compute_match_mask(ch1, ch, match_mask, mask1, mask2);
|
|
+ bnez(match_mask, HIT);
|
|
+ bgtz(cnt1, CH1_LOOP);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(HIT);
|
|
+ ctzc_bit(trailing_char, match_mask, isL, ch1, result);
|
|
+ srli(trailing_char, trailing_char, 3);
|
|
+ addi(cnt1, cnt1, 8);
|
|
+ ble(cnt1, trailing_char, NOMATCH);
|
|
+ // match case
|
|
+ if (!isL) {
|
|
+ srli(cnt1, cnt1, 1);
|
|
+ srli(trailing_char, trailing_char, 1);
|
|
+ }
|
|
+
|
|
+ sub(result, orig_cnt, cnt1);
|
|
+ add(result, result, trailing_char);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(NOMATCH);
|
|
+ mv(result, -1);
|
|
+
|
|
+ bind(DONE);
|
|
+ BLOCK_COMMENT("} string_indexof_char");
|
|
+}
|
|
+
|
|
+// Search for needle in haystack and return index or -1
|
|
+// x10: result
|
|
+// x11: haystack
|
|
+// x12: haystack_len
|
|
+// x13: needle
|
|
+// x14: needle_len
|
|
+void MacroAssembler::string_indexof(Register haystack, Register needle,
|
|
+ Register haystack_len, Register needle_len,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ Register tmp5, Register tmp6,
|
|
+ Register result, int ae)
|
|
+{
|
|
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
|
|
+
|
|
+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
|
|
+
|
|
+ Register ch1 = t0;
|
|
+ Register ch2 = t1;
|
|
+ Register nlen_tmp = tmp1; // needle len tmp
|
|
+ Register hlen_tmp = tmp2; // haystack len tmp
|
|
+ Register result_tmp = tmp4;
|
|
+
|
|
+ bool isLL = ae == StrIntrinsicNode::LL;
|
|
+
|
|
+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
|
|
+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
|
|
+ int needle_chr_shift = needle_isL ? 0 : 1;
|
|
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
|
|
+ int needle_chr_size = needle_isL ? 1 : 2;
|
|
+ int haystack_chr_size = haystack_isL ? 1 : 2;
|
|
+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
|
|
+ (load_chr_insn)&MacroAssembler::lhu;
|
|
+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
|
|
+ (load_chr_insn)&MacroAssembler::lhu;
|
|
+
|
|
+ BLOCK_COMMENT("string_indexof {");
|
|
+
|
|
+ // Note, inline_string_indexOf() generates checks:
|
|
+ // if (pattern.count > src.count) return -1;
|
|
+ // if (pattern.count == 0) return 0;
|
|
+
|
|
+ // We have two strings, a source string in haystack, haystack_len and a pattern string
|
|
+ // in needle, needle_len. Find the first occurence of pattern in source or return -1.
|
|
+
|
|
+ // For larger pattern and source we use a simplified Boyer Moore algorithm.
|
|
+ // With a small pattern and source we use linear scan.
|
|
+
|
|
+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
|
|
+ sub(result_tmp, haystack_len, needle_len);
|
|
+ // needle_len < 8, use linear scan
|
|
+ sub(t0, needle_len, 8);
|
|
+ bltz(t0, LINEARSEARCH);
|
|
+ // needle_len >= 256, use linear scan
|
|
+ sub(t0, needle_len, 256);
|
|
+ bgez(t0, LINEARSTUB);
|
|
+ // needle_len >= haystack_len/4, use linear scan
|
|
+ srli(t0, haystack_len, 2);
|
|
+ bge(needle_len, t0, LINEARSTUB);
|
|
+
|
|
+ // Boyer-Moore-Horspool introduction:
|
|
+ // The Boyer Moore alogorithm is based on the description here:-
|
|
+ //
|
|
+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
|
|
+ //
|
|
+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
|
|
+ // and the 'Good Suffix' rule.
|
|
+ //
|
|
+ // These rules are essentially heuristics for how far we can shift the
|
|
+ // pattern along the search string.
|
|
+ //
|
|
+ // The implementation here uses the 'Bad Character' rule only because of the
|
|
+ // complexity of initialisation for the 'Good Suffix' rule.
|
|
+ //
|
|
+ // This is also known as the Boyer-Moore-Horspool algorithm:
|
|
+ //
|
|
+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
|
|
+ //
|
|
+ // #define ASIZE 256
|
|
+ //
|
|
+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
|
|
+ // int i, j;
|
|
+ // unsigned c;
|
|
+ // unsigned char bc[ASIZE];
|
|
+ //
|
|
+ // /* Preprocessing */
|
|
+ // for (i = 0; i < ASIZE; ++i)
|
|
+ // bc[i] = m;
|
|
+ // for (i = 0; i < m - 1; ) {
|
|
+ // c = pattern[i];
|
|
+ // ++i;
|
|
+ // // c < 256 for Latin1 string, so, no need for branch
|
|
+ // #ifdef PATTERN_STRING_IS_LATIN1
|
|
+ // bc[c] = m - i;
|
|
+ // #else
|
|
+ // if (c < ASIZE) bc[c] = m - i;
|
|
+ // #endif
|
|
+ // }
|
|
+ //
|
|
+ // /* Searching */
|
|
+ // j = 0;
|
|
+ // while (j <= n - m) {
|
|
+ // c = src[i+j];
|
|
+ // if (pattern[m-1] == c)
|
|
+ // int k;
|
|
+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
|
|
+ // if (k < 0) return j;
|
|
+ // // c < 256 for Latin1 string, so, no need for branch
|
|
+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
|
|
+ // // LL case: (c< 256) always true. Remove branch
|
|
+ // j += bc[pattern[j+m-1]];
|
|
+ // #endif
|
|
+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
|
|
+ // // UU case: need if (c<ASIZE) check. Skip 1 character if not.
|
|
+ // if (c < ASIZE)
|
|
+ // j += bc[pattern[j+m-1]];
|
|
+ // else
|
|
+ // j += 1
|
|
+ // #endif
|
|
+ // #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
|
|
+ // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
|
|
+ // if (c < ASIZE)
|
|
+ // j += bc[pattern[j+m-1]];
|
|
+ // else
|
|
+ // j += m
|
|
+ // #endif
|
|
+ // }
|
|
+ // return -1;
|
|
+ // }
|
|
+
|
|
+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
|
|
+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
|
|
+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
|
|
+
|
|
+ Register haystack_end = haystack_len;
|
|
+ Register skipch = tmp2;
|
|
+
|
|
+ // pattern length is >=8, so, we can read at least 1 register for cases when
|
|
+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
|
|
+ // UL case. We'll re-read last character in inner pre-loop code to have
|
|
+ // single outer pre-loop load
|
|
+ const int firstStep = isLL ? 7 : 3;
|
|
+
|
|
+ const int ASIZE = 256;
|
|
+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
|
|
+
|
|
+ sub(sp, sp, ASIZE);
|
|
+
|
|
+ // init BC offset table with default value: needle_len
|
|
+ slli(t0, needle_len, 8);
|
|
+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
|
|
+ slli(tmp1, t0, 16);
|
|
+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
|
|
+ slli(tmp1, t0, 32);
|
|
+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
|
|
+
|
|
+ mv(ch1, sp); // ch1 is t0
|
|
+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
|
|
+
|
|
+ bind(BM_INIT_LOOP);
|
|
+ // for (i = 0; i < ASIZE; ++i)
|
|
+ // bc[i] = m;
|
|
+ for (int i = 0; i < 4; i++) {
|
|
+ sd(tmp5, Address(ch1, i * wordSize));
|
|
+ }
|
|
+ add(ch1, ch1, 32);
|
|
+ sub(tmp6, tmp6, 4);
|
|
+ bgtz(tmp6, BM_INIT_LOOP);
|
|
+
|
|
+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
|
|
+ Register orig_haystack = tmp5;
|
|
+ mv(orig_haystack, haystack);
|
|
+ // result_tmp = tmp4
|
|
+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
|
|
+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
|
|
+ mv(tmp3, needle);
|
|
+
|
|
+ // for (i = 0; i < m - 1; ) {
|
|
+ // c = pattern[i];
|
|
+ // ++i;
|
|
+ // // c < 256 for Latin1 string, so, no need for branch
|
|
+ // #ifdef PATTERN_STRING_IS_LATIN1
|
|
+ // bc[c] = m - i;
|
|
+ // #else
|
|
+ // if (c < ASIZE) bc[c] = m - i;
|
|
+ // #endif
|
|
+ // }
|
|
+ bind(BCLOOP);
|
|
+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
|
|
+ add(tmp3, tmp3, needle_chr_size);
|
|
+ if (!needle_isL) {
|
|
+ // ae == StrIntrinsicNode::UU
|
|
+ mv(tmp6, ASIZE);
|
|
+ bgeu(ch1, tmp6, BCSKIP);
|
|
+ }
|
|
+ add(tmp4, sp, ch1);
|
|
+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table
|
|
+
|
|
+ bind(BCSKIP);
|
|
+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1
|
|
+ bgtz(ch2, BCLOOP);
|
|
+
|
|
+ // tmp6: pattern end, address after needle
|
|
+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
|
|
+ if (needle_isL == haystack_isL) {
|
|
+ // load last 8 bytes (8LL/4UU symbols)
|
|
+ ld(tmp6, Address(tmp6, -wordSize));
|
|
+ } else {
|
|
+ // UL: from UTF-16(source) search Latin1(pattern)
|
|
+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
|
|
+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
|
|
+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks
|
|
+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
|
|
+ slli(ch2, tmp6, XLEN - 24);
|
|
+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
|
|
+ slli(ch1, tmp6, XLEN - 16);
|
|
+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
|
|
+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
|
|
+ slli(ch2, ch2, 16);
|
|
+ orr(ch2, ch2, ch1); // 0x00000b0c
|
|
+ slli(result, tmp3, 48); // use result as temp register
|
|
+ orr(tmp6, tmp6, result); // 0x0a00000d
|
|
+ slli(result, ch2, 16);
|
|
+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
|
|
+ }
|
|
+
|
|
+ // i = m - 1;
|
|
+ // skipch = j + i;
|
|
+ // if (skipch == pattern[m - 1]
|
|
+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
|
|
+ // else
|
|
+ // move j with bad char offset table
|
|
+ bind(BMLOOPSTR2);
|
|
+ // compare pattern to source string backward
|
|
+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
|
|
+ (this->*haystack_load_1chr)(skipch, Address(result), noreg);
|
|
+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
|
|
+ if (needle_isL == haystack_isL) {
|
|
+ // re-init tmp3. It's for free because it's executed in parallel with
|
|
+ // load above. Alternative is to initialize it before loop, but it'll
|
|
+ // affect performance on in-order systems with 2 or more ld/st pipelines
|
|
+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
|
|
+ }
|
|
+ if (!isLL) { // UU/UL case
|
|
+ slli(ch2, nlen_tmp, 1); // offsets in bytes
|
|
+ }
|
|
+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
|
|
+ add(result, haystack, isLL ? nlen_tmp : ch2);
|
|
+ ld(ch2, Address(result)); // load 8 bytes from source string
|
|
+ mv(ch1, tmp6);
|
|
+ if (isLL) {
|
|
+ j(BMLOOPSTR1_AFTER_LOAD);
|
|
+ } else {
|
|
+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
|
|
+ j(BMLOOPSTR1_CMP);
|
|
+ }
|
|
+
|
|
+ bind(BMLOOPSTR1);
|
|
+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
|
|
+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
|
|
+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
|
|
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
|
|
+
|
|
+ bind(BMLOOPSTR1_AFTER_LOAD);
|
|
+ sub(nlen_tmp, nlen_tmp, 1);
|
|
+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
|
|
+
|
|
+ bind(BMLOOPSTR1_CMP);
|
|
+ beq(ch1, ch2, BMLOOPSTR1);
|
|
+
|
|
+ bind(BMSKIP);
|
|
+ if (!isLL) {
|
|
+ // if we've met UTF symbol while searching Latin1 pattern, then we can
|
|
+ // skip needle_len symbols
|
|
+ if (needle_isL != haystack_isL) {
|
|
+ mv(result_tmp, needle_len);
|
|
+ } else {
|
|
+ mv(result_tmp, 1);
|
|
+ }
|
|
+ mv(t0, ASIZE);
|
|
+ bgeu(skipch, t0, BMADV);
|
|
+ }
|
|
+ add(result_tmp, sp, skipch);
|
|
+ lbu(result_tmp, Address(result_tmp)); // load skip offset
|
|
+
|
|
+ bind(BMADV);
|
|
+ sub(nlen_tmp, needle_len, 1);
|
|
+ // move haystack after bad char skip offset
|
|
+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
|
|
+ ble(haystack, haystack_end, BMLOOPSTR2);
|
|
+ add(sp, sp, ASIZE);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(BMLOOPSTR1_LASTCMP);
|
|
+ bne(ch1, ch2, BMSKIP);
|
|
+
|
|
+ bind(BMMATCH);
|
|
+ sub(result, haystack, orig_haystack);
|
|
+ if (!haystack_isL) {
|
|
+ srli(result, result, 1);
|
|
+ }
|
|
+ add(sp, sp, ASIZE);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(LINEARSTUB);
|
|
+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
|
|
+ bltz(t0, LINEARSEARCH);
|
|
+ mv(result, zr);
|
|
+ RuntimeAddress stub = NULL;
|
|
+ if (isLL) {
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
|
|
+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
|
|
+ } else if (needle_isL) {
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
|
|
+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
|
|
+ } else {
|
|
+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
|
|
+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
|
|
+ }
|
|
+ trampoline_call(stub);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(NOMATCH);
|
|
+ mv(result, -1);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(LINEARSEARCH);
|
|
+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
|
|
+
|
|
+ bind(DONE);
|
|
+ BLOCK_COMMENT("} string_indexof");
|
|
+}
|
|
+
|
|
+// string_indexof
|
|
+// result: x10
|
|
+// src: x11
|
|
+// src_count: x12
|
|
+// pattern: x13
|
|
+// pattern_count: x14 or 1/2/3/4
|
|
+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
|
|
+ Register haystack_len, Register needle_len,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ int needle_con_cnt, Register result, int ae)
|
|
+{
|
|
+ // Note:
|
|
+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
|
|
+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
|
|
+ assert(needle_con_cnt <= 4, "Invalid needle constant count");
|
|
+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
|
|
+
|
|
+ Register ch1 = t0;
|
|
+ Register ch2 = t1;
|
|
+ Register hlen_neg = haystack_len, nlen_neg = needle_len;
|
|
+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
|
|
+
|
|
+ bool isLL = ae == StrIntrinsicNode::LL;
|
|
+
|
|
+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
|
|
+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
|
|
+ int needle_chr_shift = needle_isL ? 0 : 1;
|
|
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
|
|
+ int needle_chr_size = needle_isL ? 1 : 2;
|
|
+ int haystack_chr_size = haystack_isL ? 1 : 2;
|
|
+
|
|
+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
|
|
+ (load_chr_insn)&MacroAssembler::lhu;
|
|
+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
|
|
+ (load_chr_insn)&MacroAssembler::lhu;
|
|
+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
|
|
+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
|
|
+
|
|
+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
|
|
+
|
|
+ Register first = tmp3;
|
|
+
|
|
+ if (needle_con_cnt == -1) {
|
|
+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
|
|
+
|
|
+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
|
|
+ bltz(t0, DOSHORT);
|
|
+
|
|
+ (this->*needle_load_1chr)(first, Address(needle), noreg);
|
|
+ slli(t0, needle_len, needle_chr_shift);
|
|
+ add(needle, needle, t0);
|
|
+ neg(nlen_neg, t0);
|
|
+ slli(t0, result_tmp, haystack_chr_shift);
|
|
+ add(haystack, haystack, t0);
|
|
+ neg(hlen_neg, t0);
|
|
+
|
|
+ bind(FIRST_LOOP);
|
|
+ add(t0, haystack, hlen_neg);
|
|
+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
|
|
+ beq(first, ch2, STR1_LOOP);
|
|
+
|
|
+ bind(STR2_NEXT);
|
|
+ add(hlen_neg, hlen_neg, haystack_chr_size);
|
|
+ blez(hlen_neg, FIRST_LOOP);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(STR1_LOOP);
|
|
+ add(nlen_tmp, nlen_neg, needle_chr_size);
|
|
+ add(hlen_tmp, hlen_neg, haystack_chr_size);
|
|
+ bgez(nlen_tmp, MATCH);
|
|
+
|
|
+ bind(STR1_NEXT);
|
|
+ add(ch1, needle, nlen_tmp);
|
|
+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
|
|
+ add(ch2, haystack, hlen_tmp);
|
|
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
|
|
+ bne(ch1, ch2, STR2_NEXT);
|
|
+ add(nlen_tmp, nlen_tmp, needle_chr_size);
|
|
+ add(hlen_tmp, hlen_tmp, haystack_chr_size);
|
|
+ bltz(nlen_tmp, STR1_NEXT);
|
|
+ j(MATCH);
|
|
+
|
|
+ bind(DOSHORT);
|
|
+ if (needle_isL == haystack_isL) {
|
|
+ sub(t0, needle_len, 2);
|
|
+ bltz(t0, DO1);
|
|
+ bgtz(t0, DO3);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (needle_con_cnt == 4) {
|
|
+ Label CH1_LOOP;
|
|
+ (this->*load_4chr)(ch1, Address(needle), noreg);
|
|
+ sub(result_tmp, haystack_len, 4);
|
|
+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
|
|
+ add(haystack, haystack, tmp3);
|
|
+ neg(hlen_neg, tmp3);
|
|
+
|
|
+ bind(CH1_LOOP);
|
|
+ add(ch2, haystack, hlen_neg);
|
|
+ (this->*load_4chr)(ch2, Address(ch2), noreg);
|
|
+ beq(ch1, ch2, MATCH);
|
|
+ add(hlen_neg, hlen_neg, haystack_chr_size);
|
|
+ blez(hlen_neg, CH1_LOOP);
|
|
+ j(NOMATCH);
|
|
+ }
|
|
+
|
|
+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
|
|
+ Label CH1_LOOP;
|
|
+ BLOCK_COMMENT("string_indexof DO2 {");
|
|
+ bind(DO2);
|
|
+ (this->*load_2chr)(ch1, Address(needle), noreg);
|
|
+ if (needle_con_cnt == 2) {
|
|
+ sub(result_tmp, haystack_len, 2);
|
|
+ }
|
|
+ slli(tmp3, result_tmp, haystack_chr_shift);
|
|
+ add(haystack, haystack, tmp3);
|
|
+ neg(hlen_neg, tmp3);
|
|
+
|
|
+ bind(CH1_LOOP);
|
|
+ add(tmp3, haystack, hlen_neg);
|
|
+ (this->*load_2chr)(ch2, Address(tmp3), noreg);
|
|
+ beq(ch1, ch2, MATCH);
|
|
+ add(hlen_neg, hlen_neg, haystack_chr_size);
|
|
+ blez(hlen_neg, CH1_LOOP);
|
|
+ j(NOMATCH);
|
|
+ BLOCK_COMMENT("} string_indexof DO2");
|
|
+ }
|
|
+
|
|
+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
|
|
+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
|
|
+ BLOCK_COMMENT("string_indexof DO3 {");
|
|
+
|
|
+ bind(DO3);
|
|
+ (this->*load_2chr)(first, Address(needle), noreg);
|
|
+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
|
|
+ if (needle_con_cnt == 3) {
|
|
+ sub(result_tmp, haystack_len, 3);
|
|
+ }
|
|
+ slli(hlen_tmp, result_tmp, haystack_chr_shift);
|
|
+ add(haystack, haystack, hlen_tmp);
|
|
+ neg(hlen_neg, hlen_tmp);
|
|
+
|
|
+ bind(FIRST_LOOP);
|
|
+ add(ch2, haystack, hlen_neg);
|
|
+ (this->*load_2chr)(ch2, Address(ch2), noreg);
|
|
+ beq(first, ch2, STR1_LOOP);
|
|
+
|
|
+ bind(STR2_NEXT);
|
|
+ add(hlen_neg, hlen_neg, haystack_chr_size);
|
|
+ blez(hlen_neg, FIRST_LOOP);
|
|
+ j(NOMATCH);
|
|
+
|
|
+ bind(STR1_LOOP);
|
|
+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
|
|
+ add(ch2, haystack, hlen_tmp);
|
|
+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
|
|
+ bne(ch1, ch2, STR2_NEXT);
|
|
+ j(MATCH);
|
|
+ BLOCK_COMMENT("} string_indexof DO3");
|
|
+ }
|
|
+
|
|
+ if (needle_con_cnt == -1 || needle_con_cnt == 1) {
|
|
+ Label DO1_LOOP;
|
|
+
|
|
+ BLOCK_COMMENT("string_indexof DO1 {");
|
|
+ bind(DO1);
|
|
+ (this->*needle_load_1chr)(ch1, Address(needle), noreg);
|
|
+ sub(result_tmp, haystack_len, 1);
|
|
+ mv(tmp3, result_tmp);
|
|
+ if (haystack_chr_shift) {
|
|
+ slli(tmp3, result_tmp, haystack_chr_shift);
|
|
+ }
|
|
+ add(haystack, haystack, tmp3);
|
|
+ neg(hlen_neg, tmp3);
|
|
+
|
|
+ bind(DO1_LOOP);
|
|
+ add(tmp3, haystack, hlen_neg);
|
|
+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
|
|
+ beq(ch1, ch2, MATCH);
|
|
+ add(hlen_neg, hlen_neg, haystack_chr_size);
|
|
+ blez(hlen_neg, DO1_LOOP);
|
|
+ BLOCK_COMMENT("} string_indexof DO1");
|
|
+ }
|
|
+
|
|
+ bind(NOMATCH);
|
|
+ mv(result, -1);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(MATCH);
|
|
+ srai(t0, hlen_neg, haystack_chr_shift);
|
|
+ add(result, result_tmp, t0);
|
|
+
|
|
+ bind(DONE);
|
|
+}
|
|
+
|
|
+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
|
|
+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
|
|
+ Label loop;
|
|
+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
|
|
+
|
|
+ bind(loop);
|
|
+ vsetvli(tmp1, cnt, sew, Assembler::m2);
|
|
+ vlex_v(vr1, a1, sew);
|
|
+ vlex_v(vr2, a2, sew);
|
|
+ vmsne_vv(vrs, vr1, vr2);
|
|
+ vfirst_m(tmp2, vrs);
|
|
+ bgez(tmp2, DONE);
|
|
+ sub(cnt, cnt, tmp1);
|
|
+ if (!islatin) {
|
|
+ slli(tmp1, tmp1, 1); // get byte counts
|
|
+ }
|
|
+ add(a1, a1, tmp1);
|
|
+ add(a2, a2, tmp1);
|
|
+ bnez(cnt, loop);
|
|
+
|
|
+ mv(result, true);
|
|
+}
|
|
+
|
|
+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
|
|
+ Label DONE;
|
|
+ Register tmp1 = t0;
|
|
+ Register tmp2 = t1;
|
|
+
|
|
+ BLOCK_COMMENT("string_equals_v {");
|
|
+
|
|
+ mv(result, false);
|
|
+
|
|
+ if (elem_size == 2) {
|
|
+ srli(cnt, cnt, 1);
|
|
+ }
|
|
+
|
|
+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
|
|
+
|
|
+ bind(DONE);
|
|
+ BLOCK_COMMENT("} string_equals_v");
|
|
+}
|
|
+
|
|
+// used by C2 ClearArray patterns.
|
|
+// base: Address of a buffer to be zeroed
|
|
+// cnt: Count in HeapWords
|
|
+//
|
|
+// base, cnt, v0, v1 and t0 are clobbered.
|
|
+void MacroAssembler::clear_array_v(Register base, Register cnt) {
|
|
+ Label loop;
|
|
+
|
|
+ // making zero words
|
|
+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
|
|
+ vxor_vv(v0, v0, v0);
|
|
+
|
|
+ bind(loop);
|
|
+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
|
|
+ vse64_v(v0, base);
|
|
+ sub(cnt, cnt, t0);
|
|
+ shadd(base, t0, base, t0, 3);
|
|
+ bnez(cnt, loop);
|
|
+}
|
|
+
|
|
+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
|
|
+ Register cnt1, int elem_size) {
|
|
+ Label DONE;
|
|
+ Register tmp1 = t0;
|
|
+ Register tmp2 = t1;
|
|
+ Register cnt2 = tmp2;
|
|
+ int length_offset = arrayOopDesc::length_offset_in_bytes();
|
|
+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
|
|
+
|
|
+ BLOCK_COMMENT("arrays_equals_v {");
|
|
+
|
|
+ // if (a1 == a2), return true
|
|
+ mv(result, true);
|
|
+ oop_equal(a1, a2, DONE);
|
|
+
|
|
+ mv(result, false);
|
|
+ // if a1 == null or a2 == null, return false
|
|
+ beqz(a1, DONE);
|
|
+ beqz(a2, DONE);
|
|
+ // if (a1.length != a2.length), return false
|
|
+ lwu(cnt1, Address(a1, length_offset));
|
|
+ lwu(cnt2, Address(a2, length_offset));
|
|
+ bne(cnt1, cnt2, DONE);
|
|
+
|
|
+ la(a1, Address(a1, base_offset));
|
|
+ la(a2, Address(a2, base_offset));
|
|
+
|
|
+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
|
|
+
|
|
+ bind(DONE);
|
|
+
|
|
+ BLOCK_COMMENT("} arrays_equals_v");
|
|
+}
|
|
+
|
|
+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
|
|
+ Register result, Register tmp1, Register tmp2, int encForm) {
|
|
+ Label DIFFERENCE, DONE, L, loop;
|
|
+ bool encLL = encForm == StrIntrinsicNode::LL;
|
|
+ bool encLU = encForm == StrIntrinsicNode::LU;
|
|
+ bool encUL = encForm == StrIntrinsicNode::UL;
|
|
+
|
|
+ bool str1_isL = encLL || encLU;
|
|
+ bool str2_isL = encLL || encUL;
|
|
+
|
|
+ int minCharsInWord = encLL ? wordSize : wordSize / 2;
|
|
+
|
|
+ BLOCK_COMMENT("string_compare {");
|
|
+
|
|
+ // for Lating strings, 1 byte for 1 character
|
|
+ // for UTF16 strings, 2 bytes for 1 character
|
|
+ if (!str1_isL)
|
|
+ sraiw(cnt1, cnt1, 1);
|
|
+ if (!str2_isL)
|
|
+ sraiw(cnt2, cnt2, 1);
|
|
+
|
|
+ // if str1 == str2, return the difference
|
|
+ // save the minimum of the string lengths in cnt2.
|
|
+ sub(result, cnt1, cnt2);
|
|
+ bgt(cnt1, cnt2, L);
|
|
+ mv(cnt2, cnt1);
|
|
+ bind(L);
|
|
+
|
|
+ if (str1_isL == str2_isL) { // LL or UU
|
|
+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
|
|
+ j(DONE);
|
|
+ } else { // LU or UL
|
|
+ Register strL = encLU ? str1 : str2;
|
|
+ Register strU = encLU ? str2 : str1;
|
|
+ VectorRegister vstr1 = encLU ? v4 : v0;
|
|
+ VectorRegister vstr2 = encLU ? v0 : v4;
|
|
+
|
|
+ bind(loop);
|
|
+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
|
|
+ vle8_v(vstr1, strL);
|
|
+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
|
|
+ vzext_vf2(vstr2, vstr1);
|
|
+ vle16_v(vstr1, strU);
|
|
+ vmsne_vv(v0, vstr2, vstr1);
|
|
+ vfirst_m(tmp2, v0);
|
|
+ bgez(tmp2, DIFFERENCE);
|
|
+ sub(cnt2, cnt2, tmp1);
|
|
+ add(strL, strL, tmp1);
|
|
+ shadd(strU, tmp1, strU, tmp1, 1);
|
|
+ bnez(cnt2, loop);
|
|
+ j(DONE);
|
|
+ }
|
|
+ bind(DIFFERENCE);
|
|
+ slli(tmp1, tmp2, 1);
|
|
+ add(str1, str1, str1_isL ? tmp2 : tmp1);
|
|
+ add(str2, str2, str2_isL ? tmp2 : tmp1);
|
|
+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
|
|
+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
|
|
+ sub(result, tmp1, tmp2);
|
|
+
|
|
+ bind(DONE);
|
|
+}
|
|
+
|
|
+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
|
|
+ Label loop;
|
|
+ assert_different_registers(src, dst, len, tmp, t0);
|
|
+
|
|
+ BLOCK_COMMENT("byte_array_inflate_v {");
|
|
+ bind(loop);
|
|
+ vsetvli(tmp, len, Assembler::e8, Assembler::m2);
|
|
+ vle8_v(v2, src);
|
|
+ vsetvli(t0, len, Assembler::e16, Assembler::m4);
|
|
+ vzext_vf2(v0, v2);
|
|
+ vse16_v(v0, dst);
|
|
+ sub(len, len, tmp);
|
|
+ add(src, src, tmp);
|
|
+ shadd(dst, tmp, dst, tmp, 1);
|
|
+ bnez(len, loop);
|
|
+ BLOCK_COMMENT("} byte_array_inflate_v");
|
|
+ postcond(pc() != badAddress);
|
|
+ return pc();
|
|
+}
|
|
+
|
|
+// Compress char[] array to byte[].
|
|
+// result: the array length if every element in array can be encoded; 0, otherwise.
|
|
+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
|
|
+ Label done;
|
|
+ encode_iso_array_v(src, dst, len, result, tmp);
|
|
+ beqz(len, done);
|
|
+ mv(result, zr);
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+// result: the number of elements had been encoded.
|
|
+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
|
|
+ Label loop, DIFFERENCE, DONE;
|
|
+
|
|
+ BLOCK_COMMENT("encode_iso_array_v {");
|
|
+ mv(result, 0);
|
|
+
|
|
+ bind(loop);
|
|
+ mv(tmp, 0xff);
|
|
+ vsetvli(t0, len, Assembler::e16, Assembler::m2);
|
|
+ vle16_v(v2, src);
|
|
+ // if element > 0xff, stop
|
|
+ vmsgtu_vx(v1, v2, tmp);
|
|
+ vfirst_m(tmp, v1);
|
|
+ vmsbf_m(v0, v1);
|
|
+ // compress char to byte
|
|
+ vsetvli(t0, len, Assembler::e8);
|
|
+ vncvt_x_x_w(v1, v2, Assembler::v0_t);
|
|
+ vse8_v(v1, dst, Assembler::v0_t);
|
|
+
|
|
+ bgez(tmp, DIFFERENCE);
|
|
+ add(result, result, t0);
|
|
+ add(dst, dst, t0);
|
|
+ sub(len, len, t0);
|
|
+ shadd(src, t0, src, t0, 1);
|
|
+ bnez(len, loop);
|
|
+ j(DONE);
|
|
+
|
|
+ bind(DIFFERENCE);
|
|
+ add(result, result, tmp);
|
|
+
|
|
+ bind(DONE);
|
|
+ BLOCK_COMMENT("} encode_iso_array_v");
|
|
+}
|
|
+
|
|
+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) {
|
|
+ Label loop, DONE;
|
|
+
|
|
+ mv(result, true);
|
|
+
|
|
+ bind(loop);
|
|
+ vsetvli(t0, len, Assembler::e8, Assembler::m4);
|
|
+ vle8_v(v0, ary);
|
|
+ // if element highest bit is set, return true
|
|
+ vmslt_vx(v0, v0, zr);
|
|
+ vfirst_m(tmp, v0);
|
|
+ bgez(tmp, DONE);
|
|
+
|
|
+ sub(len, len, t0);
|
|
+ add(ary, ary, t0);
|
|
+ bnez(len, loop);
|
|
+ mv(result, false);
|
|
+
|
|
+ bind(DONE);
|
|
+ postcond(pc() != badAddress);
|
|
+ return pc();
|
|
+}
|
|
+
|
|
+// string indexof
|
|
+// compute index by trailing zeros
|
|
+void MacroAssembler::compute_index(Register haystack, Register trailing_zero,
|
|
+ Register match_mask, Register result,
|
|
+ Register ch2, Register tmp,
|
|
+ bool haystack_isL)
|
|
+{
|
|
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
|
|
+ srl(match_mask, match_mask, trailing_zero);
|
|
+ srli(match_mask, match_mask, 1);
|
|
+ srli(tmp, trailing_zero, LogBitsPerByte);
|
|
+ if (!haystack_isL) andi(tmp, tmp, 0xE);
|
|
+ add(haystack, haystack, tmp);
|
|
+ ld(ch2, Address(haystack));
|
|
+ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
|
|
+ add(result, result, tmp);
|
|
+}
|
|
+
|
|
+// string indexof
|
|
+// Find pattern element in src, compute match mask,
|
|
+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
|
|
+// match mask patterns would be like:
|
|
+// - 0x8080808080808080 (Latin1)
|
|
+// - 0x8000800080008000 (UTF16)
|
|
+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
|
|
+ Register mask1, Register mask2)
|
|
+{
|
|
+ xorr(src, pattern, src);
|
|
+ sub(match_mask, src, mask1);
|
|
+ orr(src, src, mask2);
|
|
+ notr(src, src);
|
|
+ andr(match_mask, match_mask, src);
|
|
+}
|
|
+
|
|
+// add two unsigned input and output carry
|
|
+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
|
|
+{
|
|
+ assert_different_registers(dst, carry);
|
|
+ assert_different_registers(dst, src2);
|
|
+ add(dst, src1, src2);
|
|
+ sltu(carry, dst, src2);
|
|
+}
|
|
+
|
|
+// add two input with carry
|
|
+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
|
|
+{
|
|
+ assert_different_registers(dst, carry);
|
|
+ add(dst, src1, src2);
|
|
+ add(dst, dst, carry);
|
|
+}
|
|
+
|
|
+// add two unsigned input with carry and output carry
|
|
+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
|
|
+{
|
|
+ assert_different_registers(dst, src2);
|
|
+ adc(dst, src1, src2, carry);
|
|
+ sltu(carry, dst, src2);
|
|
+}
|
|
+
|
|
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
|
|
+ Register src1, Register src2, Register carry)
|
|
+{
|
|
+ cad(dest_lo, dest_lo, src1, carry);
|
|
+ add(dest_hi, dest_hi, carry);
|
|
+ cad(dest_lo, dest_lo, src2, carry);
|
|
+ add(final_dest_hi, dest_hi, carry);
|
|
+}
|
|
+
|
|
+// Code for BigInteger::mulAdd instrinsic
|
|
+// out = x10
|
|
+// in = x11
|
|
+// offset = x12 (already out.length-offset)
|
|
+// len = x13
|
|
+// k = x14
|
|
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
|
|
+ Register len, Register k, Register tmp1, Register tmp2) {
|
|
+ Label L_loop_1, L_loop_2, L_end, L_not_zero;
|
|
+ bnez(len, L_not_zero);
|
|
+ mv(out, zr);
|
|
+ j(L_end);
|
|
+ bind(L_not_zero);
|
|
+ zero_extend(k, k, 32);
|
|
+ shadd(offset, offset, out, t0, LogBytesPerInt);
|
|
+ shadd(in, len, in, t0, LogBytesPerInt);
|
|
+ mv(out, zr);
|
|
+
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ // if in and offset are both 8 bytes aligned.
|
|
+ orr(t0, in, offset);
|
|
+ andi(t0, t0, 0x7);
|
|
+ beqz(t0, L_loop_2);
|
|
+ } else {
|
|
+ j(L_loop_2);
|
|
+ }
|
|
+
|
|
+ bind(L_loop_1);
|
|
+ sub(in, in, 4);
|
|
+ lwu(t0, Address(in, 0));
|
|
+ mul(t1, t0, k);
|
|
+ add(t0, t1, out);
|
|
+ sub(offset, offset, 4);
|
|
+ lwu(t1, Address(offset, 0));
|
|
+ add(t0, t0, t1);
|
|
+ sw(t0, Address(offset));
|
|
+ srli(out, t0, 32);
|
|
+ sub(len, len, 1);
|
|
+ beqz(len, L_end);
|
|
+ j(L_loop_1);
|
|
+
|
|
+
|
|
+ bind(L_loop_2);
|
|
+ Label L_one;
|
|
+ sub(len, len, 1);
|
|
+ bltz(len, L_end);
|
|
+ sub(len, len, 1);
|
|
+ bltz(len, L_one);
|
|
+
|
|
+ sub(in, in, 8);
|
|
+ ld(tmp1, Address(in, 0));
|
|
+ ror_imm(tmp1, tmp1, 32); // convert to little-endian
|
|
+
|
|
+ const Register carry = out;
|
|
+ const Register src1_hi = t0;
|
|
+ const Register src1_lo = tmp2;
|
|
+ const Register src2 = t1;
|
|
+
|
|
+ mulhu(src1_hi, k, tmp1);
|
|
+ mul(src1_lo, k, tmp1);
|
|
+ sub(offset, offset, 8);
|
|
+ ld(src2, Address(offset, 0));
|
|
+ ror_imm(src2, src2, 32, tmp1);
|
|
+ add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1);
|
|
+ ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian
|
|
+ sd(src1_lo, Address(offset, 0));
|
|
+ j(L_loop_2);
|
|
+
|
|
+ bind(L_one);
|
|
+ sub(in, in, 4);
|
|
+ lwu(t0, Address(in, 0));
|
|
+ mul(t1, t0, k);
|
|
+ add(t0, t1, out);
|
|
+ sub(offset, offset, 4);
|
|
+ lwu(t1, Address(offset, 0));
|
|
+ add(t0, t0, t1);
|
|
+ sw(t0, Address(offset));
|
|
+ srli(out, t0, 32);
|
|
+
|
|
+ bind(L_end);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Multiply 32 bit by 32 bit first loop.
|
|
+ */
|
|
+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
|
|
+ Register y, Register y_idx, Register z,
|
|
+ Register carry, Register product,
|
|
+ Register idx, Register kdx)
|
|
+{
|
|
+ // long carry = 0;
|
|
+ // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
|
|
+ // long product = (y[j] & LONG_MASK) *
|
|
+ // (x[xstart] & LONG_MASK) + carry;
|
|
+ // z[k] = (int)product;
|
|
+ // carry = product >>> 32;
|
|
+ // }
|
|
+ // z[xstart] = (int)carry;
|
|
+
|
|
+ Label L_first_loop, L_first_loop_exit;
|
|
+
|
|
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
|
|
+ lwu(x_xstart, Address(t0, 0));
|
|
+
|
|
+ bind(L_first_loop);
|
|
+ sub(idx, idx, 1);
|
|
+ bltz(idx, L_first_loop_exit);
|
|
+
|
|
+ shadd(t0, idx, y, t0, LogBytesPerInt);
|
|
+ lwu(y_idx, Address(t0, 0));
|
|
+ mul(product, x_xstart, y_idx);
|
|
+ add(product, product, carry);
|
|
+ srli(carry, product, 32);
|
|
+ sub(kdx, kdx, 1);
|
|
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
|
|
+ sw(product, Address(t0, 0));
|
|
+ j(L_first_loop);
|
|
+
|
|
+ bind(L_first_loop_exit);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Multiply 64 bit by 64 bit first loop.
|
|
+ */
|
|
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
|
|
+ Register y, Register y_idx, Register z,
|
|
+ Register carry, Register product,
|
|
+ Register idx, Register kdx)
|
|
+{
|
|
+ //
|
|
+ // jlong carry, x[], y[], z[];
|
|
+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
|
|
+ // huge_128 product = y[idx] * x[xstart] + carry;
|
|
+ // z[kdx] = (jlong)product;
|
|
+ // carry = (jlong)(product >>> 64);
|
|
+ // }
|
|
+ // z[xstart] = carry;
|
|
+ //
|
|
+
|
|
+ Label L_first_loop, L_first_loop_exit;
|
|
+ Label L_one_x, L_one_y, L_multiply;
|
|
+
|
|
+ sub(xstart, xstart, 1);
|
|
+ bltz(xstart, L_one_x);
|
|
+
|
|
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
|
|
+ ld(x_xstart, Address(t0, 0));
|
|
+ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
|
|
+
|
|
+ bind(L_first_loop);
|
|
+ sub(idx, idx, 1);
|
|
+ bltz(idx, L_first_loop_exit);
|
|
+ sub(idx, idx, 1);
|
|
+ bltz(idx, L_one_y);
|
|
+
|
|
+ shadd(t0, idx, y, t0, LogBytesPerInt);
|
|
+ ld(y_idx, Address(t0, 0));
|
|
+ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
|
|
+ bind(L_multiply);
|
|
+
|
|
+ mulhu(t0, x_xstart, y_idx);
|
|
+ mul(product, x_xstart, y_idx);
|
|
+ cad(product, product, carry, t1);
|
|
+ adc(carry, t0, zr, t1);
|
|
+
|
|
+ sub(kdx, kdx, 2);
|
|
+ ror_imm(product, product, 32); // back to big-endian
|
|
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
|
|
+ sd(product, Address(t0, 0));
|
|
+
|
|
+ j(L_first_loop);
|
|
+
|
|
+ bind(L_one_y);
|
|
+ lwu(y_idx, Address(y, 0));
|
|
+ j(L_multiply);
|
|
+
|
|
+ bind(L_one_x);
|
|
+ lwu(x_xstart, Address(x, 0));
|
|
+ j(L_first_loop);
|
|
+
|
|
+ bind(L_first_loop_exit);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Multiply 128 bit by 128. Unrolled inner loop.
|
|
+ *
|
|
+ */
|
|
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
|
|
+ Register carry, Register carry2,
|
|
+ Register idx, Register jdx,
|
|
+ Register yz_idx1, Register yz_idx2,
|
|
+ Register tmp, Register tmp3, Register tmp4,
|
|
+ Register tmp6, Register product_hi)
|
|
+{
|
|
+ // jlong carry, x[], y[], z[];
|
|
+ // int kdx = xstart+1;
|
|
+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
|
|
+ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
|
|
+ // jlong carry2 = (jlong)(tmp3 >>> 64);
|
|
+ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2;
|
|
+ // carry = (jlong)(tmp4 >>> 64);
|
|
+ // z[kdx+idx+1] = (jlong)tmp3;
|
|
+ // z[kdx+idx] = (jlong)tmp4;
|
|
+ // }
|
|
+ // idx += 2;
|
|
+ // if (idx > 0) {
|
|
+ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
|
|
+ // z[kdx+idx] = (jlong)yz_idx1;
|
|
+ // carry = (jlong)(yz_idx1 >>> 64);
|
|
+ // }
|
|
+ //
|
|
+
|
|
+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
|
|
+
|
|
+ srli(jdx, idx, 2);
|
|
+
|
|
+ bind(L_third_loop);
|
|
+
|
|
+ sub(jdx, jdx, 1);
|
|
+ bltz(jdx, L_third_loop_exit);
|
|
+ sub(idx, idx, 4);
|
|
+
|
|
+ shadd(t0, idx, y, t0, LogBytesPerInt);
|
|
+ ld(yz_idx2, Address(t0, 0));
|
|
+ ld(yz_idx1, Address(t0, wordSize));
|
|
+
|
|
+ shadd(tmp6, idx, z, t0, LogBytesPerInt);
|
|
+
|
|
+ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
|
|
+ ror_imm(yz_idx2, yz_idx2, 32);
|
|
+
|
|
+ ld(t1, Address(tmp6, 0));
|
|
+ ld(t0, Address(tmp6, wordSize));
|
|
+
|
|
+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
|
|
+ mulhu(tmp4, product_hi, yz_idx1);
|
|
+
|
|
+ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
|
|
+ ror_imm(t1, t1, 32, tmp);
|
|
+
|
|
+ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp
|
|
+ mulhu(carry2, product_hi, yz_idx2);
|
|
+
|
|
+ cad(tmp3, tmp3, carry, carry);
|
|
+ adc(tmp4, tmp4, zr, carry);
|
|
+ cad(tmp3, tmp3, t0, t0);
|
|
+ cadc(tmp4, tmp4, tmp, t0);
|
|
+ adc(carry, carry2, zr, t0);
|
|
+ cad(tmp4, tmp4, t1, carry2);
|
|
+ adc(carry, carry, zr, carry2);
|
|
+
|
|
+ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
|
|
+ ror_imm(tmp4, tmp4, 32);
|
|
+ sd(tmp4, Address(tmp6, 0));
|
|
+ sd(tmp3, Address(tmp6, wordSize));
|
|
+
|
|
+ j(L_third_loop);
|
|
+
|
|
+ bind(L_third_loop_exit);
|
|
+
|
|
+ andi(idx, idx, 0x3);
|
|
+ beqz(idx, L_post_third_loop_done);
|
|
+
|
|
+ Label L_check_1;
|
|
+ sub(idx, idx, 2);
|
|
+ bltz(idx, L_check_1);
|
|
+
|
|
+ shadd(t0, idx, y, t0, LogBytesPerInt);
|
|
+ ld(yz_idx1, Address(t0, 0));
|
|
+ ror_imm(yz_idx1, yz_idx1, 32);
|
|
+
|
|
+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3
|
|
+ mulhu(tmp4, product_hi, yz_idx1);
|
|
+
|
|
+ shadd(t0, idx, z, t0, LogBytesPerInt);
|
|
+ ld(yz_idx2, Address(t0, 0));
|
|
+ ror_imm(yz_idx2, yz_idx2, 32, tmp);
|
|
+
|
|
+ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
|
|
+
|
|
+ ror_imm(tmp3, tmp3, 32, tmp);
|
|
+ sd(tmp3, Address(t0, 0));
|
|
+
|
|
+ bind(L_check_1);
|
|
+
|
|
+ andi(idx, idx, 0x1);
|
|
+ sub(idx, idx, 1);
|
|
+ bltz(idx, L_post_third_loop_done);
|
|
+ shadd(t0, idx, y, t0, LogBytesPerInt);
|
|
+ lwu(tmp4, Address(t0, 0));
|
|
+ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3
|
|
+ mulhu(carry2, tmp4, product_hi);
|
|
+
|
|
+ shadd(t0, idx, z, t0, LogBytesPerInt);
|
|
+ lwu(tmp4, Address(t0, 0));
|
|
+
|
|
+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry);
|
|
+
|
|
+ shadd(t0, idx, z, t0, LogBytesPerInt);
|
|
+ sw(tmp3, Address(t0, 0));
|
|
+ slli(t0, carry2, 32);
|
|
+ srli(carry, tmp3, 32);
|
|
+ orr(carry, carry, t0);
|
|
+
|
|
+ bind(L_post_third_loop_done);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Code for BigInteger::multiplyToLen() instrinsic.
|
|
+ *
|
|
+ * x10: x
|
|
+ * x11: xlen
|
|
+ * x12: y
|
|
+ * x13: ylen
|
|
+ * x14: z
|
|
+ * x15: zlen
|
|
+ * x16: tmp1
|
|
+ * x17: tmp2
|
|
+ * x7: tmp3
|
|
+ * x28: tmp4
|
|
+ * x29: tmp5
|
|
+ * x30: tmp6
|
|
+ * x31: tmp7
|
|
+ */
|
|
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
|
+ Register z, Register zlen,
|
|
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
|
+ Register tmp5, Register tmp6, Register product_hi)
|
|
+{
|
|
+ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
|
|
+
|
|
+ const Register idx = tmp1;
|
|
+ const Register kdx = tmp2;
|
|
+ const Register xstart = tmp3;
|
|
+
|
|
+ const Register y_idx = tmp4;
|
|
+ const Register carry = tmp5;
|
|
+ const Register product = xlen;
|
|
+ const Register x_xstart = zlen; // reuse register
|
|
+
|
|
+ mv(idx, ylen); // idx = ylen;
|
|
+ mv(kdx, zlen); // kdx = xlen+ylen;
|
|
+ mv(carry, zr); // carry = 0;
|
|
+
|
|
+ Label L_multiply_64_or_128, L_done;
|
|
+
|
|
+ sub(xstart, xlen, 1);
|
|
+ bltz(xstart, L_done);
|
|
+
|
|
+ const Register jdx = tmp1;
|
|
+
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ // if x and y are both 8 bytes aligend.
|
|
+ orr(t0, xlen, ylen);
|
|
+ andi(t0, t0, 0x1);
|
|
+ beqz(t0, L_multiply_64_or_128);
|
|
+ } else {
|
|
+ j(L_multiply_64_or_128);
|
|
+ }
|
|
+
|
|
+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
|
|
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+
|
|
+ Label L_second_loop_1;
|
|
+ bind(L_second_loop_1);
|
|
+ mv(carry, zr);
|
|
+ mv(jdx, ylen);
|
|
+ sub(xstart, xstart, 1);
|
|
+ bltz(xstart, L_done);
|
|
+ sub(sp, sp, 2 * wordSize);
|
|
+ sd(z, Address(sp, 0));
|
|
+ sd(zr, Address(sp, wordSize));
|
|
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
|
|
+ addi(z, t0, 4);
|
|
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
|
|
+ lwu(product, Address(t0, 0));
|
|
+ Label L_third_loop, L_third_loop_exit;
|
|
+
|
|
+ bind(L_third_loop);
|
|
+ sub(jdx, jdx, 1);
|
|
+ bltz(jdx, L_third_loop_exit);
|
|
+
|
|
+ shadd(t0, jdx, y, t0, LogBytesPerInt);
|
|
+ lwu(t0, Address(t0, 0));
|
|
+ mul(t1, t0, product);
|
|
+ add(t0, t1, carry);
|
|
+ shadd(tmp6, jdx, z, t1, LogBytesPerInt);
|
|
+ lwu(t1, Address(tmp6, 0));
|
|
+ add(t0, t0, t1);
|
|
+ sw(t0, Address(tmp6, 0));
|
|
+ srli(carry, t0, 32);
|
|
+ j(L_third_loop);
|
|
+
|
|
+ bind(L_third_loop_exit);
|
|
+ ld(z, Address(sp, 0));
|
|
+ addi(sp, sp, 2 * wordSize);
|
|
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+
|
|
+ j(L_second_loop_1);
|
|
+
|
|
+ bind(L_multiply_64_or_128);
|
|
+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
|
|
+
|
|
+ Label L_second_loop_2;
|
|
+ beqz(kdx, L_second_loop_2);
|
|
+
|
|
+ Label L_carry;
|
|
+ sub(kdx, kdx, 1);
|
|
+ beqz(kdx, L_carry);
|
|
+
|
|
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+ srli(carry, carry, 32);
|
|
+ sub(kdx, kdx, 1);
|
|
+
|
|
+ bind(L_carry);
|
|
+ shadd(t0, kdx, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+
|
|
+ // Second and third (nested) loops.
|
|
+ //
|
|
+ // for (int i = xstart-1; i >= 0; i--) { // Second loop
|
|
+ // carry = 0;
|
|
+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
|
|
+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
|
|
+ // (z[k] & LONG_MASK) + carry;
|
|
+ // z[k] = (int)product;
|
|
+ // carry = product >>> 32;
|
|
+ // }
|
|
+ // z[i] = (int)carry;
|
|
+ // }
|
|
+ //
|
|
+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
|
|
+
|
|
+ bind(L_second_loop_2);
|
|
+ mv(carry, zr); // carry = 0;
|
|
+ mv(jdx, ylen); // j = ystart+1
|
|
+
|
|
+ sub(xstart, xstart, 1); // i = xstart-1;
|
|
+ bltz(xstart, L_done);
|
|
+
|
|
+ sub(sp, sp, 4 * wordSize);
|
|
+ sd(z, Address(sp, 0));
|
|
+
|
|
+ Label L_last_x;
|
|
+ shadd(t0, xstart, z, t0, LogBytesPerInt);
|
|
+ addi(z, t0, 4);
|
|
+ sub(xstart, xstart, 1); // i = xstart-1;
|
|
+ bltz(xstart, L_last_x);
|
|
+
|
|
+ shadd(t0, xstart, x, t0, LogBytesPerInt);
|
|
+ ld(product_hi, Address(t0, 0));
|
|
+ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
|
|
+
|
|
+ Label L_third_loop_prologue;
|
|
+ bind(L_third_loop_prologue);
|
|
+
|
|
+ sd(ylen, Address(sp, wordSize));
|
|
+ sd(x, Address(sp, 2 * wordSize));
|
|
+ sd(xstart, Address(sp, 3 * wordSize));
|
|
+ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
|
|
+ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
|
|
+ ld(z, Address(sp, 0));
|
|
+ ld(ylen, Address(sp, wordSize));
|
|
+ ld(x, Address(sp, 2 * wordSize));
|
|
+ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
|
|
+ addi(sp, sp, 4 * wordSize);
|
|
+
|
|
+ addi(tmp3, xlen, 1);
|
|
+ shadd(t0, tmp3, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+
|
|
+ sub(tmp3, tmp3, 1);
|
|
+ bltz(tmp3, L_done);
|
|
+
|
|
+ // z[i] = (int) carry;
|
|
+ srli(carry, carry, 32);
|
|
+ shadd(t0, tmp3, z, t0, LogBytesPerInt);
|
|
+ sw(carry, Address(t0, 0));
|
|
+ j(L_second_loop_2);
|
|
+
|
|
+ // Next infrequent code is moved outside loops.
|
|
+ bind(L_last_x);
|
|
+ lwu(product_hi, Address(x, 0));
|
|
+ j(L_third_loop_prologue);
|
|
+
|
|
+ bind(L_done);
|
|
+}
|
|
+#endif // COMPILER2
|
|
+
|
|
+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
|
|
+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
|
|
+// shift 16 bits once.
|
|
+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
|
|
+{
|
|
+ if (UseZbb) {
|
|
+ assert_different_registers(Rd, Rs, tmp1);
|
|
+ int step = isLL ? 8 : 16;
|
|
+ ctz(Rd, Rs);
|
|
+ andi(tmp1, Rd, step - 1);
|
|
+ sub(Rd, Rd, tmp1);
|
|
+ return;
|
|
+ }
|
|
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
|
|
+ Label Loop;
|
|
+ int step = isLL ? 8 : 16;
|
|
+ mv(Rd, -step);
|
|
+ mv(tmp2, Rs);
|
|
+
|
|
+ bind(Loop);
|
|
+ addi(Rd, Rd, step);
|
|
+ andi(tmp1, tmp2, ((1 << step) - 1));
|
|
+ srli(tmp2, tmp2, step);
|
|
+ beqz(tmp1, Loop);
|
|
+}
|
|
+
|
|
+// This instruction reads adjacent 4 bytes from the lower half of source register,
|
|
+// inflate into a register, for example:
|
|
+// Rs: A7A6A5A4A3A2A1A0
|
|
+// Rd: 00A300A200A100A0
|
|
+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
|
|
+{
|
|
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
|
|
+ mv(tmp1, 0xFF000000); // first byte mask at lower word
|
|
+ andr(Rd, Rs, tmp1);
|
|
+ for (int i = 0; i < 2; i++) {
|
|
+ slli(Rd, Rd, wordSize);
|
|
+ srli(tmp1, tmp1, wordSize);
|
|
+ andr(tmp2, Rs, tmp1);
|
|
+ orr(Rd, Rd, tmp2);
|
|
+ }
|
|
+ slli(Rd, Rd, wordSize);
|
|
+ andi(tmp2, Rs, 0xFF); // last byte mask at lower word
|
|
+ orr(Rd, Rd, tmp2);
|
|
+}
|
|
+
|
|
+// This instruction reads adjacent 4 bytes from the upper half of source register,
|
|
+// inflate into a register, for example:
|
|
+// Rs: A7A6A5A4A3A2A1A0
|
|
+// Rd: 00A700A600A500A4
|
|
+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
|
|
+{
|
|
+ assert_different_registers(Rd, Rs, tmp1, tmp2);
|
|
+ srli(Rs, Rs, 32); // only upper 32 bits are needed
|
|
+ inflate_lo32(Rd, Rs, tmp1, tmp2);
|
|
+}
|
|
+
|
|
+// The size of the blocks erased by the zero_blocks stub. We must
|
|
+// handle anything smaller than this ourselves in zero_words().
|
|
+const int MacroAssembler::zero_words_block_size = 8;
|
|
+
|
|
+// zero_words() is used by C2 ClearArray patterns. It is as small as
|
|
+// possible, handling small word counts locally and delegating
|
|
+// anything larger to the zero_blocks stub. It is expanded many times
|
|
+// in compiled code, so it is important to keep it short.
|
|
+
|
|
+// ptr: Address of a buffer to be zeroed.
|
|
+// cnt: Count in HeapWords.
|
|
+//
|
|
+// ptr, cnt, and t0 are clobbered.
|
|
+address MacroAssembler::zero_words(Register ptr, Register cnt)
|
|
+{
|
|
+ assert(is_power_of_2(zero_words_block_size), "adjust this");
|
|
+ assert(ptr == x28 && cnt == x29, "mismatch in register usage");
|
|
+ assert_different_registers(cnt, t0);
|
|
+
|
|
+ BLOCK_COMMENT("zero_words {");
|
|
+ mv(t0, zero_words_block_size);
|
|
+ Label around, done, done16;
|
|
+ bltu(cnt, t0, around);
|
|
+ {
|
|
+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
|
|
+ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
|
|
+ if (StubRoutines::riscv::complete()) {
|
|
+ address tpc = trampoline_call(zero_blocks);
|
|
+ if (tpc == NULL) {
|
|
+ DEBUG_ONLY(reset_labels1(around));
|
|
+ postcond(pc() == badAddress);
|
|
+ return NULL;
|
|
+ }
|
|
+ } else {
|
|
+ jal(zero_blocks);
|
|
+ }
|
|
+ }
|
|
+ bind(around);
|
|
+ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
|
|
+ Label l;
|
|
+ andi(t0, cnt, i);
|
|
+ beqz(t0, l);
|
|
+ for (int j = 0; j < i; j++) {
|
|
+ sd(zr, Address(ptr, 0));
|
|
+ addi(ptr, ptr, 8);
|
|
+ }
|
|
+ bind(l);
|
|
+ }
|
|
+ {
|
|
+ Label l;
|
|
+ andi(t0, cnt, 1);
|
|
+ beqz(t0, l);
|
|
+ sd(zr, Address(ptr, 0));
|
|
+ bind(l);
|
|
+ }
|
|
+ BLOCK_COMMENT("} zero_words");
|
|
+ postcond(pc() != badAddress);
|
|
+ return pc();
|
|
+}
|
|
+
|
|
+// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
|
+// cnt: Immediate count in HeapWords.
|
|
+#define SmallArraySize (18 * BytesPerLong)
|
|
+void MacroAssembler::zero_words(Register base, uint64_t cnt)
|
|
+{
|
|
+ assert_different_registers(base, t0, t1);
|
|
+
|
|
+ BLOCK_COMMENT("zero_words {");
|
|
+
|
|
+ if (cnt <= SmallArraySize / BytesPerLong) {
|
|
+ for (int i = 0; i < (int)cnt; i++) {
|
|
+ sd(zr, Address(base, i * wordSize));
|
|
+ }
|
|
+ } else {
|
|
+ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
|
|
+ int remainder = cnt % unroll;
|
|
+ for (int i = 0; i < remainder; i++) {
|
|
+ sd(zr, Address(base, i * wordSize));
|
|
+ }
|
|
+
|
|
+ Label loop;
|
|
+ Register cnt_reg = t0;
|
|
+ Register loop_base = t1;
|
|
+ cnt = cnt - remainder;
|
|
+ mv(cnt_reg, cnt);
|
|
+ add(loop_base, base, remainder * wordSize);
|
|
+ bind(loop);
|
|
+ sub(cnt_reg, cnt_reg, unroll);
|
|
+ for (int i = 0; i < unroll; i++) {
|
|
+ sd(zr, Address(loop_base, i * wordSize));
|
|
+ }
|
|
+ add(loop_base, loop_base, unroll * wordSize);
|
|
+ bnez(cnt_reg, loop);
|
|
+ }
|
|
+ BLOCK_COMMENT("} zero_words");
|
|
+}
|
|
+
|
|
+// base: Address of a buffer to be filled, 8 bytes aligned.
|
|
+// cnt: Count in 8-byte unit.
|
|
+// value: Value to be filled with.
|
|
+// base will point to the end of the buffer after filling.
|
|
+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
|
+{
|
|
+// Algorithm:
|
|
+//
|
|
+// t0 = cnt & 7
|
|
+// cnt -= t0
|
|
+// p += t0
|
|
+// switch (t0):
|
|
+// switch start:
|
|
+// do while cnt
|
|
+// cnt -= 8
|
|
+// p[-8] = value
|
|
+// case 7:
|
|
+// p[-7] = value
|
|
+// case 6:
|
|
+// p[-6] = value
|
|
+// // ...
|
|
+// case 1:
|
|
+// p[-1] = value
|
|
+// case 0:
|
|
+// p += 8
|
|
+// do-while end
|
|
+// switch end
|
|
+
|
|
+ assert_different_registers(base, cnt, value, t0, t1);
|
|
+
|
|
+ Label fini, skip, entry, loop;
|
|
+ const int unroll = 8; // Number of sd instructions we'll unroll
|
|
+
|
|
+ beqz(cnt, fini);
|
|
+
|
|
+ andi(t0, cnt, unroll - 1);
|
|
+ sub(cnt, cnt, t0);
|
|
+ // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
|
|
+ shadd(base, t0, base, t1, 3);
|
|
+ la(t1, entry);
|
|
+ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
|
|
+ sub(t1, t1, t0);
|
|
+ jr(t1);
|
|
+
|
|
+ bind(loop);
|
|
+ add(base, base, unroll * 8);
|
|
+ for (int i = -unroll; i < 0; i++) {
|
|
+ sd(value, Address(base, i * 8));
|
|
+ }
|
|
+ bind(entry);
|
|
+ sub(cnt, cnt, unroll);
|
|
+ bgez(cnt, loop);
|
|
+
|
|
+ bind(fini);
|
|
+}
|
|
+
|
|
+#define FCVT_SAFE(FLOATCVT, FLOATEQ) \
|
|
+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
|
|
+ Label L_Okay; \
|
|
+ fscsr(zr); \
|
|
+ FLOATCVT(dst, src); \
|
|
+ frcsr(tmp); \
|
|
+ andi(tmp, tmp, 0x1E); \
|
|
+ beqz(tmp, L_Okay); \
|
|
+ FLOATEQ(tmp, src, src); \
|
|
+ bnez(tmp, L_Okay); \
|
|
+ mv(dst, zr); \
|
|
+ bind(L_Okay); \
|
|
+}
|
|
+
|
|
+FCVT_SAFE(fcvt_w_s, feq_s)
|
|
+FCVT_SAFE(fcvt_l_s, feq_s)
|
|
+FCVT_SAFE(fcvt_w_d, feq_d)
|
|
+FCVT_SAFE(fcvt_l_d, feq_d)
|
|
+
|
|
+#undef FCVT_SAFE
|
|
+
|
|
+#define FCMP(FLOATTYPE, FLOATSIG) \
|
|
+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \
|
|
+ FloatRegister Rs2, int unordered_result) { \
|
|
+ Label Ldone; \
|
|
+ if (unordered_result < 0) { \
|
|
+ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \
|
|
+ /* installs 1 if gt else 0 */ \
|
|
+ flt_##FLOATSIG(result, Rs2, Rs1); \
|
|
+ /* Rs1 > Rs2, install 1 */ \
|
|
+ bgtz(result, Ldone); \
|
|
+ feq_##FLOATSIG(result, Rs1, Rs2); \
|
|
+ addi(result, result, -1); \
|
|
+ /* Rs1 = Rs2, install 0 */ \
|
|
+ /* NaN or Rs1 < Rs2, install -1 */ \
|
|
+ bind(Ldone); \
|
|
+ } else { \
|
|
+ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \
|
|
+ /* installs 1 if gt or unordered else 0 */ \
|
|
+ flt_##FLOATSIG(result, Rs1, Rs2); \
|
|
+ /* Rs1 < Rs2, install -1 */ \
|
|
+ bgtz(result, Ldone); \
|
|
+ feq_##FLOATSIG(result, Rs1, Rs2); \
|
|
+ addi(result, result, -1); \
|
|
+ /* Rs1 = Rs2, install 0 */ \
|
|
+ /* NaN or Rs1 > Rs2, install 1 */ \
|
|
+ bind(Ldone); \
|
|
+ neg(result, result); \
|
|
+ } \
|
|
+}
|
|
+
|
|
+FCMP(float, s);
|
|
+FCMP(double, d);
|
|
+
|
|
+#undef FCMP
|
|
+
|
|
+// Zero words; len is in bytes
|
|
+// Destroys all registers except addr
|
|
+// len must be a nonzero multiple of wordSize
|
|
+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
|
|
+ assert_different_registers(addr, len, tmp1, t0, t1);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ andi(t0, len, BytesPerWord - 1);
|
|
+ beqz(t0, L);
|
|
+ stop("len is not a multiple of BytesPerWord");
|
|
+ bind(L);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ block_comment("zero memory");
|
|
+#endif // PRODUCT
|
|
+
|
|
+ Label loop;
|
|
+ Label entry;
|
|
+
|
|
+ // Algorithm:
|
|
+ //
|
|
+ // t0 = cnt & 7
|
|
+ // cnt -= t0
|
|
+ // p += t0
|
|
+ // switch (t0) {
|
|
+ // do {
|
|
+ // cnt -= 8
|
|
+ // p[-8] = 0
|
|
+ // case 7:
|
|
+ // p[-7] = 0
|
|
+ // case 6:
|
|
+ // p[-6] = 0
|
|
+ // ...
|
|
+ // case 1:
|
|
+ // p[-1] = 0
|
|
+ // case 0:
|
|
+ // p += 8
|
|
+ // } while (cnt)
|
|
+ // }
|
|
+
|
|
+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll
|
|
+
|
|
+ srli(len, len, LogBytesPerWord);
|
|
+ andi(t0, len, unroll - 1); // t0 = cnt % unroll
|
|
+ sub(len, len, t0); // cnt -= unroll
|
|
+ // tmp1 always points to the end of the region we're about to zero
|
|
+ shadd(tmp1, t0, addr, t1, LogBytesPerWord);
|
|
+ la(t1, entry);
|
|
+ slli(t0, t0, 2);
|
|
+ sub(t1, t1, t0);
|
|
+ jr(t1);
|
|
+ bind(loop);
|
|
+ sub(len, len, unroll);
|
|
+ for (int i = -unroll; i < 0; i++) {
|
|
+ Assembler::sd(zr, Address(tmp1, i * wordSize));
|
|
+ }
|
|
+ bind(entry);
|
|
+ add(tmp1, tmp1, unroll * wordSize);
|
|
+ bnez(len, loop);
|
|
+}
|
|
+
|
|
+// shift left by shamt and add
|
|
+// Rd = (Rs1 << shamt) + Rs2
|
|
+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
|
|
+ if (UseZba) {
|
|
+ if (shamt == 1) {
|
|
+ sh1add(Rd, Rs1, Rs2);
|
|
+ return;
|
|
+ } else if (shamt == 2) {
|
|
+ sh2add(Rd, Rs1, Rs2);
|
|
+ return;
|
|
+ } else if (shamt == 3) {
|
|
+ sh3add(Rd, Rs1, Rs2);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (shamt != 0) {
|
|
+ slli(tmp, Rs1, shamt);
|
|
+ add(Rd, Rs2, tmp);
|
|
+ } else {
|
|
+ add(Rd, Rs1, Rs2);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
|
|
+ if (UseZba && bits == 32) {
|
|
+ zext_w(dst, src);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (UseZbb && bits == 16) {
|
|
+ zext_h(dst, src);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (bits == 8) {
|
|
+ zext_b(dst, src);
|
|
+ } else {
|
|
+ slli(dst, src, XLEN - bits);
|
|
+ srli(dst, dst, XLEN - bits);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
|
|
+ if (UseZbb) {
|
|
+ if (bits == 8) {
|
|
+ sext_b(dst, src);
|
|
+ return;
|
|
+ } else if (bits == 16) {
|
|
+ sext_h(dst, src);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (bits == 32) {
|
|
+ sext_w(dst, src);
|
|
+ } else {
|
|
+ slli(dst, src, XLEN - bits);
|
|
+ srai(dst, dst, XLEN - bits);
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
|
|
+{
|
|
+ if (src1 == src2) {
|
|
+ mv(dst, zr);
|
|
+ return;
|
|
+ }
|
|
+ Label done;
|
|
+ Register left = src1;
|
|
+ Register right = src2;
|
|
+ if (dst == src1) {
|
|
+ assert_different_registers(dst, src2, tmp);
|
|
+ mv(tmp, src1);
|
|
+ left = tmp;
|
|
+ } else if (dst == src2) {
|
|
+ assert_different_registers(dst, src1, tmp);
|
|
+ mv(tmp, src2);
|
|
+ right = tmp;
|
|
+ }
|
|
+
|
|
+ // installs 1 if gt else 0
|
|
+ slt(dst, right, left);
|
|
+ bnez(dst, done);
|
|
+ slt(dst, left, right);
|
|
+ // dst = -1 if lt; else if eq , dst = 0
|
|
+ neg(dst, dst);
|
|
+ bind(done);
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method)
|
|
+{
|
|
+ ld(cpool, Address(method, Method::const_offset()));
|
|
+ ld(cpool, Address(cpool, ConstMethod::constants_offset()));
|
|
+ ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes()));
|
|
+}
|
|
+
|
|
+void MacroAssembler::load_max_stack(Register dst, Register method)
|
|
+{
|
|
+ ld(dst, Address(xmethod, Method::const_offset()));
|
|
+ lhu(dst, Address(dst, ConstMethod::max_stack_offset()));
|
|
+}
|
|
+
|
|
+// The java_calling_convention describes stack locations as ideal slots on
|
|
+// a frame with no abi restrictions. Since we must observe abi restrictions
|
|
+// (like the placement of the register window) the slots must be biased by
|
|
+// the following value.
|
|
+static int reg2offset_in(VMReg r) {
|
|
+ // Account for saved fp and ra
|
|
+ // This should really be in_preserve_stack_slots
|
|
+ return r->reg2stack() * VMRegImpl::stack_slot_size;
|
|
+}
|
|
+
|
|
+static int reg2offset_out(VMReg r) {
|
|
+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
|
|
+}
|
|
+
|
|
+// On 64 bit we will store integer like items to the stack as
|
|
+// 64 bits items (riscv64 abi) even though java would only store
|
|
+// 32bits for a parameter. On 32bit it will simply be 32 bits
|
|
+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
|
|
+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
|
|
+ if (src.first()->is_stack()) {
|
|
+ if (dst.first()->is_stack()) {
|
|
+ // stack to stack
|
|
+ ld(tmp, Address(fp, reg2offset_in(src.first())));
|
|
+ sd(tmp, Address(sp, reg2offset_out(dst.first())));
|
|
+ } else {
|
|
+ // stack to reg
|
|
+ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
|
|
+ }
|
|
+ } else if (dst.first()->is_stack()) {
|
|
+ // reg to stack
|
|
+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
|
|
+ } else {
|
|
+ if (dst.first() != src.first()) {
|
|
+ // 32bits extend sign
|
|
+ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// An oop arg. Must pass a handle not the oop itself
|
|
+void MacroAssembler::object_move(OopMap* map,
|
|
+ int oop_handle_offset,
|
|
+ int framesize_in_slots,
|
|
+ VMRegPair src,
|
|
+ VMRegPair dst,
|
|
+ bool is_receiver,
|
|
+ int* receiver_offset) {
|
|
+ assert_cond(map != NULL && receiver_offset != NULL);
|
|
+ // must pass a handle. First figure out the location we use as a handle
|
|
+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
|
|
+
|
|
+ // See if oop is NULL if it is we need no handle
|
|
+
|
|
+ if (src.first()->is_stack()) {
|
|
+ // Oop is already on the stack as an argument
|
|
+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
|
|
+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
|
|
+ if (is_receiver) {
|
|
+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
|
|
+ }
|
|
+
|
|
+ ld(t0, Address(fp, reg2offset_in(src.first())));
|
|
+ la(rHandle, Address(fp, reg2offset_in(src.first())));
|
|
+ // conditionally move a NULL
|
|
+ Label notZero1;
|
|
+ bnez(t0, notZero1);
|
|
+ mv(rHandle, zr);
|
|
+ bind(notZero1);
|
|
+ } else {
|
|
+
|
|
+ // Oop is in a register we must store it to the space we reserve
|
|
+ // on the stack for oop_handles and pass a handle if oop is non-NULL
|
|
+
|
|
+ const Register rOop = src.first()->as_Register();
|
|
+ int oop_slot = -1;
|
|
+ if (rOop == j_rarg0) {
|
|
+ oop_slot = 0;
|
|
+ } else if (rOop == j_rarg1) {
|
|
+ oop_slot = 1;
|
|
+ } else if (rOop == j_rarg2) {
|
|
+ oop_slot = 2;
|
|
+ } else if (rOop == j_rarg3) {
|
|
+ oop_slot = 3;
|
|
+ } else if (rOop == j_rarg4) {
|
|
+ oop_slot = 4;
|
|
+ } else if (rOop == j_rarg5) {
|
|
+ oop_slot = 5;
|
|
+ } else if (rOop == j_rarg6) {
|
|
+ oop_slot = 6;
|
|
+ } else {
|
|
+ assert(rOop == j_rarg7, "wrong register");
|
|
+ oop_slot = 7;
|
|
+ }
|
|
+
|
|
+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
|
|
+ int offset = oop_slot * VMRegImpl::stack_slot_size;
|
|
+
|
|
+ map->set_oop(VMRegImpl::stack2reg(oop_slot));
|
|
+ // Store oop in handle area, may be NULL
|
|
+ sd(rOop, Address(sp, offset));
|
|
+ if (is_receiver) {
|
|
+ *receiver_offset = offset;
|
|
+ }
|
|
+
|
|
+ //rOop maybe the same as rHandle
|
|
+ if (rOop == rHandle) {
|
|
+ Label isZero;
|
|
+ beqz(rOop, isZero);
|
|
+ la(rHandle, Address(sp, offset));
|
|
+ bind(isZero);
|
|
+ } else {
|
|
+ Label notZero2;
|
|
+ la(rHandle, Address(sp, offset));
|
|
+ bnez(rOop, notZero2);
|
|
+ mv(rHandle, zr);
|
|
+ bind(notZero2);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // If arg is on the stack then place it otherwise it is already in correct reg.
|
|
+ if (dst.first()->is_stack()) {
|
|
+ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
|
|
+ }
|
|
+}
|
|
+
|
|
+// A float arg may have to do float reg int reg conversion
|
|
+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
|
|
+ assert(src.first()->is_stack() && dst.first()->is_stack() ||
|
|
+ src.first()->is_reg() && dst.first()->is_reg() ||
|
|
+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
|
|
+ if (src.first()->is_stack()) {
|
|
+ if (dst.first()->is_stack()) {
|
|
+ lwu(tmp, Address(fp, reg2offset_in(src.first())));
|
|
+ sw(tmp, Address(sp, reg2offset_out(dst.first())));
|
|
+ } else if (dst.first()->is_Register()) {
|
|
+ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (src.first() != dst.first()) {
|
|
+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
|
|
+ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// A long move
|
|
+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
|
|
+ if (src.first()->is_stack()) {
|
|
+ if (dst.first()->is_stack()) {
|
|
+ // stack to stack
|
|
+ ld(tmp, Address(fp, reg2offset_in(src.first())));
|
|
+ sd(tmp, Address(sp, reg2offset_out(dst.first())));
|
|
+ } else {
|
|
+ // stack to reg
|
|
+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
|
|
+ }
|
|
+ } else if (dst.first()->is_stack()) {
|
|
+ // reg to stack
|
|
+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
|
|
+ } else {
|
|
+ if (dst.first() != src.first()) {
|
|
+ mv(dst.first()->as_Register(), src.first()->as_Register());
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// A double move
|
|
+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
|
|
+ assert(src.first()->is_stack() && dst.first()->is_stack() ||
|
|
+ src.first()->is_reg() && dst.first()->is_reg() ||
|
|
+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
|
|
+ if (src.first()->is_stack()) {
|
|
+ if (dst.first()->is_stack()) {
|
|
+ ld(tmp, Address(fp, reg2offset_in(src.first())));
|
|
+ sd(tmp, Address(sp, reg2offset_out(dst.first())));
|
|
+ } else if (dst.first()-> is_Register()) {
|
|
+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (src.first() != dst.first()) {
|
|
+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
|
|
+ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void MacroAssembler::rt_call(address dest, Register tmp) {
|
|
+ CodeBlob *cb = CodeCache::find_blob(dest);
|
|
+ if (cb) {
|
|
+ far_call(RuntimeAddress(dest));
|
|
+ } else {
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(tmp, RuntimeAddress(dest), offset);
|
|
+ jalr(x1, tmp, offset);
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..a4d5ce0e0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
|
|
@@ -0,0 +1,975 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
|
|
+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
|
|
+
|
|
+#include "asm/assembler.inline.hpp"
|
|
+#include "code/vmreg.hpp"
|
|
+// MacroAssembler extends Assembler by frequently used macros.
|
|
+//
|
|
+// Instructions for which a 'better' code sequence exists depending
|
|
+// on arguments should also go in here.
|
|
+
|
|
+class MacroAssembler: public Assembler {
|
|
+
|
|
+ public:
|
|
+ MacroAssembler(CodeBuffer* code) : Assembler(code) {
|
|
+ }
|
|
+ virtual ~MacroAssembler() {}
|
|
+
|
|
+ void safepoint_poll(Label& slow_path);
|
|
+ void safepoint_poll_acquire(Label& slow_path);
|
|
+
|
|
+ // Alignment
|
|
+ void align(int modulus);
|
|
+
|
|
+ // Stack frame creation/removal
|
|
+ // Note that SP must be updated to the right place before saving/restoring RA and FP
|
|
+ // because signal based thread suspend/resume could happend asychronously
|
|
+ void enter() {
|
|
+ addi(sp, sp, - 2 * wordSize);
|
|
+ sd(ra, Address(sp, wordSize));
|
|
+ sd(fp, Address(sp));
|
|
+ addi(fp, sp, 2 * wordSize);
|
|
+ }
|
|
+
|
|
+ void leave() {
|
|
+ addi(sp, fp, - 2 * wordSize);
|
|
+ ld(fp, Address(sp));
|
|
+ ld(ra, Address(sp, wordSize));
|
|
+ addi(sp, sp, 2 * wordSize);
|
|
+ }
|
|
+
|
|
+
|
|
+ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
|
|
+ // The pointer will be loaded into the thread register.
|
|
+ void get_thread(Register thread);
|
|
+
|
|
+ // Support for VM calls
|
|
+ //
|
|
+ // It is imperative that all calls into the VM are handled via the call_VM macros.
|
|
+ // They make sure that the stack linkage is setup correctly. call_VM's correspond
|
|
+ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
|
|
+
|
|
+ void call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1, Register arg_2,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ address entry_point,
|
|
+ Register arg_1, Register arg_2, Register arg_3,
|
|
+ bool check_exceptions = true);
|
|
+
|
|
+ // Overloadings with last_Java_sp
|
|
+ void call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ int number_of_arguments = 0,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1, Register arg_2,
|
|
+ bool check_exceptions = true);
|
|
+ void call_VM(Register oop_result,
|
|
+ Register last_java_sp,
|
|
+ address entry_point,
|
|
+ Register arg_1, Register arg_2, Register arg_3,
|
|
+ bool check_exceptions = true);
|
|
+
|
|
+ void get_vm_result(Register oop_result, Register java_thread);
|
|
+ void get_vm_result_2(Register metadata_result, Register java_thread);
|
|
+
|
|
+ // These always tightly bind to MacroAssembler::call_VM_leaf_base
|
|
+ // bypassing the virtual implementation
|
|
+ void call_VM_leaf(address entry_point,
|
|
+ int number_of_arguments = 0);
|
|
+ void call_VM_leaf(address entry_point,
|
|
+ Register arg_0);
|
|
+ void call_VM_leaf(address entry_point,
|
|
+ Register arg_0, Register arg_1);
|
|
+ void call_VM_leaf(address entry_point,
|
|
+ Register arg_0, Register arg_1, Register arg_2);
|
|
+
|
|
+ // These always tightly bind to MacroAssembler::call_VM_base
|
|
+ // bypassing the virtual implementation
|
|
+ void super_call_VM_leaf(address entry_point, Register arg_0);
|
|
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
|
|
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
|
|
+ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
|
|
+
|
|
+ // last Java Frame (fills frame anchor)
|
|
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
|
|
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
|
|
+ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
|
|
+
|
|
+ // thread in the default location (xthread)
|
|
+ void reset_last_Java_frame(bool clear_fp);
|
|
+
|
|
+ virtual void call_VM_leaf_base(
|
|
+ address entry_point, // the entry point
|
|
+ int number_of_arguments, // the number of arguments to pop after the call
|
|
+ Label* retaddr = NULL
|
|
+ );
|
|
+
|
|
+ virtual void call_VM_leaf_base(
|
|
+ address entry_point, // the entry point
|
|
+ int number_of_arguments, // the number of arguments to pop after the call
|
|
+ Label& retaddr) {
|
|
+ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
|
|
+ }
|
|
+
|
|
+ virtual void call_VM_base( // returns the register containing the thread upon return
|
|
+ Register oop_result, // where an oop-result ends up if any; use noreg otherwise
|
|
+ Register java_thread, // the thread if computed before ; use noreg otherwise
|
|
+ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
|
|
+ address entry_point, // the entry point
|
|
+ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
|
|
+ bool check_exceptions // whether to check for pending exceptions after return
|
|
+ );
|
|
+
|
|
+ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
|
|
+
|
|
+ virtual void check_and_handle_earlyret(Register java_thread);
|
|
+ virtual void check_and_handle_popframe(Register java_thread);
|
|
+
|
|
+ void resolve_oop_handle(Register result, Register tmp = x15);
|
|
+ void resolve_jobject(Register value, Register thread, Register tmp);
|
|
+
|
|
+ void movoop(Register dst, jobject obj, bool immediate = false);
|
|
+ void mov_metadata(Register dst, Metadata* obj);
|
|
+ void bang_stack_size(Register size, Register tmp);
|
|
+ void set_narrow_oop(Register dst, jobject obj);
|
|
+ void set_narrow_klass(Register dst, Klass* k);
|
|
+
|
|
+ void load_mirror(Register dst, Register method, Register tmp = x15);
|
|
+ void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
|
|
+ Address src, Register tmp1, Register thread_tmp);
|
|
+ void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
|
|
+ Register src, Register tmp1, Register tmp2, Register tmp3);
|
|
+ void load_klass(Register dst, Register src);
|
|
+ void store_klass(Register dst, Register src);
|
|
+ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
|
|
+
|
|
+ void encode_klass_not_null(Register r);
|
|
+ void decode_klass_not_null(Register r);
|
|
+ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
|
|
+ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
|
|
+ void decode_heap_oop_not_null(Register r);
|
|
+ void decode_heap_oop_not_null(Register dst, Register src);
|
|
+ void decode_heap_oop(Register d, Register s);
|
|
+ void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
|
|
+ void encode_heap_oop(Register d, Register s);
|
|
+ void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
|
|
+ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
|
|
+ Register thread_tmp = noreg, DecoratorSet decorators = 0);
|
|
+ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
|
|
+ Register thread_tmp = noreg, DecoratorSet decorators = 0);
|
|
+ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
|
|
+ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
|
|
+
|
|
+ void store_klass_gap(Register dst, Register src);
|
|
+
|
|
+ // currently unimplemented
|
|
+ // Used for storing NULL. All other oop constants should be
|
|
+ // stored using routines that take a jobject.
|
|
+ void store_heap_oop_null(Address dst);
|
|
+
|
|
+ // This dummy is to prevent a call to store_heap_oop from
|
|
+ // converting a zero (linke NULL) into a Register by giving
|
|
+ // the compiler two choices it can't resolve
|
|
+
|
|
+ void store_heap_oop(Address dst, void* dummy);
|
|
+
|
|
+ // Support for NULL-checks
|
|
+ //
|
|
+ // Generates code that causes a NULL OS exception if the content of reg is NULL.
|
|
+ // If the accessed location is M[reg + offset] and the offset is known, provide the
|
|
+ // offset. No explicit code generateion is needed if the offset is within a certain
|
|
+ // range (0 <= offset <= page_size).
|
|
+
|
|
+ virtual void null_check(Register reg, int offset = -1);
|
|
+ static bool needs_explicit_null_check(intptr_t offset);
|
|
+
|
|
+ // idiv variant which deals with MINLONG as dividend and -1 as divisor
|
|
+ int corrected_idivl(Register result, Register rs1, Register rs2,
|
|
+ bool want_remainder);
|
|
+ int corrected_idivq(Register result, Register rs1, Register rs2,
|
|
+ bool want_remainder);
|
|
+
|
|
+ // interface method calling
|
|
+ void lookup_interface_method(Register recv_klass,
|
|
+ Register intf_klass,
|
|
+ RegisterOrConstant itable_index,
|
|
+ Register method_result,
|
|
+ Register scan_tmp,
|
|
+ Label& no_such_interface,
|
|
+ bool return_method = true);
|
|
+
|
|
+ // virtual method calling
|
|
+ // n.n. x86 allows RegisterOrConstant for vtable_index
|
|
+ void lookup_virtual_method(Register recv_klass,
|
|
+ RegisterOrConstant vtable_index,
|
|
+ Register method_result);
|
|
+
|
|
+ // allocation
|
|
+ void eden_allocate(
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Label& slow_case, // continuation point if fast allocation fails
|
|
+ bool is_far = false
|
|
+ );
|
|
+ void tlab_allocate(
|
|
+ Register obj, // result: pointer to object after successful allocation
|
|
+ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
|
|
+ int con_size_in_bytes, // object size in bytes if known at compile time
|
|
+ Register tmp1, // temp register
|
|
+ Register tmp2, // temp register
|
|
+ Label& slow_case, // continuation point of fast allocation fails
|
|
+ bool is_far = false
|
|
+ );
|
|
+
|
|
+ // Test sub_klass against super_klass, with fast and slow paths.
|
|
+
|
|
+ // The fast path produces a tri-state answer: yes / no / maybe-slow.
|
|
+ // One of the three labels can be NULL, meaning take the fall-through.
|
|
+ // If super_check_offset is -1, the value is loaded up from super_klass.
|
|
+ // No registers are killed, except tmp_reg
|
|
+ void check_klass_subtype_fast_path(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Label* L_success,
|
|
+ Label* L_failure,
|
|
+ Label* L_slow_path,
|
|
+ Register super_check_offset = noreg);
|
|
+
|
|
+ // The reset of the type cehck; must be wired to a corresponding fast path.
|
|
+ // It does not repeat the fast path logic, so don't use it standalone.
|
|
+ // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable.
|
|
+ // Updates the sub's secondary super cache as necessary.
|
|
+ void check_klass_subtype_slow_path(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Register tmp2_reg,
|
|
+ Label* L_success,
|
|
+ Label* L_failure);
|
|
+
|
|
+ void check_klass_subtype(Register sub_klass,
|
|
+ Register super_klass,
|
|
+ Register tmp_reg,
|
|
+ Label& L_success);
|
|
+
|
|
+ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
|
|
+
|
|
+ // only if +VerifyOops
|
|
+ void verify_oop(Register reg, const char* s = "broken oop");
|
|
+ void verify_oop_addr(Address addr, const char* s = "broken oop addr");
|
|
+
|
|
+ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
|
|
+ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
|
|
+
|
|
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
|
|
+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
|
|
+
|
|
+ // A more convenient access to fence for our purposes
|
|
+ // We used four bit to indicate the read and write bits in the predecessors and successors,
|
|
+ // and extended i for r, o for w if UseConservativeFence enabled.
|
|
+ enum Membar_mask_bits {
|
|
+ StoreStore = 0b0101, // (pred = ow + succ = ow)
|
|
+ LoadStore = 0b1001, // (pred = ir + succ = ow)
|
|
+ StoreLoad = 0b0110, // (pred = ow + succ = ir)
|
|
+ LoadLoad = 0b1010, // (pred = ir + succ = ir)
|
|
+ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
|
|
+ };
|
|
+
|
|
+ void membar(uint32_t order_constraint);
|
|
+
|
|
+ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
|
|
+ predecessor = (order_constraint >> 2) & 0x3;
|
|
+ successor = order_constraint & 0x3;
|
|
+
|
|
+ // extend rw -> iorw:
|
|
+ // 01(w) -> 0101(ow)
|
|
+ // 10(r) -> 1010(ir)
|
|
+ // 11(rw)-> 1111(iorw)
|
|
+ if (UseConservativeFence) {
|
|
+ predecessor |= predecessor << 2;
|
|
+ successor |= successor << 2;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
|
|
+ return ((predecessor & 0x3) << 2) | (successor & 0x3);
|
|
+ }
|
|
+
|
|
+ // prints msg, dumps registers and stops execution
|
|
+ void stop(const char* msg);
|
|
+
|
|
+ static void debug64(char* msg, int64_t pc, int64_t regs[]);
|
|
+
|
|
+ void unimplemented(const char* what = "");
|
|
+
|
|
+ void should_not_reach_here() { stop("should not reach here"); }
|
|
+
|
|
+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
|
|
+ Register tmp,
|
|
+ int offset) {
|
|
+ return RegisterOrConstant(tmp);
|
|
+ }
|
|
+
|
|
+ static address target_addr_for_insn(address insn_addr);
|
|
+
|
|
+ // Required platform-specific helpers for Label::patch_instructions.
|
|
+ // They _shadow_ the declarations in AbstractAssembler, which are undefined.
|
|
+ static int pd_patch_instruction_size(address branch, address target) ;
|
|
+ void pd_patch_instruction(address branch, address target) {
|
|
+ pd_patch_instruction_size(branch, target);
|
|
+ }
|
|
+ static address pd_call_destination(address branch) {
|
|
+ return target_addr_for_insn(branch);
|
|
+ }
|
|
+
|
|
+ static int patch_oop(address insn_addr, address o);
|
|
+ address emit_trampoline_stub(int insts_call_instruction_offset, address target);
|
|
+ void emit_static_call_stub();
|
|
+
|
|
+ // The following 4 methods return the offset of the appropriate move instruction
|
|
+
|
|
+ // Support for fast byte/short loading with zero extension (depending on particular CPU)
|
|
+ int load_unsigned_byte(Register dst, Address src);
|
|
+ int load_unsigned_short(Register dst, Address src);
|
|
+
|
|
+ // Support for fast byte/short loading with sign extension (depending on particular CPU)
|
|
+ int load_signed_byte(Register dst, Address src);
|
|
+ int load_signed_short(Register dst, Address src);
|
|
+
|
|
+ // Load and store values by size and signed-ness
|
|
+ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
|
|
+ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
|
|
+
|
|
+ public:
|
|
+ // enum used for riscv--x86 linkage to define return type of x86 function
|
|
+ enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
|
|
+
|
|
+ // Standard pseudoinstruction
|
|
+ void nop();
|
|
+ void mv(Register Rd, Register Rs) ;
|
|
+ void notr(Register Rd, Register Rs);
|
|
+ void neg(Register Rd, Register Rs);
|
|
+ void negw(Register Rd, Register Rs);
|
|
+ void sext_w(Register Rd, Register Rs);
|
|
+ void zext_b(Register Rd, Register Rs);
|
|
+ void seqz(Register Rd, Register Rs); // set if = zero
|
|
+ void snez(Register Rd, Register Rs); // set if != zero
|
|
+ void sltz(Register Rd, Register Rs); // set if < zero
|
|
+ void sgtz(Register Rd, Register Rs); // set if > zero
|
|
+
|
|
+ // Float pseudoinstruction
|
|
+ void fmv_s(FloatRegister Rd, FloatRegister Rs);
|
|
+ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value
|
|
+ void fneg_s(FloatRegister Rd, FloatRegister Rs);
|
|
+
|
|
+ // Double pseudoinstruction
|
|
+ void fmv_d(FloatRegister Rd, FloatRegister Rs);
|
|
+ void fabs_d(FloatRegister Rd, FloatRegister Rs);
|
|
+ void fneg_d(FloatRegister Rd, FloatRegister Rs);
|
|
+
|
|
+ // Pseudoinstruction for control and status register
|
|
+ void rdinstret(Register Rd); // read instruction-retired counter
|
|
+ void rdcycle(Register Rd); // read cycle counter
|
|
+ void rdtime(Register Rd); // read time
|
|
+ void csrr(Register Rd, unsigned csr); // read csr
|
|
+ void csrw(unsigned csr, Register Rs); // write csr
|
|
+ void csrs(unsigned csr, Register Rs); // set bits in csr
|
|
+ void csrc(unsigned csr, Register Rs); // clear bits in csr
|
|
+ void csrwi(unsigned csr, unsigned imm);
|
|
+ void csrsi(unsigned csr, unsigned imm);
|
|
+ void csrci(unsigned csr, unsigned imm);
|
|
+ void frcsr(Register Rd); // read float-point csr
|
|
+ void fscsr(Register Rd, Register Rs); // swap float-point csr
|
|
+ void fscsr(Register Rs); // write float-point csr
|
|
+ void frrm(Register Rd); // read float-point rounding mode
|
|
+ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
|
|
+ void fsrm(Register Rs); // write float-point rounding mode
|
|
+ void fsrmi(Register Rd, unsigned imm);
|
|
+ void fsrmi(unsigned imm);
|
|
+ void frflags(Register Rd); // read float-point exception flags
|
|
+ void fsflags(Register Rd, Register Rs); // swap float-point exception flags
|
|
+ void fsflags(Register Rs); // write float-point exception flags
|
|
+ void fsflagsi(Register Rd, unsigned imm);
|
|
+ void fsflagsi(unsigned imm);
|
|
+
|
|
+ void beqz(Register Rs, const address &dest);
|
|
+ void blez(Register Rs, const address &dest);
|
|
+ void bgez(Register Rs, const address &dest);
|
|
+ void bltz(Register Rs, const address &dest);
|
|
+ void bgtz(Register Rs, const address &dest);
|
|
+ void bnez(Register Rs, const address &dest);
|
|
+ void la(Register Rd, Label &label);
|
|
+ void la(Register Rd, const address &dest);
|
|
+ void la(Register Rd, const Address &adr);
|
|
+ //label
|
|
+ void beqz(Register Rs, Label &l, bool is_far = false);
|
|
+ void bnez(Register Rs, Label &l, bool is_far = false);
|
|
+ void blez(Register Rs, Label &l, bool is_far = false);
|
|
+ void bgez(Register Rs, Label &l, bool is_far = false);
|
|
+ void bltz(Register Rs, Label &l, bool is_far = false);
|
|
+ void bgtz(Register Rs, Label &l, bool is_far = false);
|
|
+ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
|
|
+
|
|
+ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
|
|
+ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
|
|
+ void push_reg(Register Rs);
|
|
+ void pop_reg(Register Rd);
|
|
+ int push_reg(unsigned int bitset, Register stack);
|
|
+ int pop_reg(unsigned int bitset, Register stack);
|
|
+ static RegSet call_clobbered_registers();
|
|
+ void push_call_clobbered_registers();
|
|
+ void pop_call_clobbered_registers();
|
|
+ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
|
|
+ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
|
|
+
|
|
+ // if heap base register is used - reinit it with the correct value
|
|
+ void reinit_heapbase();
|
|
+
|
|
+ void bind(Label& L) {
|
|
+ Assembler::bind(L);
|
|
+ // fences across basic blocks should not be merged
|
|
+ code()->clear_last_insn();
|
|
+ }
|
|
+
|
|
+ // mv
|
|
+ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); }
|
|
+
|
|
+ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); }
|
|
+ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); }
|
|
+ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); }
|
|
+ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); }
|
|
+ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); }
|
|
+ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
|
|
+
|
|
+ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
|
|
+
|
|
+ void mv(Register Rd, Address dest);
|
|
+ void mv(Register Rd, RegisterOrConstant src);
|
|
+
|
|
+ // logic
|
|
+ void andrw(Register Rd, Register Rs1, Register Rs2);
|
|
+ void orrw(Register Rd, Register Rs1, Register Rs2);
|
|
+ void xorrw(Register Rd, Register Rs1, Register Rs2);
|
|
+
|
|
+ // vext
|
|
+ void vmnot_m(VectorRegister vd, VectorRegister vs);
|
|
+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
|
|
+ void vfneg_v(VectorRegister vd, VectorRegister vs);
|
|
+
|
|
+ // support for argument shuffling
|
|
+ void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
|
|
+ void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
|
|
+ void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
|
|
+ void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
|
|
+ void object_move(OopMap* map,
|
|
+ int oop_handle_offset,
|
|
+ int framesize_in_slots,
|
|
+ VMRegPair src,
|
|
+ VMRegPair dst,
|
|
+ bool is_receiver,
|
|
+ int* receiver_offset);
|
|
+
|
|
+ void rt_call(address dest, Register tmp = t0);
|
|
+
|
|
+ // revb
|
|
+ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
|
|
+ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
|
|
+ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
|
|
+ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
|
|
+ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
|
|
+ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
|
|
+ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
|
|
+ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
|
|
+
|
|
+ void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
|
|
+ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
|
|
+
|
|
+ // Support for serializing memory accesses between threads
|
|
+ void serialize_memory(Register thread, Register tmp1, Register tmp2);
|
|
+
|
|
+ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
|
|
+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ;
|
|
+ void cmpxchg(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result, bool result_as_bool = false);
|
|
+ void cmpxchg_weak(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result);
|
|
+ void cmpxchg_narrow_value_helper(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Register tmp1, Register tmp2, Register tmp3);
|
|
+ void cmpxchg_narrow_value(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result, bool result_as_bool,
|
|
+ Register tmp1, Register tmp2, Register tmp3);
|
|
+ void weak_cmpxchg_narrow_value(Register addr, Register expected,
|
|
+ Register new_val,
|
|
+ enum operand_size size,
|
|
+ Assembler::Aqrl acquire, Assembler::Aqrl release,
|
|
+ Register result,
|
|
+ Register tmp1, Register tmp2, Register tmp3);
|
|
+
|
|
+ void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
|
|
+ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
|
|
+ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
|
|
+ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
|
|
+
|
|
+ void atomic_xchg(Register prev, Register newv, Register addr);
|
|
+ void atomic_xchgw(Register prev, Register newv, Register addr);
|
|
+ void atomic_xchgal(Register prev, Register newv, Register addr);
|
|
+ void atomic_xchgalw(Register prev, Register newv, Register addr);
|
|
+ void atomic_xchgwu(Register prev, Register newv, Register addr);
|
|
+ void atomic_xchgalwu(Register prev, Register newv, Register addr);
|
|
+
|
|
+ // Biased locking support
|
|
+ // lock_reg and obj_reg must be loaded up with the appropriate values.
|
|
+ // swap_reg is killed.
|
|
+ // tmp_reg must be supplied and must not be t0 or t1
|
|
+ // Optional slow case is for implementations (interpreter and C1) which branch to
|
|
+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
|
|
+ // Returns offset of first potentially-faulting instruction for null
|
|
+ // check info (currently consumed only by C1). If
|
|
+ // swap_reg_contains_mark is true then returns -1 as it is assumed
|
|
+ // the calling code has already passed any potential faults.
|
|
+ int biased_locking_enter(Register lock_reg, Register obj_reg,
|
|
+ Register swap_reg, Register tmp_reg,
|
|
+ bool swap_reg_contains_mark,
|
|
+ Label& done, Label* slow_case = NULL,
|
|
+ BiasedLockingCounters* counters = NULL,
|
|
+ Register flag = noreg);
|
|
+ void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg);
|
|
+
|
|
+ static bool far_branches() {
|
|
+ return ReservedCodeCacheSize > branch_range;
|
|
+ }
|
|
+
|
|
+ //atomic
|
|
+ void atomic_incw(Register counter_addr, Register tmp1);
|
|
+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
|
|
+ la(tmp1, counter_addr);
|
|
+ atomic_incw(tmp1, tmp2);
|
|
+ }
|
|
+
|
|
+ // Jumps that can reach anywhere in the code cache.
|
|
+ // Trashes tmp.
|
|
+ void far_call(Address entry, Register tmp = t0);
|
|
+ void far_jump(Address entry, Register tmp = t0);
|
|
+
|
|
+ static int far_branch_size() {
|
|
+ if (far_branches()) {
|
|
+ return 2 * 4; // auipc + jalr, see far_call() & far_jump()
|
|
+ } else {
|
|
+ return 4;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void load_byte_map_base(Register reg);
|
|
+
|
|
+ void bang_stack_with_offset(int offset) {
|
|
+ // stack grows down, caller passes positive offset
|
|
+ assert(offset > 0, "must bang with negative offset");
|
|
+ sub(t1, sp, offset);
|
|
+ sd(zr, Address(t1));
|
|
+ }
|
|
+
|
|
+ void la_patchable(Register reg1, const Address &dest, int32_t &offset);
|
|
+
|
|
+ virtual void _call_Unimplemented(address call_site) {
|
|
+ mv(t1, call_site);
|
|
+ }
|
|
+ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ void spill(Register Rx, bool is64, int offset) {
|
|
+ is64 ? sd(Rx, Address(sp, offset))
|
|
+ : sw(Rx, Address(sp, offset));
|
|
+ }
|
|
+
|
|
+ void spill(FloatRegister Rx, bool is64, int offset) {
|
|
+ is64 ? fsd(Rx, Address(sp, offset))
|
|
+ : fsw(Rx, Address(sp, offset));
|
|
+ }
|
|
+
|
|
+ void spill(VectorRegister Vx, int offset) {
|
|
+ add(t0, sp, offset);
|
|
+ vs1r_v(Vx, t0);
|
|
+ }
|
|
+
|
|
+ void unspill(Register Rx, bool is64, int offset) {
|
|
+ is64 ? ld(Rx, Address(sp, offset))
|
|
+ : lw(Rx, Address(sp, offset));
|
|
+ }
|
|
+
|
|
+ void unspillu(Register Rx, bool is64, int offset) {
|
|
+ is64 ? ld(Rx, Address(sp, offset))
|
|
+ : lwu(Rx, Address(sp, offset));
|
|
+ }
|
|
+
|
|
+ void unspill(FloatRegister Rx, bool is64, int offset) {
|
|
+ is64 ? fld(Rx, Address(sp, offset))
|
|
+ : flw(Rx, Address(sp, offset));
|
|
+ }
|
|
+
|
|
+ void unspill(VectorRegister Vx, int offset) {
|
|
+ add(t0, sp, offset);
|
|
+ vl1r_v(Vx, t0);
|
|
+ }
|
|
+
|
|
+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset,
|
|
+ int vec_reg_size_in_bytes) {
|
|
+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
|
|
+ unspill(v0, src_offset);
|
|
+ spill(v0, dst_offset);
|
|
+ }
|
|
+
|
|
+#endif // COMPILER2
|
|
+
|
|
+ // Frame creation and destruction shared between JITs.
|
|
+ void build_frame(int framesize);
|
|
+ void remove_frame(int framesize);
|
|
+
|
|
+ void reserved_stack_check();
|
|
+ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
|
|
+ void read_polling_page(Register r, address page, relocInfo::relocType rtype);
|
|
+ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
|
|
+ // Return: the call PC
|
|
+ address trampoline_call(Address entry);
|
|
+ address ic_call(address entry, jint method_index = 0);
|
|
+ // Support for memory inc/dec
|
|
+ // n.b. increment/decrement calls with an Address destination will
|
|
+ // need to use a scratch register to load the value to be
|
|
+ // incremented. increment/decrement calls which add or subtract a
|
|
+ // constant value other than sign-extended 12-bit immediate will need
|
|
+ // to use a 2nd scratch register to hold the constant. so, an address
|
|
+ // increment/decrement may trash both t0 and t1.
|
|
+
|
|
+ void increment(const Address dst, int64_t value = 1);
|
|
+ void incrementw(const Address dst, int32_t value = 1);
|
|
+
|
|
+ void decrement(const Address dst, int64_t value = 1);
|
|
+ void decrementw(const Address dst, int32_t value = 1);
|
|
+ void cmpptr(Register src1, Address src2, Label& equal);
|
|
+ void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop
|
|
+ void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false);
|
|
+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
|
|
+#ifdef COMPILER2
|
|
+ void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min);
|
|
+
|
|
+ address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4,
|
|
+ Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size);
|
|
+
|
|
+ void string_equals(Register a1, Register a2, Register result, Register cnt1,
|
|
+ int elem_size);
|
|
+ void string_compare(Register str1, Register str2,
|
|
+ Register cnt1, Register cnt2, Register result,
|
|
+ Register tmp1, Register tmp2, Register tmp3, int ae);
|
|
+ void string_indexof_char_short(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ bool isL);
|
|
+ void string_indexof_char(Register str1, Register cnt1,
|
|
+ Register ch, Register result,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ bool isL);
|
|
+ void string_indexof(Register str1, Register str2,
|
|
+ Register cnt1, Register cnt2,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ Register tmp5, Register tmp6,
|
|
+ Register result, int ae);
|
|
+ void string_indexof_linearscan(Register haystack, Register needle,
|
|
+ Register haystack_len, Register needle_len,
|
|
+ Register tmp1, Register tmp2,
|
|
+ Register tmp3, Register tmp4,
|
|
+ int needle_con_cnt, Register result, int ae);
|
|
+ void compute_index(Register str1, Register trailing_zero, Register match_mask,
|
|
+ Register result, Register char_tmp, Register tmp,
|
|
+ bool haystack_isL);
|
|
+ void compute_match_mask(Register src, Register pattern, Register match_mask,
|
|
+ Register mask1, Register mask2);
|
|
+ void cad(Register dst, Register src1, Register src2, Register carry);
|
|
+ void cadc(Register dst, Register src1, Register src2, Register carry);
|
|
+ void adc(Register dst, Register src1, Register src2, Register carry);
|
|
+ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
|
|
+ Register src1, Register src2, Register carry = t0);
|
|
+ void mul_add(Register out, Register in, Register offset,
|
|
+ Register len, Register k, Register tmp1, Register tmp2);
|
|
+ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
|
|
+ Register y, Register y_idx, Register z,
|
|
+ Register carry, Register product,
|
|
+ Register idx, Register kdx);
|
|
+ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
|
|
+ Register y, Register y_idx, Register z,
|
|
+ Register carry, Register product,
|
|
+ Register idx, Register kdx);
|
|
+ void multiply_128_x_128_loop(Register y, Register z,
|
|
+ Register carry, Register carry2,
|
|
+ Register idx, Register jdx,
|
|
+ Register yz_idx1, Register yz_idx2,
|
|
+ Register tmp, Register tmp3, Register tmp4,
|
|
+ Register tmp6, Register product_hi);
|
|
+ void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
|
|
+ Register z, Register zlen,
|
|
+ Register tmp1, Register tmp2, Register tmp3, Register tmp4,
|
|
+ Register tmp5, Register tmp6, Register product_hi);
|
|
+#endif // COMPILER2
|
|
+ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
|
|
+ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
|
|
+
|
|
+ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
|
|
+ void zero_words(Register base, uint64_t cnt);
|
|
+ address zero_words(Register ptr, Register cnt);
|
|
+ void fill_words(Register base, Register cnt, Register value);
|
|
+ void zero_memory(Register addr, Register len, Register tmp1);
|
|
+
|
|
+ // shift left by shamt and add
|
|
+ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ // refer to conditional_branches and float_conditional_branches
|
|
+ static const int bool_test_bits = 3;
|
|
+ static const int neg_cond_bits = 2;
|
|
+ static const int unsigned_branch_mask = 1 << bool_test_bits;
|
|
+ static const int double_branch_mask = 1 << bool_test_bits;
|
|
+
|
|
+ void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src);
|
|
+
|
|
+ // cmp
|
|
+ void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false);
|
|
+ void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false);
|
|
+
|
|
+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
|
|
+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
|
|
+
|
|
+ // intrinsic methods implemented by vector instructions
|
|
+ void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
|
|
+ void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
|
|
+ void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
|
|
+ Register result, Register tmp1, Register tmp2, int encForm);
|
|
+
|
|
+ void clear_array_v(Register base, Register cnt);
|
|
+ address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp);
|
|
+ void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp);
|
|
+ void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp);
|
|
+
|
|
+ address has_negatives_v(Register ary, Register len, Register result, Register tmp);
|
|
+#endif
|
|
+
|
|
+ // Here the float instructions with safe deal with some exceptions.
|
|
+ // e.g. convert from NaN, +Inf, -Inf to int, float, double
|
|
+ // will trigger exception, we need to deal with these situations
|
|
+ // to get correct results.
|
|
+ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
|
|
+ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
|
|
+ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
|
+ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
|
|
+
|
|
+ // vector load/store unit-stride instructions
|
|
+ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
|
|
+ switch (sew) {
|
|
+ case Assembler::e64:
|
|
+ vle64_v(vd, base, vm);
|
|
+ break;
|
|
+ case Assembler::e32:
|
|
+ vle32_v(vd, base, vm);
|
|
+ break;
|
|
+ case Assembler::e16:
|
|
+ vle16_v(vd, base, vm);
|
|
+ break;
|
|
+ case Assembler::e8: // fall through
|
|
+ default:
|
|
+ vle8_v(vd, base, vm);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
|
|
+ switch (sew) {
|
|
+ case Assembler::e64:
|
|
+ vse64_v(store_data, base, vm);
|
|
+ break;
|
|
+ case Assembler::e32:
|
|
+ vse32_v(store_data, base, vm);
|
|
+ break;
|
|
+ case Assembler::e16:
|
|
+ vse16_v(store_data, base, vm);
|
|
+ break;
|
|
+ case Assembler::e8: // fall through
|
|
+ default:
|
|
+ vse8_v(store_data, base, vm);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ static const int zero_words_block_size;
|
|
+
|
|
+ void cast_primitive_type(BasicType type, Register Rt) {
|
|
+ switch (type) {
|
|
+ case T_BOOLEAN:
|
|
+ sltu(Rt, zr, Rt);
|
|
+ break;
|
|
+ case T_CHAR :
|
|
+ zero_extend(Rt, Rt, 16);
|
|
+ break;
|
|
+ case T_BYTE :
|
|
+ sign_extend(Rt, Rt, 8);
|
|
+ break;
|
|
+ case T_SHORT :
|
|
+ sign_extend(Rt, Rt, 16);
|
|
+ break;
|
|
+ case T_INT :
|
|
+ addw(Rt, Rt, zr);
|
|
+ break;
|
|
+ case T_LONG : /* nothing to do */ break;
|
|
+ case T_VOID : /* nothing to do */ break;
|
|
+ case T_FLOAT : /* nothing to do */ break;
|
|
+ case T_DOUBLE : /* nothing to do */ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // float cmp with unordered_result
|
|
+ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
|
|
+ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
|
|
+
|
|
+ // Zero/Sign-extend
|
|
+ void zero_extend(Register dst, Register src, int bits);
|
|
+ void sign_extend(Register dst, Register src, int bits);
|
|
+
|
|
+ // compare src1 and src2 and get -1/0/1 in dst.
|
|
+ // if [src1 > src2], dst = 1;
|
|
+ // if [src1 == src2], dst = 0;
|
|
+ // if [src1 < src2], dst = -1;
|
|
+ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
|
|
+
|
|
+ void load_constant_pool_cache(Register cpool, Register method);
|
|
+
|
|
+ void load_max_stack(Register dst, Register method);
|
|
+
|
|
+private:
|
|
+ void load_prototype_header(Register dst, Register src);
|
|
+ void repne_scan(Register addr, Register value, Register count, Register tmp);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // Macro short-hand support to clean-up after a failed call to trampoline
|
|
+ // call generation (see trampoline_call() below), when a set of Labels must
|
|
+ // be reset (before returning).
|
|
+#define reset_labels1(L1) L1.reset()
|
|
+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
|
|
+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
|
|
+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
|
|
+#endif
|
|
+
|
|
+ // Return true if an address is within the 48-bit RISCV64 address space.
|
|
+ bool is_valid_riscv64_address(address addr) {
|
|
+ // sv48: must have bits 63-48 all equal to bit 47
|
|
+ return ((uintptr_t)addr >> 47) == 0;
|
|
+ }
|
|
+
|
|
+ void ld_constant(Register dest, const Address &const_addr) {
|
|
+ if (NearCpool) {
|
|
+ ld(dest, const_addr);
|
|
+ } else {
|
|
+ int32_t offset = 0;
|
|
+ la_patchable(dest, InternalAddress(const_addr.target()), offset);
|
|
+ ld(dest, Address(dest, offset));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ int bitset_to_regs(unsigned int bitset, unsigned char* regs);
|
|
+ Address add_memory_helper(const Address dst);
|
|
+
|
|
+ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
|
|
+ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
|
|
+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE);
|
|
+#endif // COMPILER2
|
|
+};
|
|
+
|
|
+#ifdef ASSERT
|
|
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
|
|
+#endif
|
|
+
|
|
+/**
|
|
+ * class SkipIfEqual:
|
|
+ *
|
|
+ * Instantiating this class will result in assembly code being output that will
|
|
+ * jump around any code emitted between the creation of the instance and it's
|
|
+ * automatic destruction at the end of a scope block, depending on the value of
|
|
+ * the flag passed to the constructor, which will be checked at run-time.
|
|
+ */
|
|
+class SkipIfEqual {
|
|
+ private:
|
|
+ MacroAssembler* _masm;
|
|
+ Label _label;
|
|
+
|
|
+ public:
|
|
+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
|
|
+ ~SkipIfEqual();
|
|
+};
|
|
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..fc2b191c0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
|
|
@@ -0,0 +1,30 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
|
|
+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
|
|
+
|
|
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..d049193d4
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
|
|
@@ -0,0 +1,440 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "classfile/javaClasses.inline.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interpreterRuntime.hpp"
|
|
+#include "memory/allocation.inline.hpp"
|
|
+#include "prims/methodHandles.hpp"
|
|
+#include "runtime/flags/flagSetting.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+#ifdef PRODUCT
|
|
+#define BLOCK_COMMENT(str) /* nothing */
|
|
+#else
|
|
+#define BLOCK_COMMENT(str) __ block_comment(str)
|
|
+#endif
|
|
+
|
|
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
|
+
|
|
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
|
|
+ if (VerifyMethodHandles) {
|
|
+ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
|
|
+ "MH argument is a Class");
|
|
+ }
|
|
+ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
|
|
+}
|
|
+
|
|
+#ifdef ASSERT
|
|
+static int check_nonzero(const char* xname, int x) {
|
|
+ assert(x != 0, "%s should be nonzero", xname);
|
|
+ return x;
|
|
+}
|
|
+#define NONZERO(x) check_nonzero(#x, x)
|
|
+#else //ASSERT
|
|
+#define NONZERO(x) (x)
|
|
+#endif //PRODUCT
|
|
+
|
|
+#ifdef ASSERT
|
|
+void MethodHandles::verify_klass(MacroAssembler* _masm,
|
|
+ Register obj, SystemDictionary::WKID klass_id,
|
|
+ const char* error_message) {
|
|
+ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
|
|
+ Klass* klass = SystemDictionary::well_known_klass(klass_id);
|
|
+ Register temp = t1;
|
|
+ Register temp2 = t0; // used by MacroAssembler::cmpptr
|
|
+ Label L_ok, L_bad;
|
|
+ BLOCK_COMMENT("verify_klass {");
|
|
+ __ verify_oop(obj);
|
|
+ __ beqz(obj, L_bad);
|
|
+ __ push_reg(RegSet::of(temp, temp2), sp);
|
|
+ __ load_klass(temp, obj);
|
|
+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
|
|
+ intptr_t super_check_offset = klass->super_check_offset();
|
|
+ __ ld(temp, Address(temp, super_check_offset));
|
|
+ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
|
|
+ __ pop_reg(RegSet::of(temp, temp2), sp);
|
|
+ __ bind(L_bad);
|
|
+ __ stop(error_message);
|
|
+ __ BIND(L_ok);
|
|
+ __ pop_reg(RegSet::of(temp, temp2), sp);
|
|
+ BLOCK_COMMENT("} verify_klass");
|
|
+}
|
|
+
|
|
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { }
|
|
+
|
|
+#endif //ASSERT
|
|
+
|
|
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
|
|
+ bool for_compiler_entry) {
|
|
+ assert(method == xmethod, "interpreter calling convention");
|
|
+ Label L_no_such_method;
|
|
+ __ beqz(xmethod, L_no_such_method);
|
|
+ __ verify_method_ptr(method);
|
|
+
|
|
+ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
|
|
+ Label run_compiled_code;
|
|
+ // JVMTI events, such as single-stepping, are implemented partly by avoiding running
|
|
+ // compiled code in threads for which the event is enabled. Check here for
|
|
+ // interp_only_mode if these events CAN be enabled.
|
|
+
|
|
+ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
|
|
+ __ beqz(t0, run_compiled_code);
|
|
+ __ ld(t0, Address(method, Method::interpreter_entry_offset()));
|
|
+ __ jr(t0);
|
|
+ __ BIND(run_compiled_code);
|
|
+ }
|
|
+
|
|
+ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
|
|
+ Method::from_interpreted_offset();
|
|
+ __ ld(t0,Address(method, entry_offset));
|
|
+ __ jr(t0);
|
|
+ __ bind(L_no_such_method);
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
|
|
+}
|
|
+
|
|
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
|
|
+ Register recv, Register method_temp,
|
|
+ Register temp2,
|
|
+ bool for_compiler_entry) {
|
|
+ BLOCK_COMMENT("jump_to_lambda_form {");
|
|
+ // This is the initial entry point of a lazy method handle.
|
|
+ // After type checking, it picks up the invoker from the LambdaForm.
|
|
+ assert_different_registers(recv, method_temp, temp2);
|
|
+ assert(recv != noreg, "required register");
|
|
+ assert(method_temp == xmethod, "required register for loading method");
|
|
+
|
|
+ // Load the invoker, as MH -> MH.form -> LF.vmentry
|
|
+ __ verify_oop(recv);
|
|
+ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
|
|
+ __ verify_oop(method_temp);
|
|
+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
|
|
+ __ verify_oop(method_temp);
|
|
+ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
|
|
+ __ verify_oop(method_temp);
|
|
+ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
|
|
+
|
|
+ if (VerifyMethodHandles && !for_compiler_entry) {
|
|
+ // make sure recv is already on stack
|
|
+ __ ld(temp2, Address(method_temp, Method::const_offset()));
|
|
+ __ load_sized_value(temp2,
|
|
+ Address(temp2, ConstMethod::size_of_parameters_offset()),
|
|
+ sizeof(u2), /*is_signed*/ false);
|
|
+ Label L;
|
|
+ __ ld(t0, __ argument_address(temp2, -1));
|
|
+ __ oop_equal(recv, t0, L);
|
|
+ __ ld(x10, __ argument_address(temp2, -1));
|
|
+ __ ebreak();
|
|
+ __ BIND(L);
|
|
+ }
|
|
+
|
|
+ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
|
|
+ BLOCK_COMMENT("} jump_to_lambda_form");
|
|
+}
|
|
+
|
|
+// Code generation
|
|
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
|
|
+ vmIntrinsics::ID iid) {
|
|
+ const bool not_for_compiler_entry = false; // this is the interpreter entry
|
|
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
|
|
+ if (iid == vmIntrinsics::_invokeGeneric ||
|
|
+ iid == vmIntrinsics::_compiledLambdaForm) {
|
|
+ // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
|
|
+ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
|
|
+ // They all allow an appendix argument.
|
|
+ __ ebreak(); // empty stubs make SG sick
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
|
|
+ // xmethod: Method*
|
|
+ // x13: argument locator (parameter slot count, added to sp)
|
|
+ // x11: used as temp to hold mh or receiver
|
|
+ Register argp = x13; // argument list ptr, live on error paths
|
|
+ Register mh = x11; // MH receiver; dies quickly and is recycled
|
|
+
|
|
+ // here's where control starts out:
|
|
+ __ align(CodeEntryAlignment);
|
|
+ address entry_point = __ pc();
|
|
+
|
|
+ if (VerifyMethodHandles) {
|
|
+ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
|
|
+
|
|
+ Label L;
|
|
+ BLOCK_COMMENT("verify_intrinsic_id {");
|
|
+ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
|
|
+ __ mv(t1, (int) iid);
|
|
+ __ beq(t0, t1, L);
|
|
+ if (iid == vmIntrinsics::_linkToVirtual ||
|
|
+ iid == vmIntrinsics::_linkToSpecial) {
|
|
+ // could do this for all kinds, but would explode assembly code size
|
|
+ trace_method_handle(_masm, "bad Method*::intrinsic_id");
|
|
+ }
|
|
+ __ ebreak();
|
|
+ __ bind(L);
|
|
+ BLOCK_COMMENT("} verify_intrinsic_id");
|
|
+ }
|
|
+
|
|
+ // First task: Find out how big the argument list is.
|
|
+ Address x13_first_arg_addr;
|
|
+ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
|
|
+ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
|
|
+ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
|
|
+ __ ld(argp, Address(xmethod, Method::const_offset()));
|
|
+ __ load_sized_value(argp,
|
|
+ Address(argp, ConstMethod::size_of_parameters_offset()),
|
|
+ sizeof(u2), /*is_signed*/ false);
|
|
+ x13_first_arg_addr = __ argument_address(argp, -1);
|
|
+ } else {
|
|
+ DEBUG_ONLY(argp = noreg);
|
|
+ }
|
|
+
|
|
+ if (!is_signature_polymorphic_static(iid)) {
|
|
+ __ ld(mh, x13_first_arg_addr);
|
|
+ DEBUG_ONLY(argp = noreg);
|
|
+ }
|
|
+
|
|
+ // x13_first_arg_addr is live!
|
|
+
|
|
+ trace_method_handle_interpreter_entry(_masm, iid);
|
|
+ if (iid == vmIntrinsics::_invokeBasic) {
|
|
+ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
|
|
+
|
|
+ } else {
|
|
+ // Adjust argument list by popping the trailing MemberName argument.
|
|
+ Register recv = noreg;
|
|
+ if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
|
|
+ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
|
|
+ __ ld(recv = x12, x13_first_arg_addr);
|
|
+ }
|
|
+ DEBUG_ONLY(argp = noreg);
|
|
+ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now
|
|
+ __ pop_reg(xmember); // extract last argument
|
|
+ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
|
|
+ }
|
|
+
|
|
+ return entry_point;
|
|
+}
|
|
+
|
|
+
|
|
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
|
|
+ vmIntrinsics::ID iid,
|
|
+ Register receiver_reg,
|
|
+ Register member_reg,
|
|
+ bool for_compiler_entry) {
|
|
+ assert(is_signature_polymorphic(iid), "expected invoke iid");
|
|
+ // temps used in this code are not used in *either* compiled or interpreted calling sequences
|
|
+ Register temp1 = x7;
|
|
+ Register temp2 = x28;
|
|
+ Register temp3 = x29; // x30 is live by this point: it contains the sender SP
|
|
+ if (for_compiler_entry) {
|
|
+ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
|
|
+ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
|
|
+ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
|
|
+ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
|
|
+ }
|
|
+
|
|
+ assert_different_registers(temp1, temp2, temp3, receiver_reg);
|
|
+ assert_different_registers(temp1, temp2, temp3, member_reg);
|
|
+
|
|
+ if (iid == vmIntrinsics::_invokeBasic) {
|
|
+ // indirect through MH.form.vmentry.vmtarget
|
|
+ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
|
|
+ } else {
|
|
+ // The method is a member invoker used by direct method handles.
|
|
+ if (VerifyMethodHandles) {
|
|
+ // make sure the trailing argument really is a MemberName (caller responsibility)
|
|
+ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
|
|
+ "MemberName required for invokeVirtual etc.");
|
|
+ }
|
|
+
|
|
+ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
|
|
+ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
|
|
+ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
|
|
+ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
|
|
+
|
|
+ Register temp1_recv_klass = temp1;
|
|
+ if (iid != vmIntrinsics::_linkToStatic) {
|
|
+ __ verify_oop(receiver_reg);
|
|
+ if (iid == vmIntrinsics::_linkToSpecial) {
|
|
+ // Don't actually load the klass; just null-check the receiver.
|
|
+ __ null_check(receiver_reg);
|
|
+ } else {
|
|
+ // load receiver klass itself
|
|
+ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
|
|
+ __ load_klass(temp1_recv_klass, receiver_reg);
|
|
+ __ verify_klass_ptr(temp1_recv_klass);
|
|
+ }
|
|
+ BLOCK_COMMENT("check_receiver {");
|
|
+ // The receiver for the MemberName must be in receiver_reg.
|
|
+ // Check the receiver against the MemberName.clazz
|
|
+ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
|
|
+ // Did not load it above...
|
|
+ __ load_klass(temp1_recv_klass, receiver_reg);
|
|
+ __ verify_klass_ptr(temp1_recv_klass);
|
|
+ }
|
|
+ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
|
|
+ Label L_ok;
|
|
+ Register temp2_defc = temp2;
|
|
+ __ load_heap_oop(temp2_defc, member_clazz, temp3);
|
|
+ load_klass_from_Class(_masm, temp2_defc);
|
|
+ __ verify_klass_ptr(temp2_defc);
|
|
+ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
|
|
+ // If we get here, the type check failed!
|
|
+ __ ebreak();
|
|
+ __ bind(L_ok);
|
|
+ }
|
|
+ BLOCK_COMMENT("} check_receiver");
|
|
+ }
|
|
+ if (iid == vmIntrinsics::_linkToSpecial ||
|
|
+ iid == vmIntrinsics::_linkToStatic) {
|
|
+ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass
|
|
+ }
|
|
+
|
|
+ // Live registers at this point:
|
|
+ // member_reg - MemberName that was the trailing argument
|
|
+ // temp1_recv_klass - klass of stacked receiver, if needed
|
|
+ // x30 - interpreter linkage (if interpreted)
|
|
+ // x11 ... x10 - compiler arguments (if compiled)
|
|
+
|
|
+ Label L_incompatible_class_change_error;
|
|
+ switch (iid) {
|
|
+ case vmIntrinsics::_linkToSpecial:
|
|
+ if (VerifyMethodHandles) {
|
|
+ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
|
|
+ }
|
|
+ __ load_heap_oop(xmethod, member_vmtarget);
|
|
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
|
|
+ break;
|
|
+
|
|
+ case vmIntrinsics::_linkToStatic:
|
|
+ if (VerifyMethodHandles) {
|
|
+ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
|
|
+ }
|
|
+ __ load_heap_oop(xmethod, member_vmtarget);
|
|
+ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
|
|
+ break;
|
|
+
|
|
+ case vmIntrinsics::_linkToVirtual: {
|
|
+ // same as TemplateTable::invokevirtual,
|
|
+ // minus the CP setup and profiling:
|
|
+
|
|
+ if (VerifyMethodHandles) {
|
|
+ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
|
|
+ }
|
|
+
|
|
+ // pick out the vtable index from the MemberName, and then we can discard it:
|
|
+ Register temp2_index = temp2;
|
|
+ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
|
|
+
|
|
+ if (VerifyMethodHandles) {
|
|
+ Label L_index_ok;
|
|
+ __ bgez(temp2_index, L_index_ok);
|
|
+ __ ebreak();
|
|
+ __ BIND(L_index_ok);
|
|
+ }
|
|
+
|
|
+ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget
|
|
+ // at this point. And VerifyMethodHandles has already checked clazz, if needed.
|
|
+
|
|
+ // get target Method* & entry point
|
|
+ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case vmIntrinsics::_linkToInterface: {
|
|
+ // same as TemplateTable::invokeinterface
|
|
+ // (minus the CP setup and profiling, with different argument motion)
|
|
+ if (VerifyMethodHandles) {
|
|
+ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
|
|
+ }
|
|
+
|
|
+ Register temp3_intf = temp3;
|
|
+ __ load_heap_oop(temp3_intf, member_clazz);
|
|
+ load_klass_from_Class(_masm, temp3_intf);
|
|
+ __ verify_klass_ptr(temp3_intf);
|
|
+
|
|
+ Register rindex = xmethod;
|
|
+ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
|
|
+ if (VerifyMethodHandles) {
|
|
+ Label L;
|
|
+ __ bgez(rindex, L);
|
|
+ __ ebreak();
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // given intf, index, and recv klass, dispatch to the implementation method
|
|
+ __ lookup_interface_method(temp1_recv_klass, temp3_intf,
|
|
+ // note: next two args must be the same:
|
|
+ rindex, xmethod,
|
|
+ temp2,
|
|
+ L_incompatible_class_change_error);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ // live at this point: xmethod, x30 (if interpreted)
|
|
+
|
|
+ // After figuring out which concrete method to call, jump into it.
|
|
+ // Note that this works in the interpreter with no data motion.
|
|
+ // But the compiled version will require that r2_recv be shifted out.
|
|
+ __ verify_method_ptr(xmethod);
|
|
+ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
|
|
+ if (iid == vmIntrinsics::_linkToInterface) {
|
|
+ __ bind(L_incompatible_class_change_error);
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
|
|
+ }
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void trace_method_handle_stub(const char* adaptername,
|
|
+ oop mh,
|
|
+ intptr_t* saved_regs,
|
|
+ intptr_t* entry_sp) { }
|
|
+
|
|
+// The stub wraps the arguments in a struct on the stack to avoid
|
|
+// dealing with the different calling conventions for passing 6
|
|
+// arguments.
|
|
+struct MethodHandleStubArguments {
|
|
+ const char* adaptername;
|
|
+ oopDesc* mh;
|
|
+ intptr_t* saved_regs;
|
|
+ intptr_t* entry_sp;
|
|
+};
|
|
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { }
|
|
+
|
|
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { }
|
|
+#endif //PRODUCT
|
|
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..8ed69efe8
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
|
|
@@ -0,0 +1,58 @@
|
|
+/*
|
|
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+// Platform-specific definitions for method handles.
|
|
+// These definitions are inlined into class MethodHandles.
|
|
+
|
|
+// Adapters
|
|
+enum /* platform_dependent_constants */ {
|
|
+ adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
|
|
+};
|
|
+
|
|
+public:
|
|
+
|
|
+ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
|
|
+
|
|
+ static void verify_klass(MacroAssembler* _masm,
|
|
+ Register obj, SystemDictionary::WKID klass_id,
|
|
+ const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
|
|
+
|
|
+ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
|
|
+ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
|
|
+ "reference is a MH");
|
|
+ }
|
|
+
|
|
+ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
|
|
+
|
|
+ // Similar to InterpreterMacroAssembler::jump_from_interpreted.
|
|
+ // Takes care of special dispatch from single stepping too.
|
|
+ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
|
|
+ bool for_compiler_entry);
|
|
+
|
|
+ static void jump_to_lambda_form(MacroAssembler* _masm,
|
|
+ Register recv, Register method_temp,
|
|
+ Register temp2,
|
|
+ bool for_compiler_entry);
|
|
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..4b1573130
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
|
|
@@ -0,0 +1,404 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "runtime/handles.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "utilities/ostream.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#endif
|
|
+
|
|
+Register NativeInstruction::extract_rs1(address instr) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
|
|
+}
|
|
+
|
|
+Register NativeInstruction::extract_rs2(address instr) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
|
|
+}
|
|
+
|
|
+Register NativeInstruction::extract_rd(address instr) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
|
|
+}
|
|
+
|
|
+uint32_t NativeInstruction::extract_opcode(address instr) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return Assembler::extract(((unsigned*)instr)[0], 6, 0);
|
|
+}
|
|
+
|
|
+uint32_t NativeInstruction::extract_funct3(address instr) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return Assembler::extract(((unsigned*)instr)[0], 14, 12);
|
|
+}
|
|
+
|
|
+bool NativeInstruction::is_pc_relative_at(address instr) {
|
|
+ // auipc + jalr
|
|
+ // auipc + addi
|
|
+ // auipc + load
|
|
+ // auipc + fload_load
|
|
+ return (is_auipc_at(instr)) &&
|
|
+ (is_addi_at(instr + instruction_size) ||
|
|
+ is_jalr_at(instr + instruction_size) ||
|
|
+ is_load_at(instr + instruction_size) ||
|
|
+ is_float_load_at(instr + instruction_size)) &&
|
|
+ check_pc_relative_data_dependency(instr);
|
|
+}
|
|
+
|
|
+// ie:ld(Rd, Label)
|
|
+bool NativeInstruction::is_load_pc_relative_at(address instr) {
|
|
+ return is_auipc_at(instr) && // auipc
|
|
+ is_ld_at(instr + instruction_size) && // ld
|
|
+ check_load_pc_relative_data_dependency(instr);
|
|
+}
|
|
+
|
|
+bool NativeInstruction::is_movptr_at(address instr) {
|
|
+ return is_lui_at(instr) && // Lui
|
|
+ is_addi_at(instr + instruction_size) && // Addi
|
|
+ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
|
|
+ is_addi_at(instr + instruction_size * 3) && // Addi
|
|
+ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
|
|
+ (is_addi_at(instr + instruction_size * 5) ||
|
|
+ is_jalr_at(instr + instruction_size * 5) ||
|
|
+ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
|
|
+ check_movptr_data_dependency(instr);
|
|
+}
|
|
+
|
|
+bool NativeInstruction::is_li32_at(address instr) {
|
|
+ return is_lui_at(instr) && // lui
|
|
+ is_addiw_at(instr + instruction_size) && // addiw
|
|
+ check_li32_data_dependency(instr);
|
|
+}
|
|
+
|
|
+bool NativeInstruction::is_li64_at(address instr) {
|
|
+ return is_lui_at(instr) && // lui
|
|
+ is_addi_at(instr + instruction_size) && // addi
|
|
+ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12
|
|
+ is_addi_at(instr + instruction_size * 3) && // addi
|
|
+ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12
|
|
+ is_addi_at(instr + instruction_size * 5) && // addi
|
|
+ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8
|
|
+ is_addi_at(instr + instruction_size * 7) && // addi
|
|
+ check_li64_data_dependency(instr);
|
|
+}
|
|
+
|
|
+void NativeCall::verify() {
|
|
+ assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
|
|
+}
|
|
+
|
|
+address NativeCall::destination() const {
|
|
+ address addr = (address)this;
|
|
+ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
|
|
+ address destination = MacroAssembler::target_addr_for_insn(instruction_address());
|
|
+
|
|
+ // Do we use a trampoline stub for this call?
|
|
+ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie.
|
|
+ assert(cb && cb->is_nmethod(), "sanity");
|
|
+ nmethod *nm = (nmethod *)cb;
|
|
+ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
|
|
+ // Yes we do, so get the destination from the trampoline stub.
|
|
+ const address trampoline_stub_addr = destination;
|
|
+ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
|
|
+ }
|
|
+
|
|
+ return destination;
|
|
+}
|
|
+
|
|
+// Similar to replace_mt_safe, but just changes the destination. The
|
|
+// important thing is that free-running threads are able to execute this
|
|
+// call instruction at all times.
|
|
+//
|
|
+// Used in the runtime linkage of calls; see class CompiledIC.
|
|
+//
|
|
+// Add parameter assert_lock to switch off assertion
|
|
+// during code generation, where no patching lock is needed.
|
|
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
|
|
+ assert(!assert_lock ||
|
|
+ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
|
|
+ "concurrent code patching");
|
|
+
|
|
+ ResourceMark rm;
|
|
+ address addr_call = addr_at(0);
|
|
+ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
|
|
+
|
|
+ // Patch the constant in the call's trampoline stub.
|
|
+ address trampoline_stub_addr = get_trampoline();
|
|
+ if (trampoline_stub_addr != NULL) {
|
|
+ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
|
|
+ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
|
|
+ }
|
|
+
|
|
+ // Patch the call.
|
|
+ if (Assembler::reachable_from_branch_at(addr_call, dest)) {
|
|
+ set_destination(dest);
|
|
+ } else {
|
|
+ assert (trampoline_stub_addr != NULL, "we need a trampoline");
|
|
+ set_destination(trampoline_stub_addr);
|
|
+ }
|
|
+
|
|
+ ICache::invalidate_range(addr_call, instruction_size);
|
|
+}
|
|
+
|
|
+address NativeCall::get_trampoline() {
|
|
+ address call_addr = addr_at(0);
|
|
+
|
|
+ CodeBlob *code = CodeCache::find_blob(call_addr);
|
|
+ assert(code != NULL, "Could not find the containing code blob");
|
|
+
|
|
+ address jal_destination = MacroAssembler::pd_call_destination(call_addr);
|
|
+ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
|
|
+ return jal_destination;
|
|
+ }
|
|
+
|
|
+ if (code != NULL && code->is_nmethod()) {
|
|
+ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+// Inserts a native call instruction at a given pc
|
|
+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
|
|
+
|
|
+//-------------------------------------------------------------------
|
|
+
|
|
+void NativeMovConstReg::verify() {
|
|
+ if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
|
|
+ is_auipc_at(instruction_address()))) {
|
|
+ fatal("should be MOVPTR or AUIPC");
|
|
+ }
|
|
+}
|
|
+
|
|
+intptr_t NativeMovConstReg::data() const {
|
|
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
|
|
+ if (maybe_cpool_ref(instruction_address())) {
|
|
+ return *(intptr_t*)addr;
|
|
+ } else {
|
|
+ return (intptr_t)addr;
|
|
+ }
|
|
+}
|
|
+
|
|
+void NativeMovConstReg::set_data(intptr_t x) {
|
|
+ if (maybe_cpool_ref(instruction_address())) {
|
|
+ address addr = MacroAssembler::target_addr_for_insn(instruction_address());
|
|
+ *(intptr_t*)addr = x;
|
|
+ } else {
|
|
+ // Store x into the instruction stream.
|
|
+ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
|
|
+ ICache::invalidate_range(instruction_address(), movptr_instruction_size);
|
|
+ }
|
|
+
|
|
+ // Find and replace the oop/metadata corresponding to this
|
|
+ // instruction in oops section.
|
|
+ CodeBlob* cb = CodeCache::find_blob(instruction_address());
|
|
+ if(cb != NULL) {
|
|
+ nmethod* nm = cb->as_nmethod_or_null();
|
|
+ if (nm != NULL) {
|
|
+ RelocIterator iter(nm, instruction_address(), next_instruction_address());
|
|
+ while (iter.next()) {
|
|
+ if (iter.type() == relocInfo::oop_type) {
|
|
+ oop* oop_addr = iter.oop_reloc()->oop_addr();
|
|
+ *oop_addr = cast_to_oop(x);
|
|
+ break;
|
|
+ } else if (iter.type() == relocInfo::metadata_type) {
|
|
+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
|
|
+ *metadata_addr = (Metadata*)x;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void NativeMovConstReg::print() {
|
|
+ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
|
|
+ p2i(instruction_address()), data());
|
|
+}
|
|
+
|
|
+//-------------------------------------------------------------------
|
|
+
|
|
+int NativeMovRegMem::offset() const {
|
|
+ Unimplemented();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
|
|
+
|
|
+void NativeMovRegMem::verify() {
|
|
+ Unimplemented();
|
|
+}
|
|
+
|
|
+//--------------------------------------------------------------------------------
|
|
+
|
|
+void NativeJump::verify() { }
|
|
+
|
|
+
|
|
+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
|
|
+}
|
|
+
|
|
+
|
|
+address NativeJump::jump_destination() const {
|
|
+ address dest = MacroAssembler::target_addr_for_insn(instruction_address());
|
|
+
|
|
+ // We use jump to self as the unresolved address which the inline
|
|
+ // cache code (and relocs) know about
|
|
+
|
|
+ // return -1 if jump to self
|
|
+ dest = (dest == (address) this) ? (address) -1 : dest;
|
|
+ return dest;
|
|
+};
|
|
+
|
|
+//-------------------------------------------------------------------
|
|
+
|
|
+address NativeGeneralJump::jump_destination() const {
|
|
+ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
|
|
+ address dest = (address) move->data();
|
|
+
|
|
+ // We use jump to self as the unresolved address which the inline
|
|
+ // cache code (and relocs) know about
|
|
+
|
|
+ // return -1 if jump to self
|
|
+ dest = (dest == (address) this) ? (address) -1 : dest;
|
|
+ return dest;
|
|
+}
|
|
+
|
|
+//-------------------------------------------------------------------
|
|
+
|
|
+bool NativeInstruction::is_safepoint_poll() {
|
|
+ return is_lwu_to_zr(address(this));
|
|
+}
|
|
+
|
|
+bool NativeInstruction::is_lwu_to_zr(address instr) {
|
|
+ return (extract_opcode(instr) == 0b0000011 &&
|
|
+ extract_funct3(instr) == 0b110 &&
|
|
+ extract_rd(instr) == zr); // zr
|
|
+}
|
|
+
|
|
+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
|
|
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
|
|
+ // jvmci
|
|
+ return uint_at(0) == 0xffffffff;
|
|
+}
|
|
+
|
|
+void NativeIllegalInstruction::insert(address code_pos) {
|
|
+ assert_cond(code_pos != NULL);
|
|
+ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
|
|
+}
|
|
+
|
|
+//-------------------------------------------------------------------
|
|
+
|
|
+// MT-safe inserting of a jump over a jump or a nop (used by
|
|
+// nmethod::make_not_entrant_or_zombie)
|
|
+
|
|
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
|
|
+
|
|
+ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
|
|
+
|
|
+ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
|
|
+ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
|
|
+ "riscv cannot replace non-jump with jump");
|
|
+
|
|
+ // Patch this nmethod atomically.
|
|
+ if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
|
|
+ ptrdiff_t offset = dest - verified_entry;
|
|
+ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
|
|
+
|
|
+ uint32_t insn = 0;
|
|
+ address pInsn = (address)&insn;
|
|
+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
|
|
+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
|
|
+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
|
|
+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
|
|
+ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
|
|
+ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
|
|
+ *(unsigned int*)verified_entry = insn;
|
|
+ } else {
|
|
+ // We use an illegal instruction for marking a method as
|
|
+ // not_entrant or zombie.
|
|
+ NativeIllegalInstruction::insert(verified_entry);
|
|
+ }
|
|
+
|
|
+ ICache::invalidate_range(verified_entry, instruction_size);
|
|
+}
|
|
+
|
|
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
|
|
+ CodeBuffer cb(code_pos, instruction_size);
|
|
+ MacroAssembler a(&cb);
|
|
+
|
|
+ int32_t offset = 0;
|
|
+ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
|
|
+ a.jalr(x0, t0, offset); // jalr
|
|
+
|
|
+ ICache::invalidate_range(code_pos, instruction_size);
|
|
+}
|
|
+
|
|
+// MT-safe patching of a long jump instruction.
|
|
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
|
|
+ ShouldNotCallThis();
|
|
+}
|
|
+
|
|
+
|
|
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
|
|
+ return ptr_at(data_offset);
|
|
+}
|
|
+
|
|
+void NativeCallTrampolineStub::set_destination(address new_destination) {
|
|
+ set_ptr_at(data_offset, new_destination);
|
|
+ OrderAccess::release();
|
|
+}
|
|
+
|
|
+uint32_t NativeMembar::get_kind() {
|
|
+ uint32_t insn = uint_at(0);
|
|
+
|
|
+ uint32_t predecessor = Assembler::extract(insn, 27, 24);
|
|
+ uint32_t successor = Assembler::extract(insn, 23, 20);
|
|
+
|
|
+ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
|
|
+}
|
|
+
|
|
+void NativeMembar::set_kind(uint32_t order_kind) {
|
|
+ uint32_t predecessor = 0;
|
|
+ uint32_t successor = 0;
|
|
+
|
|
+ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
|
|
+
|
|
+ uint32_t insn = uint_at(0);
|
|
+ address pInsn = (address) &insn;
|
|
+ Assembler::patch(pInsn, 27, 24, predecessor);
|
|
+ Assembler::patch(pInsn, 23, 20, successor);
|
|
+
|
|
+ address membar = addr_at(0);
|
|
+ *(unsigned int*) membar = insn;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..e8a4e0a46
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
|
|
@@ -0,0 +1,561 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
|
|
+#define CPU_RISCV_NATIVEINST_RISCV_HPP
|
|
+
|
|
+#include "asm/assembler.hpp"
|
|
+#include "runtime/icache.hpp"
|
|
+#include "runtime/os.hpp"
|
|
+
|
|
+// We have interfaces for the following instructions:
|
|
+// - NativeInstruction
|
|
+// - - NativeCall
|
|
+// - - NativeMovConstReg
|
|
+// - - NativeMovRegMem
|
|
+// - - NativeJump
|
|
+// - - NativeGeneralJump
|
|
+// - - NativeIllegalInstruction
|
|
+// - - NativeCallTrampolineStub
|
|
+// - - NativeMembar
|
|
+
|
|
+// The base class for different kinds of native instruction abstractions.
|
|
+// Provides the primitive operations to manipulate code relative to this.
|
|
+
|
|
+class NativeInstruction {
|
|
+ friend class Relocation;
|
|
+ friend bool is_NativeCallTrampolineStub_at(address);
|
|
+ public:
|
|
+ enum {
|
|
+ instruction_size = 4
|
|
+ };
|
|
+
|
|
+ juint encoding() const {
|
|
+ return uint_at(0);
|
|
+ }
|
|
+
|
|
+ bool is_jal() const { return is_jal_at(addr_at(0)); }
|
|
+ bool is_movptr() const { return is_movptr_at(addr_at(0)); }
|
|
+ bool is_call() const { return is_call_at(addr_at(0)); }
|
|
+ bool is_jump() const { return is_jump_at(addr_at(0)); }
|
|
+
|
|
+ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
|
|
+ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
|
|
+ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
|
|
+ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
|
|
+ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
|
|
+ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
|
|
+ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
|
|
+ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
|
|
+ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
|
|
+ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
|
|
+ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
|
|
+ static bool is_slli_shift_at(address instr, uint32_t shift) {
|
|
+ assert_cond(instr != NULL);
|
|
+ return (extract_opcode(instr) == 0b0010011 && // opcode field
|
|
+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation
|
|
+ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field
|
|
+ }
|
|
+
|
|
+ static Register extract_rs1(address instr);
|
|
+ static Register extract_rs2(address instr);
|
|
+ static Register extract_rd(address instr);
|
|
+ static uint32_t extract_opcode(address instr);
|
|
+ static uint32_t extract_funct3(address instr);
|
|
+
|
|
+ // the instruction sequence of movptr is as below:
|
|
+ // lui
|
|
+ // addi
|
|
+ // slli
|
|
+ // addi
|
|
+ // slli
|
|
+ // addi/jalr/load
|
|
+ static bool check_movptr_data_dependency(address instr) {
|
|
+ address lui = instr;
|
|
+ address addi1 = lui + instruction_size;
|
|
+ address slli1 = addi1 + instruction_size;
|
|
+ address addi2 = slli1 + instruction_size;
|
|
+ address slli2 = addi2 + instruction_size;
|
|
+ address last_instr = slli2 + instruction_size;
|
|
+ return extract_rs1(addi1) == extract_rd(lui) &&
|
|
+ extract_rs1(addi1) == extract_rd(addi1) &&
|
|
+ extract_rs1(slli1) == extract_rd(addi1) &&
|
|
+ extract_rs1(slli1) == extract_rd(slli1) &&
|
|
+ extract_rs1(addi2) == extract_rd(slli1) &&
|
|
+ extract_rs1(addi2) == extract_rd(addi2) &&
|
|
+ extract_rs1(slli2) == extract_rd(addi2) &&
|
|
+ extract_rs1(slli2) == extract_rd(slli2) &&
|
|
+ extract_rs1(last_instr) == extract_rd(slli2);
|
|
+ }
|
|
+
|
|
+ // the instruction sequence of li64 is as below:
|
|
+ // lui
|
|
+ // addi
|
|
+ // slli
|
|
+ // addi
|
|
+ // slli
|
|
+ // addi
|
|
+ // slli
|
|
+ // addi
|
|
+ static bool check_li64_data_dependency(address instr) {
|
|
+ address lui = instr;
|
|
+ address addi1 = lui + instruction_size;
|
|
+ address slli1 = addi1 + instruction_size;
|
|
+ address addi2 = slli1 + instruction_size;
|
|
+ address slli2 = addi2 + instruction_size;
|
|
+ address addi3 = slli2 + instruction_size;
|
|
+ address slli3 = addi3 + instruction_size;
|
|
+ address addi4 = slli3 + instruction_size;
|
|
+ return extract_rs1(addi1) == extract_rd(lui) &&
|
|
+ extract_rs1(addi1) == extract_rd(addi1) &&
|
|
+ extract_rs1(slli1) == extract_rd(addi1) &&
|
|
+ extract_rs1(slli1) == extract_rd(slli1) &&
|
|
+ extract_rs1(addi2) == extract_rd(slli1) &&
|
|
+ extract_rs1(addi2) == extract_rd(addi2) &&
|
|
+ extract_rs1(slli2) == extract_rd(addi2) &&
|
|
+ extract_rs1(slli2) == extract_rd(slli2) &&
|
|
+ extract_rs1(addi3) == extract_rd(slli2) &&
|
|
+ extract_rs1(addi3) == extract_rd(addi3) &&
|
|
+ extract_rs1(slli3) == extract_rd(addi3) &&
|
|
+ extract_rs1(slli3) == extract_rd(slli3) &&
|
|
+ extract_rs1(addi4) == extract_rd(slli3) &&
|
|
+ extract_rs1(addi4) == extract_rd(addi4);
|
|
+ }
|
|
+
|
|
+ // the instruction sequence of li32 is as below:
|
|
+ // lui
|
|
+ // addiw
|
|
+ static bool check_li32_data_dependency(address instr) {
|
|
+ address lui = instr;
|
|
+ address addiw = lui + instruction_size;
|
|
+
|
|
+ return extract_rs1(addiw) == extract_rd(lui) &&
|
|
+ extract_rs1(addiw) == extract_rd(addiw);
|
|
+ }
|
|
+
|
|
+ // the instruction sequence of pc-relative is as below:
|
|
+ // auipc
|
|
+ // jalr/addi/load/float_load
|
|
+ static bool check_pc_relative_data_dependency(address instr) {
|
|
+ address auipc = instr;
|
|
+ address last_instr = auipc + instruction_size;
|
|
+
|
|
+ return extract_rs1(last_instr) == extract_rd(auipc);
|
|
+ }
|
|
+
|
|
+ // the instruction sequence of load_label is as below:
|
|
+ // auipc
|
|
+ // load
|
|
+ static bool check_load_pc_relative_data_dependency(address instr) {
|
|
+ address auipc = instr;
|
|
+ address last_instr = auipc + instruction_size;
|
|
+
|
|
+ return extract_rs1(last_instr) == extract_rd(auipc);
|
|
+ }
|
|
+
|
|
+ static bool is_movptr_at(address instr);
|
|
+ static bool is_li32_at(address instr);
|
|
+ static bool is_li64_at(address instr);
|
|
+ static bool is_pc_relative_at(address branch);
|
|
+ static bool is_load_pc_relative_at(address branch);
|
|
+
|
|
+ static bool is_call_at(address instr) {
|
|
+ if (is_jal_at(instr) || is_jalr_at(instr)) {
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+ static bool is_lwu_to_zr(address instr);
|
|
+
|
|
+ inline bool is_nop();
|
|
+ inline bool is_jump_or_nop();
|
|
+ bool is_safepoint_poll();
|
|
+ bool is_sigill_zombie_not_entrant();
|
|
+
|
|
+ protected:
|
|
+ address addr_at(int offset) const { return address(this) + offset; }
|
|
+
|
|
+ jint int_at(int offset) const { return *(jint*) addr_at(offset); }
|
|
+ juint uint_at(int offset) const { return *(juint*) addr_at(offset); }
|
|
+
|
|
+ address ptr_at(int offset) const { return *(address*) addr_at(offset); }
|
|
+
|
|
+ oop oop_at (int offset) const { return *(oop*) addr_at(offset); }
|
|
+
|
|
+
|
|
+ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; }
|
|
+ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; }
|
|
+ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; }
|
|
+ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; }
|
|
+
|
|
+ public:
|
|
+
|
|
+ inline friend NativeInstruction* nativeInstruction_at(address addr);
|
|
+
|
|
+ static bool maybe_cpool_ref(address instr) {
|
|
+ return is_auipc_at(instr);
|
|
+ }
|
|
+
|
|
+ bool is_membar() {
|
|
+ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
|
|
+ }
|
|
+};
|
|
+
|
|
+inline NativeInstruction* nativeInstruction_at(address addr) {
|
|
+ return (NativeInstruction*)addr;
|
|
+}
|
|
+
|
|
+// The natural type of an RISCV instruction is uint32_t
|
|
+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
|
|
+ return (NativeInstruction*)addr;
|
|
+}
|
|
+
|
|
+inline NativeCall* nativeCall_at(address addr);
|
|
+// The NativeCall is an abstraction for accessing/manipulating native
|
|
+// call instructions (used to manipulate inline caches, primitive &
|
|
+// DSO calls, etc.).
|
|
+
|
|
+class NativeCall: public NativeInstruction {
|
|
+ public:
|
|
+ enum RISCV_specific_constants {
|
|
+ instruction_size = 4,
|
|
+ instruction_offset = 0,
|
|
+ displacement_offset = 0,
|
|
+ return_address_offset = 4
|
|
+ };
|
|
+
|
|
+ address instruction_address() const { return addr_at(instruction_offset); }
|
|
+ address next_instruction_address() const { return addr_at(return_address_offset); }
|
|
+ address return_address() const { return addr_at(return_address_offset); }
|
|
+ address destination() const;
|
|
+
|
|
+ void set_destination(address dest) {
|
|
+ if (is_jal()) {
|
|
+ intptr_t offset = (intptr_t)(dest - instruction_address());
|
|
+ assert((offset & 0x1) == 0, "should be aligned");
|
|
+ assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n");
|
|
+ unsigned int insn = 0b1101111; // jal
|
|
+ address pInsn = (address)(&insn);
|
|
+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
|
|
+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
|
|
+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
|
|
+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
|
|
+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
|
|
+ set_int_at(displacement_offset, insn);
|
|
+ return;
|
|
+ }
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ void verify_alignment() { ; }
|
|
+ void verify();
|
|
+ void print();
|
|
+
|
|
+ // Creation
|
|
+ inline friend NativeCall* nativeCall_at(address addr);
|
|
+ inline friend NativeCall* nativeCall_before(address return_address);
|
|
+
|
|
+ static bool is_call_before(address return_address) {
|
|
+ return is_call_at(return_address - NativeCall::return_address_offset);
|
|
+ }
|
|
+
|
|
+ // MT-safe patching of a call instruction.
|
|
+ static void insert(address code_pos, address entry);
|
|
+
|
|
+ static void replace_mt_safe(address instr_addr, address code_buffer);
|
|
+
|
|
+ // Similar to replace_mt_safe, but just changes the destination. The
|
|
+ // important thing is that free-running threads are able to execute
|
|
+ // this call instruction at all times. If the call is an immediate BL
|
|
+ // instruction we can simply rely on atomicity of 32-bit writes to
|
|
+ // make sure other threads will see no intermediate states.
|
|
+
|
|
+ // We cannot rely on locks here, since the free-running threads must run at
|
|
+ // full speed.
|
|
+ //
|
|
+ // Used in the runtime linkage of calls; see class CompiledIC.
|
|
+ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
|
|
+
|
|
+ // The parameter assert_lock disables the assertion during code generation.
|
|
+ void set_destination_mt_safe(address dest, bool assert_lock = true);
|
|
+
|
|
+ address get_trampoline();
|
|
+};
|
|
+
|
|
+inline NativeCall* nativeCall_at(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
|
|
+#ifdef ASSERT
|
|
+ call->verify();
|
|
+#endif
|
|
+ return call;
|
|
+}
|
|
+
|
|
+inline NativeCall* nativeCall_before(address return_address) {
|
|
+ assert_cond(return_address != NULL);
|
|
+ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
|
|
+#ifdef ASSERT
|
|
+ call->verify();
|
|
+#endif
|
|
+ return call;
|
|
+}
|
|
+
|
|
+// An interface for accessing/manipulating native mov reg, imm instructions.
|
|
+// (used to manipulate inlined 64-bit data calls, etc.)
|
|
+class NativeMovConstReg: public NativeInstruction {
|
|
+ public:
|
|
+ enum RISCV_specific_constants {
|
|
+ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr().
|
|
+ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
|
|
+ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld
|
|
+ instruction_offset = 0,
|
|
+ displacement_offset = 0
|
|
+ };
|
|
+
|
|
+ address instruction_address() const { return addr_at(instruction_offset); }
|
|
+ address next_instruction_address() const {
|
|
+ // if the instruction at 5 * instruction_size is addi,
|
|
+ // it means a lui + addi + slli + addi + slli + addi instruction sequence,
|
|
+ // and the next instruction address should be addr_at(6 * instruction_size).
|
|
+ // However, when the instruction at 5 * instruction_size isn't addi,
|
|
+ // the next instruction address should be addr_at(5 * instruction_size)
|
|
+ if (nativeInstruction_at(instruction_address())->is_movptr()) {
|
|
+ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
|
|
+ // Assume: lui, addi, slli, addi, slli, addi
|
|
+ return addr_at(movptr_instruction_size);
|
|
+ } else {
|
|
+ // Assume: lui, addi, slli, addi, slli
|
|
+ return addr_at(movptr_with_offset_instruction_size);
|
|
+ }
|
|
+ } else if (is_load_pc_relative_at(instruction_address())) {
|
|
+ // Assume: auipc, ld
|
|
+ return addr_at(load_pc_relative_instruction_size);
|
|
+ }
|
|
+ guarantee(false, "Unknown instruction in NativeMovConstReg");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ intptr_t data() const;
|
|
+ void set_data(intptr_t x);
|
|
+
|
|
+ void flush() {
|
|
+ if (!maybe_cpool_ref(instruction_address())) {
|
|
+ ICache::invalidate_range(instruction_address(), movptr_instruction_size);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void verify();
|
|
+ void print();
|
|
+
|
|
+ // Creation
|
|
+ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
|
|
+ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
|
|
+};
|
|
+
|
|
+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
|
|
+#ifdef ASSERT
|
|
+ test->verify();
|
|
+#endif
|
|
+ return test;
|
|
+}
|
|
+
|
|
+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
|
|
+#ifdef ASSERT
|
|
+ test->verify();
|
|
+#endif
|
|
+ return test;
|
|
+}
|
|
+
|
|
+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
|
|
+class NativeMovRegMem: public NativeInstruction {
|
|
+ public:
|
|
+ int instruction_start() const {
|
|
+ Unimplemented();
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ address instruction_address() const {
|
|
+ Unimplemented();
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ int num_bytes_to_end_of_patch() const {
|
|
+ Unimplemented();
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ int offset() const;
|
|
+
|
|
+ void set_offset(int x);
|
|
+
|
|
+ void add_offset_in_bytes(int add_offset) { Unimplemented(); }
|
|
+
|
|
+ void verify();
|
|
+ void print();
|
|
+
|
|
+ private:
|
|
+ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
|
|
+};
|
|
+
|
|
+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
|
|
+ Unimplemented();
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+class NativeJump: public NativeInstruction {
|
|
+ public:
|
|
+ enum RISCV_specific_constants {
|
|
+ instruction_size = NativeInstruction::instruction_size,
|
|
+ instruction_offset = 0,
|
|
+ data_offset = 0,
|
|
+ next_instruction_offset = NativeInstruction::instruction_size
|
|
+ };
|
|
+
|
|
+ address instruction_address() const { return addr_at(instruction_offset); }
|
|
+ address next_instruction_address() const { return addr_at(instruction_size); }
|
|
+ address jump_destination() const;
|
|
+
|
|
+ // Creation
|
|
+ inline friend NativeJump* nativeJump_at(address address);
|
|
+
|
|
+ void verify();
|
|
+
|
|
+ // Unit testing stuff
|
|
+ static void test() {}
|
|
+
|
|
+ // Insertion of native jump instruction
|
|
+ static void insert(address code_pos, address entry);
|
|
+ // MT-safe insertion of native jump at verified method entry
|
|
+ static void check_verified_entry_alignment(address entry, address verified_entry);
|
|
+ static void patch_verified_entry(address entry, address verified_entry, address dest);
|
|
+};
|
|
+
|
|
+inline NativeJump* nativeJump_at(address addr) {
|
|
+ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
|
|
+#ifdef ASSERT
|
|
+ jump->verify();
|
|
+#endif
|
|
+ return jump;
|
|
+}
|
|
+
|
|
+class NativeGeneralJump: public NativeJump {
|
|
+public:
|
|
+ enum RISCV_specific_constants {
|
|
+ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
|
|
+ instruction_offset = 0,
|
|
+ data_offset = 0,
|
|
+ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr
|
|
+ };
|
|
+
|
|
+ address jump_destination() const;
|
|
+
|
|
+ static void insert_unconditional(address code_pos, address entry);
|
|
+ static void replace_mt_safe(address instr_addr, address code_buffer);
|
|
+};
|
|
+
|
|
+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
|
|
+ debug_only(jump->verify();)
|
|
+ return jump;
|
|
+}
|
|
+
|
|
+class NativeIllegalInstruction: public NativeInstruction {
|
|
+ public:
|
|
+ // Insert illegal opcode as specific address
|
|
+ static void insert(address code_pos);
|
|
+};
|
|
+
|
|
+inline bool NativeInstruction::is_nop() {
|
|
+ uint32_t insn = *(uint32_t*)addr_at(0);
|
|
+ return insn == 0x13;
|
|
+}
|
|
+
|
|
+inline bool NativeInstruction::is_jump_or_nop() {
|
|
+ return is_nop() || is_jump();
|
|
+}
|
|
+
|
|
+// Call trampoline stubs.
|
|
+class NativeCallTrampolineStub : public NativeInstruction {
|
|
+ public:
|
|
+
|
|
+ enum RISCV_specific_constants {
|
|
+ // Refer to function emit_trampoline_stub.
|
|
+ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
|
|
+ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr
|
|
+ };
|
|
+
|
|
+ address destination(nmethod *nm = NULL) const;
|
|
+ void set_destination(address new_destination);
|
|
+ ptrdiff_t destination_offset() const;
|
|
+};
|
|
+
|
|
+inline bool is_NativeCallTrampolineStub_at(address addr) {
|
|
+ // Ensure that the stub is exactly
|
|
+ // ld t0, L--->auipc + ld
|
|
+ // jr t0
|
|
+ // L:
|
|
+
|
|
+ // judge inst + register + imm
|
|
+ // 1). check the instructions: auipc + ld + jalr
|
|
+ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
|
|
+ // 3). check if the offset in ld[31:20] equals the data_offset
|
|
+ assert_cond(addr != NULL);
|
|
+ const int instr_size = NativeInstruction::instruction_size;
|
|
+ if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
|
|
+ (NativeInstruction::extract_rd(addr) == x5) &&
|
|
+ (NativeInstruction::extract_rd(addr + instr_size) == x5) &&
|
|
+ (NativeInstruction::extract_rs1(addr + instr_size) == x5) &&
|
|
+ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) &&
|
|
+ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
|
|
+ return (NativeCallTrampolineStub*)addr;
|
|
+}
|
|
+
|
|
+class NativeMembar : public NativeInstruction {
|
|
+public:
|
|
+ uint32_t get_kind();
|
|
+ void set_kind(uint32_t order_kind);
|
|
+};
|
|
+
|
|
+inline NativeMembar *NativeMembar_at(address addr) {
|
|
+ assert_cond(addr != NULL);
|
|
+ assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
|
|
+ return (NativeMembar*)addr;
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..04a36c1c7
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
|
|
@@ -0,0 +1,46 @@
|
|
+/*
|
|
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
|
|
+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
|
|
+
|
|
+// machine-dependent implemention for register maps
|
|
+ friend class frame;
|
|
+
|
|
+ private:
|
|
+ // This is the hook for finding a register in an "well-known" location,
|
|
+ // such as a register block of a predetermined format.
|
|
+ // Since there is none, we just return NULL.
|
|
+ // See registerMap_riscv.hpp for an example of grabbing registers
|
|
+ // from register save areas of a standard layout.
|
|
+ address pd_location(VMReg reg) const {return NULL;}
|
|
+
|
|
+ // no PD state to clear or copy:
|
|
+ void pd_clear() {}
|
|
+ void pd_initialize() {}
|
|
+ void pd_initialize_from(const RegisterMap* map) {}
|
|
+
|
|
+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..b30c1b107
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
|
|
@@ -0,0 +1,193 @@
|
|
+/*
|
|
+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "asm/register.hpp"
|
|
+#include "interp_masm_riscv.hpp"
|
|
+#include "register_riscv.hpp"
|
|
+
|
|
+REGISTER_DEFINITION(Register, noreg);
|
|
+
|
|
+REGISTER_DEFINITION(Register, x0);
|
|
+REGISTER_DEFINITION(Register, x1);
|
|
+REGISTER_DEFINITION(Register, x2);
|
|
+REGISTER_DEFINITION(Register, x3);
|
|
+REGISTER_DEFINITION(Register, x4);
|
|
+REGISTER_DEFINITION(Register, x5);
|
|
+REGISTER_DEFINITION(Register, x6);
|
|
+REGISTER_DEFINITION(Register, x7);
|
|
+REGISTER_DEFINITION(Register, x8);
|
|
+REGISTER_DEFINITION(Register, x9);
|
|
+REGISTER_DEFINITION(Register, x10);
|
|
+REGISTER_DEFINITION(Register, x11);
|
|
+REGISTER_DEFINITION(Register, x12);
|
|
+REGISTER_DEFINITION(Register, x13);
|
|
+REGISTER_DEFINITION(Register, x14);
|
|
+REGISTER_DEFINITION(Register, x15);
|
|
+REGISTER_DEFINITION(Register, x16);
|
|
+REGISTER_DEFINITION(Register, x17);
|
|
+REGISTER_DEFINITION(Register, x18);
|
|
+REGISTER_DEFINITION(Register, x19);
|
|
+REGISTER_DEFINITION(Register, x20);
|
|
+REGISTER_DEFINITION(Register, x21);
|
|
+REGISTER_DEFINITION(Register, x22);
|
|
+REGISTER_DEFINITION(Register, x23);
|
|
+REGISTER_DEFINITION(Register, x24);
|
|
+REGISTER_DEFINITION(Register, x25);
|
|
+REGISTER_DEFINITION(Register, x26);
|
|
+REGISTER_DEFINITION(Register, x27);
|
|
+REGISTER_DEFINITION(Register, x28);
|
|
+REGISTER_DEFINITION(Register, x29);
|
|
+REGISTER_DEFINITION(Register, x30);
|
|
+REGISTER_DEFINITION(Register, x31);
|
|
+
|
|
+REGISTER_DEFINITION(FloatRegister, fnoreg);
|
|
+
|
|
+REGISTER_DEFINITION(FloatRegister, f0);
|
|
+REGISTER_DEFINITION(FloatRegister, f1);
|
|
+REGISTER_DEFINITION(FloatRegister, f2);
|
|
+REGISTER_DEFINITION(FloatRegister, f3);
|
|
+REGISTER_DEFINITION(FloatRegister, f4);
|
|
+REGISTER_DEFINITION(FloatRegister, f5);
|
|
+REGISTER_DEFINITION(FloatRegister, f6);
|
|
+REGISTER_DEFINITION(FloatRegister, f7);
|
|
+REGISTER_DEFINITION(FloatRegister, f8);
|
|
+REGISTER_DEFINITION(FloatRegister, f9);
|
|
+REGISTER_DEFINITION(FloatRegister, f10);
|
|
+REGISTER_DEFINITION(FloatRegister, f11);
|
|
+REGISTER_DEFINITION(FloatRegister, f12);
|
|
+REGISTER_DEFINITION(FloatRegister, f13);
|
|
+REGISTER_DEFINITION(FloatRegister, f14);
|
|
+REGISTER_DEFINITION(FloatRegister, f15);
|
|
+REGISTER_DEFINITION(FloatRegister, f16);
|
|
+REGISTER_DEFINITION(FloatRegister, f17);
|
|
+REGISTER_DEFINITION(FloatRegister, f18);
|
|
+REGISTER_DEFINITION(FloatRegister, f19);
|
|
+REGISTER_DEFINITION(FloatRegister, f20);
|
|
+REGISTER_DEFINITION(FloatRegister, f21);
|
|
+REGISTER_DEFINITION(FloatRegister, f22);
|
|
+REGISTER_DEFINITION(FloatRegister, f23);
|
|
+REGISTER_DEFINITION(FloatRegister, f24);
|
|
+REGISTER_DEFINITION(FloatRegister, f25);
|
|
+REGISTER_DEFINITION(FloatRegister, f26);
|
|
+REGISTER_DEFINITION(FloatRegister, f27);
|
|
+REGISTER_DEFINITION(FloatRegister, f28);
|
|
+REGISTER_DEFINITION(FloatRegister, f29);
|
|
+REGISTER_DEFINITION(FloatRegister, f30);
|
|
+REGISTER_DEFINITION(FloatRegister, f31);
|
|
+
|
|
+REGISTER_DEFINITION(VectorRegister, vnoreg);
|
|
+
|
|
+REGISTER_DEFINITION(VectorRegister, v0);
|
|
+REGISTER_DEFINITION(VectorRegister, v1);
|
|
+REGISTER_DEFINITION(VectorRegister, v2);
|
|
+REGISTER_DEFINITION(VectorRegister, v3);
|
|
+REGISTER_DEFINITION(VectorRegister, v4);
|
|
+REGISTER_DEFINITION(VectorRegister, v5);
|
|
+REGISTER_DEFINITION(VectorRegister, v6);
|
|
+REGISTER_DEFINITION(VectorRegister, v7);
|
|
+REGISTER_DEFINITION(VectorRegister, v8);
|
|
+REGISTER_DEFINITION(VectorRegister, v9);
|
|
+REGISTER_DEFINITION(VectorRegister, v10);
|
|
+REGISTER_DEFINITION(VectorRegister, v11);
|
|
+REGISTER_DEFINITION(VectorRegister, v12);
|
|
+REGISTER_DEFINITION(VectorRegister, v13);
|
|
+REGISTER_DEFINITION(VectorRegister, v14);
|
|
+REGISTER_DEFINITION(VectorRegister, v15);
|
|
+REGISTER_DEFINITION(VectorRegister, v16);
|
|
+REGISTER_DEFINITION(VectorRegister, v17);
|
|
+REGISTER_DEFINITION(VectorRegister, v18);
|
|
+REGISTER_DEFINITION(VectorRegister, v19);
|
|
+REGISTER_DEFINITION(VectorRegister, v20);
|
|
+REGISTER_DEFINITION(VectorRegister, v21);
|
|
+REGISTER_DEFINITION(VectorRegister, v22);
|
|
+REGISTER_DEFINITION(VectorRegister, v23);
|
|
+REGISTER_DEFINITION(VectorRegister, v24);
|
|
+REGISTER_DEFINITION(VectorRegister, v25);
|
|
+REGISTER_DEFINITION(VectorRegister, v26);
|
|
+REGISTER_DEFINITION(VectorRegister, v27);
|
|
+REGISTER_DEFINITION(VectorRegister, v28);
|
|
+REGISTER_DEFINITION(VectorRegister, v29);
|
|
+REGISTER_DEFINITION(VectorRegister, v30);
|
|
+REGISTER_DEFINITION(VectorRegister, v31);
|
|
+
|
|
+REGISTER_DEFINITION(Register, c_rarg0);
|
|
+REGISTER_DEFINITION(Register, c_rarg1);
|
|
+REGISTER_DEFINITION(Register, c_rarg2);
|
|
+REGISTER_DEFINITION(Register, c_rarg3);
|
|
+REGISTER_DEFINITION(Register, c_rarg4);
|
|
+REGISTER_DEFINITION(Register, c_rarg5);
|
|
+REGISTER_DEFINITION(Register, c_rarg6);
|
|
+REGISTER_DEFINITION(Register, c_rarg7);
|
|
+
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg0);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg1);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg2);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg3);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg4);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg5);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg6);
|
|
+REGISTER_DEFINITION(FloatRegister, c_farg7);
|
|
+
|
|
+REGISTER_DEFINITION(Register, j_rarg0);
|
|
+REGISTER_DEFINITION(Register, j_rarg1);
|
|
+REGISTER_DEFINITION(Register, j_rarg2);
|
|
+REGISTER_DEFINITION(Register, j_rarg3);
|
|
+REGISTER_DEFINITION(Register, j_rarg4);
|
|
+REGISTER_DEFINITION(Register, j_rarg5);
|
|
+REGISTER_DEFINITION(Register, j_rarg6);
|
|
+REGISTER_DEFINITION(Register, j_rarg7);
|
|
+
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg0);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg1);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg2);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg3);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg4);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg5);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg6);
|
|
+REGISTER_DEFINITION(FloatRegister, j_farg7);
|
|
+
|
|
+REGISTER_DEFINITION(Register, zr);
|
|
+REGISTER_DEFINITION(Register, gp);
|
|
+REGISTER_DEFINITION(Register, tp);
|
|
+REGISTER_DEFINITION(Register, xmethod);
|
|
+REGISTER_DEFINITION(Register, ra);
|
|
+REGISTER_DEFINITION(Register, sp);
|
|
+REGISTER_DEFINITION(Register, fp);
|
|
+REGISTER_DEFINITION(Register, xheapbase);
|
|
+REGISTER_DEFINITION(Register, xcpool);
|
|
+REGISTER_DEFINITION(Register, xmonitors);
|
|
+REGISTER_DEFINITION(Register, xlocals);
|
|
+REGISTER_DEFINITION(Register, xthread);
|
|
+REGISTER_DEFINITION(Register, xbcp);
|
|
+REGISTER_DEFINITION(Register, xdispatch);
|
|
+REGISTER_DEFINITION(Register, esp);
|
|
+
|
|
+REGISTER_DEFINITION(Register, t0);
|
|
+REGISTER_DEFINITION(Register, t1);
|
|
+REGISTER_DEFINITION(Register, t2);
|
|
diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..76215ef2a
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
|
|
@@ -0,0 +1,69 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "register_riscv.hpp"
|
|
+
|
|
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
|
|
+ RegisterImpl::max_slots_per_register;
|
|
+const int ConcreteRegisterImpl::max_fpr =
|
|
+ ConcreteRegisterImpl::max_gpr +
|
|
+ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
|
|
+
|
|
+const int ConcreteRegisterImpl::max_vpr =
|
|
+ ConcreteRegisterImpl::max_fpr +
|
|
+ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
|
|
+
|
|
+
|
|
+const char* RegisterImpl::name() const {
|
|
+ const char* names[number_of_registers] = {
|
|
+ "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9",
|
|
+ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
|
|
+ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
|
|
+ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
|
|
+ };
|
|
+ return is_valid() ? names[encoding()] : "noreg";
|
|
+}
|
|
+
|
|
+const char* FloatRegisterImpl::name() const {
|
|
+ const char* names[number_of_registers] = {
|
|
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
|
|
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
|
|
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
|
|
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
|
|
+ };
|
|
+ return is_valid() ? names[encoding()] : "noreg";
|
|
+}
|
|
+
|
|
+const char* VectorRegisterImpl::name() const {
|
|
+ const char* names[number_of_registers] = {
|
|
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
|
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
|
|
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
|
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
|
|
+ };
|
|
+ return is_valid() ? names[encoding()] : "noreg";
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..8beba6776
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
|
|
@@ -0,0 +1,337 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
|
|
+#define CPU_RISCV_REGISTER_RISCV_HPP
|
|
+
|
|
+#include "asm/register.hpp"
|
|
+
|
|
+#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions.
|
|
+#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode.
|
|
+#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags).
|
|
+#define CSR_VSTART 0x008 // Vector start position
|
|
+#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag
|
|
+#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode
|
|
+#define CSR_VCSR 0x00F // Vector control and status register
|
|
+#define CSR_VL 0xC20 // Vector length
|
|
+#define CSR_VTYPE 0xC21 // Vector data type register
|
|
+#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes)
|
|
+#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction.
|
|
+#define CSR_TIME 0xc01 // Timer for RDTIME instruction.
|
|
+#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction.
|
|
+
|
|
+class VMRegImpl;
|
|
+typedef VMRegImpl* VMReg;
|
|
+
|
|
+// Use Register as shortcut
|
|
+class RegisterImpl;
|
|
+typedef RegisterImpl* Register;
|
|
+
|
|
+inline Register as_Register(int encoding) {
|
|
+ return (Register)(intptr_t) encoding;
|
|
+}
|
|
+
|
|
+class RegisterImpl: public AbstractRegisterImpl {
|
|
+ public:
|
|
+ enum {
|
|
+ number_of_registers = 32,
|
|
+ number_of_byte_registers = 32,
|
|
+ max_slots_per_register = 2
|
|
+ };
|
|
+
|
|
+ // derived registers, offsets, and addresses
|
|
+ Register successor() const { return as_Register(encoding() + 1); }
|
|
+
|
|
+ // construction
|
|
+ inline friend Register as_Register(int encoding);
|
|
+
|
|
+ VMReg as_VMReg();
|
|
+
|
|
+ // accessors
|
|
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
|
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
|
+ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
|
|
+ const char* name() const;
|
|
+ int encoding_nocheck() const { return (intptr_t)this; }
|
|
+
|
|
+ // Return the bit which represents this register. This is intended
|
|
+ // to be ORed into a bitmask: for usage see class RegSet below.
|
|
+ unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
|
|
+};
|
|
+
|
|
+// The integer registers of the riscv architecture
|
|
+
|
|
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
|
|
+
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x0, (0));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x1, (1));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x2, (2));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x3, (3));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x4, (4));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x5, (5));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x6, (6));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x7, (7));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x8, (8));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x9, (9));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x10, (10));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x11, (11));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x12, (12));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x13, (13));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x14, (14));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x15, (15));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x16, (16));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x17, (17));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x18, (18));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x19, (19));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x20, (20));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x21, (21));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x22, (22));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x23, (23));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x24, (24));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x25, (25));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x26, (26));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x27, (27));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x28, (28));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x29, (29));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x30, (30));
|
|
+CONSTANT_REGISTER_DECLARATION(Register, x31, (31));
|
|
+
|
|
+// Use FloatRegister as shortcut
|
|
+class FloatRegisterImpl;
|
|
+typedef FloatRegisterImpl* FloatRegister;
|
|
+
|
|
+inline FloatRegister as_FloatRegister(int encoding) {
|
|
+ return (FloatRegister)(intptr_t) encoding;
|
|
+}
|
|
+
|
|
+// The implementation of floating point registers for the architecture
|
|
+class FloatRegisterImpl: public AbstractRegisterImpl {
|
|
+ public:
|
|
+ enum {
|
|
+ number_of_registers = 32,
|
|
+ max_slots_per_register = 2
|
|
+ };
|
|
+
|
|
+ // construction
|
|
+ inline friend FloatRegister as_FloatRegister(int encoding);
|
|
+
|
|
+ VMReg as_VMReg();
|
|
+
|
|
+ // derived registers, offsets, and addresses
|
|
+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
|
|
+
|
|
+ // accessors
|
|
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
|
+ int encoding_nocheck() const { return (intptr_t)this; }
|
|
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
|
+ const char* name() const;
|
|
+
|
|
+};
|
|
+
|
|
+// The float registers of the RISCV architecture
|
|
+
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
|
|
+
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30));
|
|
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31));
|
|
+
|
|
+// Use VectorRegister as shortcut
|
|
+class VectorRegisterImpl;
|
|
+typedef VectorRegisterImpl* VectorRegister;
|
|
+
|
|
+inline VectorRegister as_VectorRegister(int encoding) {
|
|
+ return (VectorRegister)(intptr_t) encoding;
|
|
+}
|
|
+
|
|
+// The implementation of vector registers for riscv-v
|
|
+class VectorRegisterImpl: public AbstractRegisterImpl {
|
|
+ public:
|
|
+ enum {
|
|
+ number_of_registers = 32,
|
|
+ max_slots_per_register = 4
|
|
+ };
|
|
+
|
|
+ // construction
|
|
+ inline friend VectorRegister as_VectorRegister(int encoding);
|
|
+
|
|
+ VMReg as_VMReg();
|
|
+
|
|
+ // derived registers, offsets, and addresses
|
|
+ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
|
|
+
|
|
+ // accessors
|
|
+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
|
+ int encoding_nocheck() const { return (intptr_t)this; }
|
|
+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
|
+ const char* name() const;
|
|
+
|
|
+};
|
|
+
|
|
+// The vector registers of RVV
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
|
|
+
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30));
|
|
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31));
|
|
+
|
|
+
|
|
+// Need to know the total number of registers of all sorts for SharedInfo.
|
|
+// Define a class that exports it.
|
|
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
|
+ public:
|
|
+ enum {
|
|
+ // A big enough number for C2: all the registers plus flags
|
|
+ // This number must be large enough to cover REG_COUNT (defined by c2) registers.
|
|
+ // There is no requirement that any ordering here matches any ordering c2 gives
|
|
+ // it's optoregs.
|
|
+
|
|
+ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
|
|
+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
|
|
+ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
|
|
+ };
|
|
+
|
|
+ // added to make it compile
|
|
+ static const int max_gpr;
|
|
+ static const int max_fpr;
|
|
+ static const int max_vpr;
|
|
+};
|
|
+
|
|
+// A set of registers
|
|
+class RegSet {
|
|
+ uint32_t _bitset;
|
|
+
|
|
+public:
|
|
+ RegSet(uint32_t bitset) : _bitset(bitset) { }
|
|
+
|
|
+ RegSet() : _bitset(0) { }
|
|
+
|
|
+ RegSet(Register r1) : _bitset(r1->bit()) { }
|
|
+
|
|
+ ~RegSet() {}
|
|
+
|
|
+ RegSet operator+(const RegSet aSet) const {
|
|
+ RegSet result(_bitset | aSet._bitset);
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ RegSet operator-(const RegSet aSet) const {
|
|
+ RegSet result(_bitset & ~aSet._bitset);
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ RegSet &operator+=(const RegSet aSet) {
|
|
+ *this = *this + aSet;
|
|
+ return *this;
|
|
+ }
|
|
+
|
|
+ static RegSet of(Register r1) {
|
|
+ return RegSet(r1);
|
|
+ }
|
|
+
|
|
+ static RegSet of(Register r1, Register r2) {
|
|
+ return of(r1) + r2;
|
|
+ }
|
|
+
|
|
+ static RegSet of(Register r1, Register r2, Register r3) {
|
|
+ return of(r1, r2) + r3;
|
|
+ }
|
|
+
|
|
+ static RegSet of(Register r1, Register r2, Register r3, Register r4) {
|
|
+ return of(r1, r2, r3) + r4;
|
|
+ }
|
|
+
|
|
+ static RegSet range(Register start, Register end) {
|
|
+ uint32_t bits = ~0;
|
|
+ bits <<= start->encoding();
|
|
+ bits <<= (31 - end->encoding());
|
|
+ bits >>= (31 - end->encoding());
|
|
+
|
|
+ return RegSet(bits);
|
|
+ }
|
|
+
|
|
+ uint32_t bits() const { return _bitset; }
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_REGISTER_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..f49fd6439
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
|
|
@@ -0,0 +1,113 @@
|
|
+/*
|
|
+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "code/relocInfo.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "runtime/safepoint.hpp"
|
|
+
|
|
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
|
|
+ if (verify_only) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ int bytes;
|
|
+
|
|
+ switch(type()) {
|
|
+ case relocInfo::oop_type: {
|
|
+ oop_Relocation *reloc = (oop_Relocation *)this;
|
|
+ // in movoop when immediate == false
|
|
+ if (NativeInstruction::is_load_pc_relative_at(addr())) {
|
|
+ address constptr = (address)code()->oop_addr_at(reloc->oop_index());
|
|
+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
|
|
+ assert(*(address*)constptr == x, "error in oop relocation");
|
|
+ } else {
|
|
+ bytes = MacroAssembler::patch_oop(addr(), x);
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
|
|
+ break;
|
|
+ }
|
|
+ ICache::invalidate_range(addr(), bytes);
|
|
+}
|
|
+
|
|
+address Relocation::pd_call_destination(address orig_addr) {
|
|
+ assert(is_call(), "should be an address instruction here");
|
|
+ if (NativeCall::is_call_at(addr())) {
|
|
+ address trampoline = nativeCall_at(addr())->get_trampoline();
|
|
+ if (trampoline != NULL) {
|
|
+ return nativeCallTrampolineStub_at(trampoline)->destination();
|
|
+ }
|
|
+ }
|
|
+ if (orig_addr != NULL) {
|
|
+ // the extracted address from the instructions in address orig_addr
|
|
+ address new_addr = MacroAssembler::pd_call_destination(orig_addr);
|
|
+ // If call is branch to self, don't try to relocate it, just leave it
|
|
+ // as branch to self. This happens during code generation if the code
|
|
+ // buffer expands. It will be relocated to the trampoline above once
|
|
+ // code generation is complete.
|
|
+ new_addr = (new_addr == orig_addr) ? addr() : new_addr;
|
|
+ return new_addr;
|
|
+ }
|
|
+ return MacroAssembler::pd_call_destination(addr());
|
|
+}
|
|
+
|
|
+void Relocation::pd_set_call_destination(address x) {
|
|
+ assert(is_call(), "should be an address instruction here");
|
|
+ if (NativeCall::is_call_at(addr())) {
|
|
+ address trampoline = nativeCall_at(addr())->get_trampoline();
|
|
+ if (trampoline != NULL) {
|
|
+ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ MacroAssembler::pd_patch_instruction_size(addr(), x);
|
|
+ address pd_call = pd_call_destination(addr());
|
|
+ assert(pd_call == x, "fail in reloc");
|
|
+}
|
|
+
|
|
+address* Relocation::pd_address_in_code() {
|
|
+ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
|
|
+ return (address*)(MacroAssembler::target_addr_for_insn(addr()));
|
|
+}
|
|
+
|
|
+address Relocation::pd_get_address_from_code() {
|
|
+ return MacroAssembler::pd_call_destination(addr());
|
|
+}
|
|
+
|
|
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
|
|
+ if (NativeInstruction::maybe_cpool_ref(addr())) {
|
|
+ address old_addr = old_addr_for(addr(), src, dest);
|
|
+ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
|
|
+ }
|
|
+}
|
|
+
|
|
+void metadata_Relocation::pd_fix_value(address x) {
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..c30150e0a
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
|
|
@@ -0,0 +1,45 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
|
|
+#define CPU_RISCV_RELOCINFO_RISCV_HPP
|
|
+
|
|
+ // machine-dependent parts of class relocInfo
|
|
+ private:
|
|
+ enum {
|
|
+ // Relocations are byte-aligned.
|
|
+ offset_unit = 1,
|
|
+ // We don't use format().
|
|
+ format_width = 0
|
|
+ };
|
|
+
|
|
+ public:
|
|
+
|
|
+ // This platform has no oops in the code that are not also
|
|
+ // listed in the oop section.
|
|
+ static bool mustIterateImmediateOopsInCode() { return false; }
|
|
+
|
|
+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
|
|
new file mode 100644
|
|
index 000000000..137e9b7c7
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/riscv.ad
|
|
@@ -0,0 +1,10685 @@
|
|
+//
|
|
+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+//
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
+// published by the Free Software Foundation.
|
|
+//
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
+// accompanied this code).
|
|
+//
|
|
+// You should have received a copy of the GNU General Public License version
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+//
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
+// questions.
|
|
+//
|
|
+//
|
|
+
|
|
+// RISCV Architecture Description File
|
|
+
|
|
+//----------REGISTER DEFINITION BLOCK------------------------------------------
|
|
+// This information is used by the matcher and the register allocator to
|
|
+// describe individual registers and classes of registers within the target
|
|
+// archtecture.
|
|
+
|
|
+register %{
|
|
+//----------Architecture Description Register Definitions----------------------
|
|
+// General Registers
|
|
+// "reg_def" name ( register save type, C convention save type,
|
|
+// ideal register type, encoding );
|
|
+// Register Save Types:
|
|
+//
|
|
+// NS = No-Save: The register allocator assumes that these registers
|
|
+// can be used without saving upon entry to the method, &
|
|
+// that they do not need to be saved at call sites.
|
|
+//
|
|
+// SOC = Save-On-Call: The register allocator assumes that these registers
|
|
+// can be used without saving upon entry to the method,
|
|
+// but that they must be saved at call sites.
|
|
+//
|
|
+// SOE = Save-On-Entry: The register allocator assumes that these registers
|
|
+// must be saved before using them upon entry to the
|
|
+// method, but they do not need to be saved at call
|
|
+// sites.
|
|
+//
|
|
+// AS = Always-Save: The register allocator assumes that these registers
|
|
+// must be saved before using them upon entry to the
|
|
+// method, & that they must be saved at call sites.
|
|
+//
|
|
+// Ideal Register Type is used to determine how to save & restore a
|
|
+// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
|
|
+// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
|
|
+//
|
|
+// The encoding number is the actual bit-pattern placed into the opcodes.
|
|
+
|
|
+// We must define the 64 bit int registers in two 32 bit halves, the
|
|
+// real lower register and a virtual upper half register. upper halves
|
|
+// are used by the register allocator but are not actually supplied as
|
|
+// operands to memory ops.
|
|
+//
|
|
+// follow the C1 compiler in making registers
|
|
+//
|
|
+// x7, x9-x17, x28-x31 volatile (caller save)
|
|
+// x0-x4, x8, x27 system (no save, no allocate)
|
|
+// x5-x6 non-allocatable (so we can use them as temporary regs)
|
|
+
|
|
+//
|
|
+// as regards Java usage. we don't use any callee save registers
|
|
+// because this makes it difficult to de-optimise a frame (see comment
|
|
+// in x86 implementation of Deoptimization::unwind_callee_save_values)
|
|
+//
|
|
+
|
|
+// General Registers
|
|
+
|
|
+reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr
|
|
+reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() );
|
|
+reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra
|
|
+reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() );
|
|
+reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp
|
|
+reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() );
|
|
+reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp
|
|
+reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() );
|
|
+reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp
|
|
+reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() );
|
|
+reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() );
|
|
+reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() );
|
|
+reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp
|
|
+reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() );
|
|
+reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() );
|
|
+reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() );
|
|
+reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() );
|
|
+reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
|
|
+reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() );
|
|
+reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
|
|
+reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() );
|
|
+reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
|
|
+reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() );
|
|
+reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
|
|
+reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() );
|
|
+reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
|
|
+reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() );
|
|
+reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
|
|
+reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() );
|
|
+reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
|
|
+reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() );
|
|
+reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
|
|
+reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() );
|
|
+reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
|
|
+reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() );
|
|
+reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
|
|
+reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp
|
|
+reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
|
|
+reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() );
|
|
+reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
|
|
+reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() );
|
|
+reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
|
|
+reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread
|
|
+reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next());
|
|
+reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() );
|
|
+reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
|
|
+reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() );
|
|
+reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
|
|
+reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() );
|
|
+reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
|
|
+reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase
|
|
+reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
|
|
+reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() );
|
|
+reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
|
|
+reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() );
|
|
+reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
|
|
+reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() );
|
|
+reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
|
|
+reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() );
|
|
+reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
|
|
+
|
|
+// ----------------------------
|
|
+// Float/Double Registers
|
|
+// ----------------------------
|
|
+
|
|
+// Double Registers
|
|
+
|
|
+// The rules of ADL require that double registers be defined in pairs.
|
|
+// Each pair must be two 32-bit values, but not necessarily a pair of
|
|
+// single float registers. In each pair, ADLC-assigned register numbers
|
|
+// must be adjacent, with the lower number even. Finally, when the
|
|
+// CPU stores such a register pair to memory, the word associated with
|
|
+// the lower ADLC-assigned number must be stored to the lower address.
|
|
+
|
|
+// RISCV has 32 floating-point registers. Each can store a single
|
|
+// or double precision floating-point value.
|
|
+
|
|
+// for Java use float registers f0-f31 are always save on call whereas
|
|
+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
|
|
+// float registers are SOC as per the platform spec
|
|
+
|
|
+reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() );
|
|
+reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() );
|
|
+reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() );
|
|
+reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() );
|
|
+reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() );
|
|
+reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() );
|
|
+reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() );
|
|
+reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() );
|
|
+reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() );
|
|
+reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() );
|
|
+reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() );
|
|
+reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() );
|
|
+reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() );
|
|
+reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() );
|
|
+reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() );
|
|
+reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() );
|
|
+reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() );
|
|
+reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() );
|
|
+reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() );
|
|
+reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() );
|
|
+reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() );
|
|
+reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() );
|
|
+reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() );
|
|
+reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() );
|
|
+reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() );
|
|
+reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() );
|
|
+reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() );
|
|
+reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() );
|
|
+reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() );
|
|
+reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() );
|
|
+reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() );
|
|
+reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() );
|
|
+reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() );
|
|
+reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() );
|
|
+reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() );
|
|
+reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() );
|
|
+reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() );
|
|
+reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() );
|
|
+reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() );
|
|
+reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() );
|
|
+reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() );
|
|
+reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() );
|
|
+reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() );
|
|
+reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() );
|
|
+reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() );
|
|
+reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() );
|
|
+reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() );
|
|
+reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() );
|
|
+reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() );
|
|
+reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() );
|
|
+reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() );
|
|
+reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() );
|
|
+reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() );
|
|
+reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() );
|
|
+reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() );
|
|
+reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() );
|
|
+reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() );
|
|
+reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() );
|
|
+reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() );
|
|
+reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() );
|
|
+reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() );
|
|
+reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() );
|
|
+reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() );
|
|
+reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() );
|
|
+
|
|
+// ----------------------------
|
|
+// Vector Registers
|
|
+// ----------------------------
|
|
+
|
|
+// For RVV vector registers, we simply extend vector register size to 4
|
|
+// 'logical' slots. This is nominally 128 bits but it actually covers
|
|
+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
|
|
+// bits. The 'physical' RVV vector register length is detected during
|
|
+// startup, so the register allocator is able to identify the correct
|
|
+// number of bytes needed for an RVV spill/unspill.
|
|
+// for Java use vector registers v0-v31 are always save on call just
|
|
+// as the platform ABI treats v0-v31 as caller save.
|
|
+
|
|
+reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() );
|
|
+reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() );
|
|
+reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) );
|
|
+reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() );
|
|
+reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() );
|
|
+reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) );
|
|
+reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() );
|
|
+reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() );
|
|
+reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) );
|
|
+reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() );
|
|
+reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() );
|
|
+reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) );
|
|
+reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() );
|
|
+reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() );
|
|
+reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) );
|
|
+reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() );
|
|
+reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() );
|
|
+reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) );
|
|
+reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() );
|
|
+reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() );
|
|
+reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) );
|
|
+reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() );
|
|
+reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() );
|
|
+reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) );
|
|
+reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() );
|
|
+reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() );
|
|
+reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) );
|
|
+reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() );
|
|
+reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() );
|
|
+reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) );
|
|
+reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() );
|
|
+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() );
|
|
+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
|
|
+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() );
|
|
+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() );
|
|
+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
|
|
+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() );
|
|
+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() );
|
|
+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
|
|
+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() );
|
|
+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() );
|
|
+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
|
|
+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() );
|
|
+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() );
|
|
+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
|
|
+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() );
|
|
+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() );
|
|
+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
|
|
+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() );
|
|
+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() );
|
|
+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
|
|
+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() );
|
|
+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() );
|
|
+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
|
|
+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() );
|
|
+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() );
|
|
+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
|
|
+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() );
|
|
+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() );
|
|
+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
|
|
+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() );
|
|
+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() );
|
|
+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
|
|
+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() );
|
|
+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() );
|
|
+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
|
|
+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() );
|
|
+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() );
|
|
+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
|
|
+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() );
|
|
+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() );
|
|
+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
|
|
+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() );
|
|
+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() );
|
|
+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
|
|
+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() );
|
|
+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() );
|
|
+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
|
|
+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() );
|
|
+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() );
|
|
+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
|
|
+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() );
|
|
+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() );
|
|
+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
|
|
+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() );
|
|
+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() );
|
|
+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
|
|
+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() );
|
|
+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() );
|
|
+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
|
|
+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() );
|
|
+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() );
|
|
+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
|
|
+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
|
|
+
|
|
+reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() );
|
|
+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() );
|
|
+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
|
|
+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
|
|
+
|
|
+// ----------------------------
|
|
+// Special Registers
|
|
+// ----------------------------
|
|
+
|
|
+// On riscv, the physical flag register is missing, so we use t1 instead,
|
|
+// to bridge the RegFlag semantics in share/opto
|
|
+
|
|
+reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() );
|
|
+
|
|
+// Specify priority of register selection within phases of register
|
|
+// allocation. Highest priority is first. A useful heuristic is to
|
|
+// give registers a low priority when they are required by machine
|
|
+// instructions, like EAX and EDX on I486, and choose no-save registers
|
|
+// before save-on-call, & save-on-call before save-on-entry. Registers
|
|
+// which participate in fixed calling sequences should come last.
|
|
+// Registers which are used as pairs must fall on an even boundary.
|
|
+
|
|
+alloc_class chunk0(
|
|
+ // volatiles
|
|
+ R7, R7_H,
|
|
+ R28, R28_H,
|
|
+ R29, R29_H,
|
|
+ R30, R30_H,
|
|
+ R31, R31_H,
|
|
+
|
|
+ // arg registers
|
|
+ R10, R10_H,
|
|
+ R11, R11_H,
|
|
+ R12, R12_H,
|
|
+ R13, R13_H,
|
|
+ R14, R14_H,
|
|
+ R15, R15_H,
|
|
+ R16, R16_H,
|
|
+ R17, R17_H,
|
|
+
|
|
+ // non-volatiles
|
|
+ R9, R9_H,
|
|
+ R18, R18_H,
|
|
+ R19, R19_H,
|
|
+ R20, R20_H,
|
|
+ R21, R21_H,
|
|
+ R22, R22_H,
|
|
+ R24, R24_H,
|
|
+ R25, R25_H,
|
|
+ R26, R26_H,
|
|
+
|
|
+ // non-allocatable registers
|
|
+ R23, R23_H, // java thread
|
|
+ R27, R27_H, // heapbase
|
|
+ R4, R4_H, // thread
|
|
+ R8, R8_H, // fp
|
|
+ R0, R0_H, // zero
|
|
+ R1, R1_H, // ra
|
|
+ R2, R2_H, // sp
|
|
+ R3, R3_H, // gp
|
|
+);
|
|
+
|
|
+alloc_class chunk1(
|
|
+
|
|
+ // no save
|
|
+ F0, F0_H,
|
|
+ F1, F1_H,
|
|
+ F2, F2_H,
|
|
+ F3, F3_H,
|
|
+ F4, F4_H,
|
|
+ F5, F5_H,
|
|
+ F6, F6_H,
|
|
+ F7, F7_H,
|
|
+ F28, F28_H,
|
|
+ F29, F29_H,
|
|
+ F30, F30_H,
|
|
+ F31, F31_H,
|
|
+
|
|
+ // arg registers
|
|
+ F10, F10_H,
|
|
+ F11, F11_H,
|
|
+ F12, F12_H,
|
|
+ F13, F13_H,
|
|
+ F14, F14_H,
|
|
+ F15, F15_H,
|
|
+ F16, F16_H,
|
|
+ F17, F17_H,
|
|
+
|
|
+ // non-volatiles
|
|
+ F8, F8_H,
|
|
+ F9, F9_H,
|
|
+ F18, F18_H,
|
|
+ F19, F19_H,
|
|
+ F20, F20_H,
|
|
+ F21, F21_H,
|
|
+ F22, F22_H,
|
|
+ F23, F23_H,
|
|
+ F24, F24_H,
|
|
+ F25, F25_H,
|
|
+ F26, F26_H,
|
|
+ F27, F27_H,
|
|
+);
|
|
+
|
|
+alloc_class chunk2(
|
|
+ V0, V0_H, V0_J, V0_K,
|
|
+ V1, V1_H, V1_J, V1_K,
|
|
+ V2, V2_H, V2_J, V2_K,
|
|
+ V3, V3_H, V3_J, V3_K,
|
|
+ V4, V4_H, V4_J, V4_K,
|
|
+ V5, V5_H, V5_J, V5_K,
|
|
+ V6, V6_H, V6_J, V6_K,
|
|
+ V7, V7_H, V7_J, V7_K,
|
|
+ V8, V8_H, V8_J, V8_K,
|
|
+ V9, V9_H, V9_J, V9_K,
|
|
+ V10, V10_H, V10_J, V10_K,
|
|
+ V11, V11_H, V11_J, V11_K,
|
|
+ V12, V12_H, V12_J, V12_K,
|
|
+ V13, V13_H, V13_J, V13_K,
|
|
+ V14, V14_H, V14_J, V14_K,
|
|
+ V15, V15_H, V15_J, V15_K,
|
|
+ V16, V16_H, V16_J, V16_K,
|
|
+ V17, V17_H, V17_J, V17_K,
|
|
+ V18, V18_H, V18_J, V18_K,
|
|
+ V19, V19_H, V19_J, V19_K,
|
|
+ V20, V20_H, V20_J, V20_K,
|
|
+ V21, V21_H, V21_J, V21_K,
|
|
+ V22, V22_H, V22_J, V22_K,
|
|
+ V23, V23_H, V23_J, V23_K,
|
|
+ V24, V24_H, V24_J, V24_K,
|
|
+ V25, V25_H, V25_J, V25_K,
|
|
+ V26, V26_H, V26_J, V26_K,
|
|
+ V27, V27_H, V27_J, V27_K,
|
|
+ V28, V28_H, V28_J, V28_K,
|
|
+ V29, V29_H, V29_J, V29_K,
|
|
+ V30, V30_H, V30_J, V30_K,
|
|
+ V31, V31_H, V31_J, V31_K,
|
|
+);
|
|
+
|
|
+alloc_class chunk3(RFLAGS);
|
|
+
|
|
+//----------Architecture Description Register Classes--------------------------
|
|
+// Several register classes are automatically defined based upon information in
|
|
+// this architecture description.
|
|
+// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
|
|
+// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
|
|
+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
|
|
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
|
|
+//
|
|
+
|
|
+// Class for all 32 bit general purpose registers
|
|
+reg_class all_reg32(
|
|
+ R0,
|
|
+ R1,
|
|
+ R2,
|
|
+ R3,
|
|
+ R4,
|
|
+ R7,
|
|
+ R8,
|
|
+ R9,
|
|
+ R10,
|
|
+ R11,
|
|
+ R12,
|
|
+ R13,
|
|
+ R14,
|
|
+ R15,
|
|
+ R16,
|
|
+ R17,
|
|
+ R18,
|
|
+ R19,
|
|
+ R20,
|
|
+ R21,
|
|
+ R22,
|
|
+ R23,
|
|
+ R24,
|
|
+ R25,
|
|
+ R26,
|
|
+ R27,
|
|
+ R28,
|
|
+ R29,
|
|
+ R30,
|
|
+ R31
|
|
+);
|
|
+
|
|
+// Class for any 32 bit integer registers (excluding zr)
|
|
+reg_class any_reg32 %{
|
|
+ return _ANY_REG32_mask;
|
|
+%}
|
|
+
|
|
+// Singleton class for R10 int register
|
|
+reg_class int_r10_reg(R10);
|
|
+
|
|
+// Singleton class for R12 int register
|
|
+reg_class int_r12_reg(R12);
|
|
+
|
|
+// Singleton class for R13 int register
|
|
+reg_class int_r13_reg(R13);
|
|
+
|
|
+// Singleton class for R14 int register
|
|
+reg_class int_r14_reg(R14);
|
|
+
|
|
+// Class for all long integer registers
|
|
+reg_class all_reg(
|
|
+ R0, R0_H,
|
|
+ R1, R1_H,
|
|
+ R2, R2_H,
|
|
+ R3, R3_H,
|
|
+ R4, R4_H,
|
|
+ R7, R7_H,
|
|
+ R8, R8_H,
|
|
+ R9, R9_H,
|
|
+ R10, R10_H,
|
|
+ R11, R11_H,
|
|
+ R12, R12_H,
|
|
+ R13, R13_H,
|
|
+ R14, R14_H,
|
|
+ R15, R15_H,
|
|
+ R16, R16_H,
|
|
+ R17, R17_H,
|
|
+ R18, R18_H,
|
|
+ R19, R19_H,
|
|
+ R20, R20_H,
|
|
+ R21, R21_H,
|
|
+ R22, R22_H,
|
|
+ R23, R23_H,
|
|
+ R24, R24_H,
|
|
+ R25, R25_H,
|
|
+ R26, R26_H,
|
|
+ R27, R27_H,
|
|
+ R28, R28_H,
|
|
+ R29, R29_H,
|
|
+ R30, R30_H,
|
|
+ R31, R31_H
|
|
+);
|
|
+
|
|
+// Class for all long integer registers (excluding zr)
|
|
+reg_class any_reg %{
|
|
+ return _ANY_REG_mask;
|
|
+%}
|
|
+
|
|
+// Class for non-allocatable 32 bit registers
|
|
+reg_class non_allocatable_reg32(
|
|
+ R0, // zr
|
|
+ R1, // ra
|
|
+ R2, // sp
|
|
+ R3, // gp
|
|
+ R4, // tp
|
|
+ R23 // java thread
|
|
+);
|
|
+
|
|
+// Class for non-allocatable 64 bit registers
|
|
+reg_class non_allocatable_reg(
|
|
+ R0, R0_H, // zr
|
|
+ R1, R1_H, // ra
|
|
+ R2, R2_H, // sp
|
|
+ R3, R3_H, // gp
|
|
+ R4, R4_H, // tp
|
|
+ R23, R23_H // java thread
|
|
+);
|
|
+
|
|
+reg_class no_special_reg32 %{
|
|
+ return _NO_SPECIAL_REG32_mask;
|
|
+%}
|
|
+
|
|
+reg_class no_special_reg %{
|
|
+ return _NO_SPECIAL_REG_mask;
|
|
+%}
|
|
+
|
|
+reg_class ptr_reg %{
|
|
+ return _PTR_REG_mask;
|
|
+%}
|
|
+
|
|
+reg_class no_special_ptr_reg %{
|
|
+ return _NO_SPECIAL_PTR_REG_mask;
|
|
+%}
|
|
+
|
|
+// Class for 64 bit register r10
|
|
+reg_class r10_reg(
|
|
+ R10, R10_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r11
|
|
+reg_class r11_reg(
|
|
+ R11, R11_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r12
|
|
+reg_class r12_reg(
|
|
+ R12, R12_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r13
|
|
+reg_class r13_reg(
|
|
+ R13, R13_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r14
|
|
+reg_class r14_reg(
|
|
+ R14, R14_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r15
|
|
+reg_class r15_reg(
|
|
+ R15, R15_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register r16
|
|
+reg_class r16_reg(
|
|
+ R16, R16_H
|
|
+);
|
|
+
|
|
+// Class for method register
|
|
+reg_class method_reg(
|
|
+ R31, R31_H
|
|
+);
|
|
+
|
|
+// Class for heapbase register
|
|
+reg_class heapbase_reg(
|
|
+ R27, R27_H
|
|
+);
|
|
+
|
|
+// Class for java thread register
|
|
+reg_class java_thread_reg(
|
|
+ R23, R23_H
|
|
+);
|
|
+
|
|
+reg_class r28_reg(
|
|
+ R28, R28_H
|
|
+);
|
|
+
|
|
+reg_class r29_reg(
|
|
+ R29, R29_H
|
|
+);
|
|
+
|
|
+reg_class r30_reg(
|
|
+ R30, R30_H
|
|
+);
|
|
+
|
|
+// Class for zero registesr
|
|
+reg_class zr_reg(
|
|
+ R0, R0_H
|
|
+);
|
|
+
|
|
+// Class for thread register
|
|
+reg_class thread_reg(
|
|
+ R4, R4_H
|
|
+);
|
|
+
|
|
+// Class for frame pointer register
|
|
+reg_class fp_reg(
|
|
+ R8, R8_H
|
|
+);
|
|
+
|
|
+// Class for link register
|
|
+reg_class lr_reg(
|
|
+ R1, R1_H
|
|
+);
|
|
+
|
|
+// Class for long sp register
|
|
+reg_class sp_reg(
|
|
+ R2, R2_H
|
|
+);
|
|
+
|
|
+// Class for all float registers
|
|
+reg_class float_reg(
|
|
+ F0,
|
|
+ F1,
|
|
+ F2,
|
|
+ F3,
|
|
+ F4,
|
|
+ F5,
|
|
+ F6,
|
|
+ F7,
|
|
+ F8,
|
|
+ F9,
|
|
+ F10,
|
|
+ F11,
|
|
+ F12,
|
|
+ F13,
|
|
+ F14,
|
|
+ F15,
|
|
+ F16,
|
|
+ F17,
|
|
+ F18,
|
|
+ F19,
|
|
+ F20,
|
|
+ F21,
|
|
+ F22,
|
|
+ F23,
|
|
+ F24,
|
|
+ F25,
|
|
+ F26,
|
|
+ F27,
|
|
+ F28,
|
|
+ F29,
|
|
+ F30,
|
|
+ F31
|
|
+);
|
|
+
|
|
+// Double precision float registers have virtual `high halves' that
|
|
+// are needed by the allocator.
|
|
+// Class for all double registers
|
|
+reg_class double_reg(
|
|
+ F0, F0_H,
|
|
+ F1, F1_H,
|
|
+ F2, F2_H,
|
|
+ F3, F3_H,
|
|
+ F4, F4_H,
|
|
+ F5, F5_H,
|
|
+ F6, F6_H,
|
|
+ F7, F7_H,
|
|
+ F8, F8_H,
|
|
+ F9, F9_H,
|
|
+ F10, F10_H,
|
|
+ F11, F11_H,
|
|
+ F12, F12_H,
|
|
+ F13, F13_H,
|
|
+ F14, F14_H,
|
|
+ F15, F15_H,
|
|
+ F16, F16_H,
|
|
+ F17, F17_H,
|
|
+ F18, F18_H,
|
|
+ F19, F19_H,
|
|
+ F20, F20_H,
|
|
+ F21, F21_H,
|
|
+ F22, F22_H,
|
|
+ F23, F23_H,
|
|
+ F24, F24_H,
|
|
+ F25, F25_H,
|
|
+ F26, F26_H,
|
|
+ F27, F27_H,
|
|
+ F28, F28_H,
|
|
+ F29, F29_H,
|
|
+ F30, F30_H,
|
|
+ F31, F31_H
|
|
+);
|
|
+
|
|
+// Class for all RVV vector registers
|
|
+reg_class vectora_reg(
|
|
+ V1, V1_H, V1_J, V1_K,
|
|
+ V2, V2_H, V2_J, V2_K,
|
|
+ V3, V3_H, V3_J, V3_K,
|
|
+ V4, V4_H, V4_J, V4_K,
|
|
+ V5, V5_H, V5_J, V5_K,
|
|
+ V6, V6_H, V6_J, V6_K,
|
|
+ V7, V7_H, V7_J, V7_K,
|
|
+ V8, V8_H, V8_J, V8_K,
|
|
+ V9, V9_H, V9_J, V9_K,
|
|
+ V10, V10_H, V10_J, V10_K,
|
|
+ V11, V11_H, V11_J, V11_K,
|
|
+ V12, V12_H, V12_J, V12_K,
|
|
+ V13, V13_H, V13_J, V13_K,
|
|
+ V14, V14_H, V14_J, V14_K,
|
|
+ V15, V15_H, V15_J, V15_K,
|
|
+ V16, V16_H, V16_J, V16_K,
|
|
+ V17, V17_H, V17_J, V17_K,
|
|
+ V18, V18_H, V18_J, V18_K,
|
|
+ V19, V19_H, V19_J, V19_K,
|
|
+ V20, V20_H, V20_J, V20_K,
|
|
+ V21, V21_H, V21_J, V21_K,
|
|
+ V22, V22_H, V22_J, V22_K,
|
|
+ V23, V23_H, V23_J, V23_K,
|
|
+ V24, V24_H, V24_J, V24_K,
|
|
+ V25, V25_H, V25_J, V25_K,
|
|
+ V26, V26_H, V26_J, V26_K,
|
|
+ V27, V27_H, V27_J, V27_K,
|
|
+ V28, V28_H, V28_J, V28_K,
|
|
+ V29, V29_H, V29_J, V29_K,
|
|
+ V30, V30_H, V30_J, V30_K,
|
|
+ V31, V31_H, V31_J, V31_K
|
|
+);
|
|
+
|
|
+// Class for 64 bit register f0
|
|
+reg_class f0_reg(
|
|
+ F0, F0_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register f1
|
|
+reg_class f1_reg(
|
|
+ F1, F1_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register f2
|
|
+reg_class f2_reg(
|
|
+ F2, F2_H
|
|
+);
|
|
+
|
|
+// Class for 64 bit register f3
|
|
+reg_class f3_reg(
|
|
+ F3, F3_H
|
|
+);
|
|
+
|
|
+// class for vector register v1
|
|
+reg_class v1_reg(
|
|
+ V1, V1_H, V1_J, V1_K
|
|
+);
|
|
+
|
|
+// class for vector register v2
|
|
+reg_class v2_reg(
|
|
+ V2, V2_H, V2_J, V2_K
|
|
+);
|
|
+
|
|
+// class for vector register v3
|
|
+reg_class v3_reg(
|
|
+ V3, V3_H, V3_J, V3_K
|
|
+);
|
|
+
|
|
+// class for vector register v4
|
|
+reg_class v4_reg(
|
|
+ V4, V4_H, V4_J, V4_K
|
|
+);
|
|
+
|
|
+// class for vector register v5
|
|
+reg_class v5_reg(
|
|
+ V5, V5_H, V5_J, V5_K
|
|
+);
|
|
+
|
|
+// class for condition codes
|
|
+reg_class reg_flags(RFLAGS);
|
|
+%}
|
|
+
|
|
+//----------DEFINITION BLOCK---------------------------------------------------
|
|
+// Define name --> value mappings to inform the ADLC of an integer valued name
|
|
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
|
|
+// Format:
|
|
+// int_def <name> ( <int_value>, <expression>);
|
|
+// Generated Code in ad_<arch>.hpp
|
|
+// #define <name> (<expression>)
|
|
+// // value == <int_value>
|
|
+// Generated code in ad_<arch>.cpp adlc_verification()
|
|
+// assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
|
|
+//
|
|
+
|
|
+// we follow the ppc-aix port in using a simple cost model which ranks
|
|
+// register operations as cheap, memory ops as more expensive and
|
|
+// branches as most expensive. the first two have a low as well as a
|
|
+// normal cost. huge cost appears to be a way of saying don't do
|
|
+// something
|
|
+
|
|
+definitions %{
|
|
+ // The default cost (of a register move instruction).
|
|
+ int_def DEFAULT_COST ( 100, 100);
|
|
+ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt,
|
|
+ // multi, auipc, nop, logical, move
|
|
+ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload
|
|
+ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore
|
|
+ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp
|
|
+ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call
|
|
+ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul
|
|
+ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi
|
|
+ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi
|
|
+ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd
|
|
+ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd
|
|
+ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv
|
|
+ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt
|
|
+%}
|
|
+
|
|
+
|
|
+
|
|
+//----------SOURCE BLOCK-------------------------------------------------------
|
|
+// This is a block of C++ code which provides values, functions, and
|
|
+// definitions necessary in the rest of the architecture description
|
|
+
|
|
+source_hpp %{
|
|
+
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "gc/shared/cardTable.hpp"
|
|
+#include "gc/shared/cardTableBarrierSet.hpp"
|
|
+#include "gc/shared/collectedHeap.hpp"
|
|
+#include "opto/addnode.hpp"
|
|
+#include "opto/convertnode.hpp"
|
|
+
|
|
+extern RegMask _ANY_REG32_mask;
|
|
+extern RegMask _ANY_REG_mask;
|
|
+extern RegMask _PTR_REG_mask;
|
|
+extern RegMask _NO_SPECIAL_REG32_mask;
|
|
+extern RegMask _NO_SPECIAL_REG_mask;
|
|
+extern RegMask _NO_SPECIAL_PTR_REG_mask;
|
|
+
|
|
+class CallStubImpl {
|
|
+
|
|
+ //--------------------------------------------------------------
|
|
+ //---< Used for optimization in Compile::shorten_branches >---
|
|
+ //--------------------------------------------------------------
|
|
+
|
|
+ public:
|
|
+ // Size of call trampoline stub.
|
|
+ static uint size_call_trampoline() {
|
|
+ return 0; // no call trampolines on this platform
|
|
+ }
|
|
+
|
|
+ // number of relocations needed by a call trampoline stub
|
|
+ static uint reloc_call_trampoline() {
|
|
+ return 0; // no call trampolines on this platform
|
|
+ }
|
|
+};
|
|
+
|
|
+class HandlerImpl {
|
|
+
|
|
+ public:
|
|
+
|
|
+ static int emit_exception_handler(CodeBuffer &cbuf);
|
|
+ static int emit_deopt_handler(CodeBuffer& cbuf);
|
|
+
|
|
+ static uint size_exception_handler() {
|
|
+ return MacroAssembler::far_branch_size();
|
|
+ }
|
|
+
|
|
+ static uint size_deopt_handler() {
|
|
+ // count auipc + far branch
|
|
+ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
|
|
+ }
|
|
+};
|
|
+
|
|
+// predicate controlling translation of StoreCM
|
|
+bool unnecessary_storestore(const Node *storecm);
|
|
+
|
|
+bool is_CAS(int opcode, bool maybe_volatile);
|
|
+
|
|
+// predicate controlling translation of CompareAndSwapX
|
|
+bool needs_acquiring_load_exclusive(const Node *load);
|
|
+
|
|
+
|
|
+// predicate using the temp register for decoding klass
|
|
+bool maybe_use_tmp_register_decoding_klass();
|
|
+%}
|
|
+
|
|
+source %{
|
|
+
|
|
+ // Derived RegMask with conditionally allocatable registers
|
|
+
|
|
+ RegMask _ANY_REG32_mask;
|
|
+ RegMask _ANY_REG_mask;
|
|
+ RegMask _PTR_REG_mask;
|
|
+ RegMask _NO_SPECIAL_REG32_mask;
|
|
+ RegMask _NO_SPECIAL_REG_mask;
|
|
+ RegMask _NO_SPECIAL_PTR_REG_mask;
|
|
+
|
|
+ void reg_mask_init() {
|
|
+
|
|
+ _ANY_REG32_mask = _ALL_REG32_mask;
|
|
+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
|
|
+
|
|
+ _ANY_REG_mask = _ALL_REG_mask;
|
|
+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
|
|
+
|
|
+ _PTR_REG_mask = _ALL_REG_mask;
|
|
+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
|
|
+
|
|
+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
|
|
+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
|
|
+
|
|
+ _NO_SPECIAL_REG_mask = _ALL_REG_mask;
|
|
+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
|
|
+
|
|
+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
|
|
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
|
|
+
|
|
+ // x27 is not allocatable when compressed oops is on
|
|
+ if (UseCompressedOops) {
|
|
+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
|
|
+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
|
|
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
|
|
+ }
|
|
+
|
|
+ // x8 is not allocatable when PreserveFramePointer is on
|
|
+ if (PreserveFramePointer) {
|
|
+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
|
|
+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
|
|
+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
|
|
+ }
|
|
+ }
|
|
+
|
|
+
|
|
+// predicate controlling translation of StoreCM
|
|
+//
|
|
+// returns true if a StoreStore must precede the card write otherwise
|
|
+// false
|
|
+bool unnecessary_storestore(const Node *storecm)
|
|
+{
|
|
+ assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
|
|
+
|
|
+ // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore)
|
|
+ // between an object put and the associated card mark when we are using
|
|
+ // CMS without conditional card marking
|
|
+
|
|
+ if (UseConcMarkSweepGC && !UseCondCardMark) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // a storestore is unnecesary in all other cases
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+// is_CAS(int opcode, bool maybe_volatile)
|
|
+//
|
|
+// return true if opcode is one of the possible CompareAndSwapX
|
|
+// values otherwise false.
|
|
+bool is_CAS(int opcode, bool maybe_volatile)
|
|
+{
|
|
+ switch(opcode) {
|
|
+ // We handle these
|
|
+ case Op_CompareAndSwapI:
|
|
+ case Op_CompareAndSwapL:
|
|
+ case Op_CompareAndSwapP:
|
|
+ case Op_CompareAndSwapN:
|
|
+ case Op_CompareAndSwapB:
|
|
+ case Op_CompareAndSwapS:
|
|
+ case Op_GetAndSetI:
|
|
+ case Op_GetAndSetL:
|
|
+ case Op_GetAndSetP:
|
|
+ case Op_GetAndSetN:
|
|
+ case Op_GetAndAddI:
|
|
+ case Op_GetAndAddL:
|
|
+#if INCLUDE_SHENANDOAHGC
|
|
+ case Op_ShenandoahCompareAndSwapP:
|
|
+ case Op_ShenandoahCompareAndSwapN:
|
|
+#endif
|
|
+ return true;
|
|
+ case Op_CompareAndExchangeI:
|
|
+ case Op_CompareAndExchangeN:
|
|
+ case Op_CompareAndExchangeB:
|
|
+ case Op_CompareAndExchangeS:
|
|
+ case Op_CompareAndExchangeL:
|
|
+ case Op_CompareAndExchangeP:
|
|
+ case Op_WeakCompareAndSwapB:
|
|
+ case Op_WeakCompareAndSwapS:
|
|
+ case Op_WeakCompareAndSwapI:
|
|
+ case Op_WeakCompareAndSwapL:
|
|
+ case Op_WeakCompareAndSwapP:
|
|
+ case Op_WeakCompareAndSwapN:
|
|
+ return maybe_volatile;
|
|
+ default:
|
|
+ return false;
|
|
+ }
|
|
+}
|
|
+
|
|
+// predicate controlling translation of CAS
|
|
+//
|
|
+// returns true if CAS needs to use an acquiring load otherwise false
|
|
+bool needs_acquiring_load_exclusive(const Node *n)
|
|
+{
|
|
+ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
|
|
+ if (UseBarriersForVolatile) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ LoadStoreNode* ldst = n->as_LoadStore();
|
|
+ if (n != NULL && is_CAS(n->Opcode(), false)) {
|
|
+ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
|
|
+ } else {
|
|
+ return ldst != NULL && ldst->trailing_membar() != NULL;
|
|
+ }
|
|
+ // so we can just return true here
|
|
+ return true;
|
|
+}
|
|
+
|
|
+bool maybe_use_tmp_register_decoding_klass() {
|
|
+ return !UseCompressedOops &&
|
|
+ Universe::narrow_klass_base() != NULL &&
|
|
+ Universe::narrow_klass_shift() != 0;
|
|
+}
|
|
+#define __ _masm.
|
|
+
|
|
+// advance declarations for helper functions to convert register
|
|
+// indices to register objects
|
|
+
|
|
+// the ad file has to provide implementations of certain methods
|
|
+// expected by the generic code
|
|
+//
|
|
+// REQUIRED FUNCTIONALITY
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+// !!!!! Special hack to get all types of calls to specify the byte offset
|
|
+// from the start of the call to the point where the return address
|
|
+// will point.
|
|
+
|
|
+int MachCallStaticJavaNode::ret_addr_offset()
|
|
+{
|
|
+ // call should be a simple jal
|
|
+ int off = 4;
|
|
+ return off;
|
|
+}
|
|
+
|
|
+int MachCallDynamicJavaNode::ret_addr_offset()
|
|
+{
|
|
+ return 28; // movptr, jal
|
|
+}
|
|
+
|
|
+int MachCallRuntimeNode::ret_addr_offset() {
|
|
+ // for generated stubs the call will be
|
|
+ // jal(addr)
|
|
+ // or with far branches
|
|
+ // jal(trampoline_stub)
|
|
+ // for real runtime callouts it will be five instructions
|
|
+ // see riscv_enc_java_to_runtime
|
|
+ // la(t1, retaddr)
|
|
+ // la(t0, RuntimeAddress(addr))
|
|
+ // addi(sp, sp, -2 * wordSize)
|
|
+ // sd(zr, Address(sp))
|
|
+ // sd(t1, Address(sp, wordSize))
|
|
+ // jalr(t0)
|
|
+ CodeBlob *cb = CodeCache::find_blob(_entry_point);
|
|
+ if (cb != NULL) {
|
|
+ return 1 * NativeInstruction::instruction_size;
|
|
+ } else {
|
|
+ return 11 * NativeInstruction::instruction_size;
|
|
+ }
|
|
+}
|
|
+
|
|
+// Indicate if the safepoint node needs the polling page as an input
|
|
+
|
|
+// the shared code plants the oop data at the start of the generated
|
|
+// code for the safepoint node and that needs ot be at the load
|
|
+// instruction itself. so we cannot plant a mov of the safepoint poll
|
|
+// address followed by a load. setting this to true means the mov is
|
|
+// scheduled as a prior instruction. that's better for scheduling
|
|
+// anyway.
|
|
+
|
|
+bool SafePointNode::needs_polling_address_input()
|
|
+{
|
|
+ return true;
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
|
+ assert_cond(st != NULL);
|
|
+ st->print("BREAKPOINT");
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ ebreak();
|
|
+}
|
|
+
|
|
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
|
|
+ return MachNode::size(ra_);
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
|
|
+ st->print("nop \t# %d bytes pad for loops and calls", _count);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ for (int i = 0; i < _count; i++) {
|
|
+ __ nop();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ uint MachNopNode::size(PhaseRegAlloc*) const {
|
|
+ return _count * NativeInstruction::instruction_size;
|
|
+ }
|
|
+
|
|
+//=============================================================================
|
|
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
|
|
+
|
|
+int Compile::ConstantTable::calculate_table_base_offset() const {
|
|
+ return 0; // absolute addressing, no offset
|
|
+}
|
|
+
|
|
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
|
|
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
|
|
+ ShouldNotReachHere();
|
|
+}
|
|
+
|
|
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
|
|
+ // Empty encoding
|
|
+}
|
|
+
|
|
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
|
|
+ assert_cond(st != NULL);
|
|
+ st->print("-- \t// MachConstantBaseNode (empty encoding)");
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
|
+ assert_cond(st != NULL && ra_ != NULL);
|
|
+ Compile* C = ra_->C;
|
|
+
|
|
+ int framesize = C->frame_slots() << LogBytesPerInt;
|
|
+
|
|
+ if (C->need_stack_bang(framesize)) {
|
|
+ st->print("# stack bang size=%d\n\t", framesize);
|
|
+ }
|
|
+
|
|
+ st->print("sub sp, sp, #%d\n\t", framesize);
|
|
+ st->print("sd fp, [sp, #%d]", - 2 * wordSize);
|
|
+ st->print("sd ra, [sp, #%d]", - wordSize);
|
|
+ if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); }
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
|
+ assert_cond(ra_ != NULL);
|
|
+ Compile* C = ra_->C;
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ // n.b. frame size includes space for return pc and fp
|
|
+ const int framesize = C->frame_size_in_bytes();
|
|
+ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
|
|
+
|
|
+ // insert a nop at the start of the prolog so we can patch in a
|
|
+ // branch if we need to invalidate the method later
|
|
+ __ nop();
|
|
+
|
|
+ assert_cond(C != NULL);
|
|
+ int bangsize = C->bang_size_in_bytes();
|
|
+ if (C->need_stack_bang(bangsize) && UseStackBanging) {
|
|
+ __ generate_stack_overflow_check(bangsize);
|
|
+ }
|
|
+
|
|
+ __ build_frame(framesize);
|
|
+
|
|
+ if (VerifyStackAtCalls) {
|
|
+ Unimplemented();
|
|
+ }
|
|
+
|
|
+ C->set_frame_complete(cbuf.insts_size());
|
|
+
|
|
+ if (C->has_mach_constant_base_node()) {
|
|
+ // NOTE: We set the table base offset here because users might be
|
|
+ // emitted before MachConstantBaseNode.
|
|
+ Compile::ConstantTable& constant_table = C->constant_table();
|
|
+ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
|
|
+ }
|
|
+}
|
|
+
|
|
+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
|
|
+{
|
|
+ assert_cond(ra_ != NULL);
|
|
+ return MachNode::size(ra_); // too many variables; just compute it
|
|
+ // the hard way
|
|
+}
|
|
+
|
|
+int MachPrologNode::reloc() const
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
|
+ assert_cond(st != NULL && ra_ != NULL);
|
|
+ Compile* C = ra_->C;
|
|
+ assert_cond(C != NULL);
|
|
+ int framesize = C->frame_size_in_bytes();
|
|
+
|
|
+ st->print("# pop frame %d\n\t", framesize);
|
|
+
|
|
+ if (framesize == 0) {
|
|
+ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize));
|
|
+ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize));
|
|
+ st->print("add sp, sp, #%d\n\t", (2 * wordSize));
|
|
+ } else {
|
|
+ st->print("add sp, sp, #%d\n\t", framesize);
|
|
+ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize);
|
|
+ st->print("ld fp, [sp,#%d]\n\t", - wordSize);
|
|
+ }
|
|
+
|
|
+ if (do_polling() && C->is_method_compilation()) {
|
|
+ st->print("# touch polling page\n\t");
|
|
+ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page()));
|
|
+ st->print("ld zr, [t0]");
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
|
+ assert_cond(ra_ != NULL);
|
|
+ Compile* C = ra_->C;
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ assert_cond(C != NULL);
|
|
+ int framesize = C->frame_size_in_bytes();
|
|
+
|
|
+ __ remove_frame(framesize);
|
|
+
|
|
+ if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
|
|
+ __ reserved_stack_check();
|
|
+ }
|
|
+
|
|
+ if (do_polling() && C->is_method_compilation()) {
|
|
+ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
|
|
+ }
|
|
+}
|
|
+
|
|
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
|
|
+ assert_cond(ra_ != NULL);
|
|
+ // Variable size. Determine dynamically.
|
|
+ return MachNode::size(ra_);
|
|
+}
|
|
+
|
|
+int MachEpilogNode::reloc() const {
|
|
+ // Return number of relocatable values contained in this instruction.
|
|
+ return 1; // 1 for polling page.
|
|
+}
|
|
+const Pipeline * MachEpilogNode::pipeline() const {
|
|
+ return MachNode::pipeline_class();
|
|
+}
|
|
+
|
|
+int MachEpilogNode::safepoint_offset() const {
|
|
+ assert(do_polling(), "no return for this epilog node");
|
|
+ return 4;
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+// Figure out which register class each belongs in: rc_int, rc_float or
|
|
+// rc_stack.
|
|
+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
|
|
+
|
|
+static enum RC rc_class(OptoReg::Name reg) {
|
|
+
|
|
+ if (reg == OptoReg::Bad) {
|
|
+ return rc_bad;
|
|
+ }
|
|
+
|
|
+ // we have 30 int registers * 2 halves
|
|
+ // (t0 and t1 are omitted)
|
|
+ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
|
|
+ if (reg < slots_of_int_registers) {
|
|
+ return rc_int;
|
|
+ }
|
|
+
|
|
+ // we have 32 float register * 2 halves
|
|
+ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
|
|
+ if (reg < slots_of_int_registers + slots_of_float_registers) {
|
|
+ return rc_float;
|
|
+ }
|
|
+
|
|
+ // we have 32 vector register * 4 halves
|
|
+ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
|
|
+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
|
|
+ return rc_vector;
|
|
+ }
|
|
+
|
|
+ // Between vector regs & stack is the flags regs.
|
|
+ assert(OptoReg::is_stack(reg), "blow up if spilling flags");
|
|
+
|
|
+ return rc_stack;
|
|
+}
|
|
+
|
|
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
|
|
+ assert_cond(ra_ != NULL);
|
|
+ Compile* C = ra_->C;
|
|
+
|
|
+ // Get registers to move.
|
|
+ OptoReg::Name src_hi = ra_->get_reg_second(in(1));
|
|
+ OptoReg::Name src_lo = ra_->get_reg_first(in(1));
|
|
+ OptoReg::Name dst_hi = ra_->get_reg_second(this);
|
|
+ OptoReg::Name dst_lo = ra_->get_reg_first(this);
|
|
+
|
|
+ enum RC src_hi_rc = rc_class(src_hi);
|
|
+ enum RC src_lo_rc = rc_class(src_lo);
|
|
+ enum RC dst_hi_rc = rc_class(dst_hi);
|
|
+ enum RC dst_lo_rc = rc_class(dst_lo);
|
|
+
|
|
+ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
|
|
+
|
|
+ if (src_hi != OptoReg::Bad) {
|
|
+ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
|
|
+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
|
|
+ "expected aligned-adjacent pairs");
|
|
+ }
|
|
+
|
|
+ if (src_lo == dst_lo && src_hi == dst_hi) {
|
|
+ return 0; // Self copy, no move.
|
|
+ }
|
|
+
|
|
+ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
|
|
+ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
|
|
+ int src_offset = ra_->reg2offset(src_lo);
|
|
+ int dst_offset = ra_->reg2offset(dst_lo);
|
|
+
|
|
+ if (bottom_type() == NULL) {
|
|
+ ShouldNotReachHere();
|
|
+ } else if (bottom_type()->isa_vect() != NULL) {
|
|
+ uint ireg = ideal_reg();
|
|
+ if (ireg == Op_VecA && cbuf) {
|
|
+ MacroAssembler _masm(cbuf);
|
|
+ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
|
+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
|
|
+ // stack to stack
|
|
+ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
|
|
+ vector_reg_size_in_bytes);
|
|
+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
|
|
+ // vpr to stack
|
|
+ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
|
|
+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
|
|
+ // stack to vpr
|
|
+ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
|
|
+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
|
|
+ // vpr to vpr
|
|
+ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+ } else if (cbuf != NULL) {
|
|
+ MacroAssembler _masm(cbuf);
|
|
+ switch (src_lo_rc) {
|
|
+ case rc_int:
|
|
+ if (dst_lo_rc == rc_int) { // gpr --> gpr copy
|
|
+ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
|
|
+ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
|
|
+ } else {
|
|
+ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
|
|
+ }
|
|
+ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
|
|
+ if (is64) {
|
|
+ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
+ as_Register(Matcher::_regEncode[src_lo]));
|
|
+ } else {
|
|
+ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
+ as_Register(Matcher::_regEncode[src_lo]));
|
|
+ }
|
|
+ } else { // gpr --> stack spill
|
|
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
|
|
+ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
|
|
+ }
|
|
+ break;
|
|
+ case rc_float:
|
|
+ if (dst_lo_rc == rc_int) { // fpr --> gpr copy
|
|
+ if (is64) {
|
|
+ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
+ } else {
|
|
+ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
+ }
|
|
+ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
|
|
+ if (is64) {
|
|
+ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
+ } else {
|
|
+ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
+ as_FloatRegister(Matcher::_regEncode[src_lo]));
|
|
+ }
|
|
+ } else { // fpr --> stack spill
|
|
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
|
|
+ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
|
|
+ is64, dst_offset);
|
|
+ }
|
|
+ break;
|
|
+ case rc_stack:
|
|
+ if (dst_lo_rc == rc_int) { // stack --> gpr load
|
|
+ if (this->ideal_reg() == Op_RegI) {
|
|
+ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
|
|
+ } else { // // zero extended for narrow oop or klass
|
|
+ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
|
|
+ }
|
|
+ } else if (dst_lo_rc == rc_float) { // stack --> fpr load
|
|
+ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
|
|
+ is64, src_offset);
|
|
+ } else { // stack --> stack copy
|
|
+ assert(dst_lo_rc == rc_stack, "spill to bad register class");
|
|
+ if (this->ideal_reg() == Op_RegI) {
|
|
+ __ unspill(t0, is64, src_offset);
|
|
+ } else { // zero extended for narrow oop or klass
|
|
+ __ unspillu(t0, is64, src_offset);
|
|
+ }
|
|
+ __ spill(t0, is64, dst_offset);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (st != NULL) {
|
|
+ st->print("spill ");
|
|
+ if (src_lo_rc == rc_stack) {
|
|
+ st->print("[sp, #%d] -> ", src_offset);
|
|
+ } else {
|
|
+ st->print("%s -> ", Matcher::regName[src_lo]);
|
|
+ }
|
|
+ if (dst_lo_rc == rc_stack) {
|
|
+ st->print("[sp, #%d]", dst_offset);
|
|
+ } else {
|
|
+ st->print("%s", Matcher::regName[dst_lo]);
|
|
+ }
|
|
+ if (bottom_type()->isa_vect() != NULL) {
|
|
+ int vsize = 0;
|
|
+ if (ideal_reg() == Op_VecA) {
|
|
+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ st->print("\t# vector spill size = %d", vsize);
|
|
+ } else {
|
|
+ st->print("\t# spill size = %d", is64 ? 64 : 32);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
|
+ if (ra_ == NULL) {
|
|
+ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
|
|
+ } else {
|
|
+ implementation(NULL, ra_, false, st);
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
|
+ implementation(&cbuf, ra_, false, NULL);
|
|
+}
|
|
+
|
|
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
|
|
+ return MachNode::size(ra_);
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
|
|
+ assert_cond(ra_ != NULL && st != NULL);
|
|
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
|
+ int reg = ra_->get_reg_first(this);
|
|
+ st->print("add %s, sp, #%d\t# box lock",
|
|
+ Matcher::regName[reg], offset);
|
|
+}
|
|
+#endif
|
|
+
|
|
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ assert_cond(ra_ != NULL);
|
|
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
|
+ int reg = ra_->get_encode(this);
|
|
+
|
|
+ if (is_imm_in_range(offset, 12, 0)) {
|
|
+ __ addi(as_Register(reg), sp, offset);
|
|
+ } else if (is_imm_in_range(offset, 32, 0)) {
|
|
+ __ li32(t0, offset);
|
|
+ __ add(as_Register(reg), sp, t0);
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
|
|
+ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
|
|
+ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
|
|
+
|
|
+ if (is_imm_in_range(offset, 12, 0)) {
|
|
+ return NativeInstruction::instruction_size;
|
|
+ } else {
|
|
+ return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
|
|
+ }
|
|
+}
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
|
|
+{
|
|
+ assert_cond(st != NULL);
|
|
+ st->print_cr("# MachUEPNode");
|
|
+ if (UseCompressedClassPointers) {
|
|
+ st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
|
|
+ if (Universe::narrow_klass_shift() != 0) {
|
|
+ st->print_cr("\tdecode_klass_not_null t0, t0");
|
|
+ }
|
|
+ } else {
|
|
+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
|
|
+ }
|
|
+ st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check");
|
|
+}
|
|
+#endif
|
|
+
|
|
+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
|
|
+{
|
|
+ // This is the unverified entry point.
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ Label skip;
|
|
+ __ cmp_klass(j_rarg0, t1, t0, skip);
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
|
|
+ __ bind(skip);
|
|
+}
|
|
+
|
|
+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
|
|
+{
|
|
+ assert_cond(ra_ != NULL);
|
|
+ return MachNode::size(ra_);
|
|
+}
|
|
+
|
|
+// REQUIRED EMIT CODE
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+// Emit exception handler code.
|
|
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
|
|
+{
|
|
+ // la_patchable t0, #exception_blob_entry_point
|
|
+ // jr (offset)t0
|
|
+ // or
|
|
+ // j #exception_blob_entry_point
|
|
+ // Note that the code buffer's insts_mark is always relative to insts.
|
|
+ // That's why we must use the macroassembler to generate a handler.
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ address base = __ start_a_stub(size_exception_handler());
|
|
+ if (base == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return 0; // CodeBuffer::expand failed
|
|
+ }
|
|
+ int offset = __ offset();
|
|
+ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
|
|
+ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
|
|
+ __ end_a_stub();
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+// Emit deopt handler code.
|
|
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
|
|
+{
|
|
+ // Note that the code buffer's insts_mark is always relative to insts.
|
|
+ // That's why we must use the macroassembler to generate a handler.
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ address base = __ start_a_stub(size_deopt_handler());
|
|
+ if (base == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return 0; // CodeBuffer::expand failed
|
|
+ }
|
|
+ int offset = __ offset();
|
|
+
|
|
+ __ auipc(ra, 0);
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
|
|
+
|
|
+ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
|
|
+ __ end_a_stub();
|
|
+ return offset;
|
|
+
|
|
+}
|
|
+// REQUIRED MATCHER CODE
|
|
+
|
|
+//=============================================================================
|
|
+
|
|
+const bool Matcher::match_rule_supported(int opcode) {
|
|
+ if (!has_match_rule(opcode)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ switch (opcode) {
|
|
+ case Op_StrCompressedCopy: // fall through
|
|
+ case Op_StrInflatedCopy: // fall through
|
|
+ case Op_HasNegatives:
|
|
+ return UseRVV;
|
|
+ case Op_EncodeISOArray:
|
|
+ return UseRVV && SpecialEncodeISOArray;
|
|
+ case Op_PopCountI:
|
|
+ case Op_PopCountL:
|
|
+ return UsePopCountInstruction;
|
|
+ case Op_CountLeadingZerosI:
|
|
+ case Op_CountLeadingZerosL:
|
|
+ case Op_CountTrailingZerosI:
|
|
+ case Op_CountTrailingZerosL:
|
|
+ return UseZbb;
|
|
+ }
|
|
+
|
|
+ return true; // Per default match rules are supported.
|
|
+}
|
|
+
|
|
+// Identify extra cases that we might want to provide match rules for vector nodes and
|
|
+// other intrinsics guarded with vector length (vlen).
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return op_vec_supported(opcode);
|
|
+}
|
|
+
|
|
+const bool Matcher::has_predicated_vectors(void) {
|
|
+ return false; // not supported
|
|
+
|
|
+}
|
|
+
|
|
+const int Matcher::float_pressure(int default_pressure_threshold) {
|
|
+ return default_pressure_threshold;
|
|
+}
|
|
+
|
|
+int Matcher::regnum_to_fpu_offset(int regnum)
|
|
+{
|
|
+ Unimplemented();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+// Is this branch offset short enough that a short branch can be used?
|
|
+//
|
|
+// NOTE: If the platform does not provide any short branch variants, then
|
|
+// this method should return false for offset 0.
|
|
+// |---label(L1)-----|
|
|
+// |-----------------|
|
|
+// |-----------------|----------eq: float-------------------
|
|
+// |-----------------| // far_cmpD_branch | cmpD_branch
|
|
+// |------- ---------| feq; | feq;
|
|
+// |-far_cmpD_branch-| beqz done; | bnez L;
|
|
+// |-----------------| j L; |
|
|
+// |-----------------| bind(done); |
|
|
+// |-----------------|--------------------------------------
|
|
+// |-----------------| // so shortBrSize = br_size - 4;
|
|
+// |-----------------| // so offs = offset - shortBrSize + 4;
|
|
+// |---label(L2)-----|
|
|
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
|
|
+ // The passed offset is relative to address of the branch.
|
|
+ int shortBrSize = br_size - 4;
|
|
+ int offs = offset - shortBrSize + 4;
|
|
+ return (-4096 <= offs && offs < 4096);
|
|
+}
|
|
+
|
|
+const bool Matcher::isSimpleConstant64(jlong value) {
|
|
+ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
|
|
+ // Probably always true, even if a temp register is required.
|
|
+ return true;
|
|
+}
|
|
+
|
|
+// true just means we have fast l2f conversion
|
|
+const bool Matcher::convL2FSupported(void) {
|
|
+ return true;
|
|
+}
|
|
+
|
|
+// Vector width in bytes.
|
|
+const int Matcher::vector_width_in_bytes(BasicType bt) {
|
|
+ if (UseRVV) {
|
|
+ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
|
|
+ // MaxVectorSize == VM_Version::_initial_vector_length
|
|
+ return MaxVectorSize;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+// Limits on vector size (number of elements) loaded into vector.
|
|
+const int Matcher::max_vector_size(const BasicType bt) {
|
|
+ return vector_width_in_bytes(bt) / type2aelembytes(bt);
|
|
+}
|
|
+const int Matcher::min_vector_size(const BasicType bt) {
|
|
+ return max_vector_size(bt);
|
|
+}
|
|
+
|
|
+// Vector ideal reg.
|
|
+const uint Matcher::vector_ideal_reg(int len) {
|
|
+ assert(MaxVectorSize >= len, "");
|
|
+ if (UseRVV) {
|
|
+ return Op_VecA;
|
|
+ }
|
|
+
|
|
+ ShouldNotReachHere();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+const uint Matcher::vector_shift_count_ideal_reg(int size) {
|
|
+ switch(size) {
|
|
+ case 8: return Op_VecD;
|
|
+ case 16: return Op_VecX;
|
|
+ default:
|
|
+ if (size == vector_width_in_bytes(T_BYTE)) {
|
|
+ return Op_VecA;
|
|
+ }
|
|
+ }
|
|
+ ShouldNotReachHere();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return UseRVV;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return Matcher::max_vector_size(bt);
|
|
+}
|
|
+
|
|
+// AES support not yet implemented
|
|
+const bool Matcher::pass_original_key_for_aes() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+// riscv supports misaligned vectors store/load.
|
|
+const bool Matcher::misaligned_vectors_ok() {
|
|
+ return true;
|
|
+}
|
|
+
|
|
+// false => size gets scaled to BytesPerLong, ok.
|
|
+const bool Matcher::init_array_count_is_in_bytes = false;
|
|
+
|
|
+// Use conditional move (CMOVL)
|
|
+const int Matcher::long_cmove_cost() {
|
|
+ // long cmoves are no more expensive than int cmoves
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+const int Matcher::float_cmove_cost() {
|
|
+ // float cmoves are no more expensive than int cmoves
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+// Does the CPU require late expand (see block.cpp for description of late expand)?
|
|
+const bool Matcher::require_postalloc_expand = false;
|
|
+
|
|
+// Do we need to mask the count passed to shift instructions or does
|
|
+// the cpu only look at the lower 5/6 bits anyway?
|
|
+const bool Matcher::need_masked_shift_count = false;
|
|
+
|
|
+// This affects two different things:
|
|
+// - how Decode nodes are matched
|
|
+// - how ImplicitNullCheck opportunities are recognized
|
|
+// If true, the matcher will try to remove all Decodes and match them
|
|
+// (as operands) into nodes. NullChecks are not prepared to deal with
|
|
+// Decodes by final_graph_reshaping().
|
|
+// If false, final_graph_reshaping() forces the decode behind the Cmp
|
|
+// for a NullCheck. The matcher matches the Decode node into a register.
|
|
+// Implicit_null_check optimization moves the Decode along with the
|
|
+// memory operation back up before the NullCheck.
|
|
+bool Matcher::narrow_oop_use_complex_address() {
|
|
+ return Universe::narrow_oop_shift() == 0;
|
|
+}
|
|
+
|
|
+bool Matcher::narrow_klass_use_complex_address() {
|
|
+// TODO
|
|
+// decide whether we need to set this to true
|
|
+ return false;
|
|
+}
|
|
+
|
|
+bool Matcher::const_oop_prefer_decode() {
|
|
+ // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
|
|
+ return Universe::narrow_oop_base() == NULL;
|
|
+}
|
|
+
|
|
+bool Matcher::const_klass_prefer_decode() {
|
|
+ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
|
|
+ return Universe::narrow_klass_base() == NULL;
|
|
+}
|
|
+
|
|
+// Is it better to copy float constants, or load them directly from
|
|
+// memory? Intel can load a float constant from a direct address,
|
|
+// requiring no extra registers. Most RISCs will have to materialize
|
|
+// an address into a register first, so they would do better to copy
|
|
+// the constant from stack.
|
|
+const bool Matcher::rematerialize_float_constants = false;
|
|
+
|
|
+// If CPU can load and store mis-aligned doubles directly then no
|
|
+// fixup is needed. Else we split the double into 2 integer pieces
|
|
+// and move it piece-by-piece. Only happens when passing doubles into
|
|
+// C code as the Java calling convention forces doubles to be aligned.
|
|
+const bool Matcher::misaligned_doubles_ok = true;
|
|
+
|
|
+// No-op on amd64
|
|
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
|
|
+ Unimplemented();
|
|
+}
|
|
+
|
|
+// Advertise here if the CPU requires explicit rounding operations to
|
|
+// implement the UseStrictFP mode.
|
|
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
|
|
+
|
|
+// Are floats converted to double when stored to stack during
|
|
+// deoptimization?
|
|
+bool Matcher::float_in_double() { return false; }
|
|
+
|
|
+// Do ints take an entire long register or just half?
|
|
+// The relevant question is how the int is callee-saved:
|
|
+// the whole long is written but de-opt'ing will have to extract
|
|
+// the relevant 32 bits.
|
|
+const bool Matcher::int_in_long = true;
|
|
+
|
|
+// Return whether or not this register is ever used as an argument.
|
|
+// This function is used on startup to build the trampoline stubs in
|
|
+// generateOptoStub. Registers not mentioned will be killed by the VM
|
|
+// call in the trampoline, and arguments in those registers not be
|
|
+// available to the callee.
|
|
+bool Matcher::can_be_java_arg(int reg)
|
|
+{
|
|
+ return
|
|
+ reg == R10_num || reg == R10_H_num ||
|
|
+ reg == R11_num || reg == R11_H_num ||
|
|
+ reg == R12_num || reg == R12_H_num ||
|
|
+ reg == R13_num || reg == R13_H_num ||
|
|
+ reg == R14_num || reg == R14_H_num ||
|
|
+ reg == R15_num || reg == R15_H_num ||
|
|
+ reg == R16_num || reg == R16_H_num ||
|
|
+ reg == R17_num || reg == R17_H_num ||
|
|
+ reg == F10_num || reg == F10_H_num ||
|
|
+ reg == F11_num || reg == F11_H_num ||
|
|
+ reg == F12_num || reg == F12_H_num ||
|
|
+ reg == F13_num || reg == F13_H_num ||
|
|
+ reg == F14_num || reg == F14_H_num ||
|
|
+ reg == F15_num || reg == F15_H_num ||
|
|
+ reg == F16_num || reg == F16_H_num ||
|
|
+ reg == F17_num || reg == F17_H_num;
|
|
+}
|
|
+
|
|
+bool Matcher::is_spillable_arg(int reg)
|
|
+{
|
|
+ return can_be_java_arg(reg);
|
|
+}
|
|
+
|
|
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+RegMask Matcher::divI_proj_mask() {
|
|
+ ShouldNotReachHere();
|
|
+ return RegMask();
|
|
+}
|
|
+
|
|
+// Register for MODI projection of divmodI.
|
|
+RegMask Matcher::modI_proj_mask() {
|
|
+ ShouldNotReachHere();
|
|
+ return RegMask();
|
|
+}
|
|
+
|
|
+// Register for DIVL projection of divmodL.
|
|
+RegMask Matcher::divL_proj_mask() {
|
|
+ ShouldNotReachHere();
|
|
+ return RegMask();
|
|
+}
|
|
+
|
|
+// Register for MODL projection of divmodL.
|
|
+RegMask Matcher::modL_proj_mask() {
|
|
+ ShouldNotReachHere();
|
|
+ return RegMask();
|
|
+}
|
|
+
|
|
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
|
|
+ return FP_REG_mask();
|
|
+}
|
|
+
|
|
+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
|
|
+ assert_cond(addp != NULL);
|
|
+ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
|
|
+ Node* u = addp->fast_out(i);
|
|
+ if (u != NULL && u->is_Mem()) {
|
|
+ int opsize = u->as_Mem()->memory_size();
|
|
+ assert(opsize > 0, "unexpected memory operand size");
|
|
+ if (u->as_Mem()->memory_size() != (1 << shift)) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+const bool Matcher::convi2l_type_required = false;
|
|
+
|
|
+// Should the Matcher clone shifts on addressing modes, expecting them
|
|
+// to be subsumed into complex addressing expressions or compute them
|
|
+// into registers?
|
|
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
|
|
+ assert_cond(m != NULL);
|
|
+ if (clone_base_plus_offset_address(m, mstack, address_visited)) {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ Node *off = m->in(AddPNode::Offset);
|
|
+ if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
|
|
+ size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
|
|
+ // Are there other uses besides address expressions?
|
|
+ !is_visited(off)) {
|
|
+ address_visited.set(off->_idx); // Flag as address_visited
|
|
+ mstack.push(off->in(2), Visit);
|
|
+ Node *conv = off->in(1);
|
|
+ if (conv->Opcode() == Op_ConvI2L &&
|
|
+ // Are there other uses besides address expressions?
|
|
+ !is_visited(conv)) {
|
|
+ address_visited.set(conv->_idx); // Flag as address_visited
|
|
+ mstack.push(conv->in(1), Pre_Visit);
|
|
+ } else {
|
|
+ mstack.push(conv, Pre_Visit);
|
|
+ }
|
|
+ address_visited.test_set(m->_idx); // Flag as address_visited
|
|
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
|
|
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
|
|
+ return true;
|
|
+ } else if (off != NULL && off->Opcode() == Op_ConvI2L &&
|
|
+ // Are there other uses besides address expressions?
|
|
+ !is_visited(off)) {
|
|
+ address_visited.test_set(m->_idx); // Flag as address_visited
|
|
+ address_visited.set(off->_idx); // Flag as address_visited
|
|
+ mstack.push(off->in(1), Pre_Visit);
|
|
+ mstack.push(m->in(AddPNode::Address), Pre_Visit);
|
|
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+void Compile::reshape_address(AddPNode* addp) {
|
|
+}
|
|
+
|
|
+%}
|
|
+
|
|
+
|
|
+
|
|
+//----------ENCODING BLOCK-----------------------------------------------------
|
|
+// This block specifies the encoding classes used by the compiler to
|
|
+// output byte streams. Encoding classes are parameterized macros
|
|
+// used by Machine Instruction Nodes in order to generate the bit
|
|
+// encoding of the instruction. Operands specify their base encoding
|
|
+// interface with the interface keyword. There are currently
|
|
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
|
|
+// COND_INTER. REG_INTER causes an operand to generate a function
|
|
+// which returns its register number when queried. CONST_INTER causes
|
|
+// an operand to generate a function which returns the value of the
|
|
+// constant when queried. MEMORY_INTER causes an operand to generate
|
|
+// four functions which return the Base Register, the Index Register,
|
|
+// the Scale Value, and the Offset Value of the operand when queried.
|
|
+// COND_INTER causes an operand to generate six functions which return
|
|
+// the encoding code (ie - encoding bits for the instruction)
|
|
+// associated with each basic boolean condition for a conditional
|
|
+// instruction.
|
|
+//
|
|
+// Instructions specify two basic values for encoding. Again, a
|
|
+// function is available to check if the constant displacement is an
|
|
+// oop. They use the ins_encode keyword to specify their encoding
|
|
+// classes (which must be a sequence of enc_class names, and their
|
|
+// parameters, specified in the encoding block), and they use the
|
|
+// opcode keyword to specify, in order, their primary, secondary, and
|
|
+// tertiary opcode. Only the opcode sections which a particular
|
|
+// instruction needs for encoding need to be specified.
|
|
+encode %{
|
|
+ // BEGIN Non-volatile memory access
|
|
+
|
|
+ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ int64_t con = (int64_t)$src$$constant;
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ mv(dst_reg, con);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ address con = (address)$src$$constant;
|
|
+ if (con == NULL || con == (address)1) {
|
|
+ ShouldNotReachHere();
|
|
+ } else {
|
|
+ relocInfo::relocType rtype = $src->constant_reloc();
|
|
+ if (rtype == relocInfo::oop_type) {
|
|
+ __ movoop(dst_reg, (jobject)con, /*immediate*/true);
|
|
+ } else if (rtype == relocInfo::metadata_type) {
|
|
+ __ mov_metadata(dst_reg, (Metadata*)con);
|
|
+ } else {
|
|
+ assert(rtype == relocInfo::none, "unexpected reloc type");
|
|
+ __ mv(dst_reg, $src$$constant);
|
|
+ }
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_p1(iRegP dst) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ mv(dst_reg, 1);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ int32_t offset = 0;
|
|
+ address page = (address)$src$$constant;
|
|
+ unsigned long align = (unsigned long)page & 0xfff;
|
|
+ assert(align == 0, "polling page must be page aligned");
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
|
|
+ __ addi(dst_reg, dst_reg, offset);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ load_byte_map_base($dst$$Register);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ address con = (address)$src$$constant;
|
|
+ if (con == NULL) {
|
|
+ ShouldNotReachHere();
|
|
+ } else {
|
|
+ relocInfo::relocType rtype = $src->constant_reloc();
|
|
+ assert(rtype == relocInfo::oop_type, "unexpected reloc type");
|
|
+ __ set_narrow_oop(dst_reg, (jobject)con);
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_zero(iRegNorP dst) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ mv(dst_reg, zr);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ address con = (address)$src$$constant;
|
|
+ if (con == NULL) {
|
|
+ ShouldNotReachHere();
|
|
+ } else {
|
|
+ relocInfo::relocType rtype = $src->constant_reloc();
|
|
+ assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
|
|
+ __ set_narrow_klass(dst_reg, (Klass *)con);
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result as bool*/ true);
|
|
+ %}
|
|
+
|
|
+ // compare and branch instruction encodings
|
|
+
|
|
+ enc_class riscv_enc_j(label lbl) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Label* L = $lbl$$label;
|
|
+ __ j(*L);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Label* L = $lbl$$label;
|
|
+ switch($cmp$$cmpcode) {
|
|
+ case(BoolTest::ge):
|
|
+ __ j(*L);
|
|
+ break;
|
|
+ case(BoolTest::lt):
|
|
+ break;
|
|
+ default:
|
|
+ Unimplemented();
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ // call instruction encodings
|
|
+
|
|
+ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
|
|
+ Register sub_reg = as_Register($sub$$reg);
|
|
+ Register super_reg = as_Register($super$$reg);
|
|
+ Register temp_reg = as_Register($temp$$reg);
|
|
+ Register result_reg = as_Register($result$$reg);
|
|
+ Register cr_reg = t1;
|
|
+
|
|
+ Label miss;
|
|
+ Label done;
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
|
|
+ NULL, &miss);
|
|
+ if ($primary) {
|
|
+ __ mv(result_reg, zr);
|
|
+ } else {
|
|
+ __ mv(cr_reg, zr);
|
|
+ __ j(done);
|
|
+ }
|
|
+
|
|
+ __ bind(miss);
|
|
+ if (!$primary) {
|
|
+ __ mv(cr_reg, 1);
|
|
+ }
|
|
+
|
|
+ __ bind(done);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_java_static_call(method meth) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ address addr = (address)$meth$$method;
|
|
+ address call = NULL;
|
|
+ assert_cond(addr != NULL);
|
|
+ if (!_method) {
|
|
+ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
|
|
+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
|
|
+ if (call == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ } else {
|
|
+ int method_index = resolved_method_index(cbuf);
|
|
+ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
|
|
+ : static_call_Relocation::spec(method_index);
|
|
+ call = __ trampoline_call(Address(addr, rspec));
|
|
+ if (call == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ // Emit stub for static call
|
|
+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
|
|
+ if (stub == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_java_dynamic_call(method meth) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ int method_index = resolved_method_index(cbuf);
|
|
+ address call = __ ic_call((address)$meth$$method, method_index);
|
|
+ if (call == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_call_epilog() %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ if (VerifyStackAtCalls) {
|
|
+ // Check that stack depth is unchanged: find majik cookie on stack
|
|
+ __ call_Unimplemented();
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_java_to_runtime(method meth) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+
|
|
+ // some calls to generated routines (arraycopy code) are scheduled
|
|
+ // by C2 as runtime calls. if so we can call them using a jr (they
|
|
+ // will be in a reachable segment) otherwise we have to use a jalr
|
|
+ // which loads the absolute address into a register.
|
|
+ address entry = (address)$meth$$method;
|
|
+ CodeBlob *cb = CodeCache::find_blob(entry);
|
|
+ if (cb != NULL) {
|
|
+ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
|
|
+ if (call == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ } else {
|
|
+ Label retaddr;
|
|
+ __ la(t1, retaddr);
|
|
+ __ la(t0, RuntimeAddress(entry));
|
|
+ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ sd(t1, Address(sp, wordSize));
|
|
+ __ jalr(t0);
|
|
+ __ bind(retaddr);
|
|
+ __ addi(sp, sp, 2 * wordSize);
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ // using the cr register as the bool result: 0 for success; others failed.
|
|
+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register flag = t1;
|
|
+ Register oop = as_Register($object$$reg);
|
|
+ Register box = as_Register($box$$reg);
|
|
+ Register disp_hdr = as_Register($tmp$$reg);
|
|
+ Register tmp = as_Register($tmp2$$reg);
|
|
+ Label cont;
|
|
+ Label object_has_monitor;
|
|
+
|
|
+ assert_different_registers(oop, box, tmp, disp_hdr, t0);
|
|
+
|
|
+ // Load markOop from object into displaced_header.
|
|
+ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
|
|
+
|
|
+ // Always do locking in runtime.
|
|
+ if (EmitSync & 0x01) {
|
|
+ __ mv(flag, 1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
|
|
+ // ignore slow case here
|
|
+ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
|
|
+ }
|
|
+
|
|
+ // Check for existing monitor
|
|
+ if ((EmitSync & 0x02) == 0) {
|
|
+ __ andi(t0, disp_hdr, markOopDesc::monitor_value);
|
|
+ __ bnez(t0, object_has_monitor);
|
|
+ }
|
|
+
|
|
+ // Set tmp to be (markOop of object | UNLOCK_VALUE).
|
|
+ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
|
|
+
|
|
+ // Initialize the box. (Must happen before we update the object mark!)
|
|
+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
|
+
|
|
+ // Compare object markOop with an unlocked value (tmp) and if
|
|
+ // equal exchange the stack address of our box with object markOop.
|
|
+ // On failure disp_hdr contains the possibly locked markOop.
|
|
+ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
|
|
+ Assembler::rl, /*result*/disp_hdr);
|
|
+ __ mv(flag, zr);
|
|
+ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
|
|
+
|
|
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
|
+
|
|
+ // If the compare-and-exchange succeeded, then we found an unlocked
|
|
+ // object, will have now locked it will continue at label cont
|
|
+ // We did not see an unlocked object so try the fast recursive case.
|
|
+
|
|
+ // Check if the owner is self by comparing the value in the
|
|
+ // markOop of object (disp_hdr) with the stack pointer.
|
|
+ __ sub(disp_hdr, disp_hdr, sp);
|
|
+ __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
|
|
+ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
|
|
+ // hence we can store 0 as the displaced header in the box, which indicates that it is a
|
|
+ // recursive lock.
|
|
+ __ andr(tmp/*==0?*/, disp_hdr, tmp);
|
|
+ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
|
+ __ mv(flag, tmp); // we can use the value of tmp as the result here
|
|
+
|
|
+ if ((EmitSync & 0x02) == 0) {
|
|
+ __ j(cont);
|
|
+
|
|
+ // Handle existing monitor.
|
|
+ __ bind(object_has_monitor);
|
|
+ // The object's monitor m is unlocked iff m->owner == NULL,
|
|
+ // otherwise m->owner may contain a thread or a stack address.
|
|
+ //
|
|
+ // Try to CAS m->owner from NULL to current thread.
|
|
+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
|
|
+ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
|
|
+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
|
|
+
|
|
+ // Store a non-null value into the box to avoid looking like a re-entrant
|
|
+ // lock. The fast-path monitor unlock code checks for
|
|
+ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
|
|
+ // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
|
|
+ __ mv(tmp, (address)markOopDesc::unused_mark());
|
|
+ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
|
+ }
|
|
+
|
|
+ __ bind(cont);
|
|
+ %}
|
|
+
|
|
+ // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
|
|
+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register flag = t1;
|
|
+ Register oop = as_Register($object$$reg);
|
|
+ Register box = as_Register($box$$reg);
|
|
+ Register disp_hdr = as_Register($tmp$$reg);
|
|
+ Register tmp = as_Register($tmp2$$reg);
|
|
+ Label cont;
|
|
+ Label object_has_monitor;
|
|
+
|
|
+ assert_different_registers(oop, box, tmp, disp_hdr, flag);
|
|
+
|
|
+ // Always do locking in runtime.
|
|
+ if (EmitSync & 0x01) {
|
|
+ __ mv(flag, 1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
|
|
+ __ biased_locking_exit(oop, tmp, cont, flag);
|
|
+ }
|
|
+
|
|
+ // Find the lock address and load the displaced header from the stack.
|
|
+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
|
|
+
|
|
+ // If the displaced header is 0, we have a recursive unlock.
|
|
+ __ mv(flag, disp_hdr);
|
|
+ __ beqz(disp_hdr, cont);
|
|
+
|
|
+ // Handle existing monitor.
|
|
+ if ((EmitSync & 0x02) == 0) {
|
|
+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
|
|
+ __ andi(t0, disp_hdr, markOopDesc::monitor_value);
|
|
+ __ bnez(t0, object_has_monitor);
|
|
+ }
|
|
+
|
|
+ // Check if it is still a light weight lock, this is true if we
|
|
+ // see the stack address of the basicLock in the markOop of the
|
|
+ // object.
|
|
+
|
|
+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
|
|
+ Assembler::rl, /*result*/tmp);
|
|
+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds
|
|
+ __ j(cont);
|
|
+
|
|
+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
|
|
+
|
|
+ // Handle existing monitor.
|
|
+ if ((EmitSync & 0x02) == 0) {
|
|
+ __ bind(object_has_monitor);
|
|
+ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
|
|
+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
|
+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
|
|
+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
|
|
+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
|
|
+ __ bnez(flag, cont);
|
|
+
|
|
+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
|
|
+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
|
|
+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
|
|
+ __ bnez(flag, cont);
|
|
+ // need a release store here
|
|
+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sd(zr, Address(tmp)); // set unowned
|
|
+ }
|
|
+
|
|
+ __ bind(cont);
|
|
+ %}
|
|
+
|
|
+ // arithmetic encodings
|
|
+
|
|
+ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ Register src1_reg = as_Register($src1$$reg);
|
|
+ Register src2_reg = as_Register($src2$$reg);
|
|
+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ Register src1_reg = as_Register($src1$$reg);
|
|
+ Register src2_reg = as_Register($src2$$reg);
|
|
+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ Register src1_reg = as_Register($src1$$reg);
|
|
+ Register src2_reg = as_Register($src2$$reg);
|
|
+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ Register src1_reg = as_Register($src1$$reg);
|
|
+ Register src2_reg = as_Register($src2$$reg);
|
|
+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_tail_call(iRegP jump_target) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register target_reg = as_Register($jump_target$$reg);
|
|
+ __ jr(target_reg);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ Register target_reg = as_Register($jump_target$$reg);
|
|
+ // exception oop should be in x10
|
|
+ // ret addr has been popped into ra
|
|
+ // callee expects it in x13
|
|
+ __ mv(x13, ra);
|
|
+ __ jr(target_reg);
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_rethrow() %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
|
|
+ %}
|
|
+
|
|
+ enc_class riscv_enc_ret() %{
|
|
+ MacroAssembler _masm(&cbuf);
|
|
+ __ ret();
|
|
+ %}
|
|
+
|
|
+%}
|
|
+
|
|
+//----------FRAME--------------------------------------------------------------
|
|
+// Definition of frame structure and management information.
|
|
+//
|
|
+// S T A C K L A Y O U T Allocators stack-slot number
|
|
+// | (to get allocators register number
|
|
+// G Owned by | | v add OptoReg::stack0())
|
|
+// r CALLER | |
|
|
+// o | +--------+ pad to even-align allocators stack-slot
|
|
+// w V | pad0 | numbers; owned by CALLER
|
|
+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned
|
|
+// h ^ | in | 5
|
|
+// | | args | 4 Holes in incoming args owned by SELF
|
|
+// | | | | 3
|
|
+// | | +--------+
|
|
+// V | | old out| Empty on Intel, window on Sparc
|
|
+// | old |preserve| Must be even aligned.
|
|
+// | SP-+--------+----> Matcher::_old_SP, even aligned
|
|
+// | | in | 3 area for Intel ret address
|
|
+// Owned by |preserve| Empty on Sparc.
|
|
+// SELF +--------+
|
|
+// | | pad2 | 2 pad to align old SP
|
|
+// | +--------+ 1
|
|
+// | | locks | 0
|
|
+// | +--------+----> OptoReg::stack0(), even aligned
|
|
+// | | pad1 | 11 pad to align new SP
|
|
+// | +--------+
|
|
+// | | | 10
|
|
+// | | spills | 9 spills
|
|
+// V | | 8 (pad0 slot for callee)
|
|
+// -----------+--------+----> Matcher::_out_arg_limit, unaligned
|
|
+// ^ | out | 7
|
|
+// | | args | 6 Holes in outgoing args owned by CALLEE
|
|
+// Owned by +--------+
|
|
+// CALLEE | new out| 6 Empty on Intel, window on Sparc
|
|
+// | new |preserve| Must be even-aligned.
|
|
+// | SP-+--------+----> Matcher::_new_SP, even aligned
|
|
+// | | |
|
|
+//
|
|
+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
|
|
+// known from SELF's arguments and the Java calling convention.
|
|
+// Region 6-7 is determined per call site.
|
|
+// Note 2: If the calling convention leaves holes in the incoming argument
|
|
+// area, those holes are owned by SELF. Holes in the outgoing area
|
|
+// are owned by the CALLEE. Holes should not be nessecary in the
|
|
+// incoming area, as the Java calling convention is completely under
|
|
+// the control of the AD file. Doubles can be sorted and packed to
|
|
+// avoid holes. Holes in the outgoing arguments may be nessecary for
|
|
+// varargs C calling conventions.
|
|
+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
|
|
+// even aligned with pad0 as needed.
|
|
+// Region 6 is even aligned. Region 6-7 is NOT even aligned;
|
|
+// (the latter is true on Intel but is it false on RISCV?)
|
|
+// region 6-11 is even aligned; it may be padded out more so that
|
|
+// the region from SP to FP meets the minimum stack alignment.
|
|
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
|
|
+// alignment. Region 11, pad1, may be dynamically extended so that
|
|
+// SP meets the minimum alignment.
|
|
+
|
|
+frame %{
|
|
+ // What direction does stack grow in (assumed to be same for C & Java)
|
|
+ stack_direction(TOWARDS_LOW);
|
|
+
|
|
+ // These three registers define part of the calling convention
|
|
+ // between compiled code and the interpreter.
|
|
+
|
|
+ // Inline Cache Register or methodOop for I2C.
|
|
+ inline_cache_reg(R31);
|
|
+
|
|
+ // Method Oop Register when calling interpreter.
|
|
+ interpreter_method_oop_reg(R31);
|
|
+
|
|
+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
|
|
+ cisc_spilling_operand_name(indOffset);
|
|
+
|
|
+ // Number of stack slots consumed by locking an object
|
|
+ // generate Compile::sync_stack_slots
|
|
+ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
|
|
+ sync_stack_slots(1 * VMRegImpl::slots_per_word);
|
|
+
|
|
+ // Compiled code's Frame Pointer
|
|
+ frame_pointer(R2);
|
|
+
|
|
+ // Interpreter stores its frame pointer in a register which is
|
|
+ // stored to the stack by I2CAdaptors.
|
|
+ // I2CAdaptors convert from interpreted java to compiled java.
|
|
+ interpreter_frame_pointer(R8);
|
|
+
|
|
+ // Stack alignment requirement
|
|
+ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
|
|
+
|
|
+ // Number of stack slots between incoming argument block and the start of
|
|
+ // a new frame. The PROLOG must add this many slots to the stack. The
|
|
+ // EPILOG must remove this many slots.
|
|
+ // RISCV needs two words for RA (return address) and FP (frame pointer).
|
|
+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
|
|
+
|
|
+ // Number of outgoing stack slots killed above the out_preserve_stack_slots
|
|
+ // for calls to C. Supports the var-args backing area for register parms.
|
|
+ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
|
|
+
|
|
+ // The after-PROLOG location of the return address. Location of
|
|
+ // return address specifies a type (REG or STACK) and a number
|
|
+ // representing the register number (i.e. - use a register name) or
|
|
+ // stack slot.
|
|
+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
|
|
+ // Otherwise, it is above the locks and verification slot and alignment word
|
|
+ // TODO this may well be correct but need to check why that - 2 is there
|
|
+ // ppc port uses 0 but we definitely need to allow for fixed_slots
|
|
+ // which folds in the space used for monitors
|
|
+ return_addr(STACK - 2 +
|
|
+ align_up((Compile::current()->in_preserve_stack_slots() +
|
|
+ Compile::current()->fixed_slots()),
|
|
+ stack_alignment_in_slots()));
|
|
+
|
|
+ // Body of function which returns an integer array locating
|
|
+ // arguments either in registers or in stack slots. Passed an array
|
|
+ // of ideal registers called "sig" and a "length" count. Stack-slot
|
|
+ // offsets are based on outgoing arguments, i.e. a CALLER setting up
|
|
+ // arguments for a CALLEE. Incoming stack arguments are
|
|
+ // automatically biased by the preserve_stack_slots field above.
|
|
+
|
|
+ calling_convention
|
|
+ %{
|
|
+ // No difference between ingoing/outgoing just pass false
|
|
+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
|
|
+ %}
|
|
+
|
|
+ c_calling_convention
|
|
+ %{
|
|
+ // This is obviously always outgoing
|
|
+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
|
|
+ %}
|
|
+
|
|
+ // Location of compiled Java return values. Same as C for now.
|
|
+ return_value
|
|
+ %{
|
|
+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
|
|
+ "only return normal values");
|
|
+
|
|
+ static const int lo[Op_RegL + 1] = { // enum name
|
|
+ 0, // Op_Node
|
|
+ 0, // Op_Set
|
|
+ R10_num, // Op_RegN
|
|
+ R10_num, // Op_RegI
|
|
+ R10_num, // Op_RegP
|
|
+ F10_num, // Op_RegF
|
|
+ F10_num, // Op_RegD
|
|
+ R10_num // Op_RegL
|
|
+ };
|
|
+
|
|
+ static const int hi[Op_RegL + 1] = { // enum name
|
|
+ 0, // Op_Node
|
|
+ 0, // Op_Set
|
|
+ OptoReg::Bad, // Op_RegN
|
|
+ OptoReg::Bad, // Op_RegI
|
|
+ R10_H_num, // Op_RegP
|
|
+ OptoReg::Bad, // Op_RegF
|
|
+ F10_H_num, // Op_RegD
|
|
+ R10_H_num // Op_RegL
|
|
+ };
|
|
+
|
|
+ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+//----------ATTRIBUTES---------------------------------------------------------
|
|
+//----------Operand Attributes-------------------------------------------------
|
|
+op_attrib op_cost(1); // Required cost attribute
|
|
+
|
|
+//----------Instruction Attributes---------------------------------------------
|
|
+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
|
|
+ins_attrib ins_size(32); // Required size attribute (in bits)
|
|
+ins_attrib ins_short_branch(0); // Required flag: is this instruction
|
|
+ // a non-matching short branch variant
|
|
+ // of some long branch?
|
|
+ins_attrib ins_alignment(4); // Required alignment attribute (must
|
|
+ // be a power of 2) specifies the
|
|
+ // alignment that some part of the
|
|
+ // instruction (not necessarily the
|
|
+ // start) requires. If > 1, a
|
|
+ // compute_padding() function must be
|
|
+ // provided for the instruction
|
|
+
|
|
+//----------OPERANDS-----------------------------------------------------------
|
|
+// Operand definitions must precede instruction definitions for correct parsing
|
|
+// in the ADLC because operands constitute user defined types which are used in
|
|
+// instruction definitions.
|
|
+
|
|
+//----------Simple Operands----------------------------------------------------
|
|
+
|
|
+// Integer operands 32 bit
|
|
+// 32 bit immediate
|
|
+operand immI()
|
|
+%{
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 32 bit zero
|
|
+operand immI0()
|
|
+%{
|
|
+ predicate(n->get_int() == 0);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 32 bit unit increment
|
|
+operand immI_1()
|
|
+%{
|
|
+ predicate(n->get_int() == 1);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 32 bit unit decrement
|
|
+operand immI_M1()
|
|
+%{
|
|
+ predicate(n->get_int() == -1);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Unsigned Integer Immediate: 6-bit int, greater than 32
|
|
+operand uimmI6_ge32() %{
|
|
+ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_le_4()
|
|
+%{
|
|
+ predicate(n->get_int() <= 4);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_16()
|
|
+%{
|
|
+ predicate(n->get_int() == 16);
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_24()
|
|
+%{
|
|
+ predicate(n->get_int() == 24);
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_31()
|
|
+%{
|
|
+ predicate(n->get_int() == 31);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_32()
|
|
+%{
|
|
+ predicate(n->get_int() == 32);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_63()
|
|
+%{
|
|
+ predicate(n->get_int() == 63);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immI_64()
|
|
+%{
|
|
+ predicate(n->get_int() == 64);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 32 bit integer valid for add immediate
|
|
+operand immIAdd()
|
|
+%{
|
|
+ predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int()));
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 32 bit integer valid for sub immediate
|
|
+operand immISub()
|
|
+%{
|
|
+ predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int()));
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 5 bit signed value.
|
|
+operand immI5()
|
|
+%{
|
|
+ predicate(n->get_int() <= 15 && n->get_int() >= -16);
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 5 bit signed value (simm5)
|
|
+operand immL5()
|
|
+%{
|
|
+ predicate(n->get_long() <= 15 && n->get_long() >= -16);
|
|
+ match(ConL);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Integer operands 64 bit
|
|
+// 64 bit immediate
|
|
+operand immL()
|
|
+%{
|
|
+ match(ConL);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 64 bit zero
|
|
+operand immL0()
|
|
+%{
|
|
+ predicate(n->get_long() == 0);
|
|
+ match(ConL);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer operands
|
|
+// Pointer Immediate
|
|
+operand immP()
|
|
+%{
|
|
+ match(ConP);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// NULL Pointer Immediate
|
|
+operand immP0()
|
|
+%{
|
|
+ predicate(n->get_ptr() == 0);
|
|
+ match(ConP);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer Immediate One
|
|
+// this is used in object initialization (initial object header)
|
|
+operand immP_1()
|
|
+%{
|
|
+ predicate(n->get_ptr() == 1);
|
|
+ match(ConP);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Polling Page Pointer Immediate
|
|
+operand immPollPage()
|
|
+%{
|
|
+ predicate((address)n->get_ptr() == os::get_polling_page());
|
|
+ match(ConP);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Card Table Byte Map Base
|
|
+operand immByteMapBase()
|
|
+%{
|
|
+ // Get base of card map
|
|
+ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
|
|
+ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
|
|
+ match(ConP);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Int Immediate: low 16-bit mask
|
|
+operand immI_16bits()
|
|
+%{
|
|
+ predicate(n->get_int() == 0xFFFF);
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Long Immediate: low 32-bit mask
|
|
+operand immL_32bits()
|
|
+%{
|
|
+ predicate(n->get_long() == 0xFFFFFFFFL);
|
|
+ match(ConL);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 64 bit unit decrement
|
|
+operand immL_M1()
|
|
+%{
|
|
+ predicate(n->get_long() == -1);
|
|
+ match(ConL);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+
|
|
+// 32 bit offset of pc in thread anchor
|
|
+
|
|
+operand immL_pc_off()
|
|
+%{
|
|
+ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
|
|
+ in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
|
|
+ match(ConL);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 64 bit integer valid for add immediate
|
|
+operand immLAdd()
|
|
+%{
|
|
+ predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
|
|
+ match(ConL);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// 64 bit integer valid for sub immediate
|
|
+operand immLSub()
|
|
+%{
|
|
+ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
|
|
+ match(ConL);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Narrow pointer operands
|
|
+// Narrow Pointer Immediate
|
|
+operand immN()
|
|
+%{
|
|
+ match(ConN);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Narrow NULL Pointer Immediate
|
|
+operand immN0()
|
|
+%{
|
|
+ predicate(n->get_narrowcon() == 0);
|
|
+ match(ConN);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immNKlass()
|
|
+%{
|
|
+ match(ConNKlass);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Float and Double operands
|
|
+// Double Immediate
|
|
+operand immD()
|
|
+%{
|
|
+ match(ConD);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Double Immediate: +0.0d
|
|
+operand immD0()
|
|
+%{
|
|
+ predicate(jlong_cast(n->getd()) == 0);
|
|
+ match(ConD);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Float Immediate
|
|
+operand immF()
|
|
+%{
|
|
+ match(ConF);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Float Immediate: +0.0f.
|
|
+operand immF0()
|
|
+%{
|
|
+ predicate(jint_cast(n->getf()) == 0);
|
|
+ match(ConF);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immIOffset()
|
|
+%{
|
|
+ predicate(is_imm_in_range(n->get_int(), 12, 0));
|
|
+ match(ConI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+operand immLOffset()
|
|
+%{
|
|
+ predicate(is_imm_in_range(n->get_long(), 12, 0));
|
|
+ match(ConL);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Scale values
|
|
+operand immIScale()
|
|
+%{
|
|
+ predicate(1 <= n->get_int() && (n->get_int() <= 3));
|
|
+ match(ConI);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(CONST_INTER);
|
|
+%}
|
|
+
|
|
+// Integer 32 bit Register Operands
|
|
+operand iRegI()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(any_reg32));
|
|
+ match(RegI);
|
|
+ match(iRegINoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Integer 32 bit Register not Special
|
|
+operand iRegINoSp()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(no_special_reg32));
|
|
+ match(RegI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Register R10 only
|
|
+operand iRegI_R10()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(int_r10_reg));
|
|
+ match(RegI);
|
|
+ match(iRegINoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Register R12 only
|
|
+operand iRegI_R12()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(int_r12_reg));
|
|
+ match(RegI);
|
|
+ match(iRegINoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Register R13 only
|
|
+operand iRegI_R13()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(int_r13_reg));
|
|
+ match(RegI);
|
|
+ match(iRegINoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Register R14 only
|
|
+operand iRegI_R14()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(int_r14_reg));
|
|
+ match(RegI);
|
|
+ match(iRegINoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Integer 64 bit Register Operands
|
|
+operand iRegL()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(any_reg));
|
|
+ match(RegL);
|
|
+ match(iRegLNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Integer 64 bit Register not Special
|
|
+operand iRegLNoSp()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(no_special_reg));
|
|
+ match(RegL);
|
|
+ match(iRegL_R10);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Long 64 bit Register R28 only
|
|
+operand iRegL_R28()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r28_reg));
|
|
+ match(RegL);
|
|
+ match(iRegLNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Long 64 bit Register R29 only
|
|
+operand iRegL_R29()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r29_reg));
|
|
+ match(RegL);
|
|
+ match(iRegLNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Long 64 bit Register R30 only
|
|
+operand iRegL_R30()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r30_reg));
|
|
+ match(RegL);
|
|
+ match(iRegLNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer Register Operands
|
|
+// Pointer Register
|
|
+operand iRegP()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(RegP);
|
|
+ match(iRegPNoSp);
|
|
+ match(iRegP_R10);
|
|
+ match(javaThread_RegP);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer 64 bit Register not Special
|
|
+operand iRegPNoSp()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(no_special_ptr_reg));
|
|
+ match(RegP);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand iRegP_R10()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r10_reg));
|
|
+ match(RegP);
|
|
+ // match(iRegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer 64 bit Register R11 only
|
|
+operand iRegP_R11()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r11_reg));
|
|
+ match(RegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand iRegP_R12()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r12_reg));
|
|
+ match(RegP);
|
|
+ // match(iRegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer 64 bit Register R13 only
|
|
+operand iRegP_R13()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r13_reg));
|
|
+ match(RegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand iRegP_R14()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r14_reg));
|
|
+ match(RegP);
|
|
+ // match(iRegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand iRegP_R15()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r15_reg));
|
|
+ match(RegP);
|
|
+ // match(iRegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand iRegP_R16()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r16_reg));
|
|
+ match(RegP);
|
|
+ // match(iRegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer 64 bit Register R28 only
|
|
+operand iRegP_R28()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r28_reg));
|
|
+ match(RegP);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Pointer Register Operands
|
|
+// Narrow Pointer Register
|
|
+operand iRegN()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(any_reg32));
|
|
+ match(RegN);
|
|
+ match(iRegNNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Integer 64 bit Register not Special
|
|
+operand iRegNNoSp()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(no_special_reg32));
|
|
+ match(RegN);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// heap base register -- used for encoding immN0
|
|
+operand iRegIHeapbase()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(heapbase_reg));
|
|
+ match(RegI);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Long 64 bit Register R10 only
|
|
+operand iRegL_R10()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(r10_reg));
|
|
+ match(RegL);
|
|
+ match(iRegLNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Float Register
|
|
+// Float register operands
|
|
+operand fRegF()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(float_reg));
|
|
+ match(RegF);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Double Register
|
|
+// Double register operands
|
|
+operand fRegD()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(double_reg));
|
|
+ match(RegD);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Generic vector class. This will be used for
|
|
+// all vector operands.
|
|
+operand vReg()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(vectora_reg));
|
|
+ match(VecA);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand vReg_V1()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(v1_reg));
|
|
+ match(VecA);
|
|
+ match(vReg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand vReg_V2()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(v2_reg));
|
|
+ match(VecA);
|
|
+ match(vReg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand vReg_V3()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(v3_reg));
|
|
+ match(VecA);
|
|
+ match(vReg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand vReg_V4()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(v4_reg));
|
|
+ match(VecA);
|
|
+ match(vReg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+operand vReg_V5()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(v5_reg));
|
|
+ match(VecA);
|
|
+ match(vReg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Java Thread Register
|
|
+operand javaThread_RegP(iRegP reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
|
|
+ match(reg);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+//----------Memory Operands----------------------------------------------------
|
|
+// RISCV has only base_plus_offset and literal address mode, so no need to use
|
|
+// index and scale. Here set index as 0xffffffff and scale as 0x0.
|
|
+operand indirect(iRegP reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(reg);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp(0x0);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand indOffI(iRegP reg, immIOffset off)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(AddP reg off);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg, $off]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp($off);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand indOffL(iRegP reg, immLOffset off)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(AddP reg off);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg, $off]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp($off);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand indirectN(iRegN reg)
|
|
+%{
|
|
+ predicate(Universe::narrow_oop_shift() == 0);
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(DecodeN reg);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg]\t# narrow" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp(0x0);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand indOffIN(iRegN reg, immIOffset off)
|
|
+%{
|
|
+ predicate(Universe::narrow_oop_shift() == 0);
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(AddP (DecodeN reg) off);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg, $off]\t# narrow" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp($off);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand indOffLN(iRegN reg, immLOffset off)
|
|
+%{
|
|
+ predicate(Universe::narrow_oop_shift() == 0);
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(AddP (DecodeN reg) off);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg, $off]\t# narrow" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp($off);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// RISCV opto stubs need to write to the pc slot in the thread anchor
|
|
+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(ptr_reg));
|
|
+ match(AddP reg off);
|
|
+ op_cost(0);
|
|
+ format %{ "[$reg, $off]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base($reg);
|
|
+ index(0xffffffff);
|
|
+ scale(0x0);
|
|
+ disp($off);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+
|
|
+//----------Special Memory Operands--------------------------------------------
|
|
+// Stack Slot Operand - This operand is used for loading and storing temporary
|
|
+// values on the stack where a match requires a value to
|
|
+// flow through memory.
|
|
+operand stackSlotI(sRegI reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(stack_slots));
|
|
+ // No match rule because this operand is only generated in matching
|
|
+ // match(RegI);
|
|
+ format %{ "[$reg]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base(0x02); // RSP
|
|
+ index(0xffffffff); // No Index
|
|
+ scale(0x0); // No Scale
|
|
+ disp($reg); // Stack Offset
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand stackSlotF(sRegF reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(stack_slots));
|
|
+ // No match rule because this operand is only generated in matching
|
|
+ // match(RegF);
|
|
+ format %{ "[$reg]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base(0x02); // RSP
|
|
+ index(0xffffffff); // No Index
|
|
+ scale(0x0); // No Scale
|
|
+ disp($reg); // Stack Offset
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand stackSlotD(sRegD reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(stack_slots));
|
|
+ // No match rule because this operand is only generated in matching
|
|
+ // match(RegD);
|
|
+ format %{ "[$reg]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base(0x02); // RSP
|
|
+ index(0xffffffff); // No Index
|
|
+ scale(0x0); // No Scale
|
|
+ disp($reg); // Stack Offset
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand stackSlotL(sRegL reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(stack_slots));
|
|
+ // No match rule because this operand is only generated in matching
|
|
+ // match(RegL);
|
|
+ format %{ "[$reg]" %}
|
|
+ interface(MEMORY_INTER) %{
|
|
+ base(0x02); // RSP
|
|
+ index(0xffffffff); // No Index
|
|
+ scale(0x0); // No Scale
|
|
+ disp($reg); // Stack Offset
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// Special operand allowing long args to int ops to be truncated for free
|
|
+
|
|
+operand iRegL2I(iRegL reg) %{
|
|
+
|
|
+ op_cost(0);
|
|
+
|
|
+ match(ConvL2I reg);
|
|
+
|
|
+ format %{ "l2i($reg)" %}
|
|
+
|
|
+ interface(REG_INTER)
|
|
+%}
|
|
+
|
|
+
|
|
+// Comparison Operands
|
|
+// NOTE: Label is a predefined operand which should not be redefined in
|
|
+// the AD file. It is generically handled within the ADLC.
|
|
+
|
|
+//----------Conditional Branch Operands----------------------------------------
|
|
+// Comparison Op - This is the operation of the comparison, and is limited to
|
|
+// the following set of codes:
|
|
+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
|
|
+//
|
|
+// Other attributes of the comparison, such as unsignedness, are specified
|
|
+// by the comparison instruction that sets a condition code flags register.
|
|
+// That result is represented by a flags operand whose subtype is appropriate
|
|
+// to the unsignedness (etc.) of the comparison.
|
|
+//
|
|
+// Later, the instruction which matches both the Comparison Op (a Bool) and
|
|
+// the flags (produced by the Cmp) specifies the coding of the comparison op
|
|
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
|
|
+
|
|
+
|
|
+// used for signed integral comparisons and fp comparisons
|
|
+operand cmpOp()
|
|
+%{
|
|
+ match(Bool);
|
|
+
|
|
+ format %{ "" %}
|
|
+
|
|
+ // the values in interface derives from struct BoolTest::mask
|
|
+ interface(COND_INTER) %{
|
|
+ equal(0x0, "eq");
|
|
+ greater(0x1, "gt");
|
|
+ overflow(0x2, "overflow");
|
|
+ less(0x3, "lt");
|
|
+ not_equal(0x4, "ne");
|
|
+ less_equal(0x5, "le");
|
|
+ no_overflow(0x6, "no_overflow");
|
|
+ greater_equal(0x7, "ge");
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// used for unsigned integral comparisons
|
|
+operand cmpOpU()
|
|
+%{
|
|
+ match(Bool);
|
|
+
|
|
+ format %{ "" %}
|
|
+ // the values in interface derives from struct BoolTest::mask
|
|
+ interface(COND_INTER) %{
|
|
+ equal(0x0, "eq");
|
|
+ greater(0x1, "gtu");
|
|
+ overflow(0x2, "overflow");
|
|
+ less(0x3, "ltu");
|
|
+ not_equal(0x4, "ne");
|
|
+ less_equal(0x5, "leu");
|
|
+ no_overflow(0x6, "no_overflow");
|
|
+ greater_equal(0x7, "geu");
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// used for certain integral comparisons which can be
|
|
+// converted to bxx instructions
|
|
+operand cmpOpEqNe()
|
|
+%{
|
|
+ match(Bool);
|
|
+ op_cost(0);
|
|
+ predicate(n->as_Bool()->_test._test == BoolTest::ne ||
|
|
+ n->as_Bool()->_test._test == BoolTest::eq);
|
|
+
|
|
+ format %{ "" %}
|
|
+ interface(COND_INTER) %{
|
|
+ equal(0x0, "eq");
|
|
+ greater(0x1, "gt");
|
|
+ overflow(0x2, "overflow");
|
|
+ less(0x3, "lt");
|
|
+ not_equal(0x4, "ne");
|
|
+ less_equal(0x5, "le");
|
|
+ no_overflow(0x6, "no_overflow");
|
|
+ greater_equal(0x7, "ge");
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand cmpOpULtGe()
|
|
+%{
|
|
+ match(Bool);
|
|
+ op_cost(0);
|
|
+ predicate(n->as_Bool()->_test._test == BoolTest::lt ||
|
|
+ n->as_Bool()->_test._test == BoolTest::ge);
|
|
+
|
|
+ format %{ "" %}
|
|
+ interface(COND_INTER) %{
|
|
+ equal(0x0, "eq");
|
|
+ greater(0x1, "gt");
|
|
+ overflow(0x2, "overflow");
|
|
+ less(0x3, "lt");
|
|
+ not_equal(0x4, "ne");
|
|
+ less_equal(0x5, "le");
|
|
+ no_overflow(0x6, "no_overflow");
|
|
+ greater_equal(0x7, "ge");
|
|
+ %}
|
|
+%}
|
|
+
|
|
+operand cmpOpUEqNeLeGt()
|
|
+%{
|
|
+ match(Bool);
|
|
+ op_cost(0);
|
|
+ predicate(n->as_Bool()->_test._test == BoolTest::ne ||
|
|
+ n->as_Bool()->_test._test == BoolTest::eq ||
|
|
+ n->as_Bool()->_test._test == BoolTest::le ||
|
|
+ n->as_Bool()->_test._test == BoolTest::gt);
|
|
+
|
|
+ format %{ "" %}
|
|
+ interface(COND_INTER) %{
|
|
+ equal(0x0, "eq");
|
|
+ greater(0x1, "gt");
|
|
+ overflow(0x2, "overflow");
|
|
+ less(0x3, "lt");
|
|
+ not_equal(0x4, "ne");
|
|
+ less_equal(0x5, "le");
|
|
+ no_overflow(0x6, "no_overflow");
|
|
+ greater_equal(0x7, "ge");
|
|
+ %}
|
|
+%}
|
|
+
|
|
+
|
|
+// Flags register, used as output of compare logic
|
|
+operand rFlagsReg()
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(reg_flags));
|
|
+ match(RegFlags);
|
|
+
|
|
+ op_cost(0);
|
|
+ format %{ "RFLAGS" %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+// Special Registers
|
|
+
|
|
+// Method Register
|
|
+operand inline_cache_RegP(iRegP reg)
|
|
+%{
|
|
+ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
|
|
+ match(reg);
|
|
+ match(iRegPNoSp);
|
|
+ op_cost(0);
|
|
+ format %{ %}
|
|
+ interface(REG_INTER);
|
|
+%}
|
|
+
|
|
+//----------OPERAND CLASSES----------------------------------------------------
|
|
+// Operand Classes are groups of operands that are used as to simplify
|
|
+// instruction definitions by not requiring the AD writer to specify
|
|
+// separate instructions for every form of operand when the
|
|
+// instruction accepts multiple operand types with the same basic
|
|
+// encoding and format. The classic case of this is memory operands.
|
|
+
|
|
+// memory is used to define read/write location for load/store
|
|
+// instruction defs. we can turn a memory op into an Address
|
|
+
|
|
+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
|
|
+
|
|
+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
|
|
+// operations. it allows the src to be either an iRegI or a (ConvL2I
|
|
+// iRegL). in the latter case the l2i normally planted for a ConvL2I
|
|
+// can be elided because the 32-bit instruction will just employ the
|
|
+// lower 32 bits anyway.
|
|
+//
|
|
+// n.b. this does not elide all L2I conversions. if the truncated
|
|
+// value is consumed by more than one operation then the ConvL2I
|
|
+// cannot be bundled into the consuming nodes so an l2i gets planted
|
|
+// (actually a mvw $dst $src) and the downstream instructions consume
|
|
+// the result of the l2i as an iRegI input. That's a shame since the
|
|
+// mvw is actually redundant but its not too costly.
|
|
+
|
|
+opclass iRegIorL2I(iRegI, iRegL2I);
|
|
+opclass iRegIorL(iRegI, iRegL);
|
|
+opclass iRegNorP(iRegN, iRegP);
|
|
+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
|
|
+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
|
|
+opclass immIorL(immI, immL);
|
|
+
|
|
+//----------PIPELINE-----------------------------------------------------------
|
|
+// Rules which define the behavior of the target architectures pipeline.
|
|
+
|
|
+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
|
|
+//pipe_desc(ID, EX, MEM, WR);
|
|
+#define ID S0
|
|
+#define EX S1
|
|
+#define MEM S2
|
|
+#define WR S3
|
|
+
|
|
+// Integer ALU reg operation
|
|
+pipeline %{
|
|
+
|
|
+attributes %{
|
|
+ // RISC-V instructions are of fixed length
|
|
+ fixed_size_instructions; // Fixed size instructions TODO does
|
|
+ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2
|
|
+ // RISC-V instructions come in 32-bit word units
|
|
+ instruction_unit_size = 4; // An instruction is 4 bytes long
|
|
+ instruction_fetch_unit_size = 64; // The processor fetches one line
|
|
+ instruction_fetch_units = 1; // of 64 bytes
|
|
+
|
|
+ // List of nop instructions
|
|
+ nops( MachNop );
|
|
+%}
|
|
+
|
|
+// We don't use an actual pipeline model so don't care about resources
|
|
+// or description. we do use pipeline classes to introduce fixed
|
|
+// latencies
|
|
+
|
|
+//----------RESOURCES----------------------------------------------------------
|
|
+// Resources are the functional units available to the machine
|
|
+
|
|
+// Generic RISC-V pipeline
|
|
+// 1 decoder
|
|
+// 1 instruction decoded per cycle
|
|
+// 1 load/store ops per cycle, 1 branch, 1 FPU
|
|
+// 1 mul, 1 div
|
|
+
|
|
+resources ( DECODE,
|
|
+ ALU,
|
|
+ MUL,
|
|
+ DIV,
|
|
+ BRANCH,
|
|
+ LDST,
|
|
+ FPU);
|
|
+
|
|
+//----------PIPELINE DESCRIPTION-----------------------------------------------
|
|
+// Pipeline Description specifies the stages in the machine's pipeline
|
|
+
|
|
+// Define the pipeline as a generic 6 stage pipeline
|
|
+pipe_desc(S0, S1, S2, S3, S4, S5);
|
|
+
|
|
+//----------PIPELINE CLASSES---------------------------------------------------
|
|
+// Pipeline Classes describe the stages in which input and output are
|
|
+// referenced by the hardware pipeline.
|
|
+
|
|
+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
|
|
+%{
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_uop_s(fRegF dst, fRegF src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_uop_d(fRegD dst, fRegD src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_d2f(fRegF dst, fRegD src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_f2d(fRegD dst, fRegF src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_l2f(fRegF dst, iRegL src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : S1(read);
|
|
+ src2 : S2(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_load_constant_s(fRegF dst)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_load_constant_d(fRegD dst)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ FPU : S5;
|
|
+%}
|
|
+
|
|
+pipe_class fp_load_mem_s(fRegF dst, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ mem : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+pipe_class fp_load_mem_d(fRegD dst, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ mem : S1(read);
|
|
+ dst : S5(write);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+pipe_class fp_store_reg_s(fRegF src, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ mem : S5(write);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+pipe_class fp_store_reg_d(fRegD src, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : S1(read);
|
|
+ mem : S5(write);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+//------- Integer ALU operations --------------------------
|
|
+
|
|
+// Integer ALU reg-reg operation
|
|
+// Operands needs in ID, result generated in EX
|
|
+// E.g. ADD Rd, Rs1, Rs2
|
|
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+// Integer ALU reg operation with constant shift
|
|
+// E.g. SLLI Rd, Rs1, #shift
|
|
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ src1 : ID(read);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+// Integer ALU reg-reg operation with variable shift
|
|
+// both operands must be available in ID
|
|
+// E.g. SLL Rd, Rs1, Rs2
|
|
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+// Integer ALU reg operation
|
|
+// E.g. NEG Rd, Rs2
|
|
+pipe_class ialu_reg(iRegI dst, iRegI src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ src : ID(read);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+// Integer ALU reg immediate operation
|
|
+// E.g. ADDI Rd, Rs1, #imm
|
|
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ src1 : ID(read);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+// Integer ALU immediate operation (no source operands)
|
|
+// E.g. LI Rd, #imm
|
|
+pipe_class ialu_imm(iRegI dst)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : EX(write);
|
|
+ DECODE : ID;
|
|
+ ALU : EX;
|
|
+%}
|
|
+
|
|
+//------- Multiply pipeline operations --------------------
|
|
+
|
|
+// Multiply reg-reg
|
|
+// E.g. MULW Rd, Rs1, Rs2
|
|
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : WR(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ MUL : WR;
|
|
+%}
|
|
+
|
|
+// E.g. MUL RD, Rs1, Rs2
|
|
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(3); // Maximum latency for 64 bit mul
|
|
+ dst : WR(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ MUL : WR;
|
|
+%}
|
|
+
|
|
+//------- Divide pipeline operations --------------------
|
|
+
|
|
+// E.g. DIVW Rd, Rs1, Rs2
|
|
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(8); // Maximum latency for 32 bit divide
|
|
+ dst : WR(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ DIV : WR;
|
|
+%}
|
|
+
|
|
+// E.g. DIV RD, Rs1, Rs2
|
|
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(16); // Maximum latency for 64 bit divide
|
|
+ dst : WR(write);
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ DIV : WR;
|
|
+%}
|
|
+
|
|
+//------- Load pipeline operations ------------------------
|
|
+
|
|
+// Load - reg, mem
|
|
+// E.g. LA Rd, mem
|
|
+pipe_class iload_reg_mem(iRegI dst, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : WR(write);
|
|
+ mem : ID(read);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+// Load - reg, reg
|
|
+// E.g. LD Rd, Rs
|
|
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : WR(write);
|
|
+ src : ID(read);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+//------- Store pipeline operations -----------------------
|
|
+
|
|
+// Store - zr, mem
|
|
+// E.g. SD zr, mem
|
|
+pipe_class istore_mem(memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ mem : ID(read);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+// Store - reg, mem
|
|
+// E.g. SD Rs, mem
|
|
+pipe_class istore_reg_mem(iRegI src, memory mem)
|
|
+%{
|
|
+ single_instruction;
|
|
+ mem : ID(read);
|
|
+ src : EX(read);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+// Store - reg, reg
|
|
+// E.g. SD Rs2, Rs1
|
|
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ dst : ID(read);
|
|
+ src : EX(read);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+//------- Store pipeline operations -----------------------
|
|
+
|
|
+// Branch
|
|
+pipe_class pipe_branch()
|
|
+%{
|
|
+ single_instruction;
|
|
+ DECODE : ID;
|
|
+ BRANCH : EX;
|
|
+%}
|
|
+
|
|
+// Branch
|
|
+pipe_class pipe_branch_reg(iRegI src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : ID(read);
|
|
+ DECODE : ID;
|
|
+ BRANCH : EX;
|
|
+%}
|
|
+
|
|
+// Compare & Branch
|
|
+// E.g. BEQ Rs1, Rs2, L
|
|
+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src1 : ID(read);
|
|
+ src2 : ID(read);
|
|
+ DECODE : ID;
|
|
+ BRANCH : EX;
|
|
+%}
|
|
+
|
|
+// E.g. BEQZ Rs, L
|
|
+pipe_class pipe_cmpz_branch(iRegI src)
|
|
+%{
|
|
+ single_instruction;
|
|
+ src : ID(read);
|
|
+ DECODE : ID;
|
|
+ BRANCH : EX;
|
|
+%}
|
|
+
|
|
+//------- Synchronisation operations ----------------------
|
|
+// Any operation requiring serialization
|
|
+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
|
|
+pipe_class pipe_serial()
|
|
+%{
|
|
+ single_instruction;
|
|
+ force_serialization;
|
|
+ fixed_latency(16);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+pipe_class pipe_slow()
|
|
+%{
|
|
+ instruction_count(10);
|
|
+ multiple_bundles;
|
|
+ force_serialization;
|
|
+ fixed_latency(16);
|
|
+ DECODE : ID;
|
|
+ LDST : MEM;
|
|
+%}
|
|
+
|
|
+// Empty pipeline class
|
|
+pipe_class pipe_class_empty()
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(0);
|
|
+%}
|
|
+
|
|
+// Default pipeline class.
|
|
+pipe_class pipe_class_default()
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(2);
|
|
+%}
|
|
+
|
|
+// Pipeline class for compares.
|
|
+pipe_class pipe_class_compare()
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(16);
|
|
+%}
|
|
+
|
|
+// Pipeline class for memory operations.
|
|
+pipe_class pipe_class_memory()
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(16);
|
|
+%}
|
|
+
|
|
+// Pipeline class for call.
|
|
+pipe_class pipe_class_call()
|
|
+%{
|
|
+ single_instruction;
|
|
+ fixed_latency(100);
|
|
+%}
|
|
+
|
|
+// Define the class for the Nop node.
|
|
+define %{
|
|
+ MachNop = pipe_class_empty;
|
|
+%}
|
|
+%}
|
|
+//----------INSTRUCTIONS-------------------------------------------------------
|
|
+//
|
|
+// match -- States which machine-independent subtree may be replaced
|
|
+// by this instruction.
|
|
+// ins_cost -- The estimated cost of this instruction is used by instruction
|
|
+// selection to identify a minimum cost tree of machine
|
|
+// instructions that matches a tree of machine-independent
|
|
+// instructions.
|
|
+// format -- A string providing the disassembly for this instruction.
|
|
+// The value of an instruction's operand may be inserted
|
|
+// by referring to it with a '$' prefix.
|
|
+// opcode -- Three instruction opcodes may be provided. These are referred
|
|
+// to within an encode class as $primary, $secondary, and $tertiary
|
|
+// rrspectively. The primary opcode is commonly used to
|
|
+// indicate the type of machine instruction, while secondary
|
|
+// and tertiary are often used for prefix options or addressing
|
|
+// modes.
|
|
+// ins_encode -- A list of encode classes with parameters. The encode class
|
|
+// name must have been defined in an 'enc_class' specification
|
|
+// in the encode section of the architecture description.
|
|
+
|
|
+// ============================================================================
|
|
+// Memory (Load/Store) Instructions
|
|
+
|
|
+// Load Instructions
|
|
+
|
|
+// Load Byte (8 bit signed)
|
|
+instruct loadB(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadB mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lb $dst, $mem\t# byte, #@loadB" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Byte (8 bit signed) into long
|
|
+instruct loadB2L(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (ConvI2L (LoadB mem)));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Byte (8 bit unsigned)
|
|
+instruct loadUB(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadUB mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Byte (8 bit unsigned) into long
|
|
+instruct loadUB2L(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (ConvI2L (LoadUB mem)));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Short (16 bit signed)
|
|
+instruct loadS(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadS mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lh $dst, $mem\t# short, #@loadS" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Short (16 bit signed) into long
|
|
+instruct loadS2L(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (ConvI2L (LoadS mem)));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lh $dst, $mem\t# short, #@loadS2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Char (16 bit unsigned)
|
|
+instruct loadUS(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadUS mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lhu $dst, $mem\t# short, #@loadUS" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Short/Char (16 bit unsigned) into long
|
|
+instruct loadUS2L(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (ConvI2L (LoadUS mem)));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Integer (32 bit signed)
|
|
+instruct loadI(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadI mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lw $dst, $mem\t# int, #@loadI" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Integer (32 bit signed) into long
|
|
+instruct loadI2L(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (ConvI2L (LoadI mem)));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lw $dst, $mem\t# int, #@loadI2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Integer (32 bit unsigned) into long
|
|
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
|
|
+%{
|
|
+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Long (64 bit signed)
|
|
+instruct loadL(iRegLNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadL mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "ld $dst, $mem\t# int, #@loadL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Range
|
|
+instruct loadRange(iRegINoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadRange mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lwu $dst, $mem\t# range, #@loadRange" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Pointer
|
|
+instruct loadP(iRegPNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadP mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "ld $dst, $mem\t# ptr, #@loadP" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Compressed Pointer
|
|
+instruct loadN(iRegNNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadN mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Klass Pointer
|
|
+instruct loadKlass(iRegPNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadKlass mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "ld $dst, $mem\t# class, #@loadKlass" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Narrow Klass Pointer
|
|
+instruct loadNKlass(iRegNNoSp dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadNKlass mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// Load Float
|
|
+instruct loadF(fRegF dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadF mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "flw $dst, $mem\t# float, #@loadF" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_mem_s);
|
|
+%}
|
|
+
|
|
+// Load Double
|
|
+instruct loadD(fRegD dst, memory mem)
|
|
+%{
|
|
+ match(Set dst (LoadD mem));
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{ "fld $dst, $mem\t# double, #@loadD" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_mem_d);
|
|
+%}
|
|
+
|
|
+// Load Int Constant
|
|
+instruct loadConI(iRegINoSp dst, immI src)
|
|
+%{
|
|
+ match(Set dst src);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "li $dst, $src\t# int, #@loadConI" %}
|
|
+
|
|
+ ins_encode(riscv_enc_li_imm(dst, src));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Long Constant
|
|
+instruct loadConL(iRegLNoSp dst, immL src)
|
|
+%{
|
|
+ match(Set dst src);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "li $dst, $src\t# long, #@loadConL" %}
|
|
+
|
|
+ ins_encode(riscv_enc_li_imm(dst, src));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Pointer Constant
|
|
+instruct loadConP(iRegPNoSp dst, immP con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $con\t# ptr, #@loadConP" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_p(dst, con));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Null Pointer Constant
|
|
+instruct loadConP0(iRegPNoSp dst, immP0 con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_zero(dst));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Pointer Constant One
|
|
+instruct loadConP1(iRegPNoSp dst, immP_1 con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_p1(dst));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Poll Page Constant
|
|
+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST * 6);
|
|
+ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_poll_page(dst, con));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Byte Map Base Constant
|
|
+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_byte_map_base(dst));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Narrow Pointer Constant
|
|
+instruct loadConN(iRegNNoSp dst, immN con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST * 4);
|
|
+ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_n(dst, con));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Narrow Null Pointer Constant
|
|
+instruct loadConN0(iRegNNoSp dst, immN0 con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_zero(dst));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Narrow Klass Constant
|
|
+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
|
|
+%{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(ALU_COST * 6);
|
|
+ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mov_nk(dst, con));
|
|
+
|
|
+ ins_pipe(ialu_imm);
|
|
+%}
|
|
+
|
|
+// Load Float Constant
|
|
+instruct loadConF(fRegF dst, immF con) %{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{
|
|
+ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_constant_s);
|
|
+%}
|
|
+
|
|
+instruct loadConF0(fRegF dst, immF0 con) %{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_constant_s);
|
|
+%}
|
|
+
|
|
+// Load Double Constant
|
|
+instruct loadConD(fRegD dst, immD con) %{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+ format %{
|
|
+ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_constant_d);
|
|
+%}
|
|
+
|
|
+instruct loadConD0(fRegD dst, immD0 con) %{
|
|
+ match(Set dst con);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_load_constant_d);
|
|
+%}
|
|
+
|
|
+// Store Instructions
|
|
+// Store CMS card-mark Immediate
|
|
+instruct storeimmCM0(immI0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreCM mem zero));
|
|
+ predicate(unnecessary_storestore(n));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "storestore (elided)\n\t"
|
|
+ "sb zr, $mem\t# byte, #@storeimmCM0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store CMS card-mark Immediate with intervening StoreStore
|
|
+// needed when using CMS with no conditional card marking
|
|
+instruct storeimmCM0_ordered(immI0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreCM mem zero));
|
|
+
|
|
+ ins_cost(ALU_COST + STORE_COST);
|
|
+ format %{ "membar(StoreStore)\n\t"
|
|
+ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Byte
|
|
+instruct storeB(iRegIorL2I src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreB mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sb $src, $mem\t# byte, #@storeB" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+instruct storeimmB0(immI0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreB mem zero));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Char/Short
|
|
+instruct storeC(iRegIorL2I src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreC mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sh $src, $mem\t# short, #@storeC" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+instruct storeimmC0(immI0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreC mem zero));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sh zr, $mem\t# short, #@storeimmC0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Integer
|
|
+instruct storeI(iRegIorL2I src, memory mem)
|
|
+%{
|
|
+ match(Set mem(StoreI mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sw $src, $mem\t# int, #@storeI" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+instruct storeimmI0(immI0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem(StoreI mem zero));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sw zr, $mem\t# int, #@storeimmI0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Long (64 bit signed)
|
|
+instruct storeL(iRegL src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreL mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sd $src, $mem\t# long, #@storeL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+// Store Long (64 bit signed)
|
|
+instruct storeimmL0(immL0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreL mem zero));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sd zr, $mem\t# long, #@storeimmL0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Pointer
|
|
+instruct storeP(iRegP src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreP mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sd $src, $mem\t# ptr, #@storeP" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+// Store Pointer
|
|
+instruct storeimmP0(immP0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreP mem zero));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_mem);
|
|
+%}
|
|
+
|
|
+// Store Compressed Pointer
|
|
+instruct storeN(iRegN src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreN mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreN mem zero));
|
|
+ predicate(Universe::narrow_oop_base() == NULL &&
|
|
+ Universe::narrow_klass_base() == NULL);
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+// Store Float
|
|
+instruct storeF(fRegF src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreF mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "fsw $src, $mem\t# float, #@storeF" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_store_reg_s);
|
|
+%}
|
|
+
|
|
+// Store Double
|
|
+instruct storeD(fRegD src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreD mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "fsd $src, $mem\t# double, #@storeD" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_store_reg_d);
|
|
+%}
|
|
+
|
|
+// Store Compressed Klass Pointer
|
|
+instruct storeNKlass(iRegN src, memory mem)
|
|
+%{
|
|
+ match(Set mem (StoreNKlass mem src));
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_mem);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Atomic operation instructions
|
|
+//
|
|
+// Intel and SPARC both implement Ideal Node LoadPLocked and
|
|
+// Store{PIL}Conditional instructions using a normal load for the
|
|
+// LoadPLocked and a CAS for the Store{PIL}Conditional.
|
|
+//
|
|
+// The ideal code appears only to use LoadPLocked/storePConditional as a
|
|
+// pair to lock object allocations from Eden space when not using
|
|
+// TLABs.
|
|
+//
|
|
+// There does not appear to be a Load{IL}Locked Ideal Node and the
|
|
+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
|
|
+// and to use StoreIConditional only for 32-bit and StoreLConditional
|
|
+// only for 64-bit.
|
|
+//
|
|
+// We implement LoadPLocked and storePConditional instructions using,
|
|
+// respectively the RISCV hw load-reserve and store-conditional
|
|
+// instructions. Whereas we must implement each of
|
|
+// Store{IL}Conditional using a CAS which employs a pair of
|
|
+// instructions comprising a load-reserve followed by a
|
|
+// store-conditional.
|
|
+
|
|
+
|
|
+// Locked-load (load reserved) of the current heap-top
|
|
+// used when updating the eden heap top
|
|
+// implemented using lr_d on RISCV64
|
|
+instruct loadPLocked(iRegPNoSp dst, indirect mem)
|
|
+%{
|
|
+ match(Set dst (LoadPLocked mem));
|
|
+
|
|
+ ins_cost(ALU_COST * 2 + LOAD_COST);
|
|
+
|
|
+ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
|
|
+ __ lr_d($dst$$Register, t0, Assembler::aq);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// Conditional-store of the updated heap-top.
|
|
+// Used during allocation of the shared heap.
|
|
+// implemented using sc_d on RISCV.
|
|
+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(ALU_COST * 2 + STORE_COST);
|
|
+
|
|
+ format %{
|
|
+ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
|
|
+ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
|
|
+// when attempting to rebias a lock towards the current thread.
|
|
+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set cr (StoreLConditional mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
|
|
+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
|
|
+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// storeIConditional also has acquire semantics, for no better reason
|
|
+// than matching storeLConditional.
|
|
+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set cr (StoreIConditional mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
|
|
+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
|
|
+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// standard CompareAndSwapX when we are using barriers
|
|
+// these have higher priority than the rules selected by a predicate
|
|
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
|
|
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
|
|
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
|
|
+%{
|
|
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// alternative CompareAndSwapX when we are eliding barriers
|
|
+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
|
|
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
|
|
+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndSwapN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// Sundry CAS operations. Note that release is always true,
|
|
+// regardless of the memory ordering of the CAS. This is because we
|
|
+// need the volatile case to be sequentially consistent but there is
|
|
+// no trailing StoreLoad barrier emitted by C2. Unfortunately we
|
|
+// can't check the type of memory ordering here, so we always emit a
|
|
+// sc_d(w) with rl bit set.
|
|
+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
|
|
+
|
|
+ effect(TEMP_DEF res);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapB"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapS"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapI"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapL"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapN"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapP"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapBAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
|
|
+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
|
|
+
|
|
+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapSAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapIAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapLAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapNAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
|
|
+
|
|
+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
|
|
+
|
|
+ format %{
|
|
+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
|
|
+ "# $res == 1 when success, #@weakCompareAndSwapPAcq"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
|
|
+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
|
|
+%{
|
|
+ match(Set prev (GetAndSetI mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
|
|
+%{
|
|
+ match(Set prev (GetAndSetL mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
|
|
+%{
|
|
+ match(Set prev (GetAndSetN mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
|
|
+%{
|
|
+ match(Set prev (GetAndSetP mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set prev (GetAndSetI mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set prev (GetAndSetL mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set prev (GetAndSetN mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set prev (GetAndSetP mem newv));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
|
|
+%{
|
|
+ match(Set newval (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used());
|
|
+
|
|
+ match(Set dummy (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
|
|
+%{
|
|
+ match(Set newval (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used());
|
|
+
|
|
+ match(Set dummy (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
|
|
+%{
|
|
+ match(Set newval (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used());
|
|
+
|
|
+ match(Set dummy (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
|
|
+%{
|
|
+ match(Set newval (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used());
|
|
+
|
|
+ match(Set dummy (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set newval (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
|
|
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set dummy (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set newval (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set dummy (GetAndAddL mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set newval (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set dummy (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
|
|
+%{
|
|
+ predicate(needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set newval (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
|
|
+%{
|
|
+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
|
|
+
|
|
+ match(Set dummy (GetAndAddI mem incr));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Arithmetic Instructions
|
|
+//
|
|
+
|
|
+// Integer Addition
|
|
+
|
|
+// TODO
|
|
+// these currently employ operations which do not set CR and hence are
|
|
+// not flagged as killing CR but we would like to isolate the cases
|
|
+// where we want to set flags from those where we don't. need to work
|
|
+// out how to do that.
|
|
+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (AddI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ addw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
|
|
+ match(Set dst (AddI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ int32_t con = (int32_t)$src2$$constant;
|
|
+ __ addiw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
|
|
+ match(Set dst (AddI (ConvL2I src1) src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ addiw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Pointer Addition
|
|
+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
|
|
+ match(Set dst (AddP src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ add(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// If we shift more than 32 bits, we need not convert I2L.
|
|
+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
|
|
+ match(Set dst (LShiftL (ConvI2L src) scale));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Pointer Immediate Addition
|
|
+// n.b. this needs to be more expensive than using an indirect memory
|
|
+// operand
|
|
+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
|
|
+ match(Set dst (AddP src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // src2 is imm, so actually call the addi
|
|
+ __ add(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Long Addition
|
|
+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (AddL src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ add(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// No constant pool entries requiredLong Immediate Addition.
|
|
+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
|
|
+ match(Set dst (AddL src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // src2 is imm, so actually call the addi
|
|
+ __ add(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Integer Subtraction
|
|
+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (SubI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ subw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate Subtraction
|
|
+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
|
|
+ match(Set dst (SubI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // src2 is imm, so actually call the addiw
|
|
+ __ subw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Long Subtraction
|
|
+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (SubL src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sub(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// No constant pool entries requiredLong Immediate Subtraction.
|
|
+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
|
|
+ match(Set dst (SubL src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // src2 is imm, so actually call the addi
|
|
+ __ sub(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ $src2$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Integer Negation (special case for sub)
|
|
+
|
|
+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
|
|
+ match(Set dst (SubI zero src));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // actually call the subw
|
|
+ __ negw(as_Register($dst$$reg),
|
|
+ as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Long Negation
|
|
+
|
|
+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
|
|
+ match(Set dst (SubL zero src));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // actually call the sub
|
|
+ __ neg(as_Register($dst$$reg),
|
|
+ as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Integer Multiply
|
|
+
|
|
+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (MulI src1 src2));
|
|
+ ins_cost(IMUL_COST);
|
|
+ format %{ "mulw $dst, $src1, $src2\t#@mulI" %}
|
|
+
|
|
+ //this means 2 word multi, and no sign extend to 64 bits
|
|
+ ins_encode %{
|
|
+ // riscv64 mulw will sign-extension to high 32 bits in dst reg
|
|
+ __ mulw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(imul_reg_reg);
|
|
+%}
|
|
+
|
|
+// Long Multiply
|
|
+
|
|
+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (MulL src1 src2));
|
|
+ ins_cost(IMUL_COST);
|
|
+ format %{ "mul $dst, $src1, $src2\t#@mulL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ mul(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(lmul_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
|
|
+%{
|
|
+ match(Set dst (MulHiL src1 src2));
|
|
+ ins_cost(IMUL_COST);
|
|
+ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ mulh(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(lmul_reg_reg);
|
|
+%}
|
|
+
|
|
+// Integer Divide
|
|
+
|
|
+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (DivI src1 src2));
|
|
+ ins_cost(IDIVSI_COST);
|
|
+ format %{ "divw $dst, $src1, $src2\t#@divI"%}
|
|
+
|
|
+ ins_encode(riscv_enc_divw(dst, src1, src2));
|
|
+ ins_pipe(idiv_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
|
|
+ match(Set dst (URShiftI (RShiftI src1 div1) div2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Long Divide
|
|
+
|
|
+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (DivL src1 src2));
|
|
+ ins_cost(IDIVDI_COST);
|
|
+ format %{ "div $dst, $src1, $src2\t#@divL" %}
|
|
+
|
|
+ ins_encode(riscv_enc_div(dst, src1, src2));
|
|
+ ins_pipe(ldiv_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
|
|
+ match(Set dst (URShiftL (RShiftL src1 div1) div2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Integer Remainder
|
|
+
|
|
+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (ModI src1 src2));
|
|
+ ins_cost(IDIVSI_COST);
|
|
+ format %{ "remw $dst, $src1, $src2\t#@modI" %}
|
|
+
|
|
+ ins_encode(riscv_enc_modw(dst, src1, src2));
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Long Remainder
|
|
+
|
|
+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (ModL src1 src2));
|
|
+ ins_cost(IDIVDI_COST);
|
|
+ format %{ "rem $dst, $src1, $src2\t#@modL" %}
|
|
+
|
|
+ ins_encode(riscv_enc_mod(dst, src1, src2));
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Integer Shifts
|
|
+
|
|
+// Shift Left Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (LShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sllw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Left Immediate
|
|
+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
|
|
+ match(Set dst (LShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 5 bits of the I-immediate field for RV32I
|
|
+ __ slliw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x1f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Shift Right Logical Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (URShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ srlw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Right Logical Immediate
|
|
+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
|
|
+ match(Set dst (URShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 6 bits of the I-immediate field for RV64I
|
|
+ __ srliw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x1f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Shift Right Arithmetic Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (RShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // riscv will sign-ext dst high 32 bits
|
|
+ __ sraw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Right Arithmetic Immediate
|
|
+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
|
|
+ match(Set dst (RShiftI src1 src2));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // riscv will sign-ext dst high 32 bits
|
|
+ __ sraiw(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x1f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Long Shifts
|
|
+
|
|
+// Shift Left Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (LShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sll(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Left Immediate
|
|
+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
|
|
+ match(Set dst (LShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 6 bits of the I-immediate field for RV64I
|
|
+ __ slli(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x3f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Shift Right Logical Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (URShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ srl(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Right Logical Immediate
|
|
+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
|
|
+ match(Set dst (URShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 6 bits of the I-immediate field for RV64I
|
|
+ __ srli(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x3f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// A special-case pattern for card table stores.
|
|
+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
|
|
+ match(Set dst (URShiftL (CastP2X src1) src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 6 bits of the I-immediate field for RV64I
|
|
+ __ srli(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x3f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// Shift Right Arithmetic Register
|
|
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
|
|
+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
|
|
+ match(Set dst (RShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sra(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg_vshift);
|
|
+%}
|
|
+
|
|
+// Shift Right Arithmetic Immediate
|
|
+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
|
|
+ match(Set dst (RShiftL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // the shift amount is encoded in the lower
|
|
+ // 6 bits of the I-immediate field for RV64I
|
|
+ __ srai(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (unsigned) $src2$$constant & 0x3f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
|
|
+ match(Set dst (XorI src1 m1));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
|
|
+ match(Set dst (XorL src1 m1));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+
|
|
+// ============================================================================
|
|
+// Floating Point Arithmetic Instructions
|
|
+
|
|
+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (AddF src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fadd_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_s);
|
|
+%}
|
|
+
|
|
+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (AddD src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fadd_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_d);
|
|
+%}
|
|
+
|
|
+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (SubF src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsub_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_s);
|
|
+%}
|
|
+
|
|
+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (SubD src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsub_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_d);
|
|
+%}
|
|
+
|
|
+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (MulF src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmul_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_s);
|
|
+%}
|
|
+
|
|
+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (MulD src1 src2));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmul_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_d);
|
|
+%}
|
|
+
|
|
+// src1 * src2 + src3
|
|
+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaF src3 (Binary src1 src2)));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmadd_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// src1 * src2 + src3
|
|
+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaD src3 (Binary src1 src2)));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmadd_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// src1 * src2 - src3
|
|
+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmsub_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// src1 * src2 - src3
|
|
+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmsub_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// -src1 * src2 + src3
|
|
+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
|
|
+ match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fnmsub_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// -src1 * src2 + src3
|
|
+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
|
|
+ match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fnmsub_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// -src1 * src2 - src3
|
|
+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
|
|
+ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
|
|
+
|
|
+ ins_cost(FMUL_SINGLE_COST);
|
|
+ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fnmadd_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// -src1 * src2 - src3
|
|
+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
|
|
+ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
|
|
+
|
|
+ ins_cost(FMUL_DOUBLE_COST);
|
|
+ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fnmadd_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg),
|
|
+ as_FloatRegister($src3$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+// Math.max(FF)F
|
|
+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (MaxF src1 src2));
|
|
+ effect(TEMP_DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
|
|
+ format %{ "fsflags zr\t#@maxF_reg_reg\n\t"
|
|
+ "fmax.s $dst, $src1, $src2\n\t"
|
|
+ "flt.s zr, $src1, $src2\n\t"
|
|
+ "frflags t0\n\t"
|
|
+ "beqz t0, Ldone\n\t"
|
|
+ "fadd.s $dst, $src1, $src2" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_s);
|
|
+%}
|
|
+
|
|
+// Math.min(FF)F
|
|
+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (MinF src1 src2));
|
|
+ effect(TEMP_DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
|
|
+ format %{ "fsflags zr\t#@minF_reg_reg\n\t"
|
|
+ "fmin.s $dst, $src1, $src2\n\t"
|
|
+ "flt.s zr, $src1, $src2\n\t"
|
|
+ "frflags t0\n\t"
|
|
+ "beqz t0, Ldone\n\t"
|
|
+ "fadd.s $dst, $src1, $src2" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_s);
|
|
+%}
|
|
+
|
|
+// Math.max(DD)D
|
|
+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (MaxD src1 src2));
|
|
+ effect(TEMP_DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
|
|
+ format %{ "fsflags zr\t#@maxD_reg_reg\n\t"
|
|
+ "fmax.d $dst, $src1, $src2\n\t"
|
|
+ "flt.d zr, $src1, $src2\n\t"
|
|
+ "frflags t0\n\t"
|
|
+ "beqz t0, Ldone\n\t"
|
|
+ "fadd.d $dst, $src1, $src2" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_d);
|
|
+%}
|
|
+
|
|
+// Math.min(DD)D
|
|
+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (MinD src1 src2));
|
|
+ effect(TEMP_DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
|
|
+ format %{ "fsflags zr\t#@minD_reg_reg\n\t"
|
|
+ "fmin.d $dst, $src1, $src2\n\t"
|
|
+ "flt.d zr, $src1, $src2\n\t"
|
|
+ "frflags t0\n\t"
|
|
+ "beqz t0, Ldone\n\t"
|
|
+ "fadd.d $dst, $src1, $src2" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_dop_reg_reg_d);
|
|
+%}
|
|
+
|
|
+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
|
|
+ match(Set dst (DivF src1 src2));
|
|
+
|
|
+ ins_cost(FDIV_COST);
|
|
+ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fdiv_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_div_s);
|
|
+%}
|
|
+
|
|
+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
|
|
+ match(Set dst (DivD src1 src2));
|
|
+
|
|
+ ins_cost(FDIV_COST);
|
|
+ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fdiv_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src1$$reg),
|
|
+ as_FloatRegister($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_div_d);
|
|
+%}
|
|
+
|
|
+instruct negF_reg_reg(fRegF dst, fRegF src) %{
|
|
+ match(Set dst (NegF src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fneg_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_uop_s);
|
|
+%}
|
|
+
|
|
+instruct negD_reg_reg(fRegD dst, fRegD src) %{
|
|
+ match(Set dst (NegD src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fneg_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_uop_d);
|
|
+%}
|
|
+
|
|
+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ match(Set dst (AbsI src));
|
|
+
|
|
+ ins_cost(ALU_COST * 3);
|
|
+ format %{ "sraiw t0, $src, 0x1f\n\t"
|
|
+ "xorr $dst, $src, t0\n\t"
|
|
+ "subw $dst, $dst, t0\t#@absI_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sraiw(t0, as_Register($src$$reg), 0x1f);
|
|
+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
|
|
+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ConvI2L (AbsI src)));
|
|
+
|
|
+ ins_cost(ALU_COST * 3);
|
|
+ format %{ "sraiw t0, $src, 0x1f\n\t"
|
|
+ "xorr $dst, $src, t0\n\t"
|
|
+ "subw $dst, $dst, t0\t#@absI2L_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sraiw(t0, as_Register($src$$reg), 0x1f);
|
|
+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
|
|
+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
|
|
+ match(Set dst (AbsL src));
|
|
+
|
|
+ ins_cost(ALU_COST * 3);
|
|
+ format %{ "srai t0, $src, 0x3f\n\t"
|
|
+ "xorr $dst, $src, t0\n\t"
|
|
+ "sub $dst, $dst, t0\t#absL_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ srai(t0, as_Register($src$$reg), 0x3f);
|
|
+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
|
|
+ __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct absF_reg(fRegF dst, fRegF src) %{
|
|
+ match(Set dst (AbsF src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %}
|
|
+ ins_encode %{
|
|
+ __ fabs_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_uop_s);
|
|
+%}
|
|
+
|
|
+instruct absD_reg(fRegD dst, fRegD src) %{
|
|
+ match(Set dst (AbsD src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %}
|
|
+ ins_encode %{
|
|
+ __ fabs_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_uop_d);
|
|
+%}
|
|
+
|
|
+instruct sqrtF_reg(fRegF dst, fRegF src) %{
|
|
+ match(Set dst (SqrtF src));
|
|
+
|
|
+ ins_cost(FSQRT_COST);
|
|
+ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %}
|
|
+ ins_encode %{
|
|
+ __ fsqrt_s(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_sqrt_s);
|
|
+%}
|
|
+
|
|
+instruct sqrtD_reg(fRegD dst, fRegD src) %{
|
|
+ match(Set dst (SqrtD src));
|
|
+
|
|
+ ins_cost(FSQRT_COST);
|
|
+ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %}
|
|
+ ins_encode %{
|
|
+ __ fsqrt_d(as_FloatRegister($dst$$reg),
|
|
+ as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_sqrt_d);
|
|
+%}
|
|
+
|
|
+// Arithmetic Instructions End
|
|
+
|
|
+// ============================================================================
|
|
+// Logical Instructions
|
|
+
|
|
+// Register And
|
|
+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
|
|
+ match(Set dst (AndI src1 src2));
|
|
+
|
|
+ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ andr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate And
|
|
+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
|
|
+ match(Set dst (AndI src1 src2));
|
|
+
|
|
+ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ andi(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Register Or
|
|
+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
|
|
+ match(Set dst (OrI src1 src2));
|
|
+
|
|
+ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ orr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate Or
|
|
+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
|
|
+ match(Set dst (OrI src1 src2));
|
|
+
|
|
+ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ ori(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Register Xor
|
|
+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
|
|
+ match(Set dst (XorI src1 src2));
|
|
+
|
|
+ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ xorr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate Xor
|
|
+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
|
|
+ match(Set dst (XorI src1 src2));
|
|
+
|
|
+ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ xori(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Register And Long
|
|
+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (AndL src1 src2));
|
|
+
|
|
+ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ andr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate And Long
|
|
+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
|
|
+ match(Set dst (AndL src1 src2));
|
|
+
|
|
+ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ andi(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Register Or Long
|
|
+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (OrL src1 src2));
|
|
+
|
|
+ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ orr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate Or Long
|
|
+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
|
|
+ match(Set dst (OrL src1 src2));
|
|
+
|
|
+ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ ori(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// Register Xor Long
|
|
+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
|
|
+ match(Set dst (XorL src1 src2));
|
|
+
|
|
+ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ xorr(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Immediate Xor Long
|
|
+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
|
|
+ match(Set dst (XorL src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ xori(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ (int32_t)($src2$$constant));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_imm);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// BSWAP Instructions
|
|
+
|
|
+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReverseBytesI src));
|
|
+ effect(TEMP cr);
|
|
+
|
|
+ ins_cost(ALU_COST * 13);
|
|
+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{
|
|
+ match(Set dst (ReverseBytesL src));
|
|
+ effect(TEMP cr);
|
|
+
|
|
+ ins_cost(ALU_COST * 29);
|
|
+ format %{ "revb $dst, $src\t#@bytes_reverse_long" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReverseBytesUS src));
|
|
+
|
|
+ ins_cost(ALU_COST * 5);
|
|
+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReverseBytesS src));
|
|
+
|
|
+ ins_cost(ALU_COST * 5);
|
|
+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// MemBar Instruction
|
|
+
|
|
+instruct load_fence() %{
|
|
+ match(LoadFence);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "#@load_fence" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ %}
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_acquire() %{
|
|
+ match(MemBarAcquire);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "#@membar_acquire\n\t"
|
|
+ "fence ir iorw" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ block_comment("membar_acquire");
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_acquire_lock() %{
|
|
+ match(MemBarAcquireLock);
|
|
+ ins_cost(0);
|
|
+
|
|
+ format %{ "#@membar_acquire_lock (elided)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ block_comment("membar_acquire_lock (elided)");
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct store_fence() %{
|
|
+ match(StoreFence);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "#@store_fence" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ %}
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_release() %{
|
|
+ match(MemBarRelease);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "#@membar_release\n\t"
|
|
+ "fence iorw ow" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ block_comment("membar_release");
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ %}
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_storestore() %{
|
|
+ match(MemBarStoreStore);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "MEMBAR-store-store\t#@membar_storestore" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+ %}
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_release_lock() %{
|
|
+ match(MemBarReleaseLock);
|
|
+ ins_cost(0);
|
|
+
|
|
+ format %{ "#@membar_release_lock (elided)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ block_comment("membar_release_lock (elided)");
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+instruct membar_volatile() %{
|
|
+ match(MemBarVolatile);
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "#@membar_volatile\n\t"
|
|
+ "fence iorw iorw"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ block_comment("membar_volatile");
|
|
+ __ membar(MacroAssembler::StoreLoad);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Cast Instructions (Java-level type cast)
|
|
+
|
|
+instruct castX2P(iRegPNoSp dst, iRegL src) %{
|
|
+ match(Set dst (CastX2P src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ if ($dst$$reg != $src$$reg) {
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
|
|
+ match(Set dst (CastP2X src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ if ($dst$$reg != $src$$reg) {
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct castPP(iRegPNoSp dst)
|
|
+%{
|
|
+ match(Set dst (CastPP dst));
|
|
+ ins_cost(0);
|
|
+
|
|
+ size(0);
|
|
+ format %{ "# castPP of $dst, #@castPP" %}
|
|
+ ins_encode(/* empty encoding */);
|
|
+ ins_pipe(pipe_class_empty);
|
|
+%}
|
|
+
|
|
+instruct castII(iRegI dst)
|
|
+%{
|
|
+ match(Set dst (CastII dst));
|
|
+
|
|
+ size(0);
|
|
+ format %{ "# castII of $dst, #@castII" %}
|
|
+ ins_encode(/* empty encoding */);
|
|
+ ins_cost(0);
|
|
+ ins_pipe(pipe_class_empty);
|
|
+%}
|
|
+
|
|
+instruct checkCastPP(iRegPNoSp dst)
|
|
+%{
|
|
+ match(Set dst (CheckCastPP dst));
|
|
+
|
|
+ size(0);
|
|
+ ins_cost(0);
|
|
+ format %{ "# checkcastPP of $dst, #@checkCastPP" %}
|
|
+ ins_encode(/* empty encoding */);
|
|
+ ins_pipe(pipe_class_empty);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Convert Instructions
|
|
+
|
|
+// int to bool
|
|
+instruct convI2Bool(iRegINoSp dst, iRegI src)
|
|
+%{
|
|
+ match(Set dst (Conv2B src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "snez $dst, $src\t#@convI2Bool" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ snez(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// pointer to bool
|
|
+instruct convP2Bool(iRegINoSp dst, iRegP src)
|
|
+%{
|
|
+ match(Set dst (Conv2B src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "snez $dst, $src\t#@convP2Bool" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ snez(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// int <-> long
|
|
+
|
|
+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
|
|
+%{
|
|
+ match(Set dst (ConvI2L src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %}
|
|
+ ins_encode %{
|
|
+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
|
|
+ match(Set dst (ConvL2I src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// int to unsigned long (Zero-extend)
|
|
+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
|
|
+%{
|
|
+ match(Set dst (AndL (ConvI2L src) mask));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// float <-> double
|
|
+
|
|
+instruct convF2D_reg(fRegD dst, fRegF src) %{
|
|
+ match(Set dst (ConvF2D src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_f2d);
|
|
+%}
|
|
+
|
|
+instruct convD2F_reg(fRegF dst, fRegD src) %{
|
|
+ match(Set dst (ConvD2F src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_d2f);
|
|
+%}
|
|
+
|
|
+// float <-> int
|
|
+
|
|
+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
|
|
+ match(Set dst (ConvF2I src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_f2i);
|
|
+%}
|
|
+
|
|
+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ConvI2F src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_i2f);
|
|
+%}
|
|
+
|
|
+// float <-> long
|
|
+
|
|
+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
|
|
+ match(Set dst (ConvF2L src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_f2l);
|
|
+%}
|
|
+
|
|
+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
|
|
+ match(Set dst (ConvL2F src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_l2f);
|
|
+%}
|
|
+
|
|
+// double <-> int
|
|
+
|
|
+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
|
|
+ match(Set dst (ConvD2I src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_d2i);
|
|
+%}
|
|
+
|
|
+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ConvI2D src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_i2d);
|
|
+%}
|
|
+
|
|
+// double <-> long
|
|
+
|
|
+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
|
|
+ match(Set dst (ConvD2L src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_d2l);
|
|
+%}
|
|
+
|
|
+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
|
|
+ match(Set dst (ConvL2D src));
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_l2d);
|
|
+%}
|
|
+
|
|
+// Convert oop into int for vectors alignment masking
|
|
+instruct convP2I(iRegINoSp dst, iRegP src) %{
|
|
+ match(Set dst (ConvL2I (CastP2X src)));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ zero_extend($dst$$Register, $src$$Register, 32);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Convert compressed oop into int for vectors alignment masking
|
|
+// in case of 32bit oops (heap < 4Gb).
|
|
+instruct convN2I(iRegINoSp dst, iRegN src)
|
|
+%{
|
|
+ predicate(Universe::narrow_oop_shift() == 0);
|
|
+ match(Set dst (ConvL2I (CastP2X (DecodeN src))));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ mv($dst$$Register, $src$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Convert oop pointer into compressed form
|
|
+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
|
|
+ match(Set dst (EncodeP src));
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %}
|
|
+ ins_encode %{
|
|
+ Register s = $src$$Register;
|
|
+ Register d = $dst$$Register;
|
|
+ __ encode_heap_oop(d, s);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
|
|
+ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
|
|
+ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
|
|
+ match(Set dst (DecodeN src));
|
|
+
|
|
+ ins_cost(0);
|
|
+ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %}
|
|
+ ins_encode %{
|
|
+ Register s = $src$$Register;
|
|
+ Register d = $dst$$Register;
|
|
+ __ decode_heap_oop(d, s);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
|
|
+ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
|
|
+ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
|
|
+ match(Set dst (DecodeN src));
|
|
+
|
|
+ ins_cost(0);
|
|
+ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
|
|
+ ins_encode %{
|
|
+ Register s = $src$$Register;
|
|
+ Register d = $dst$$Register;
|
|
+ __ decode_heap_oop_not_null(d, s);
|
|
+ %}
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Convert klass pointer into compressed form.
|
|
+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
|
|
+ match(Set dst (EncodePKlass src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register src_reg = as_Register($src$$reg);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ encode_klass_not_null(dst_reg, src_reg, t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
|
|
+ predicate(!maybe_use_tmp_register_decoding_klass());
|
|
+
|
|
+ match(Set dst (DecodeNKlass src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register src_reg = as_Register($src$$reg);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{
|
|
+ predicate(maybe_use_tmp_register_decoding_klass());
|
|
+
|
|
+ match(Set dst (DecodeNKlass src));
|
|
+
|
|
+ effect(TEMP tmp);
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Register src_reg = as_Register($src$$reg);
|
|
+ Register dst_reg = as_Register($dst$$reg);
|
|
+ Register tmp_reg = as_Register($tmp$$reg);
|
|
+ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// stack <-> reg and reg <-> reg shuffles with no conversion
|
|
+
|
|
+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
|
|
+
|
|
+ match(Set dst (MoveF2I src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+
|
|
+ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_reg);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
|
|
+
|
|
+ match(Set dst (MoveI2F src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+
|
|
+ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
|
|
+
|
|
+ match(Set dst (MoveD2L src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+
|
|
+ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(iload_reg_reg);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
|
|
+
|
|
+ match(Set dst (MoveL2D src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(LOAD_COST);
|
|
+
|
|
+ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
|
|
+
|
|
+ match(Set dst (MoveF2I src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+
|
|
+ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
|
|
+
|
|
+ match(Set dst (MoveI2F src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+
|
|
+ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_reg);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
|
|
+
|
|
+ match(Set dst (MoveD2L src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+
|
|
+ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
|
|
+
|
|
+ match(Set dst (MoveL2D src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(STORE_COST);
|
|
+
|
|
+ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(istore_reg_reg);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
|
|
+
|
|
+ match(Set dst (MoveF2I src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_f2i);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
|
|
+
|
|
+ match(Set dst (MoveI2F src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_i2f);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
|
|
+
|
|
+ match(Set dst (MoveD2L src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_d2l);
|
|
+
|
|
+%}
|
|
+
|
|
+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
|
|
+
|
|
+ match(Set dst (MoveL2D src));
|
|
+
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ ins_cost(XFER_COST);
|
|
+
|
|
+ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(fp_l2d);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Compare Instructions which set the result float comparisons in dest register.
|
|
+
|
|
+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
|
|
+%{
|
|
+ match(Set dst (CmpF3 op1 op2));
|
|
+
|
|
+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
|
|
+ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
|
|
+ "bgtz $dst, done\n\t"
|
|
+ "feq.s $dst, $op1, $op2\n\t"
|
|
+ "addi $dst, $dst, -1\t#@cmpF3_reg_reg"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
|
|
+ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
|
|
+ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
|
|
+%{
|
|
+ match(Set dst (CmpD3 op1 op2));
|
|
+
|
|
+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
|
|
+ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
|
|
+ "bgtz $dst, done\n\t"
|
|
+ "feq.d $dst, $op1, $op2\n\t"
|
|
+ "addi $dst, $dst, -1\t#@cmpD3_reg_reg"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
|
|
+ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
|
|
+%{
|
|
+ match(Set dst (CmpL3 op1 op2));
|
|
+
|
|
+ ins_cost(ALU_COST * 3 + BRANCH_COST);
|
|
+ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
|
|
+ "bnez $dst, done\n\t"
|
|
+ "slt $dst, $op1, $op2\n\t"
|
|
+ "neg $dst, $dst\t#@cmpL3_reg_reg"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
|
|
+ __ mv(as_Register($dst$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
|
|
+%{
|
|
+ match(Set dst (CmpLTMask p q));
|
|
+
|
|
+ ins_cost(2 * ALU_COST);
|
|
+
|
|
+ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
|
|
+ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
|
|
+ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
|
|
+%{
|
|
+ match(Set dst (CmpLTMask op zero));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+
|
|
+ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+
|
|
+// ============================================================================
|
|
+// Max and Min
|
|
+
|
|
+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ match(Set dst (MinI src1 src2));
|
|
+
|
|
+ effect(DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(BRANCH_COST + ALU_COST * 2);
|
|
+ format %{
|
|
+ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
|
|
+ "mv $dst, $src2\n\t"
|
|
+ "j Ldone\n\t"
|
|
+ "bind Lsrc1\n\t"
|
|
+ "mv $dst, $src1\n\t"
|
|
+ "bind\t#@minI_rReg"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Label Lsrc1, Ldone;
|
|
+ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
|
|
+ __ j(Ldone);
|
|
+ __ bind(Lsrc1);
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
|
|
+ __ bind(Ldone);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
|
|
+%{
|
|
+ match(Set dst (MaxI src1 src2));
|
|
+
|
|
+ effect(DEF dst, USE src1, USE src2);
|
|
+
|
|
+ ins_cost(BRANCH_COST + ALU_COST * 2);
|
|
+ format %{
|
|
+ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
|
|
+ "mv $dst, $src2\n\t"
|
|
+ "j Ldone\n\t"
|
|
+ "bind Lsrc1\n\t"
|
|
+ "mv $dst, $src1\n\t"
|
|
+ "bind\t#@maxI_rReg"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ Label Lsrc1, Ldone;
|
|
+ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
|
|
+ __ j(Ldone);
|
|
+ __ bind(Lsrc1);
|
|
+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
|
|
+ __ bind(Ldone);
|
|
+
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Branch Instructions
|
|
+// Direct Branch.
|
|
+instruct branch(label lbl)
|
|
+%{
|
|
+ match(Goto);
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "j $lbl\t#@branch" %}
|
|
+
|
|
+ ins_encode(riscv_enc_j(lbl));
|
|
+
|
|
+ ins_pipe(pipe_branch);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Compare and Branch Instructions
|
|
+
|
|
+// Patterns for short (< 12KiB) variants
|
|
+
|
|
+// Compare flags and branch near instructions.
|
|
+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
|
|
+ match(If cmp cr);
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
|
|
+ %}
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare signed int and branch near instructions
|
|
+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpI_branch'.
|
|
+ match(If cmp (CmpI op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpI_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpI op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare unsigned int and branch near instructions
|
|
+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpU_branch'.
|
|
+ match(If cmp (CmpU op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpU_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpU op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare signed long and branch near instructions
|
|
+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpL_branch'.
|
|
+ match(If cmp (CmpL op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpL_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpL op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare unsigned long and branch near instructions
|
|
+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpUL_branch'.
|
|
+ match(If cmp (CmpUL op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpUL_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpUL op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare pointer and branch near instructions
|
|
+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpP_branch'.
|
|
+ match(If cmp (CmpP op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpP_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpP op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare narrow pointer and branch near instructions
|
|
+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpN_branch'.
|
|
+ match(If cmp (CmpN op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpN_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpN op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare float and branch near instructions
|
|
+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpF_branch'.
|
|
+ match(If cmp (CmpF op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST);
|
|
+ format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpF_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpF op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST);
|
|
+ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare double and branch near instructions
|
|
+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpD_branch'.
|
|
+ match(If cmp (CmpD op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST);
|
|
+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
|
|
+ as_FloatRegister($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpD_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpD op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST);
|
|
+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
|
|
+ as_FloatRegister($op2$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare signed int with zero and branch near instructions
|
|
+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpI_reg_imm0_branch'.
|
|
+ match(If cmp (CmpI op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpI_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpI op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare unsigned int with zero and branch near instructions
|
|
+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
|
|
+ match(If cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
|
|
+
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare signed long with zero and branch near instructions
|
|
+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpL_reg_imm0_branch'.
|
|
+ match(If cmp (CmpL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpL_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare unsigned long with zero and branch near instructions
|
|
+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
|
|
+ match(If cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare pointer with zero and branch near instructions
|
|
+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpP_reg_imm0_branch'.
|
|
+ match(If cmp (CmpP op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpP_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpP op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare narrow pointer with zero and branch near instructions
|
|
+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpN_reg_imm0_branch'.
|
|
+ match(If cmp (CmpN op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpN_reg_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpN op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Compare narrow pointer with pointer zero and branch near instructions
|
|
+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
|
|
+ match(If cmp (CmpP (DecodeN op1) zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
|
|
+ // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
|
|
+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+ ins_short_branch(1);
|
|
+%}
|
|
+
|
|
+// Patterns for far (20KiB) variants
|
|
+
|
|
+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
|
|
+ match(If cmp cr);
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+// Compare signed int and branch far instructions
|
|
+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
|
|
+ match(If cmp (CmpI op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ // the format instruction [far_b$cmp] here is be used as two insructions
|
|
+ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpI op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
|
|
+ match(If cmp (CmpU op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpU op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
|
|
+ match(If cmp (CmpL op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpL op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
|
|
+ match(If cmp (CmpUL op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpUL op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpP op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpP op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpN op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpN op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
|
|
+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmp_branch);
|
|
+%}
|
|
+
|
|
+// Float compare and branch instructions
|
|
+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpF op1 op2));
|
|
+
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST * 2);
|
|
+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
|
+ *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+%}
|
|
+
|
|
+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpF op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST * 2);
|
|
+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
|
|
+ *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+%}
|
|
+
|
|
+// Double compare and branch instructions
|
|
+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpD op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST * 2);
|
|
+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
|
|
+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+%}
|
|
+
|
|
+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpD op1 op2));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(XFER_COST + BRANCH_COST * 2);
|
|
+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
|
|
+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_compare);
|
|
+%}
|
|
+
|
|
+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpI op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpI op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
|
|
+
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+// compare lt/ge unsigned instructs has no short instruct with same match
|
|
+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpU op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+// compare lt/ge unsigned instructs has no short instruct with same match
|
|
+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(If cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
|
|
+
|
|
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
|
|
+%{
|
|
+ match(CountedLoopEnd cmp (CmpUL op1 zero));
|
|
+
|
|
+ effect(USE op1, USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
|
|
+
|
|
+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
|
|
+ match(If cmp (CmpP op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpP op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
|
|
+ match(If cmp (CmpN op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpN op1 zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
|
|
+ match(If cmp (CmpP (DecodeN op1) zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
|
|
+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
|
|
+ effect(USE lbl);
|
|
+
|
|
+ ins_cost(BRANCH_COST * 2);
|
|
+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_cmpz_branch);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Conditional Move Instructions
|
|
+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
|
|
+ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+
|
|
+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
|
|
+ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+
|
|
+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
|
|
+ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
|
|
+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+ format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
|
|
+ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+
|
|
+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
|
|
+ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+
|
|
+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
|
|
+ "mv $dst, $src\n\t"
|
|
+ "skip:"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
|
|
+ as_Register($op1$$reg), as_Register($op2$$reg),
|
|
+ as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+
|
|
+// ============================================================================
|
|
+// Procedure Call/Return Instructions
|
|
+
|
|
+// Call Java Static Instruction
|
|
+
|
|
+instruct CallStaticJavaDirect(method meth)
|
|
+%{
|
|
+ match(CallStaticJava);
|
|
+
|
|
+ effect(USE meth);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
|
|
+
|
|
+ ins_encode( riscv_enc_java_static_call(meth),
|
|
+ riscv_enc_call_epilog );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// TO HERE
|
|
+
|
|
+// Call Java Dynamic Instruction
|
|
+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
|
|
+%{
|
|
+ match(CallDynamicJava);
|
|
+
|
|
+ effect(USE meth, KILL cr);
|
|
+
|
|
+ ins_cost(BRANCH_COST + ALU_COST * 6);
|
|
+
|
|
+ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
|
|
+
|
|
+ ins_encode( riscv_enc_java_dynamic_call(meth),
|
|
+ riscv_enc_call_epilog );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// Call Runtime Instruction
|
|
+
|
|
+instruct CallRuntimeDirect(method meth, rFlagsReg cr)
|
|
+%{
|
|
+ match(CallRuntime);
|
|
+
|
|
+ effect(USE meth, KILL cr);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
|
|
+
|
|
+ ins_encode( riscv_enc_java_to_runtime(meth) );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// Call Runtime Instruction
|
|
+
|
|
+instruct CallLeafDirect(method meth, rFlagsReg cr)
|
|
+%{
|
|
+ match(CallLeaf);
|
|
+
|
|
+ effect(USE meth, KILL cr);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
|
|
+
|
|
+ ins_encode( riscv_enc_java_to_runtime(meth) );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// Call Runtime Instruction
|
|
+
|
|
+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
|
|
+%{
|
|
+ match(CallLeafNoFP);
|
|
+
|
|
+ effect(USE meth, KILL cr);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
|
|
+
|
|
+ ins_encode( riscv_enc_java_to_runtime(meth) );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Partial Subtype Check
|
|
+//
|
|
+// superklass array for an instance of the superklass. Set a hidden
|
|
+// internal cache on a hit (cache is checked with exposed code in
|
|
+// gen_subtype_check()). Return zero for a hit. The encoding
|
|
+// ALSO sets flags.
|
|
+
|
|
+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result)
|
|
+%{
|
|
+ match(Set result (PartialSubtypeCheck sub super));
|
|
+ effect(KILL temp, KILL cr);
|
|
+
|
|
+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
|
|
+ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
|
|
+
|
|
+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
|
|
+
|
|
+ opcode(0x1); // Force zero of result reg on hit
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result,
|
|
+ immP0 zero, rFlagsReg cr)
|
|
+%{
|
|
+ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
|
|
+ effect(KILL temp, KILL result);
|
|
+
|
|
+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
|
|
+ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
|
|
+
|
|
+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
|
|
+
|
|
+ opcode(0x0); // Don't zero result reg on hit
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_compare($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
|
+ StrIntrinsicNode::UU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
|
+ StrIntrinsicNode::LL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
|
+
|
|
+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
|
+ StrIntrinsicNode::UL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
|
|
+ rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
|
|
+ StrIntrinsicNode::LU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
|
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ string_indexof($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ $tmp5$$Register, $tmp6$$Register,
|
|
+ $result$$Register, StrIntrinsicNode::UU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
|
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ string_indexof($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ $tmp5$$Register, $tmp6$$Register,
|
|
+ $result$$Register, StrIntrinsicNode::LL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
|
|
+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ string_indexof($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ $tmp5$$Register, $tmp6$$Register,
|
|
+ $result$$Register, StrIntrinsicNode::UL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
|
|
+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ int icnt2 = (int)$int_cnt2$$constant;
|
|
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, zr,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ icnt2, $result$$Register, StrIntrinsicNode::UU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
|
|
+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ int icnt2 = (int)$int_cnt2$$constant;
|
|
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, zr,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ icnt2, $result$$Register, StrIntrinsicNode::LL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
|
|
+ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
|
|
+%{
|
|
+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
|
|
+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
|
|
+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ int icnt2 = (int)$int_cnt2$$constant;
|
|
+ __ string_indexof_linearscan($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, zr,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register,
|
|
+ icnt2, $result$$Register, StrIntrinsicNode::UL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
|
|
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
|
|
+%{
|
|
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
|
|
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
|
|
+
|
|
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
|
|
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register, false /* isU */) ;
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+
|
|
+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
|
|
+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
|
|
+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
|
|
+%{
|
|
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
|
|
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
|
|
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
|
|
+
|
|
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
|
|
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
|
|
+ $tmp3$$Register, $tmp4$$Register, true /* isL */);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+// clearing of an array
|
|
+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV);
|
|
+ match(Set dummy (ClearArray cnt base));
|
|
+ effect(USE_KILL cnt, USE_KILL base, KILL cr);
|
|
+
|
|
+ ins_cost(4 * DEFAULT_COST);
|
|
+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ address tpc = __ zero_words($base$$Register, $cnt$$Register);
|
|
+ if (tpc == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
|
|
+ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
|
|
+ match(Set dummy (ClearArray cnt base));
|
|
+ effect(USE_KILL base, KILL cr);
|
|
+
|
|
+ ins_cost(4 * DEFAULT_COST);
|
|
+ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
|
|
+ iRegI_R10 result, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (StrEquals (Binary str1 str2) cnt));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
|
+
|
|
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_equals($str1$$Register, $str2$$Register,
|
|
+ $result$$Register, $cnt$$Register, 1);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
|
|
+ iRegI_R10 result, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (StrEquals (Binary str1 str2) cnt));
|
|
+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
|
|
+
|
|
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_equals($str1$$Register, $str2$$Register,
|
|
+ $result$$Register, $cnt$$Register, 2);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
|
+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
|
|
+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (AryEq ary1 ary2));
|
|
+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
|
+
|
|
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
|
|
+ ins_encode %{
|
|
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
|
|
+ $result$$Register, $tmp$$Register, 1);
|
|
+ if (tpc == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
|
+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
|
|
+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (AryEq ary1 ary2));
|
|
+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
|
|
+
|
|
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
|
|
+ ins_encode %{
|
|
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
|
|
+ $result$$Register, $tmp$$Register, 2);
|
|
+ if (tpc == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// Safepoint Instructions
|
|
+
|
|
+instruct safePoint(iRegP poll)
|
|
+%{
|
|
+ match(SafePoint poll);
|
|
+
|
|
+ ins_cost(2 * LOAD_COST);
|
|
+ format %{
|
|
+ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
|
|
+ %}
|
|
+ ins_encode %{
|
|
+ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
|
|
+ %}
|
|
+ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
|
|
+%}
|
|
+
|
|
+// ============================================================================
|
|
+// This name is KNOWN by the ADLC and cannot be changed.
|
|
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
|
|
+// for this guy.
|
|
+instruct tlsLoadP(javaThread_RegP dst)
|
|
+%{
|
|
+ match(Set dst (ThreadLocal));
|
|
+
|
|
+ ins_cost(0);
|
|
+
|
|
+ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
|
|
+
|
|
+ size(0);
|
|
+
|
|
+ ins_encode( /*empty*/ );
|
|
+
|
|
+ ins_pipe(pipe_class_empty);
|
|
+%}
|
|
+
|
|
+// inlined locking and unlocking
|
|
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
|
|
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
|
|
+%{
|
|
+ match(Set cr (FastLock object box));
|
|
+ effect(TEMP tmp, TEMP tmp2);
|
|
+
|
|
+ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
|
|
+ format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %}
|
|
+
|
|
+ ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2));
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
|
|
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
|
|
+%{
|
|
+ match(Set cr (FastUnlock object box));
|
|
+ effect(TEMP tmp, TEMP tmp2);
|
|
+
|
|
+ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
|
|
+ format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %}
|
|
+
|
|
+ ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2));
|
|
+
|
|
+ ins_pipe(pipe_serial);
|
|
+%}
|
|
+
|
|
+// Tail Call; Jump from runtime stub to Java code.
|
|
+// Also known as an 'interprocedural jump'.
|
|
+// Target of jump will eventually return to caller.
|
|
+// TailJump below removes the return address.
|
|
+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
|
|
+%{
|
|
+ match(TailCall jump_target method_oop);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
|
|
+
|
|
+ ins_encode(riscv_enc_tail_call(jump_target));
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
|
|
+%{
|
|
+ match(TailJump jump_target ex_oop);
|
|
+
|
|
+ ins_cost(ALU_COST + BRANCH_COST);
|
|
+
|
|
+ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
|
|
+
|
|
+ ins_encode(riscv_enc_tail_jmp(jump_target));
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// Create exception oop: created by stack-crawling runtime code.
|
|
+// Created exception is now available to this handler, and is setup
|
|
+// just prior to jumping to this handler. No code emitted.
|
|
+instruct CreateException(iRegP_R10 ex_oop)
|
|
+%{
|
|
+ match(Set ex_oop (CreateEx));
|
|
+
|
|
+ ins_cost(0);
|
|
+ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
|
|
+
|
|
+ size(0);
|
|
+
|
|
+ ins_encode( /*empty*/ );
|
|
+
|
|
+ ins_pipe(pipe_class_empty);
|
|
+%}
|
|
+
|
|
+// Rethrow exception: The exception oop will come in the first
|
|
+// argument position. Then JUMP (not call) to the rethrow stub code.
|
|
+instruct RethrowException()
|
|
+%{
|
|
+ match(Rethrow);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "j rethrow_stub\t#@RethrowException" %}
|
|
+
|
|
+ ins_encode( riscv_enc_rethrow() );
|
|
+
|
|
+ ins_pipe(pipe_class_call);
|
|
+%}
|
|
+
|
|
+// Return Instruction
|
|
+// epilog node loads ret address into ra as part of frame pop
|
|
+instruct Ret()
|
|
+%{
|
|
+ match(Return);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+ format %{ "ret\t// return register, #@Ret" %}
|
|
+
|
|
+ ins_encode(riscv_enc_ret());
|
|
+
|
|
+ ins_pipe(pipe_branch);
|
|
+%}
|
|
+
|
|
+// Die now.
|
|
+instruct ShouldNotReachHere() %{
|
|
+ match(Halt);
|
|
+
|
|
+ ins_cost(BRANCH_COST);
|
|
+
|
|
+ format %{ "#@ShouldNotReachHere" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ if (is_reachable()) {
|
|
+ __ halt();
|
|
+ }
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_default);
|
|
+%}
|
|
+
|
|
+
|
|
+//----------PEEPHOLE RULES-----------------------------------------------------
|
|
+// These must follow all instruction definitions as they use the names
|
|
+// defined in the instructions definitions.
|
|
+//
|
|
+// peepmatch ( root_instr_name [preceding_instruction]* );
|
|
+//
|
|
+// peepconstraint %{
|
|
+// (instruction_number.operand_name relational_op instruction_number.operand_name
|
|
+// [, ...] );
|
|
+// // instruction numbers are zero-based using left to right order in peepmatch
|
|
+//
|
|
+// peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
|
|
+// // provide an instruction_number.operand_name for each operand that appears
|
|
+// // in the replacement instruction's match rule
|
|
+//
|
|
+// ---------VM FLAGS---------------------------------------------------------
|
|
+//
|
|
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
|
|
+//
|
|
+// Each peephole rule is given an identifying number starting with zero and
|
|
+// increasing by one in the order seen by the parser. An individual peephole
|
|
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
|
|
+// on the command-line.
|
|
+//
|
|
+// ---------CURRENT LIMITATIONS----------------------------------------------
|
|
+//
|
|
+// Only match adjacent instructions in same basic block
|
|
+// Only equality constraints
|
|
+// Only constraints between operands, not (0.dest_reg == RAX_enc)
|
|
+// Only one replacement instruction
|
|
+//
|
|
+//----------SMARTSPILL RULES---------------------------------------------------
|
|
+// These must follow all instruction definitions as they use the names
|
|
+// defined in the instructions definitions.
|
|
+
|
|
+// Local Variables:
|
|
+// mode: c++
|
|
+// End:
|
|
diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
|
|
new file mode 100644
|
|
index 000000000..6f7055a39
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/riscv_b.ad
|
|
@@ -0,0 +1,605 @@
|
|
+//
|
|
+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+//
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
+// published by the Free Software Foundation.
|
|
+//
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
+// accompanied this code).
|
|
+//
|
|
+// You should have received a copy of the GNU General Public License version
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+//
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
+// questions.
|
|
+//
|
|
+//
|
|
+
|
|
+// RISCV Bit-Manipulation Extension Architecture Description File
|
|
+
|
|
+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{
|
|
+ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
|
|
+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32));
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ format %{ "roriw $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{
|
|
+ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
|
|
+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64));
|
|
+ effect(DEF dst, USE src);
|
|
+
|
|
+ format %{ "rori $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// ror expander
|
|
+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
|
|
+ effect(DEF dst, USE src, USE shift);
|
|
+
|
|
+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %}
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// ror expander
|
|
+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
|
|
+ effect(DEF dst, USE src, USE shift);
|
|
+
|
|
+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %}
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+
|
|
+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift))));
|
|
+
|
|
+ expand %{
|
|
+ rorI_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift))));
|
|
+
|
|
+ expand %{
|
|
+ rorI_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift))));
|
|
+
|
|
+ expand %{
|
|
+ rorL_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift))));
|
|
+
|
|
+ expand %{
|
|
+ rorL_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// rol expander
|
|
+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
|
|
+ effect(DEF dst, USE src, USE shift);
|
|
+
|
|
+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %}
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// rol expander
|
|
+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
|
|
+ effect(DEF dst, USE src, USE shift);
|
|
+
|
|
+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %}
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift))));
|
|
+
|
|
+ expand %{
|
|
+ rolI_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift))));
|
|
+
|
|
+ expand %{
|
|
+ rolI_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift))));
|
|
+
|
|
+ expand %{
|
|
+ rolL_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift))));
|
|
+
|
|
+ expand %{
|
|
+ rolL_reg_b(dst, src, shift);
|
|
+ %}
|
|
+%}
|
|
+
|
|
+// Convert oop into int for vectors alignment masking
|
|
+instruct convP2I_b(iRegINoSp dst, iRegP src) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (ConvL2I (CastP2X src)));
|
|
+
|
|
+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// byte to int
|
|
+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (RShiftI (LShiftI src lshift) rshift));
|
|
+
|
|
+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// int to short
|
|
+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (RShiftI (LShiftI src lshift) rshift));
|
|
+
|
|
+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// short to unsigned int
|
|
+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (AndI src mask));
|
|
+
|
|
+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// int to unsigned long (zero extend)
|
|
+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (AndL (ConvI2L src) mask));
|
|
+
|
|
+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ ins_encode %{
|
|
+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_shift);
|
|
+%}
|
|
+
|
|
+// BSWAP instructions
|
|
+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (ReverseBytesI src));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (ReverseBytesL src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (ReverseBytesUS src));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (ReverseBytesS src));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Shift Add Pointer
|
|
+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (AddP src1 (LShiftL src2 imm)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ shadd(as_Register($dst$$reg),
|
|
+ as_Register($src2$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ t0,
|
|
+ $imm$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ shadd(as_Register($dst$$reg),
|
|
+ as_Register($src2$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ t0,
|
|
+ $imm$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Shift Add Long
|
|
+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (AddL src1 (LShiftL src2 imm)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ shadd(as_Register($dst$$reg),
|
|
+ as_Register($src2$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ t0,
|
|
+ $imm$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
|
|
+ predicate(UseZba);
|
|
+ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ shadd(as_Register($dst$$reg),
|
|
+ as_Register($src2$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ t0,
|
|
+ $imm$$constant);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Zeros Count instructions
|
|
+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (CountLeadingZerosI src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (CountLeadingZerosL src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ clz(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (CountTrailingZerosI src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (CountTrailingZerosL src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Population Count instructions
|
|
+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
|
|
+ predicate(UsePopCountInstruction);
|
|
+ match(Set dst (PopCountI src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "cpopw $dst, $src\t#@popCountI_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Note: Long/bitCount(long) returns an int.
|
|
+instruct popCountL_b(iRegINoSp dst, iRegL src) %{
|
|
+ predicate(UsePopCountInstruction);
|
|
+ match(Set dst (PopCountL src));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "cpop $dst, $src\t#@popCountL_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg);
|
|
+%}
|
|
+
|
|
+// Max and Min
|
|
+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (MinI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (MaxI src1 src2));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Abs
|
|
+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (AbsI src));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{
|
|
+ "negw t0, $src\n\t"
|
|
+ "max $dst, $src, t0\t#@absI_reg_b"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ negw(t0, as_Register($src$$reg));
|
|
+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (AbsL src));
|
|
+
|
|
+ ins_cost(ALU_COST * 2);
|
|
+ format %{
|
|
+ "neg t0, $src\n\t"
|
|
+ "max $dst, $src, t0\t#@absL_reg_b"
|
|
+ %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ neg(t0, as_Register($src$$reg));
|
|
+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// And Not
|
|
+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (AndI src1 (XorI src2 m1)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ andn(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (AndL src1 (XorL src2 m1)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ andn(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+// Or Not
|
|
+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrI src1 (XorI src2 m1)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ orn(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
+
|
|
+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
|
|
+ predicate(UseZbb);
|
|
+ match(Set dst (OrL src1 (XorL src2 m1)));
|
|
+
|
|
+ ins_cost(ALU_COST);
|
|
+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ orn(as_Register($dst$$reg),
|
|
+ as_Register($src1$$reg),
|
|
+ as_Register($src2$$reg));
|
|
+ %}
|
|
+
|
|
+ ins_pipe(ialu_reg_reg);
|
|
+%}
|
|
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
|
|
new file mode 100644
|
|
index 000000000..905041890
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
|
|
@@ -0,0 +1,1723 @@
|
|
+//
|
|
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+// Copyright (c) 2020, Arm Limited. All rights reserved.
|
|
+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+//
|
|
+// This code is free software; you can redistribute it and/or modify it
|
|
+// under the terms of the GNU General Public License version 2 only, as
|
|
+// published by the Free Software Foundation.
|
|
+//
|
|
+// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+// version 2 for more details (a copy is included in the LICENSE file that
|
|
+// accompanied this code).
|
|
+//
|
|
+// You should have received a copy of the GNU General Public License version
|
|
+// 2 along with this work; if not, write to the Free Software Foundation,
|
|
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+//
|
|
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+// or visit www.oracle.com if you need additional information or have any
|
|
+// questions.
|
|
+//
|
|
+//
|
|
+
|
|
+// RISCV VEC Architecture Description File
|
|
+
|
|
+opclass vmemA(indirect);
|
|
+
|
|
+source_hpp %{
|
|
+ bool op_vec_supported(int opcode);
|
|
+%}
|
|
+
|
|
+source %{
|
|
+
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* n) {
|
|
+ const TypeVect* vt = n->bottom_type()->is_vect();
|
|
+ return vt->element_basic_type();
|
|
+ }
|
|
+
|
|
+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
|
|
+ int def_idx = use->operand_index(opnd);
|
|
+ Node* def = use->in(def_idx);
|
|
+ const TypeVect* vt = def->bottom_type()->is_vect();
|
|
+ return vt->element_basic_type();
|
|
+ }
|
|
+
|
|
+ static void loadStore(MacroAssembler masm, bool is_store,
|
|
+ VectorRegister reg, BasicType bt, Register base) {
|
|
+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
|
|
+ masm.vsetvli(t0, x0, sew);
|
|
+ if (is_store) {
|
|
+ masm.vsex_v(reg, base, sew);
|
|
+ } else {
|
|
+ masm.vlex_v(reg, base, sew);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ bool op_vec_supported(int opcode) {
|
|
+ switch (opcode) {
|
|
+ // No multiply reduction instructions
|
|
+ case Op_MulReductionVD:
|
|
+ case Op_MulReductionVF:
|
|
+ case Op_MulReductionVI:
|
|
+ case Op_MulReductionVL:
|
|
+ // Others
|
|
+ case Op_Extract:
|
|
+ case Op_ExtractB:
|
|
+ case Op_ExtractC:
|
|
+ case Op_ExtractD:
|
|
+ case Op_ExtractF:
|
|
+ case Op_ExtractI:
|
|
+ case Op_ExtractL:
|
|
+ case Op_ExtractS:
|
|
+ case Op_ExtractUB:
|
|
+ return false;
|
|
+ default:
|
|
+ return UseRVV;
|
|
+ }
|
|
+ }
|
|
+
|
|
+%}
|
|
+
|
|
+definitions %{
|
|
+ int_def VEC_COST (200, 200);
|
|
+%}
|
|
+
|
|
+// All VEC instructions
|
|
+
|
|
+// vector load/store
|
|
+instruct loadV(vReg dst, vmemA mem) %{
|
|
+ match(Set dst (LoadVector mem));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vle $dst, $mem\t#@loadV" %}
|
|
+ ins_encode %{
|
|
+ VectorRegister dst_reg = as_VectorRegister($dst$$reg);
|
|
+ loadStore(MacroAssembler(&cbuf), false, dst_reg,
|
|
+ vector_element_basic_type(this), as_Register($mem$$base));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct storeV(vReg src, vmemA mem) %{
|
|
+ match(Set mem (StoreVector mem src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vse $src, $mem\t#@storeV" %}
|
|
+ ins_encode %{
|
|
+ VectorRegister src_reg = as_VectorRegister($src$$reg);
|
|
+ loadStore(MacroAssembler(&cbuf), true, src_reg,
|
|
+ vector_element_basic_type(this, $src), as_Register($mem$$base));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector abs
|
|
+
|
|
+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
|
|
+ match(Set dst (AbsVB src));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
|
|
+ "vmax.vv $dst, $tmp, $src" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
|
|
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
|
|
+ match(Set dst (AbsVS src));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
|
|
+ "vmax.vv $dst, $tmp, $src" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
|
|
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
|
|
+ match(Set dst (AbsVI src));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
|
|
+ "vmax.vv $dst, $tmp, $src" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
|
|
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
|
|
+ match(Set dst (AbsVL src));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
|
|
+ "vmax.vv $dst, $tmp, $src" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
|
|
+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vabsF(vReg dst, vReg src) %{
|
|
+ match(Set dst (AbsVF src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vabsD(vReg dst, vReg src) %{
|
|
+ match(Set dst (AbsVD src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector add
|
|
+
|
|
+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVB src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVS src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVI src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVL src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVF src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AddVD src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfadd_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector and
|
|
+
|
|
+instruct vand(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (AndV src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vand_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector or
|
|
+
|
|
+instruct vor(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (OrV src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector xor
|
|
+
|
|
+instruct vxor(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (XorV src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector float div
|
|
+
|
|
+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (DivVF src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfdiv_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (DivVD src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfdiv_vv(as_VectorRegister($dst$$reg),
|
|
+ as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector fmla
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector fmls
|
|
+
|
|
+// dst_src1 = dst_src1 + -src2 * src3
|
|
+// dst_src1 = dst_src1 + src2 * -src3
|
|
+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
|
|
+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 + -src2 * src3
|
|
+// dst_src1 = dst_src1 + src2 * -src3
|
|
+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
|
|
+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector fnmla
|
|
+
|
|
+// dst_src1 = -dst_src1 + -src2 * src3
|
|
+// dst_src1 = -dst_src1 + src2 * -src3
|
|
+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = -dst_src1 + -src2 * src3
|
|
+// dst_src1 = -dst_src1 + src2 * -src3
|
|
+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector fnmls
|
|
+
|
|
+// dst_src1 = -dst_src1 + src2 * src3
|
|
+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = -dst_src1 + src2 * src3
|
|
+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ predicate(UseFMA);
|
|
+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector mla
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 + src2 * src3
|
|
+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector mls
|
|
+
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// dst_src1 = dst_src1 - src2 * src3
|
|
+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
|
|
+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
|
|
+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector mul
|
|
+
|
|
+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVB src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVS src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVI src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVL src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVF src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (MulVD src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector fneg
|
|
+
|
|
+instruct vnegF(vReg dst, vReg src) %{
|
|
+ match(Set dst (NegVF src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vnegD(vReg dst, vReg src) %{
|
|
+ match(Set dst (NegVD src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// popcount vector
|
|
+
|
|
+instruct vpopcountI(iRegINoSp dst, vReg src) %{
|
|
+ match(Set dst (PopCountVI src));
|
|
+ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector add reduction
|
|
+
|
|
+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
|
|
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
|
|
+ match(Set dst (AddReductionVI src1 src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
|
|
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vmv.x.s $dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
|
|
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
|
|
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
|
|
+ match(Set dst (AddReductionVI src1 src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
|
|
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vmv.x.s $dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
|
|
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
|
|
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
|
+ match(Set dst (AddReductionVI src1 src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
|
|
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vmv.x.s $dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
|
|
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
|
|
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
|
+ match(Set dst (AddReductionVL src1 src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
|
|
+ "vredsum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vmv.x.s $dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
|
|
+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
|
|
+ match(Set src1_dst (AddReductionVF src1_dst src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
|
|
+ "vfredosum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vfmv.f.s $src1_dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
|
|
+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
|
|
+ match(Set src1_dst (AddReductionVD src1_dst src2));
|
|
+ effect(TEMP tmp);
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
|
|
+ "vfredosum.vs $tmp, $src2, $tmp\n\t"
|
|
+ "vfmv.f.s $src1_dst, $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
|
|
+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
|
|
+ as_VectorRegister($tmp$$reg));
|
|
+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector replicate
|
|
+
|
|
+instruct replicateB(vReg dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReplicateB src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.x $dst, $src\t#@replicateB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateS(vReg dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReplicateS src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.x $dst, $src\t#@replicateS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateI(vReg dst, iRegIorL2I src) %{
|
|
+ match(Set dst (ReplicateI src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.x $dst, $src\t#@replicateI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateL(vReg dst, iRegL src) %{
|
|
+ match(Set dst (ReplicateL src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.x $dst, $src\t#@replicateL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateB_imm5(vReg dst, immI5 con) %{
|
|
+ match(Set dst (ReplicateB con));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateS_imm5(vReg dst, immI5 con) %{
|
|
+ match(Set dst (ReplicateS con));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateI_imm5(vReg dst, immI5 con) %{
|
|
+ match(Set dst (ReplicateI con));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateL_imm5(vReg dst, immL5 con) %{
|
|
+ match(Set dst (ReplicateL con));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateF(vReg dst, fRegF src) %{
|
|
+ match(Set dst (ReplicateF src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct replicateD(vReg dst, fRegD src) %{
|
|
+ match(Set dst (ReplicateD src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector shift
|
|
+
|
|
+instruct vasrB(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (RShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
|
|
+ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0\n\t"
|
|
+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ BitsPerByte - 1, Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrS(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (RShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
|
|
+ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0\n\t"
|
|
+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ BitsPerShort - 1, Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrI(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (RShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrL(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (RShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslB(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (LShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect( TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
|
|
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0\n\t"
|
|
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ // if shift > BitsPerByte - 1, clear the element
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg), Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslS(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (LShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
|
|
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0\n\t"
|
|
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ // if shift > BitsPerShort - 1, clear the element
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg), Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslI(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (LShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslL(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (LShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (URShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
|
|
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0, v0\n\t"
|
|
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ // if shift > BitsPerByte - 1, clear the element
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg), Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (URShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP_DEF dst);
|
|
+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
|
|
+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
|
|
+ "vmnot.m v0, v0\n\t"
|
|
+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ // if shift > BitsPerShort - 1, clear the element
|
|
+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg), Assembler::v0_t);
|
|
+ // otherwise, shift
|
|
+ __ vmnot_m(v0, v0);
|
|
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg), Assembler::v0_t);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+
|
|
+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (URShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+
|
|
+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
|
|
+ match(Set dst (URShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($shift$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (RShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con >= BitsPerByte) con = BitsPerByte - 1;
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (RShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con >= BitsPerShort) con = BitsPerShort - 1;
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (RShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
|
|
+ predicate((n->in(2)->get_int() & 0x3f) < 64);
|
|
+ match(Set dst (RShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x3f;
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con < 32) {
|
|
+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ } else {
|
|
+ __ li(t0, con);
|
|
+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
|
|
+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (URShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con >= BitsPerByte) {
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (URShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con >= BitsPerShort) {
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (URShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
|
|
+ predicate((n->in(2)->get_int() & 0x3f) < 64);
|
|
+ match(Set dst (URShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x3f;
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ if (con == 0) {
|
|
+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ if (con < 32) {
|
|
+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ } else {
|
|
+ __ li(t0, con);
|
|
+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
|
|
+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (LShiftVB src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ if (con >= BitsPerByte) {
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (LShiftVS src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ if (con >= BitsPerShort) {
|
|
+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
|
|
+ as_VectorRegister($src$$reg));
|
|
+ return;
|
|
+ }
|
|
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
|
|
+ match(Set dst (LShiftVI src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x1f;
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
|
|
+ predicate((n->in(2)->get_int() & 0x3f) < 64);
|
|
+ match(Set dst (LShiftVL src shift));
|
|
+ ins_cost(VEC_COST);
|
|
+ effect(TEMP tmp);
|
|
+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
|
|
+ ins_encode %{
|
|
+ uint32_t con = (unsigned)$shift$$constant & 0x3f;
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ if (con < 32) {
|
|
+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
|
|
+ } else {
|
|
+ __ li(t0, con);
|
|
+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
|
|
+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
|
|
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
|
|
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
|
|
+ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
|
|
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
|
|
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
|
|
+ match(Set dst (LShiftCntV cnt));
|
|
+ match(Set dst (RShiftCntV cnt));
|
|
+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector sqrt
|
|
+
|
|
+instruct vsqrtF(vReg dst, vReg src) %{
|
|
+ match(Set dst (SqrtVF src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsqrtD(vReg dst, vReg src) %{
|
|
+ match(Set dst (SqrtVD src));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+// vector sub
|
|
+
|
|
+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVB src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e8);
|
|
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVS src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e16);
|
|
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVI src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVL src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVF src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e32);
|
|
+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
|
|
+ match(Set dst (SubVD src1 src2));
|
|
+ ins_cost(VEC_COST);
|
|
+ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
|
|
+ ins_encode %{
|
|
+ __ vsetvli(t0, x0, Assembler::e64);
|
|
+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
|
|
+ as_VectorRegister($src2$$reg));
|
|
+ %}
|
|
+ ins_pipe(pipe_slow);
|
|
+%}
|
|
+
|
|
+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
|
|
+ iRegI_R10 result, vReg_V1 v1,
|
|
+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (StrEquals (Binary str1 str2) cnt));
|
|
+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
|
|
+
|
|
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_equals_v($str1$$Register, $str2$$Register,
|
|
+ $result$$Register, $cnt$$Register, 1);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
|
|
+ iRegI_R10 result, vReg_V1 v1,
|
|
+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (StrEquals (Binary str1 str2) cnt));
|
|
+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
|
|
+
|
|
+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_equals_v($str1$$Register, $str2$$Register,
|
|
+ $result$$Register, $cnt$$Register, 2);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
|
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
|
|
+%{
|
|
+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result (AryEq ary1 ary2));
|
|
+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
|
|
+
|
|
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ arrays_equals_v($ary1$$Register, $ary2$$Register,
|
|
+ $result$$Register, $tmp$$Register, 1);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
|
|
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
|
|
+%{
|
|
+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result (AryEq ary1 ary2));
|
|
+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
|
|
+
|
|
+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
|
|
+ ins_encode %{
|
|
+ __ arrays_equals_v($ary1$$Register, $ary2$$Register,
|
|
+ $result$$Register, $tmp$$Register, 2);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
|
|
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
|
|
+ ins_encode %{
|
|
+ // Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
|
+ __ string_compare_v($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ StrIntrinsicNode::UU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
|
|
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare_v($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ StrIntrinsicNode::LL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
|
|
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
|
|
+
|
|
+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare_v($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ StrIntrinsicNode::UL);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
|
|
+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
|
|
+ iRegP_R28 tmp1, iRegL_R29 tmp2)
|
|
+%{
|
|
+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
|
|
+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
|
|
+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
|
|
+
|
|
+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
|
|
+ ins_encode %{
|
|
+ __ string_compare_v($str1$$Register, $str2$$Register,
|
|
+ $cnt1$$Register, $cnt2$$Register, $result$$Register,
|
|
+ $tmp1$$Register, $tmp2$$Register,
|
|
+ StrIntrinsicNode::LU);
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+// fast byte[] to char[] inflation
|
|
+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
|
|
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
|
|
+%{
|
|
+ predicate(UseRVV);
|
|
+ match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
|
+ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
|
|
+
|
|
+ format %{ "String Inflate $src,$dst" %}
|
|
+ ins_encode %{
|
|
+ address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
|
|
+ if (tpc == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
+
|
|
+// encode char[] to byte[] in ISO_8859_1
|
|
+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
|
|
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
|
|
+%{
|
|
+ predicate(UseRVV);
|
|
+ match(Set result (EncodeISOArray src (Binary dst len)));
|
|
+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
|
|
+
|
|
+ format %{ "Encode array $src,$dst,$len -> $result" %}
|
|
+ ins_encode %{
|
|
+ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
|
|
+ $result$$Register, $tmp$$Register);
|
|
+ %}
|
|
+ ins_pipe( pipe_class_memory );
|
|
+%}
|
|
+
|
|
+// fast char[] to byte[] compression
|
|
+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
|
|
+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
|
|
+%{
|
|
+ predicate(UseRVV);
|
|
+ match(Set result (StrCompressedCopy src (Binary dst len)));
|
|
+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
|
|
+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
|
|
+
|
|
+ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %}
|
|
+ ins_encode %{
|
|
+ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
|
|
+ $result$$Register, $tmp$$Register);
|
|
+ %}
|
|
+ ins_pipe( pipe_slow );
|
|
+%}
|
|
+
|
|
+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
|
|
+%{
|
|
+ predicate(UseRVV);
|
|
+ match(Set result (HasNegatives ary1 len));
|
|
+ effect(USE_KILL ary1, USE_KILL len, TEMP tmp);
|
|
+ format %{ "has negatives byte[] $ary1,$len -> $result" %}
|
|
+ ins_encode %{
|
|
+ address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register);
|
|
+ if (tpc == NULL) {
|
|
+ ciEnv::current()->record_failure("CodeCache is full");
|
|
+ return;
|
|
+ }
|
|
+ %}
|
|
+ ins_pipe( pipe_slow );
|
|
+%}
|
|
+
|
|
+// clearing of an array
|
|
+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
|
|
+ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
|
|
+%{
|
|
+ predicate(UseRVV);
|
|
+ match(Set dummy (ClearArray cnt base));
|
|
+ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
|
|
+
|
|
+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
|
|
+
|
|
+ ins_encode %{
|
|
+ __ clear_array_v($base$$Register, $cnt$$Register);
|
|
+ %}
|
|
+
|
|
+ ins_pipe(pipe_class_memory);
|
|
+%}
|
|
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..9922ff4cf
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
|
|
@@ -0,0 +1,2738 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "code/debugInfoRec.hpp"
|
|
+#include "code/icBuffer.hpp"
|
|
+#include "code/vtableStubs.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "logging/log.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "oops/compiledICHolder.hpp"
|
|
+#include "runtime/safepointMechanism.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/vframeArray.hpp"
|
|
+#include "utilities/align.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+#ifdef COMPILER1
|
|
+#include "c1/c1_Runtime1.hpp"
|
|
+#endif
|
|
+#ifdef COMPILER2
|
|
+#include "adfiles/ad_riscv.hpp"
|
|
+#include "opto/runtime.hpp"
|
|
+#endif
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
|
|
+
|
|
+class SimpleRuntimeFrame {
|
|
+public:
|
|
+
|
|
+ // Most of the runtime stubs have this simple frame layout.
|
|
+ // This class exists to make the layout shared in one place.
|
|
+ // Offsets are for compiler stack slots, which are jints.
|
|
+ enum layout {
|
|
+ // The frame sender code expects that fp will be in the "natural" place and
|
|
+ // will override any oopMap setting for it. We must therefore force the layout
|
|
+ // so that it agrees with the frame sender code.
|
|
+ // we don't expect any arg reg save area so riscv asserts that
|
|
+ // frame::arg_reg_save_area_bytes == 0
|
|
+ fp_off = 0, fp_off2,
|
|
+ return_off, return_off2,
|
|
+ framesize
|
|
+ };
|
|
+};
|
|
+
|
|
+class RegisterSaver {
|
|
+ const bool _save_vectors;
|
|
+ public:
|
|
+ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
|
|
+ ~RegisterSaver() {}
|
|
+ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
|
|
+ void restore_live_registers(MacroAssembler* masm);
|
|
+
|
|
+ // Offsets into the register save area
|
|
+ // Used by deoptimization when it is managing result register
|
|
+ // values on its own
|
|
+ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
|
|
+ // |---v0---|<---SP
|
|
+ // |---v1---|save vectors only in generate_handler_blob
|
|
+ // |-- .. --|
|
|
+ // |---v31--|-----
|
|
+ // |---f0---|
|
|
+ // |---f1---|
|
|
+ // | .. |
|
|
+ // |---f31--|
|
|
+ // |---reserved slot for stack alignment---|
|
|
+ // |---x5---|
|
|
+ // | x6 |
|
|
+ // |---.. --|
|
|
+ // |---x31--|
|
|
+ // |---fp---|
|
|
+ // |---ra---|
|
|
+ int v0_offset_in_bytes(void) { return 0; }
|
|
+ int f0_offset_in_bytes(void) {
|
|
+ int f0_offset = 0;
|
|
+#ifdef COMPILER2
|
|
+ if (_save_vectors) {
|
|
+ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
|
|
+ BytesPerInt;
|
|
+ }
|
|
+#endif
|
|
+ return f0_offset;
|
|
+ }
|
|
+ int reserved_slot_offset_in_bytes(void) {
|
|
+ return f0_offset_in_bytes() +
|
|
+ FloatRegisterImpl::max_slots_per_register *
|
|
+ FloatRegisterImpl::number_of_registers *
|
|
+ BytesPerInt;
|
|
+ }
|
|
+
|
|
+ int reg_offset_in_bytes(Register r) {
|
|
+ assert(r->encoding() > 4, "ra, sp, gp and tp not saved");
|
|
+ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
|
|
+ }
|
|
+
|
|
+ int freg_offset_in_bytes(FloatRegister f) {
|
|
+ return f0_offset_in_bytes() + f->encoding() * wordSize;
|
|
+ }
|
|
+
|
|
+ int ra_offset_in_bytes(void) {
|
|
+ return reserved_slot_offset_in_bytes() +
|
|
+ (RegisterImpl::number_of_registers - 3) *
|
|
+ RegisterImpl::max_slots_per_register *
|
|
+ BytesPerInt;
|
|
+ }
|
|
+
|
|
+ // During deoptimization only the result registers need to be restored,
|
|
+ // all the other values have already been extracted.
|
|
+ void restore_result_registers(MacroAssembler* masm);
|
|
+};
|
|
+
|
|
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
|
|
+ int vector_size_in_bytes = 0;
|
|
+ int vector_size_in_slots = 0;
|
|
+#ifdef COMPILER2
|
|
+ if (_save_vectors) {
|
|
+ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
|
|
+ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
|
|
+ // OopMap frame size is in compiler stack slots (jint's) not bytes or words
|
|
+ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
|
|
+ // The caller will allocate additional_frame_words
|
|
+ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
|
|
+ // CodeBlob frame size is in words.
|
|
+ int frame_size_in_words = frame_size_in_bytes / wordSize;
|
|
+ *total_frame_words = frame_size_in_words;
|
|
+
|
|
+ // Save Integer, Float and Vector registers.
|
|
+ __ enter();
|
|
+ __ push_CPU_state(_save_vectors, vector_size_in_bytes);
|
|
+
|
|
+ // Set an oopmap for the call site. This oopmap will map all
|
|
+ // oop-registers and debug-info registers as callee-saved. This
|
|
+ // will allow deoptimization at this safepoint to find all possible
|
|
+ // debug-info recordings, as well as let GC find all oops.
|
|
+
|
|
+ OopMapSet *oop_maps = new OopMapSet();
|
|
+ OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
|
|
+ assert_cond(oop_maps != NULL && oop_map != NULL);
|
|
+
|
|
+ int sp_offset_in_slots = 0;
|
|
+ int step_in_slots = 0;
|
|
+ if (_save_vectors) {
|
|
+ step_in_slots = vector_size_in_slots;
|
|
+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
|
|
+ VectorRegister r = as_VectorRegister(i);
|
|
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ step_in_slots = FloatRegisterImpl::max_slots_per_register;
|
|
+ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
|
|
+ FloatRegister r = as_FloatRegister(i);
|
|
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
|
|
+ }
|
|
+
|
|
+ step_in_slots = RegisterImpl::max_slots_per_register;
|
|
+ // skip the slot reserved for alignment, see MacroAssembler::push_reg;
|
|
+ // also skip x5 ~ x6 on the stack because they are caller-saved registers.
|
|
+ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
|
|
+ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
|
|
+ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
|
|
+ Register r = as_Register(i);
|
|
+ if (r != xthread) {
|
|
+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return oop_map;
|
|
+}
|
|
+
|
|
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
|
|
+#ifdef COMPILER2
|
|
+ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
|
|
+#else
|
|
+ __ pop_CPU_state(_save_vectors);
|
|
+#endif
|
|
+ __ leave();
|
|
+}
|
|
+
|
|
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
|
|
+ // Just restore result register. Only used by deoptimization. By
|
|
+ // now any callee save register that needs to be restored to a c2
|
|
+ // caller of the deoptee has been extracted into the vframeArray
|
|
+ // and will be stuffed into the c2i adapter we create for later
|
|
+ // restoration so only result registers need to be restored here.
|
|
+ // Restore fp result register
|
|
+ __ fld(f10, Address(sp, freg_offset_in_bytes(f10)));
|
|
+ // Restore integer result register
|
|
+ __ ld(x10, Address(sp, reg_offset_in_bytes(x10)));
|
|
+
|
|
+ // Pop all of the register save are off the stack
|
|
+ __ add(sp, sp, align_up(ra_offset_in_bytes(), 16));
|
|
+}
|
|
+
|
|
+// Is vector's size (in bytes) bigger than a size saved by default?
|
|
+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
|
|
+bool SharedRuntime::is_wide_vector(int size) {
|
|
+ return UseRVV;
|
|
+}
|
|
+
|
|
+size_t SharedRuntime::trampoline_size() {
|
|
+ // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction
|
|
+ return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr
|
|
+}
|
|
+
|
|
+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
|
|
+ int32_t offset = 0;
|
|
+ __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli
|
|
+ __ jalr(x0, t0, offset);
|
|
+}
|
|
+
|
|
+// ---------------------------------------------------------------------------
|
|
+// Read the array of BasicTypes from a signature, and compute where the
|
|
+// arguments should go. Values in the VMRegPair regs array refer to 4-byte
|
|
+// quantities. Values less than VMRegImpl::stack0 are registers, those above
|
|
+// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
|
|
+// as framesizes are fixed.
|
|
+// VMRegImpl::stack0 refers to the first slot 0(sp).
|
|
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register
|
|
+// up to RegisterImpl::number_of_registers) are the 64-bit
|
|
+// integer registers.
|
|
+
|
|
+// Note: the INPUTS in sig_bt are in units of Java argument words,
|
|
+// which are 64-bit. The OUTPUTS are in 32-bit units.
|
|
+
|
|
+// The Java calling convention is a "shifted" version of the C ABI.
|
|
+// By skipping the first C ABI register we can call non-static jni
|
|
+// methods with small numbers of arguments without having to shuffle
|
|
+// the arguments at all. Since we control the java ABI we ought to at
|
|
+// least get some advantage out of it.
|
|
+
|
|
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
|
|
+ VMRegPair *regs,
|
|
+ int total_args_passed,
|
|
+ int is_outgoing) {
|
|
+ assert_cond(sig_bt != NULL && regs != NULL);
|
|
+ // Create the mapping between argument positions and
|
|
+ // registers.
|
|
+ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
|
|
+ j_rarg0, j_rarg1, j_rarg2, j_rarg3,
|
|
+ j_rarg4, j_rarg5, j_rarg6, j_rarg7
|
|
+ };
|
|
+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
|
|
+ j_farg0, j_farg1, j_farg2, j_farg3,
|
|
+ j_farg4, j_farg5, j_farg6, j_farg7
|
|
+ };
|
|
+
|
|
+ uint int_args = 0;
|
|
+ uint fp_args = 0;
|
|
+ uint stk_args = 0; // inc by 2 each time
|
|
+
|
|
+ for (int i = 0; i < total_args_passed; i++) {
|
|
+ switch (sig_bt[i]) {
|
|
+ case T_BOOLEAN: // fall through
|
|
+ case T_CHAR: // fall through
|
|
+ case T_BYTE: // fall through
|
|
+ case T_SHORT: // fall through
|
|
+ case T_INT:
|
|
+ if (int_args < Argument::n_int_register_parameters_j) {
|
|
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_VOID:
|
|
+ // halves of T_LONG or T_DOUBLE
|
|
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
|
|
+ regs[i].set_bad();
|
|
+ break;
|
|
+ case T_LONG: // fall through
|
|
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ARRAY: // fall through
|
|
+ case T_ADDRESS:
|
|
+ if (int_args < Argument::n_int_register_parameters_j) {
|
|
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_FLOAT:
|
|
+ if (fp_args < Argument::n_float_register_parameters_j) {
|
|
+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_DOUBLE:
|
|
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
|
|
+ if (fp_args < Argument::n_float_register_parameters_j) {
|
|
+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return align_up(stk_args, 2);
|
|
+}
|
|
+
|
|
+// Patch the callers callsite with entry to compiled code if it exists.
|
|
+static void patch_callers_callsite(MacroAssembler *masm) {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
|
|
+ __ beqz(t0, L);
|
|
+
|
|
+ __ enter();
|
|
+ __ push_CPU_state();
|
|
+
|
|
+ // VM needs caller's callsite
|
|
+ // VM needs target method
|
|
+ // This needs to be a long call since we will relocate this adapter to
|
|
+ // the codeBuffer and it may not reach
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
+#endif
|
|
+
|
|
+ __ mv(c_rarg0, xmethod);
|
|
+ __ mv(c_rarg1, ra);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ __ pop_CPU_state();
|
|
+ // restore sp
|
|
+ __ leave();
|
|
+ __ bind(L);
|
|
+}
|
|
+
|
|
+static void gen_c2i_adapter(MacroAssembler *masm,
|
|
+ int total_args_passed,
|
|
+ int comp_args_on_stack,
|
|
+ const BasicType *sig_bt,
|
|
+ const VMRegPair *regs,
|
|
+ Label& skip_fixup) {
|
|
+ // Before we get into the guts of the C2I adapter, see if we should be here
|
|
+ // at all. We've come from compiled code and are attempting to jump to the
|
|
+ // interpreter, which means the caller made a static call to get here
|
|
+ // (vcalls always get a compiled target if there is one). Check for a
|
|
+ // compiled target. If there is one, we need to patch the caller's call.
|
|
+ patch_callers_callsite(masm);
|
|
+
|
|
+ __ bind(skip_fixup);
|
|
+
|
|
+ int words_pushed = 0;
|
|
+
|
|
+ // Since all args are passed on the stack, total_args_passed *
|
|
+ // Interpreter::stackElementSize is the space we need.
|
|
+
|
|
+ int extraspace = total_args_passed * Interpreter::stackElementSize;
|
|
+
|
|
+ __ mv(x30, sp);
|
|
+
|
|
+ // stack is aligned, keep it that way
|
|
+ extraspace = align_up(extraspace, 2 * wordSize);
|
|
+
|
|
+ if (extraspace) {
|
|
+ __ sub(sp, sp, extraspace);
|
|
+ }
|
|
+
|
|
+ // Now write the args into the outgoing interpreter space
|
|
+ for (int i = 0; i < total_args_passed; i++) {
|
|
+ if (sig_bt[i] == T_VOID) {
|
|
+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ // offset to start parameters
|
|
+ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
|
|
+ int next_off = st_off - Interpreter::stackElementSize;
|
|
+
|
|
+ // Say 4 args:
|
|
+ // i st_off
|
|
+ // 0 32 T_LONG
|
|
+ // 1 24 T_VOID
|
|
+ // 2 16 T_OBJECT
|
|
+ // 3 8 T_BOOL
|
|
+ // - 0 return address
|
|
+ //
|
|
+ // However to make thing extra confusing. Because we can fit a Java long/double in
|
|
+ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
|
|
+ // leaves one slot empty and only stores to a single slot. In this case the
|
|
+ // slot that is occupied is the T_VOID slot. See I said it was confusing.
|
|
+
|
|
+ VMReg r_1 = regs[i].first();
|
|
+ VMReg r_2 = regs[i].second();
|
|
+ if (!r_1->is_valid()) {
|
|
+ assert(!r_2->is_valid(), "");
|
|
+ continue;
|
|
+ }
|
|
+ if (r_1->is_stack()) {
|
|
+ // memory to memory use t0
|
|
+ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
|
|
+ + extraspace
|
|
+ + words_pushed * wordSize);
|
|
+ if (!r_2->is_valid()) {
|
|
+ __ lwu(t0, Address(sp, ld_off));
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
|
|
+ } else {
|
|
+ __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
|
|
+
|
|
+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
|
|
+ // T_DOUBLE and T_LONG use two slots in the interpreter
|
|
+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
|
|
+ // ld_off == LSW, ld_off+wordSize == MSW
|
|
+ // st_off == MSW, next_off == LSW
|
|
+ __ sd(t0, Address(sp, next_off), /*temp register*/esp);
|
|
+#ifdef ASSERT
|
|
+ // Overwrite the unused slot with known junk
|
|
+ __ mv(t0, 0xdeadffffdeadaaaaul);
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
|
|
+#endif /* ASSERT */
|
|
+ } else {
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
|
|
+ }
|
|
+ }
|
|
+ } else if (r_1->is_Register()) {
|
|
+ Register r = r_1->as_Register();
|
|
+ if (!r_2->is_valid()) {
|
|
+ // must be only an int (or less ) so move only 32bits to slot
|
|
+ __ sd(r, Address(sp, st_off));
|
|
+ } else {
|
|
+ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
|
|
+ // T_DOUBLE and T_LONG use two slots in the interpreter
|
|
+ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
|
|
+ // jlong/double in gpr
|
|
+#ifdef ASSERT
|
|
+ // Overwrite the unused slot with known junk
|
|
+ __ mv(t0, 0xdeadffffdeadaaabul);
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
|
|
+#endif /* ASSERT */
|
|
+ __ sd(r, Address(sp, next_off));
|
|
+ } else {
|
|
+ __ sd(r, Address(sp, st_off));
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ assert(r_1->is_FloatRegister(), "");
|
|
+ if (!r_2->is_valid()) {
|
|
+ // only a float use just part of the slot
|
|
+ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
|
|
+ } else {
|
|
+#ifdef ASSERT
|
|
+ // Overwrite the unused slot with known junk
|
|
+ __ mv(t0, 0xdeadffffdeadaaacul);
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/esp);
|
|
+#endif /* ASSERT */
|
|
+ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ __ mv(esp, sp); // Interp expects args on caller's expression stack
|
|
+
|
|
+ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
|
|
+ __ jr(t0);
|
|
+}
|
|
+
|
|
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
|
|
+ int total_args_passed,
|
|
+ int comp_args_on_stack,
|
|
+ const BasicType *sig_bt,
|
|
+ const VMRegPair *regs) {
|
|
+ // Cut-out for having no stack args.
|
|
+ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
|
|
+ if (comp_args_on_stack != 0) {
|
|
+ __ sub(t0, sp, comp_words_on_stack * wordSize);
|
|
+ __ andi(sp, t0, -16);
|
|
+ }
|
|
+
|
|
+ // Will jump to the compiled code just as if compiled code was doing it.
|
|
+ // Pre-load the register-jump target early, to schedule it better.
|
|
+ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
|
|
+
|
|
+ // Now generate the shuffle code.
|
|
+ for (int i = 0; i < total_args_passed; i++) {
|
|
+ if (sig_bt[i] == T_VOID) {
|
|
+ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ // Pick up 0, 1 or 2 words from SP+offset.
|
|
+
|
|
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
|
|
+ "scrambled load targets?");
|
|
+ // Load in argument order going down.
|
|
+ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
|
|
+ // Point to interpreter value (vs. tag)
|
|
+ int next_off = ld_off - Interpreter::stackElementSize;
|
|
+
|
|
+ VMReg r_1 = regs[i].first();
|
|
+ VMReg r_2 = regs[i].second();
|
|
+ if (!r_1->is_valid()) {
|
|
+ assert(!r_2->is_valid(), "");
|
|
+ continue;
|
|
+ }
|
|
+ if (r_1->is_stack()) {
|
|
+ // Convert stack slot to an SP offset (+ wordSize to account for return address )
|
|
+ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
|
|
+ if (!r_2->is_valid()) {
|
|
+ __ lw(t0, Address(esp, ld_off));
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/t2);
|
|
+ } else {
|
|
+ //
|
|
+ // We are using two optoregs. This can be either T_OBJECT,
|
|
+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
|
|
+ // two slots but only uses one for thr T_LONG or T_DOUBLE case
|
|
+ // So we must adjust where to pick up the data to match the
|
|
+ // interpreter.
|
|
+ //
|
|
+ // Interpreter local[n] == MSW, local[n+1] == LSW however locals
|
|
+ // are accessed as negative so LSW is at LOW address
|
|
+
|
|
+ // ld_off is MSW so get LSW
|
|
+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
|
|
+ next_off : ld_off;
|
|
+ __ ld(t0, Address(esp, offset));
|
|
+ // st_off is LSW (i.e. reg.first())
|
|
+ __ sd(t0, Address(sp, st_off), /*temp register*/t2);
|
|
+ }
|
|
+ } else if (r_1->is_Register()) { // Register argument
|
|
+ Register r = r_1->as_Register();
|
|
+ if (r_2->is_valid()) {
|
|
+ //
|
|
+ // We are using two VMRegs. This can be either T_OBJECT,
|
|
+ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
|
|
+ // two slots but only uses one for thr T_LONG or T_DOUBLE case
|
|
+ // So we must adjust where to pick up the data to match the
|
|
+ // interpreter.
|
|
+
|
|
+ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
|
|
+ next_off : ld_off;
|
|
+
|
|
+ // this can be a misaligned move
|
|
+ __ ld(r, Address(esp, offset));
|
|
+ } else {
|
|
+ // sign extend and use a full word?
|
|
+ __ lw(r, Address(esp, ld_off));
|
|
+ }
|
|
+ } else {
|
|
+ if (!r_2->is_valid()) {
|
|
+ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
|
|
+ } else {
|
|
+ __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // 6243940 We might end up in handle_wrong_method if
|
|
+ // the callee is deoptimized as we race thru here. If that
|
|
+ // happens we don't want to take a safepoint because the
|
|
+ // caller frame will look interpreted and arguments are now
|
|
+ // "compiled" so it is much better to make this transition
|
|
+ // invisible to the stack walking code. Unfortunately if
|
|
+ // we try and find the callee by normal means a safepoint
|
|
+ // is possible. So we stash the desired callee in the thread
|
|
+ // and the vm will find there should this case occur.
|
|
+
|
|
+ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
|
|
+
|
|
+ __ jr(t1);
|
|
+}
|
|
+
|
|
+// ---------------------------------------------------------------
|
|
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
|
|
+ int total_args_passed,
|
|
+ int comp_args_on_stack,
|
|
+ const BasicType *sig_bt,
|
|
+ const VMRegPair *regs,
|
|
+ AdapterFingerPrint* fingerprint) {
|
|
+ address i2c_entry = __ pc();
|
|
+ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
|
|
+
|
|
+ address c2i_unverified_entry = __ pc();
|
|
+ Label skip_fixup;
|
|
+
|
|
+ Label ok;
|
|
+
|
|
+ const Register holder = t1;
|
|
+ const Register receiver = j_rarg0;
|
|
+ const Register tmp = t2; // A call-clobbered register not used for arg passing
|
|
+
|
|
+ // -------------------------------------------------------------------------
|
|
+ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls
|
|
+ // to the interpreter. The args start out packed in the compiled layout. They
|
|
+ // need to be unpacked into the interpreter layout. This will almost always
|
|
+ // require some stack space. We grow the current (compiled) stack, then repack
|
|
+ // the args. We finally end in a jump to the generic interpreter entry point.
|
|
+ // On exit from the interpreter, the interpreter will restore our SP (lest the
|
|
+ // compiled code, which relys solely on SP and not FP, get sick).
|
|
+
|
|
+ {
|
|
+ __ block_comment("c2i_unverified_entry {");
|
|
+ __ load_klass(t0, receiver);
|
|
+ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
|
|
+ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
|
|
+ __ beq(t0, tmp, ok);
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
|
|
+
|
|
+ __ bind(ok);
|
|
+ // Method might have been compiled since the call site was patched to
|
|
+ // interpreted; if that is the case treat it as a miss so we can get
|
|
+ // the call site corrected.
|
|
+ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
|
|
+ __ beqz(t0, skip_fixup);
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
|
|
+ __ block_comment("} c2i_unverified_entry");
|
|
+ }
|
|
+
|
|
+ address c2i_entry = __ pc();
|
|
+
|
|
+ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
|
|
+
|
|
+ __ flush();
|
|
+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
|
|
+}
|
|
+
|
|
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
|
|
+ VMRegPair *regs,
|
|
+ VMRegPair *regs2,
|
|
+ int total_args_passed) {
|
|
+ assert(regs2 == NULL, "not needed on riscv");
|
|
+ assert_cond(sig_bt != NULL && regs != NULL);
|
|
+
|
|
+ // We return the amount of VMRegImpl stack slots we need to reserve for all
|
|
+ // the arguments NOT counting out_preserve_stack_slots.
|
|
+
|
|
+ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
|
|
+ c_rarg0, c_rarg1, c_rarg2, c_rarg3,
|
|
+ c_rarg4, c_rarg5, c_rarg6, c_rarg7
|
|
+ };
|
|
+ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
|
|
+ c_farg0, c_farg1, c_farg2, c_farg3,
|
|
+ c_farg4, c_farg5, c_farg6, c_farg7
|
|
+ };
|
|
+
|
|
+ uint int_args = 0;
|
|
+ uint fp_args = 0;
|
|
+ uint stk_args = 0; // inc by 2 each time
|
|
+
|
|
+ for (int i = 0; i < total_args_passed; i++) {
|
|
+ switch (sig_bt[i]) {
|
|
+ case T_BOOLEAN: // fall through
|
|
+ case T_CHAR: // fall through
|
|
+ case T_BYTE: // fall through
|
|
+ case T_SHORT: // fall through
|
|
+ case T_INT:
|
|
+ if (int_args < Argument::n_int_register_parameters_c) {
|
|
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_LONG: // fall through
|
|
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
|
|
+ case T_OBJECT: // fall through
|
|
+ case T_ARRAY: // fall through
|
|
+ case T_ADDRESS: // fall through
|
|
+ case T_METADATA:
|
|
+ if (int_args < Argument::n_int_register_parameters_c) {
|
|
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_FLOAT:
|
|
+ if (fp_args < Argument::n_float_register_parameters_c) {
|
|
+ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
|
|
+ } else if (int_args < Argument::n_int_register_parameters_c) {
|
|
+ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set1(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_DOUBLE:
|
|
+ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
|
|
+ if (fp_args < Argument::n_float_register_parameters_c) {
|
|
+ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
|
|
+ } else if (int_args < Argument::n_int_register_parameters_c) {
|
|
+ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
|
|
+ } else {
|
|
+ regs[i].set2(VMRegImpl::stack2reg(stk_args));
|
|
+ stk_args += 2;
|
|
+ }
|
|
+ break;
|
|
+ case T_VOID: // Halves of longs and doubles
|
|
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
|
|
+ regs[i].set_bad();
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return stk_args;
|
|
+}
|
|
+
|
|
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
|
|
+ // We always ignore the frame_slots arg and just use the space just below frame pointer
|
|
+ // which by this time is free to use
|
|
+ switch (ret_type) {
|
|
+ case T_FLOAT:
|
|
+ __ fsw(f10, Address(fp, -3 * wordSize));
|
|
+ break;
|
|
+ case T_DOUBLE:
|
|
+ __ fsd(f10, Address(fp, -3 * wordSize));
|
|
+ break;
|
|
+ case T_VOID: break;
|
|
+ default: {
|
|
+ __ sd(x10, Address(fp, -3 * wordSize));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
|
|
+ // We always ignore the frame_slots arg and just use the space just below frame pointer
|
|
+ // which by this time is free to use
|
|
+ switch (ret_type) {
|
|
+ case T_FLOAT:
|
|
+ __ flw(f10, Address(fp, -3 * wordSize));
|
|
+ break;
|
|
+ case T_DOUBLE:
|
|
+ __ fld(f10, Address(fp, -3 * wordSize));
|
|
+ break;
|
|
+ case T_VOID: break;
|
|
+ default: {
|
|
+ __ ld(x10, Address(fp, -3 * wordSize));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
|
|
+ RegSet x;
|
|
+ for ( int i = first_arg ; i < arg_count ; i++ ) {
|
|
+ if (args[i].first()->is_Register()) {
|
|
+ x = x + args[i].first()->as_Register();
|
|
+ } else if (args[i].first()->is_FloatRegister()) {
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
|
|
+ }
|
|
+ }
|
|
+ __ push_reg(x, sp);
|
|
+}
|
|
+
|
|
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
|
|
+ RegSet x;
|
|
+ for ( int i = first_arg ; i < arg_count ; i++ ) {
|
|
+ if (args[i].first()->is_Register()) {
|
|
+ x = x + args[i].first()->as_Register();
|
|
+ } else {
|
|
+ ;
|
|
+ }
|
|
+ }
|
|
+ __ pop_reg(x, sp);
|
|
+ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
|
|
+ if (args[i].first()->is_Register()) {
|
|
+ ;
|
|
+ } else if (args[i].first()->is_FloatRegister()) {
|
|
+ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
|
|
+ __ add(sp, sp, 2 * wordSize);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// Check GCLocker::needs_gc and enter the runtime if it's true. This
|
|
+// keeps a new JNI critical region from starting until a GC has been
|
|
+// forced. Save down any oops in registers and describe them in an
|
|
+// OopMap.
|
|
+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
|
|
+ int stack_slots,
|
|
+ int total_c_args,
|
|
+ int total_in_args,
|
|
+ int arg_save_area,
|
|
+ OopMapSet* oop_maps,
|
|
+ VMRegPair* in_regs,
|
|
+ BasicType* in_sig_bt) { Unimplemented(); }
|
|
+
|
|
+// Unpack an array argument into a pointer to the body and the length
|
|
+// if the array is non-null, otherwise pass 0 for both.
|
|
+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
|
|
+
|
|
+class ComputeMoveOrder: public StackObj {
|
|
+ class MoveOperation: public ResourceObj {
|
|
+ friend class ComputeMoveOrder;
|
|
+ private:
|
|
+ VMRegPair _src;
|
|
+ VMRegPair _dst;
|
|
+ int _src_index;
|
|
+ int _dst_index;
|
|
+ bool _processed;
|
|
+ MoveOperation* _next;
|
|
+ MoveOperation* _prev;
|
|
+
|
|
+ static int get_id(VMRegPair r) { Unimplemented(); return 0; }
|
|
+
|
|
+ public:
|
|
+ MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
|
|
+ _src(src)
|
|
+ , _dst(dst)
|
|
+ , _src_index(src_index)
|
|
+ , _dst_index(dst_index)
|
|
+ , _processed(false)
|
|
+ , _next(NULL)
|
|
+ , _prev(NULL) { Unimplemented(); }
|
|
+
|
|
+ ~MoveOperation() {
|
|
+ _next = NULL;
|
|
+ _prev = NULL;
|
|
+ }
|
|
+
|
|
+ VMRegPair src() const { Unimplemented(); return _src; }
|
|
+ int src_id() const { Unimplemented(); return 0; }
|
|
+ int src_index() const { Unimplemented(); return 0; }
|
|
+ VMRegPair dst() const { Unimplemented(); return _src; }
|
|
+ void set_dst(int i, VMRegPair dst) { Unimplemented(); }
|
|
+ int dst_index() const { Unimplemented(); return 0; }
|
|
+ int dst_id() const { Unimplemented(); return 0; }
|
|
+ MoveOperation* next() const { Unimplemented(); return 0; }
|
|
+ MoveOperation* prev() const { Unimplemented(); return 0; }
|
|
+ void set_processed() { Unimplemented(); }
|
|
+ bool is_processed() const { Unimplemented(); return 0; }
|
|
+
|
|
+ // insert
|
|
+ void break_cycle(VMRegPair temp_register) { Unimplemented(); }
|
|
+
|
|
+ void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
|
|
+ };
|
|
+
|
|
+ private:
|
|
+ GrowableArray<MoveOperation*> edges;
|
|
+
|
|
+ public:
|
|
+ ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
|
|
+ BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
|
|
+
|
|
+ ~ComputeMoveOrder() {}
|
|
+ // Collected all the move operations
|
|
+ void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
|
|
+
|
|
+ // Walk the edges breaking cycles between moves. The result list
|
|
+ // can be walked in order to produce the proper set of loads
|
|
+ GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
|
|
+};
|
|
+
|
|
+static void verify_oop_args(MacroAssembler* masm,
|
|
+ const methodHandle& method,
|
|
+ const BasicType* sig_bt,
|
|
+ const VMRegPair* regs) {
|
|
+ const Register temp_reg = x9; // not part of any compiled calling seq
|
|
+ if (VerifyOops) {
|
|
+ for (int i = 0; i < method->size_of_parameters(); i++) {
|
|
+ if (sig_bt[i] == T_OBJECT ||
|
|
+ sig_bt[i] == T_ARRAY) {
|
|
+ VMReg r = regs[i].first();
|
|
+ assert(r->is_valid(), "bad oop arg");
|
|
+ if (r->is_stack()) {
|
|
+ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
|
|
+ __ verify_oop(temp_reg);
|
|
+ } else {
|
|
+ __ verify_oop(r->as_Register());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void gen_special_dispatch(MacroAssembler* masm,
|
|
+ const methodHandle& method,
|
|
+ const BasicType* sig_bt,
|
|
+ const VMRegPair* regs) {
|
|
+ verify_oop_args(masm, method, sig_bt, regs);
|
|
+ vmIntrinsics::ID iid = method->intrinsic_id();
|
|
+
|
|
+ // Now write the args into the outgoing interpreter space
|
|
+ bool has_receiver = false;
|
|
+ Register receiver_reg = noreg;
|
|
+ int member_arg_pos = -1;
|
|
+ Register member_reg = noreg;
|
|
+ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
|
|
+ if (ref_kind != 0) {
|
|
+ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument
|
|
+ member_reg = x9; // known to be free at this point
|
|
+ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
|
|
+ } else if (iid == vmIntrinsics::_invokeBasic) {
|
|
+ has_receiver = true;
|
|
+ } else {
|
|
+ fatal("unexpected intrinsic id %d", iid);
|
|
+ }
|
|
+
|
|
+ if (member_reg != noreg) {
|
|
+ // Load the member_arg into register, if necessary.
|
|
+ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
|
|
+ VMReg r = regs[member_arg_pos].first();
|
|
+ if (r->is_stack()) {
|
|
+ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
|
|
+ } else {
|
|
+ // no data motion is needed
|
|
+ member_reg = r->as_Register();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (has_receiver) {
|
|
+ // Make sure the receiver is loaded into a register.
|
|
+ assert(method->size_of_parameters() > 0, "oob");
|
|
+ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
|
|
+ VMReg r = regs[0].first();
|
|
+ assert(r->is_valid(), "bad receiver arg");
|
|
+ if (r->is_stack()) {
|
|
+ // Porting note: This assumes that compiled calling conventions always
|
|
+ // pass the receiver oop in a register. If this is not true on some
|
|
+ // platform, pick a temp and load the receiver from stack.
|
|
+ fatal("receiver always in a register");
|
|
+ receiver_reg = x12; // known to be free at this point
|
|
+ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
|
|
+ } else {
|
|
+ // no data motion is needed
|
|
+ receiver_reg = r->as_Register();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Figure out which address we are really jumping to:
|
|
+ MethodHandles::generate_method_handle_dispatch(masm, iid,
|
|
+ receiver_reg, member_reg, /*for_compiler_entry:*/ true);
|
|
+}
|
|
+
|
|
+// ---------------------------------------------------------------------------
|
|
+// Generate a native wrapper for a given method. The method takes arguments
|
|
+// in the Java compiled code convention, marshals them to the native
|
|
+// convention (handlizes oops, etc), transitions to native, makes the call,
|
|
+// returns to java state (possibly blocking), unhandlizes any result and
|
|
+// returns.
|
|
+//
|
|
+// Critical native functions are a shorthand for the use of
|
|
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
|
|
+// functions. The wrapper is expected to unpack the arguments before
|
|
+// passing them to the callee and perform checks before and after the
|
|
+// native call to ensure that they GCLocker
|
|
+// lock_critical/unlock_critical semantics are followed. Some other
|
|
+// parts of JNI setup are skipped like the tear down of the JNI handle
|
|
+// block and the check for pending exceptions it's impossible for them
|
|
+// to be thrown.
|
|
+//
|
|
+// They are roughly structured like this:
|
|
+// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
|
|
+// tranistion to thread_in_native
|
|
+// unpack arrray arguments and call native entry point
|
|
+// check for safepoint in progress
|
|
+// check if any thread suspend flags are set
|
|
+// call into JVM and possible unlock the JNI critical
|
|
+// if a GC was suppressed while in the critical native.
|
|
+// transition back to thread_in_Java
|
|
+// return to caller
|
|
+//
|
|
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
|
+ const methodHandle& method,
|
|
+ int compile_id,
|
|
+ BasicType* in_sig_bt,
|
|
+ VMRegPair* in_regs,
|
|
+ BasicType ret_type,
|
|
+ address critical_entry) {
|
|
+ if (method->is_method_handle_intrinsic()) {
|
|
+ vmIntrinsics::ID iid = method->intrinsic_id();
|
|
+ intptr_t start = (intptr_t)__ pc();
|
|
+ int vep_offset = ((intptr_t)__ pc()) - start;
|
|
+
|
|
+ // First instruction must be a nop as it may need to be patched on deoptimisation
|
|
+ __ nop();
|
|
+ gen_special_dispatch(masm,
|
|
+ method,
|
|
+ in_sig_bt,
|
|
+ in_regs);
|
|
+ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period
|
|
+ __ flush();
|
|
+ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually
|
|
+ return nmethod::new_native_nmethod(method,
|
|
+ compile_id,
|
|
+ masm->code(),
|
|
+ vep_offset,
|
|
+ frame_complete,
|
|
+ stack_slots / VMRegImpl::slots_per_word,
|
|
+ in_ByteSize(-1),
|
|
+ in_ByteSize(-1),
|
|
+ (OopMapSet*)NULL);
|
|
+ }
|
|
+ bool is_critical_native = true;
|
|
+ address native_func = critical_entry;
|
|
+ if (native_func == NULL) {
|
|
+ native_func = method->native_function();
|
|
+ is_critical_native = false;
|
|
+ }
|
|
+ assert(native_func != NULL, "must have function");
|
|
+
|
|
+ // An OopMap for lock (and class if static)
|
|
+ OopMapSet *oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ intptr_t start = (intptr_t)__ pc();
|
|
+
|
|
+ // We have received a description of where all the java arg are located
|
|
+ // on entry to the wrapper. We need to convert these args to where
|
|
+ // the jni function will expect them. To figure out where they go
|
|
+ // we convert the java signature to a C signature by inserting
|
|
+ // the hidden arguments as arg[0] and possibly arg[1] (static method)
|
|
+
|
|
+ const int total_in_args = method->size_of_parameters();
|
|
+ int total_c_args = total_in_args;
|
|
+ if (!is_critical_native) {
|
|
+ total_c_args += 1;
|
|
+ if (method->is_static()) {
|
|
+ total_c_args++;
|
|
+ }
|
|
+ } else {
|
|
+ for (int i = 0; i < total_in_args; i++) {
|
|
+ if (in_sig_bt[i] == T_ARRAY) {
|
|
+ total_c_args++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
|
|
+ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
|
|
+ assert_cond(out_sig_bt != NULL && out_regs != NULL);
|
|
+ BasicType* in_elem_bt = NULL;
|
|
+
|
|
+ int argc = 0;
|
|
+ if (!is_critical_native) {
|
|
+ out_sig_bt[argc++] = T_ADDRESS;
|
|
+ if (method->is_static()) {
|
|
+ out_sig_bt[argc++] = T_OBJECT;
|
|
+ }
|
|
+
|
|
+ for (int i = 0; i < total_in_args ; i++) {
|
|
+ out_sig_bt[argc++] = in_sig_bt[i];
|
|
+ }
|
|
+ } else {
|
|
+ Thread* THREAD = Thread::current();
|
|
+ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
|
|
+ assert_cond(in_elem_bt != NULL);
|
|
+ SignatureStream ss(method->signature());
|
|
+ for (int i = 0; i < total_in_args ; i++) {
|
|
+ if (in_sig_bt[i] == T_ARRAY) {
|
|
+ // Arrays are passed as int, elem* pair
|
|
+ out_sig_bt[argc++] = T_INT;
|
|
+ out_sig_bt[argc++] = T_ADDRESS;
|
|
+ Symbol* atype = ss.as_symbol(CHECK_NULL);
|
|
+ const char* at = atype->as_C_string();
|
|
+ if (strlen(at) == 2) {
|
|
+ assert(at[0] == '[', "must be");
|
|
+ switch (at[1]) {
|
|
+ case 'B': in_elem_bt[i] = T_BYTE; break;
|
|
+ case 'C': in_elem_bt[i] = T_CHAR; break;
|
|
+ case 'D': in_elem_bt[i] = T_DOUBLE; break;
|
|
+ case 'F': in_elem_bt[i] = T_FLOAT; break;
|
|
+ case 'I': in_elem_bt[i] = T_INT; break;
|
|
+ case 'J': in_elem_bt[i] = T_LONG; break;
|
|
+ case 'S': in_elem_bt[i] = T_SHORT; break;
|
|
+ case 'Z': in_elem_bt[i] = T_BOOLEAN; break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ out_sig_bt[argc++] = in_sig_bt[i];
|
|
+ in_elem_bt[i] = T_VOID;
|
|
+ }
|
|
+ if (in_sig_bt[i] != T_VOID) {
|
|
+ assert(in_sig_bt[i] == ss.type(), "must match");
|
|
+ ss.next();
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Now figure out where the args must be stored and how much stack space
|
|
+ // they require.
|
|
+ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
|
|
+
|
|
+ // Compute framesize for the wrapper. We need to handlize all oops in
|
|
+ // incoming registers
|
|
+
|
|
+ // Calculate the total number of stack slots we will need.
|
|
+
|
|
+ // First count the abi requirement plus all of the outgoing args
|
|
+ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
|
|
+
|
|
+ // Now the space for the inbound oop handle area
|
|
+ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers
|
|
+ if (is_critical_native) {
|
|
+ // Critical natives may have to call out so they need a save area
|
|
+ // for register arguments.
|
|
+ int double_slots = 0;
|
|
+ int single_slots = 0;
|
|
+ for ( int i = 0; i < total_in_args; i++) {
|
|
+ if (in_regs[i].first()->is_Register()) {
|
|
+ const Register reg = in_regs[i].first()->as_Register();
|
|
+ switch (in_sig_bt[i]) {
|
|
+ case T_BOOLEAN:
|
|
+ case T_BYTE:
|
|
+ case T_SHORT:
|
|
+ case T_CHAR:
|
|
+ case T_INT: single_slots++; break;
|
|
+ case T_ARRAY: // specific to LP64 (7145024)
|
|
+ case T_LONG: double_slots++; break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ } else if (in_regs[i].first()->is_FloatRegister()) {
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+ total_save_slots = double_slots * 2 + single_slots;
|
|
+ // align the save area
|
|
+ if (double_slots != 0) {
|
|
+ stack_slots = align_up(stack_slots, 2);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ int oop_handle_offset = stack_slots;
|
|
+ stack_slots += total_save_slots;
|
|
+
|
|
+ // Now any space we need for handlizing a klass if static method
|
|
+
|
|
+ int klass_slot_offset = 0;
|
|
+ int klass_offset = -1;
|
|
+ int lock_slot_offset = 0;
|
|
+ bool is_static = false;
|
|
+
|
|
+ if (method->is_static()) {
|
|
+ klass_slot_offset = stack_slots;
|
|
+ stack_slots += VMRegImpl::slots_per_word;
|
|
+ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
|
|
+ is_static = true;
|
|
+ }
|
|
+
|
|
+ // Plus a lock if needed
|
|
+
|
|
+ if (method->is_synchronized()) {
|
|
+ lock_slot_offset = stack_slots;
|
|
+ stack_slots += VMRegImpl::slots_per_word;
|
|
+ }
|
|
+
|
|
+ // Now a place (+2) to save return values or temp during shuffling
|
|
+ // + 4 for return address (which we own) and saved fp
|
|
+ stack_slots += 6;
|
|
+
|
|
+ // Ok The space we have allocated will look like:
|
|
+ //
|
|
+ //
|
|
+ // FP-> | |
|
|
+ // | 2 slots (ra) |
|
|
+ // | 2 slots (fp) |
|
|
+ // |---------------------|
|
|
+ // | 2 slots for moves |
|
|
+ // |---------------------|
|
|
+ // | lock box (if sync) |
|
|
+ // |---------------------| <- lock_slot_offset
|
|
+ // | klass (if static) |
|
|
+ // |---------------------| <- klass_slot_offset
|
|
+ // | oopHandle area |
|
|
+ // |---------------------| <- oop_handle_offset (8 java arg registers)
|
|
+ // | outbound memory |
|
|
+ // | based arguments |
|
|
+ // | |
|
|
+ // |---------------------|
|
|
+ // | |
|
|
+ // SP-> | out_preserved_slots |
|
|
+ //
|
|
+ //
|
|
+
|
|
+
|
|
+ // Now compute actual number of stack words we need rounding to make
|
|
+ // stack properly aligned.
|
|
+ stack_slots = align_up(stack_slots, StackAlignmentInSlots);
|
|
+
|
|
+ int stack_size = stack_slots * VMRegImpl::stack_slot_size;
|
|
+
|
|
+ // First thing make an ic check to see if we should even be here
|
|
+
|
|
+ // We are free to use all registers as temps without saving them and
|
|
+ // restoring them except fp. fp is the only callee save register
|
|
+ // as far as the interpreter and the compiler(s) are concerned.
|
|
+
|
|
+
|
|
+ const Register ic_reg = t1;
|
|
+ const Register receiver = j_rarg0;
|
|
+
|
|
+ Label hit;
|
|
+ Label exception_pending;
|
|
+
|
|
+ assert_different_registers(ic_reg, receiver, t0);
|
|
+ __ verify_oop(receiver);
|
|
+ __ cmp_klass(receiver, ic_reg, t0, hit);
|
|
+
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
|
|
+
|
|
+ // Verified entry point must be aligned
|
|
+ __ align(8);
|
|
+
|
|
+ __ bind(hit);
|
|
+
|
|
+ int vep_offset = ((intptr_t)__ pc()) - start;
|
|
+
|
|
+ // If we have to make this method not-entrant we'll overwrite its
|
|
+ // first instruction with a jump.
|
|
+ __ nop();
|
|
+
|
|
+ // Generate stack overflow check
|
|
+ if (UseStackBanging) {
|
|
+ __ bang_stack_with_offset(checked_cast<int>(JavaThread::stack_shadow_zone_size()));
|
|
+ } else {
|
|
+ Unimplemented();
|
|
+ }
|
|
+
|
|
+ // Generate a new frame for the wrapper.
|
|
+ __ enter();
|
|
+ // -2 because return address is already present and so is saved fp
|
|
+ __ sub(sp, sp, stack_size - 2 * wordSize);
|
|
+
|
|
+ // Frame is now completed as far as size and linkage.
|
|
+ int frame_complete = ((intptr_t)__ pc()) - start;
|
|
+
|
|
+ // We use x18 as the oop handle for the receiver/klass
|
|
+ // It is callee save so it survives the call to native
|
|
+
|
|
+ const Register oop_handle_reg = x18;
|
|
+
|
|
+ if (is_critical_native) {
|
|
+ check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
|
|
+ oop_handle_offset, oop_maps, in_regs, in_sig_bt);
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // We immediately shuffle the arguments so that any vm call we have to
|
|
+ // make from here on out (sync slow path, jvmti, etc.) we will have
|
|
+ // captured the oops from our caller and have a valid oopMap for
|
|
+ // them.
|
|
+
|
|
+ // -----------------
|
|
+ // The Grand Shuffle
|
|
+
|
|
+ // The Java calling convention is either equal (linux) or denser (win64) than the
|
|
+ // c calling convention. However the because of the jni_env argument the c calling
|
|
+ // convention always has at least one more (and two for static) arguments than Java.
|
|
+ // Therefore if we move the args from java -> c backwards then we will never have
|
|
+ // a register->register conflict and we don't have to build a dependency graph
|
|
+ // and figure out how to break any cycles.
|
|
+ //
|
|
+
|
|
+ // Record esp-based slot for receiver on stack for non-static methods
|
|
+ int receiver_offset = -1;
|
|
+
|
|
+ // This is a trick. We double the stack slots so we can claim
|
|
+ // the oops in the caller's frame. Since we are sure to have
|
|
+ // more args than the caller doubling is enough to make
|
|
+ // sure we can capture all the incoming oop args from the
|
|
+ // caller.
|
|
+ //
|
|
+ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ int float_args = 0;
|
|
+ int int_args = 0;
|
|
+
|
|
+#ifdef ASSERT
|
|
+ bool reg_destroyed[RegisterImpl::number_of_registers];
|
|
+ bool freg_destroyed[FloatRegisterImpl::number_of_registers];
|
|
+ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
|
|
+ reg_destroyed[r] = false;
|
|
+ }
|
|
+ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
|
|
+ freg_destroyed[f] = false;
|
|
+ }
|
|
+
|
|
+#endif /* ASSERT */
|
|
+
|
|
+ // This may iterate in two different directions depending on the
|
|
+ // kind of native it is. The reason is that for regular JNI natives
|
|
+ // the incoming and outgoing registers are offset upwards and for
|
|
+ // critical natives they are offset down.
|
|
+ GrowableArray<int> arg_order(2 * total_in_args);
|
|
+ VMRegPair tmp_vmreg;
|
|
+ tmp_vmreg.set2(x9->as_VMReg());
|
|
+
|
|
+ if (!is_critical_native) {
|
|
+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
|
|
+ arg_order.push(i);
|
|
+ arg_order.push(c_arg);
|
|
+ }
|
|
+ } else {
|
|
+ // Compute a valid move order, using tmp_vmreg to break any cycles
|
|
+ ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
|
|
+ }
|
|
+
|
|
+ int temploc = -1;
|
|
+ for (int ai = 0; ai < arg_order.length(); ai += 2) {
|
|
+ int i = arg_order.at(ai);
|
|
+ int c_arg = arg_order.at(ai + 1);
|
|
+ __ block_comment(err_msg("mv %d -> %d", i, c_arg));
|
|
+ if (c_arg == -1) {
|
|
+ assert(is_critical_native, "should only be required for critical natives");
|
|
+ // This arg needs to be moved to a temporary
|
|
+ __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
|
|
+ in_regs[i] = tmp_vmreg;
|
|
+ temploc = i;
|
|
+ continue;
|
|
+ } else if (i == -1) {
|
|
+ assert(is_critical_native, "should only be required for critical natives");
|
|
+ // Read from the temporary location
|
|
+ assert(temploc != -1, "must be valid");
|
|
+ i = temploc;
|
|
+ temploc = -1;
|
|
+ }
|
|
+#ifdef ASSERT
|
|
+ if (in_regs[i].first()->is_Register()) {
|
|
+ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
|
|
+ } else if (in_regs[i].first()->is_FloatRegister()) {
|
|
+ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
|
|
+ }
|
|
+ if (out_regs[c_arg].first()->is_Register()) {
|
|
+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
|
|
+ } else if (out_regs[c_arg].first()->is_FloatRegister()) {
|
|
+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
|
|
+ }
|
|
+#endif /* ASSERT */
|
|
+ switch (in_sig_bt[i]) {
|
|
+ case T_ARRAY:
|
|
+ if (is_critical_native) {
|
|
+ unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
|
|
+ c_arg++;
|
|
+#ifdef ASSERT
|
|
+ if (out_regs[c_arg].first()->is_Register()) {
|
|
+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
|
|
+ } else if (out_regs[c_arg].first()->is_FloatRegister()) {
|
|
+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
|
|
+ }
|
|
+#endif
|
|
+ int_args++;
|
|
+ break;
|
|
+ }
|
|
+ // no break
|
|
+ case T_OBJECT:
|
|
+ assert(!is_critical_native, "no oop arguments");
|
|
+ __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
|
|
+ ((i == 0) && (!is_static)),
|
|
+ &receiver_offset);
|
|
+ int_args++;
|
|
+ break;
|
|
+ case T_VOID:
|
|
+ break;
|
|
+
|
|
+ case T_FLOAT:
|
|
+ __ float_move(in_regs[i], out_regs[c_arg]);
|
|
+ float_args++;
|
|
+ break;
|
|
+
|
|
+ case T_DOUBLE:
|
|
+ assert( i + 1 < total_in_args &&
|
|
+ in_sig_bt[i + 1] == T_VOID &&
|
|
+ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
|
|
+ __ double_move(in_regs[i], out_regs[c_arg]);
|
|
+ float_args++;
|
|
+ break;
|
|
+
|
|
+ case T_LONG :
|
|
+ __ long_move(in_regs[i], out_regs[c_arg]);
|
|
+ int_args++;
|
|
+ break;
|
|
+
|
|
+ case T_ADDRESS:
|
|
+ assert(false, "found T_ADDRESS in java args");
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ __ move32_64(in_regs[i], out_regs[c_arg]);
|
|
+ int_args++;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // point c_arg at the first arg that is already loaded in case we
|
|
+ // need to spill before we call out
|
|
+ int c_arg = total_c_args - total_in_args;
|
|
+
|
|
+ // Pre-load a static method's oop into c_rarg1.
|
|
+ if (method->is_static() && !is_critical_native) {
|
|
+
|
|
+ // load oop into a register
|
|
+ __ movoop(c_rarg1,
|
|
+ JNIHandles::make_local(method->method_holder()->java_mirror()),
|
|
+ /*immediate*/true);
|
|
+
|
|
+ // Now handlize the static class mirror it's known not-null.
|
|
+ __ sd(c_rarg1, Address(sp, klass_offset));
|
|
+ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
|
|
+
|
|
+ // Now get the handle
|
|
+ __ la(c_rarg1, Address(sp, klass_offset));
|
|
+ // and protect the arg if we must spill
|
|
+ c_arg--;
|
|
+ }
|
|
+
|
|
+ // Change state to native (we save the return address in the thread, since it might not
|
|
+ // be pushed on the stack when we do a stack traversal).
|
|
+ // We use the same pc/oopMap repeatedly when we call out
|
|
+
|
|
+ Label native_return;
|
|
+ __ set_last_Java_frame(sp, noreg, native_return, t0);
|
|
+
|
|
+ Label dtrace_method_entry, dtrace_method_entry_done;
|
|
+ {
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
|
|
+ __ lbu(t0, Address(t0, offset));
|
|
+ __ addw(t0, t0, zr);
|
|
+ __ bnez(t0, dtrace_method_entry);
|
|
+ __ bind(dtrace_method_entry_done);
|
|
+ }
|
|
+
|
|
+ // RedefineClasses() tracing support for obsolete method entry
|
|
+ if (log_is_enabled(Trace, redefine, class, obsolete)) {
|
|
+ // protect the args we've loaded
|
|
+ save_args(masm, total_c_args, c_arg, out_regs);
|
|
+ __ mov_metadata(c_rarg1, method());
|
|
+ __ call_VM_leaf(
|
|
+ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
|
|
+ xthread, c_rarg1);
|
|
+ restore_args(masm, total_c_args, c_arg, out_regs);
|
|
+ }
|
|
+
|
|
+ // Lock a synchronized method
|
|
+
|
|
+ // Register definitions used by locking and unlocking
|
|
+
|
|
+ const Register swap_reg = x10;
|
|
+ const Register obj_reg = x9; // Will contain the oop
|
|
+ const Register lock_reg = x30; // Address of compiler lock object (BasicLock)
|
|
+ const Register old_hdr = x30; // value of old header at unlock time
|
|
+ const Register tmp = ra;
|
|
+
|
|
+ Label slow_path_lock;
|
|
+ Label lock_done;
|
|
+
|
|
+ if (method->is_synchronized()) {
|
|
+ assert(!is_critical_native, "unhandled");
|
|
+
|
|
+ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
|
|
+
|
|
+ // Get the handle (the 2nd argument)
|
|
+ __ mv(oop_handle_reg, c_rarg1);
|
|
+
|
|
+ // Get address of the box
|
|
+
|
|
+ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
|
|
+
|
|
+ // Load the oop from the handle
|
|
+ __ ld(obj_reg, Address(oop_handle_reg, 0));
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
|
|
+ }
|
|
+
|
|
+ // Load (object->mark() | 1) into swap_reg % x10
|
|
+ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
|
|
+ __ ori(swap_reg, t0, 1);
|
|
+
|
|
+ // Save (object->mark() | 1) into BasicLock's displaced header
|
|
+ __ sd(swap_reg, Address(lock_reg, mark_word_offset));
|
|
+
|
|
+ // src -> dest if dest == x10 else x10 <- dest
|
|
+ {
|
|
+ Label here;
|
|
+ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
|
|
+ }
|
|
+
|
|
+ // Test if the oopMark is an obvious stack pointer, i.e.,
|
|
+ // 1) (mark & 3) == 0, and
|
|
+ // 2) sp <= mark < mark + os::pagesize()
|
|
+ // These 3 tests can be done by evaluating the following
|
|
+ // expression: ((mark - sp) & (3 - os::vm_page_size())),
|
|
+ // assuming both stack pointer and pagesize have their
|
|
+ // least significant 2 bits clear.
|
|
+ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
|
|
+
|
|
+ __ sub(swap_reg, swap_reg, sp);
|
|
+ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
|
|
+
|
|
+ // Save the test result, for recursive case, the result is zero
|
|
+ __ sd(swap_reg, Address(lock_reg, mark_word_offset));
|
|
+ __ bnez(swap_reg, slow_path_lock);
|
|
+
|
|
+ // Slow path will re-enter here
|
|
+
|
|
+ __ bind(lock_done);
|
|
+ }
|
|
+
|
|
+
|
|
+ // Finally just about ready to make the JNI call
|
|
+
|
|
+ // get JNIEnv* which is first argument to native
|
|
+ if (!is_critical_native) {
|
|
+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
|
|
+ }
|
|
+
|
|
+ // Now set thread in native
|
|
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ __ mv(t0, _thread_in_native);
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sw(t0, Address(t1));
|
|
+
|
|
+ __ rt_call(native_func);
|
|
+
|
|
+ __ bind(native_return);
|
|
+
|
|
+ intptr_t return_pc = (intptr_t) __ pc();
|
|
+ oop_maps->add_gc_map(return_pc - start, map);
|
|
+
|
|
+ // Unpack native results.
|
|
+ if(ret_type != T_OBJECT && ret_type != T_ARRAY) {
|
|
+ __ cast_primitive_type(ret_type, x10);
|
|
+ }
|
|
+
|
|
+ // Switch thread to "native transition" state before reading the synchronization state.
|
|
+ // This additional state is necessary because reading and testing the synchronization
|
|
+ // state is not atomic w.r.t. GC, as this scenario demonstrates:
|
|
+ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
|
|
+ // VM thread changes sync state to synchronizing and suspends threads for GC.
|
|
+ // Thread A is resumed to finish this native method, but doesn't block here since it
|
|
+ // didn't see any synchronization is progress, and escapes.
|
|
+ __ mv(t0, _thread_in_native_trans);
|
|
+
|
|
+ if(os::is_MP()) {
|
|
+ if (UseMembar) {
|
|
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
|
|
+
|
|
+ // Force this write out before the read below
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ } else {
|
|
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sw(t0, Address(t1));
|
|
+
|
|
+ // Write serialization page so VM thread can do a pseudo remote membar.
|
|
+ // We use the current thread pointer to calculate a thread specific
|
|
+ // offset to write to within the page. This minimizes bus traffic
|
|
+ // due to cache line collision.
|
|
+ __ serialize_memory(xthread, x12, t0);
|
|
+ }
|
|
+ } else {
|
|
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ }
|
|
+
|
|
+ // check for safepoint operation in progress and/or pending suspend requests
|
|
+ Label safepoint_in_progress, safepoint_in_progress_done;
|
|
+ {
|
|
+ __ safepoint_poll_acquire(safepoint_in_progress);
|
|
+ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
|
|
+ __ bnez(t0, safepoint_in_progress);
|
|
+ __ bind(safepoint_in_progress_done);
|
|
+ }
|
|
+
|
|
+ // change thread state
|
|
+ Label after_transition;
|
|
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ __ mv(t0, _thread_in_Java);
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sw(t0, Address(t1));
|
|
+ __ bind(after_transition);
|
|
+
|
|
+ Label reguard;
|
|
+ Label reguard_done;
|
|
+ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
|
|
+ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
|
|
+ __ beq(t0, t1, reguard);
|
|
+ __ bind(reguard_done);
|
|
+
|
|
+ // native result if any is live
|
|
+
|
|
+ // Unlock
|
|
+ Label unlock_done;
|
|
+ Label slow_path_unlock;
|
|
+ if (method->is_synchronized()) {
|
|
+
|
|
+ // Get locked oop from the handle we passed to jni
|
|
+ __ ld(obj_reg, Address(oop_handle_reg, 0));
|
|
+
|
|
+ Label done;
|
|
+
|
|
+ if (UseBiasedLocking) {
|
|
+ __ biased_locking_exit(obj_reg, old_hdr, done);
|
|
+ }
|
|
+
|
|
+ // Simple recursive lock?
|
|
+
|
|
+ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
|
|
+ __ beqz(t0, done);
|
|
+
|
|
+ // Must save x10 if if it is live now because cmpxchg must use it
|
|
+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
|
|
+ save_native_result(masm, ret_type, stack_slots);
|
|
+ }
|
|
+
|
|
+
|
|
+ // get address of the stack lock
|
|
+ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
|
|
+ // get old displaced header
|
|
+ __ ld(old_hdr, Address(x10, 0));
|
|
+
|
|
+ // Atomic swap old header if oop still contains the stack lock
|
|
+ Label succeed;
|
|
+ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
|
|
+ __ bind(succeed);
|
|
+
|
|
+ // slow path re-enters here
|
|
+ __ bind(unlock_done);
|
|
+ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
|
|
+ restore_native_result(masm, ret_type, stack_slots);
|
|
+ }
|
|
+
|
|
+ __ bind(done);
|
|
+ }
|
|
+
|
|
+ Label dtrace_method_exit, dtrace_method_exit_done;
|
|
+ {
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
|
|
+ __ lbu(t0, Address(t0, offset));
|
|
+ __ bnez(t0, dtrace_method_exit);
|
|
+ __ bind(dtrace_method_exit_done);
|
|
+ }
|
|
+
|
|
+ __ reset_last_Java_frame(false);
|
|
+
|
|
+ // Unbox oop result, e.g. JNIHandles::resolve result.
|
|
+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
|
|
+ __ resolve_jobject(x10, xthread, t1);
|
|
+ }
|
|
+
|
|
+ if (CheckJNICalls) {
|
|
+ // clear_pending_jni_exception_check
|
|
+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
|
|
+ }
|
|
+
|
|
+ if (!is_critical_native) {
|
|
+ // reset handle block
|
|
+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
|
|
+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
|
|
+ }
|
|
+
|
|
+ __ leave();
|
|
+
|
|
+ if (!is_critical_native) {
|
|
+ // Any exception pending?
|
|
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ __ bnez(t0, exception_pending);
|
|
+ }
|
|
+
|
|
+ // We're done
|
|
+ __ ret();
|
|
+
|
|
+ // Unexpected paths are out of line and go here
|
|
+
|
|
+ if (!is_critical_native) {
|
|
+ // forward the exception
|
|
+ __ bind(exception_pending);
|
|
+
|
|
+ // and forward the exception
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
+ }
|
|
+
|
|
+ // Slow path locking & unlocking
|
|
+ if (method->is_synchronized()) {
|
|
+
|
|
+ __ block_comment("Slow path lock {");
|
|
+ __ bind(slow_path_lock);
|
|
+
|
|
+ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
|
|
+ // args are (oop obj, BasicLock* lock, JavaThread* thread)
|
|
+
|
|
+ // protect the args we've loaded
|
|
+ save_args(masm, total_c_args, c_arg, out_regs);
|
|
+
|
|
+ __ mv(c_rarg0, obj_reg);
|
|
+ __ mv(c_rarg1, lock_reg);
|
|
+ __ mv(c_rarg2, xthread);
|
|
+
|
|
+ // Not a leaf but we have last_Java_frame setup as we want
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
|
|
+ restore_args(masm, total_c_args, c_arg, out_regs);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ { Label L;
|
|
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ __ beqz(t0, L);
|
|
+ __ stop("no pending exception allowed on exit from monitorenter");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+ __ j(lock_done);
|
|
+
|
|
+ __ block_comment("} Slow path lock");
|
|
+
|
|
+ __ block_comment("Slow path unlock {");
|
|
+ __ bind(slow_path_unlock);
|
|
+
|
|
+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
|
|
+ save_native_result(masm, ret_type, stack_slots);
|
|
+ }
|
|
+
|
|
+ __ mv(c_rarg2, xthread);
|
|
+ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
|
|
+ __ mv(c_rarg0, obj_reg);
|
|
+
|
|
+ // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
|
|
+ // NOTE that obj_reg == x9 currently
|
|
+ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+
|
|
+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
|
|
+
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ __ beqz(t0, L);
|
|
+ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif /* ASSERT */
|
|
+
|
|
+ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+
|
|
+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
|
|
+ restore_native_result(masm, ret_type, stack_slots);
|
|
+ }
|
|
+ __ j(unlock_done);
|
|
+
|
|
+ __ block_comment("} Slow path unlock");
|
|
+
|
|
+ } // synchronized
|
|
+
|
|
+ // SLOW PATH Reguard the stack if needed
|
|
+
|
|
+ __ bind(reguard);
|
|
+ save_native_result(masm, ret_type, stack_slots);
|
|
+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
|
|
+ restore_native_result(masm, ret_type, stack_slots);
|
|
+ // and continue
|
|
+ __ j(reguard_done);
|
|
+
|
|
+ // SLOW PATH safepoint
|
|
+ {
|
|
+ __ block_comment("safepoint {");
|
|
+ __ bind(safepoint_in_progress);
|
|
+
|
|
+ // Don't use call_VM as it will see a possible pending exception and forward it
|
|
+ // and never return here preventing us from clearing _last_native_pc down below.
|
|
+ //
|
|
+ save_native_result(masm, ret_type, stack_slots);
|
|
+ __ mv(c_rarg0, xthread);
|
|
+#ifndef PRODUCT
|
|
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
+#endif
|
|
+ int32_t offset = 0;
|
|
+ if (!is_critical_native) {
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
|
|
+ } else {
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset);
|
|
+ }
|
|
+ __ jalr(x1, t0, offset);
|
|
+ // Restore any method result value
|
|
+ restore_native_result(masm, ret_type, stack_slots);
|
|
+
|
|
+ if (is_critical_native) {
|
|
+ // The call above performed the transition to thread_in_Java so
|
|
+ // skip the transition logic above.
|
|
+ __ j(after_transition);
|
|
+ }
|
|
+
|
|
+ __ j(safepoint_in_progress_done);
|
|
+ __ block_comment("} safepoint");
|
|
+ }
|
|
+
|
|
+ // SLOW PATH dtrace support
|
|
+ {
|
|
+ __ block_comment("dtrace entry {");
|
|
+ __ bind(dtrace_method_entry);
|
|
+
|
|
+ // We have all of the arguments setup at this point. We must not touch any register
|
|
+ // argument registers at this point (what if we save/restore them there are no oop?
|
|
+
|
|
+ save_args(masm, total_c_args, c_arg, out_regs);
|
|
+ __ mov_metadata(c_rarg1, method());
|
|
+ __ call_VM_leaf(
|
|
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
|
|
+ xthread, c_rarg1);
|
|
+ restore_args(masm, total_c_args, c_arg, out_regs);
|
|
+ __ j(dtrace_method_entry_done);
|
|
+ __ block_comment("} dtrace entry");
|
|
+ }
|
|
+
|
|
+ {
|
|
+ __ block_comment("dtrace exit {");
|
|
+ __ bind(dtrace_method_exit);
|
|
+ save_native_result(masm, ret_type, stack_slots);
|
|
+ __ mov_metadata(c_rarg1, method());
|
|
+ __ call_VM_leaf(
|
|
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
|
|
+ xthread, c_rarg1);
|
|
+ restore_native_result(masm, ret_type, stack_slots);
|
|
+ __ j(dtrace_method_exit_done);
|
|
+ __ block_comment("} dtrace exit");
|
|
+ }
|
|
+
|
|
+ __ flush();
|
|
+
|
|
+ nmethod *nm = nmethod::new_native_nmethod(method,
|
|
+ compile_id,
|
|
+ masm->code(),
|
|
+ vep_offset,
|
|
+ frame_complete,
|
|
+ stack_slots / VMRegImpl::slots_per_word,
|
|
+ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
|
|
+ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
|
|
+ oop_maps);
|
|
+ assert(nm != NULL, "create native nmethod fail!");
|
|
+ if (is_critical_native) {
|
|
+ nm->set_lazy_critical_native(true);
|
|
+ }
|
|
+
|
|
+ return nm;
|
|
+}
|
|
+
|
|
+// this function returns the adjust size (in number of words) to a c2i adapter
|
|
+// activation for use during deoptimization
|
|
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
|
|
+ assert(callee_locals >= callee_parameters,
|
|
+ "test and remove; got more parms than locals");
|
|
+ if (callee_locals < callee_parameters) {
|
|
+ return 0; // No adjustment for negative locals
|
|
+ }
|
|
+ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
|
|
+ // diff is counted in stack words
|
|
+ return align_up(diff, 2);
|
|
+}
|
|
+
|
|
+//------------------------------generate_deopt_blob----------------------------
|
|
+void SharedRuntime::generate_deopt_blob() {
|
|
+ // Allocate space for the code
|
|
+ ResourceMark rm;
|
|
+ // Setup code generation tools
|
|
+ int pad = 0;
|
|
+ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
|
|
+ MacroAssembler* masm = new MacroAssembler(&buffer);
|
|
+ int frame_size_in_words = -1;
|
|
+ OopMap* map = NULL;
|
|
+ OopMapSet *oop_maps = new OopMapSet();
|
|
+ assert_cond(masm != NULL && oop_maps != NULL);
|
|
+ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
|
|
+
|
|
+ // -------------
|
|
+ // This code enters when returning to a de-optimized nmethod. A return
|
|
+ // address has been pushed on the the stack, and return values are in
|
|
+ // registers.
|
|
+ // If we are doing a normal deopt then we were called from the patched
|
|
+ // nmethod from the point we returned to the nmethod. So the return
|
|
+ // address on the stack is wrong by NativeCall::instruction_size
|
|
+ // We will adjust the value so it looks like we have the original return
|
|
+ // address on the stack (like when we eagerly deoptimized).
|
|
+ // In the case of an exception pending when deoptimizing, we enter
|
|
+ // with a return address on the stack that points after the call we patched
|
|
+ // into the exception handler. We have the following register state from,
|
|
+ // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
|
|
+ // x10: exception oop
|
|
+ // x9: exception handler
|
|
+ // x13: throwing pc
|
|
+ // So in this case we simply jam x13 into the useless return address and
|
|
+ // the stack looks just like we want.
|
|
+ //
|
|
+ // At this point we need to de-opt. We save the argument return
|
|
+ // registers. We call the first C routine, fetch_unroll_info(). This
|
|
+ // routine captures the return values and returns a structure which
|
|
+ // describes the current frame size and the sizes of all replacement frames.
|
|
+ // The current frame is compiled code and may contain many inlined
|
|
+ // functions, each with their own JVM state. We pop the current frame, then
|
|
+ // push all the new frames. Then we call the C routine unpack_frames() to
|
|
+ // populate these frames. Finally unpack_frames() returns us the new target
|
|
+ // address. Notice that callee-save registers are BLOWN here; they have
|
|
+ // already been captured in the vframeArray at the time the return PC was
|
|
+ // patched.
|
|
+ address start = __ pc();
|
|
+ Label cont;
|
|
+
|
|
+ // Prolog for non exception case!
|
|
+
|
|
+ // Save everything in sight.
|
|
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
|
|
+
|
|
+ // Normal deoptimization. Save exec mode for unpack_frames.
|
|
+ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
|
|
+ __ j(cont);
|
|
+
|
|
+ int reexecute_offset = __ pc() - start;
|
|
+
|
|
+ // Reexecute case
|
|
+ // return address is the pc describes what bci to do re-execute at
|
|
+
|
|
+ // No need to update map as each call to save_live_registers will produce identical oopmap
|
|
+ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
|
|
+
|
|
+ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
|
|
+ __ j(cont);
|
|
+
|
|
+ int exception_offset = __ pc() - start;
|
|
+
|
|
+ // Prolog for exception case
|
|
+
|
|
+ // all registers are dead at this entry point, except for x10, and
|
|
+ // x13 which contain the exception oop and exception pc
|
|
+ // respectively. Set them in TLS and fall thru to the
|
|
+ // unpack_with_exception_in_tls entry point.
|
|
+
|
|
+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+
|
|
+ int exception_in_tls_offset = __ pc() - start;
|
|
+
|
|
+ // new implementation because exception oop is now passed in JavaThread
|
|
+
|
|
+ // Prolog for exception case
|
|
+ // All registers must be preserved because they might be used by LinearScan
|
|
+ // Exceptiop oop and throwing PC are passed in JavaThread
|
|
+ // tos: stack at point of call to method that threw the exception (i.e. only
|
|
+ // args are on the stack, no return address)
|
|
+
|
|
+ // The return address pushed by save_live_registers will be patched
|
|
+ // later with the throwing pc. The correct value is not available
|
|
+ // now because loading it from memory would destroy registers.
|
|
+
|
|
+ // NB: The SP at this point must be the SP of the method that is
|
|
+ // being deoptimized. Deoptimization assumes that the frame created
|
|
+ // here by save_live_registers is immediately below the method's SP.
|
|
+ // This is a somewhat fragile mechanism.
|
|
+
|
|
+ // Save everything in sight.
|
|
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
|
|
+
|
|
+ // Now it is safe to overwrite any register
|
|
+
|
|
+ // Deopt during an exception. Save exec mode for unpack_frames.
|
|
+ __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved
|
|
+
|
|
+ // load throwing pc from JavaThread and patch it as the return address
|
|
+ // of the current frame. Then clear the field in JavaThread
|
|
+
|
|
+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // verify that there is really an exception oop in JavaThread
|
|
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ verify_oop(x10);
|
|
+
|
|
+ // verify that there is no pending exception
|
|
+ Label no_pending_exception;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, no_pending_exception);
|
|
+ __ stop("must not have pending exception here");
|
|
+ __ bind(no_pending_exception);
|
|
+#endif
|
|
+
|
|
+ __ bind(cont);
|
|
+
|
|
+ // Call C code. Need thread and this frame, but NOT official VM entry
|
|
+ // crud. We cannot block on this call, no GC can happen.
|
|
+ //
|
|
+ // UnrollBlock* fetch_unroll_info(JavaThread* thread)
|
|
+
|
|
+ // fetch_unroll_info needs to call last_java_frame().
|
|
+
|
|
+ Label retaddr;
|
|
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread,
|
|
+ JavaThread::last_Java_fp_offset()));
|
|
+ __ beqz(t0, L);
|
|
+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mv(c_rarg1, xcpool);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ __ bind(retaddr);
|
|
+
|
|
+ // Need to have an oopmap that tells fetch_unroll_info where to
|
|
+ // find any register it might need.
|
|
+ oop_maps->add_gc_map(__ pc() - start, map);
|
|
+
|
|
+ __ reset_last_Java_frame(false);
|
|
+
|
|
+ // Load UnrollBlock* into x15
|
|
+ __ mv(x15, x10);
|
|
+
|
|
+ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
|
|
+ Label noException;
|
|
+ __ mv(t0, Deoptimization::Unpack_exception);
|
|
+ __ bne(xcpool, t0, noException); // Was exception pending?
|
|
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+ __ verify_oop(x10);
|
|
+
|
|
+ // Overwrite the result registers with the exception results.
|
|
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
|
|
+
|
|
+ __ bind(noException);
|
|
+
|
|
+ // Only register save data is on the stack.
|
|
+ // Now restore the result registers. Everything else is either dead
|
|
+ // or captured in the vframeArray.
|
|
+ reg_saver.restore_result_registers(masm);
|
|
+
|
|
+ // All of the register save area has been popped of the stack. Only the
|
|
+ // return address remains.
|
|
+
|
|
+ // Pop all the frames we must move/replace.
|
|
+ //
|
|
+ // Frame picture (youngest to oldest)
|
|
+ // 1: self-frame (no frame link)
|
|
+ // 2: deopting frame (no frame link)
|
|
+ // 3: caller of deopting frame (could be compiled/interpreted).
|
|
+ //
|
|
+ // Note: by leaving the return address of self-frame on the stack
|
|
+ // and using the size of frame 2 to adjust the stack
|
|
+ // when we are done the return to frame 3 will still be on the stack.
|
|
+
|
|
+ // Pop deoptimized frame
|
|
+ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
|
|
+ __ sub(x12, x12, 2 * wordSize);
|
|
+ __ add(sp, sp, x12);
|
|
+ __ ld(fp, Address(sp, 0));
|
|
+ __ ld(ra, Address(sp, wordSize));
|
|
+ __ addi(sp, sp, 2 * wordSize);
|
|
+ // RA should now be the return address to the caller (3)
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // Compilers generate code that bang the stack by as much as the
|
|
+ // interpreter would need. So this stack banging should never
|
|
+ // trigger a fault. Verify that it does not on non product builds.
|
|
+ if (UseStackBanging) {
|
|
+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
|
|
+ __ bang_stack_size(x9, x12);
|
|
+ }
|
|
+#endif
|
|
+ // Load address of array of frame pcs into x12
|
|
+ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
|
|
+
|
|
+ // Load address of array of frame sizes into x14
|
|
+ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
|
|
+
|
|
+ // Load counter into x13
|
|
+ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
|
|
+
|
|
+ // Now adjust the caller's stack to make up for the extra locals
|
|
+ // but record the original sp so that we can save it in the skeletal interpreter
|
|
+ // frame and the stack walking of interpreter_sender will get the unextended sp
|
|
+ // value and not the "real" sp value.
|
|
+
|
|
+ const Register sender_sp = x16;
|
|
+
|
|
+ __ mv(sender_sp, sp);
|
|
+ __ lwu(x9, Address(x15,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ caller_adjustment_offset_in_bytes()));
|
|
+ __ sub(sp, sp, x9);
|
|
+
|
|
+ // Push interpreter frames in a loop
|
|
+ __ mv(t0, (uint64_t)0xDEADDEAD); // Make a recognizable pattern
|
|
+ __ mv(t1, t0);
|
|
+ Label loop;
|
|
+ __ bind(loop);
|
|
+ __ ld(x9, Address(x14, 0)); // Load frame size
|
|
+ __ addi(x14, x14, wordSize);
|
|
+ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand
|
|
+ __ ld(ra, Address(x12, 0)); // Load pc
|
|
+ __ addi(x12, x12, wordSize);
|
|
+ __ enter(); // Save old & set new fp
|
|
+ __ sub(sp, sp, x9); // Prolog
|
|
+ // This value is corrected by layout_activation_impl
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
|
|
+ __ mv(sender_sp, sp); // Pass sender_sp to next frame
|
|
+ __ addi(x13, x13, -1); // Decrement counter
|
|
+ __ bnez(x13, loop);
|
|
+
|
|
+ // Re-push self-frame
|
|
+ __ ld(ra, Address(x12));
|
|
+ __ enter();
|
|
+
|
|
+ // Allocate a full sized register save area. We subtract 2 because
|
|
+ // enter() just pushed 2 words
|
|
+ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
|
|
+
|
|
+ // Restore frame locals after moving the frame
|
|
+ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
|
|
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
|
|
+
|
|
+ // Call C code. Need thread but NOT official VM entry
|
|
+ // crud. We cannot block on this call, no GC can happen. Call should
|
|
+ // restore return values to their stack-slots with the new SP.
|
|
+ //
|
|
+ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
|
|
+
|
|
+ // Use fp because the frames look interpreted now
|
|
+ // Don't need the precise return PC here, just precise enough to point into this code blob.
|
|
+ address the_pc = __ pc();
|
|
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
|
|
+
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mv(c_rarg1, xcpool); // second arg: exec_mode
|
|
+ offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+
|
|
+ // Set an oopmap for the call site
|
|
+ // Use the same PC we used for the last java frame
|
|
+ oop_maps->add_gc_map(the_pc - start,
|
|
+ new OopMap( frame_size_in_words, 0 ));
|
|
+
|
|
+ // Clear fp AND pc
|
|
+ __ reset_last_Java_frame(true);
|
|
+
|
|
+ // Collect return values
|
|
+ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
|
|
+ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
|
|
+
|
|
+ // Pop self-frame.
|
|
+ __ leave(); // Epilog
|
|
+
|
|
+ // Jump to interpreter
|
|
+ __ ret();
|
|
+
|
|
+ // Make sure all code is generated
|
|
+ masm->flush();
|
|
+
|
|
+ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
|
|
+ assert(_deopt_blob != NULL, "create deoptimization blob fail!");
|
|
+ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
|
|
+}
|
|
+
|
|
+uint SharedRuntime::out_preserve_stack_slots() {
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef COMPILER2
|
|
+//------------------------------generate_uncommon_trap_blob--------------------
|
|
+void SharedRuntime::generate_uncommon_trap_blob() {
|
|
+ // Allocate space for the code
|
|
+ ResourceMark rm;
|
|
+ // Setup code generation tools
|
|
+ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
|
|
+ MacroAssembler* masm = new MacroAssembler(&buffer);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
|
|
+
|
|
+ address start = __ pc();
|
|
+
|
|
+ // Push self-frame. We get here with a return address in RA
|
|
+ // and sp should be 16 byte aligned
|
|
+ // push fp and retaddr by hand
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ sd(ra, Address(sp, wordSize));
|
|
+ __ sd(fp, Address(sp, 0));
|
|
+ // we don't expect an arg reg save area
|
|
+#ifndef PRODUCT
|
|
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
+#endif
|
|
+ // compiler left unloaded_class_index in j_rarg0 move to where the
|
|
+ // runtime expects it.
|
|
+ __ addiw(c_rarg1, j_rarg0, 0);
|
|
+
|
|
+ // we need to set the past SP to the stack pointer of the stub frame
|
|
+ // and the pc to the address where this runtime call will return
|
|
+ // although actually any pc in this code blob will do).
|
|
+ Label retaddr;
|
|
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
|
|
+
|
|
+ // Call C code. Need thread but NOT official VM entry
|
|
+ // crud. We cannot block on this call, no GC can happen. Call should
|
|
+ // capture callee-saved registers as well as return values.
|
|
+ //
|
|
+ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
|
|
+ //
|
|
+ // n.b. 3 gp args, 0 fp args, integral return type
|
|
+
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0,
|
|
+ RuntimeAddress(CAST_FROM_FN_PTR(address,
|
|
+ Deoptimization::uncommon_trap)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ __ bind(retaddr);
|
|
+
|
|
+ // Set an oopmap for the call site
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
|
|
+ assert_cond(oop_maps != NULL && map != NULL);
|
|
+
|
|
+ // location of fp is known implicitly by the frame sender code
|
|
+
|
|
+ oop_maps->add_gc_map(__ pc() - start, map);
|
|
+
|
|
+ __ reset_last_Java_frame(false);
|
|
+
|
|
+ // move UnrollBlock* into x14
|
|
+ __ mv(x14, x10);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ { Label L;
|
|
+ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
|
|
+ __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
|
|
+ __ beq(t0, t1, L);
|
|
+ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // Pop all the frames we must move/replace.
|
|
+ //
|
|
+ // Frame picture (youngest to oldest)
|
|
+ // 1: self-frame (no frame link)
|
|
+ // 2: deopting frame (no frame link)
|
|
+ // 3: caller of deopting frame (could be compiled/interpreted).
|
|
+
|
|
+ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
|
|
+
|
|
+ // Pop deoptimized frame (int)
|
|
+ __ lwu(x12, Address(x14,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ size_of_deoptimized_frame_offset_in_bytes()));
|
|
+ __ sub(x12, x12, 2 * wordSize);
|
|
+ __ add(sp, sp, x12);
|
|
+ __ ld(fp, sp, 0);
|
|
+ __ ld(ra, sp, wordSize);
|
|
+ __ addi(sp, sp, 2 * wordSize);
|
|
+ // RA should now be the return address to the caller (3) frame
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // Compilers generate code that bang the stack by as much as the
|
|
+ // interpreter would need. So this stack banging should never
|
|
+ // trigger a fault. Verify that it does not on non product builds.
|
|
+ if (UseStackBanging) {
|
|
+ __ lwu(x11, Address(x14,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ total_frame_sizes_offset_in_bytes()));
|
|
+ __ bang_stack_size(x11, x12);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // Load address of array of frame pcs into x12 (address*)
|
|
+ __ ld(x12, Address(x14,
|
|
+ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
|
|
+
|
|
+ // Load address of array of frame sizes into x15 (intptr_t*)
|
|
+ __ ld(x15, Address(x14,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ frame_sizes_offset_in_bytes()));
|
|
+
|
|
+ // Counter
|
|
+ __ lwu(x13, Address(x14,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ number_of_frames_offset_in_bytes())); // (int)
|
|
+
|
|
+ // Now adjust the caller's stack to make up for the extra locals but
|
|
+ // record the original sp so that we can save it in the skeletal
|
|
+ // interpreter frame and the stack walking of interpreter_sender
|
|
+ // will get the unextended sp value and not the "real" sp value.
|
|
+
|
|
+ const Register sender_sp = t1; // temporary register
|
|
+
|
|
+ __ lwu(x11, Address(x14,
|
|
+ Deoptimization::UnrollBlock::
|
|
+ caller_adjustment_offset_in_bytes())); // (int)
|
|
+ __ mv(sender_sp, sp);
|
|
+ __ sub(sp, sp, x11);
|
|
+
|
|
+ // Push interpreter frames in a loop
|
|
+ Label loop;
|
|
+ __ bind(loop);
|
|
+ __ ld(x11, Address(x15, 0)); // Load frame size
|
|
+ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand
|
|
+ __ ld(ra, Address(x12, 0)); // Save return address
|
|
+ __ enter(); // and old fp & set new fp
|
|
+ __ sub(sp, sp, x11); // Prolog
|
|
+ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
|
|
+ // This value is corrected by layout_activation_impl
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ __ mv(sender_sp, sp); // Pass sender_sp to next frame
|
|
+ __ add(x15, x15, wordSize); // Bump array pointer (sizes)
|
|
+ __ add(x12, x12, wordSize); // Bump array pointer (pcs)
|
|
+ __ subw(x13, x13, 1); // Decrement counter
|
|
+ __ bgtz(x13, loop);
|
|
+ __ ld(ra, Address(x12, 0)); // save final return address
|
|
+ // Re-push self-frame
|
|
+ __ enter(); // & old fp & set new fp
|
|
+
|
|
+ // Use fp because the frames look interpreted now
|
|
+ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
|
|
+ // Don't need the precise return PC here, just precise enough to point into this code blob.
|
|
+ address the_pc = __ pc();
|
|
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
|
|
+
|
|
+ // Call C code. Need thread but NOT official VM entry
|
|
+ // crud. We cannot block on this call, no GC can happen. Call should
|
|
+ // restore return values to their stack-slots with the new SP.
|
|
+ //
|
|
+ // BasicType unpack_frames(JavaThread* thread, int exec_mode)
|
|
+ //
|
|
+
|
|
+ // n.b. 2 gp args, 0 fp args, integral return type
|
|
+
|
|
+ // sp should already be aligned
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
|
|
+ offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+
|
|
+ // Set an oopmap for the call site
|
|
+ // Use the same PC we used for the last java frame
|
|
+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
|
|
+
|
|
+ // Clear fp AND pc
|
|
+ __ reset_last_Java_frame(true);
|
|
+
|
|
+ // Pop self-frame.
|
|
+ __ leave(); // Epilog
|
|
+
|
|
+ // Jump to interpreter
|
|
+ __ ret();
|
|
+
|
|
+ // Make sure all code is generated
|
|
+ masm->flush();
|
|
+
|
|
+ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps,
|
|
+ SimpleRuntimeFrame::framesize >> 1);
|
|
+}
|
|
+#endif // COMPILER2
|
|
+
|
|
+//------------------------------generate_handler_blob------
|
|
+//
|
|
+// Generate a special Compile2Runtime blob that saves all registers,
|
|
+// and setup oopmap.
|
|
+//
|
|
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
|
|
+ ResourceMark rm;
|
|
+ OopMapSet *oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ OopMap* map = NULL;
|
|
+
|
|
+ // Allocate space for the code. Setup code generation tools.
|
|
+ CodeBuffer buffer("handler_blob", 2048, 1024);
|
|
+ MacroAssembler* masm = new MacroAssembler(&buffer);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+ address start = __ pc();
|
|
+ address call_pc = NULL;
|
|
+ int frame_size_in_words = -1;
|
|
+ bool cause_return = (poll_type == POLL_AT_RETURN);
|
|
+ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
|
|
+
|
|
+ // Save Integer and Float registers.
|
|
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
|
|
+
|
|
+ // The following is basically a call_VM. However, we need the precise
|
|
+ // address of the call in order to generate an oopmap. Hence, we do all the
|
|
+ // work outselves.
|
|
+
|
|
+ Label retaddr;
|
|
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
|
|
+
|
|
+ // The return address must always be correct so that frame constructor never
|
|
+ // sees an invalid pc.
|
|
+
|
|
+ if (!cause_return) {
|
|
+ // overwrite the return address pushed by save_live_registers
|
|
+ // Additionally, x18 is a callee-saved register so we can look at
|
|
+ // it later to determine if someone changed the return address for
|
|
+ // us!
|
|
+ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
|
|
+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ // Do the call
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ __ bind(retaddr);
|
|
+
|
|
+ // Set an oopmap for the call site. This oopmap will map all
|
|
+ // oop-registers and debug-info registers as callee-saved. This
|
|
+ // will allow deoptimization at this safepoint to find all possible
|
|
+ // debug-info recordings, as well as let GC find all oops.
|
|
+
|
|
+ oop_maps->add_gc_map( __ pc() - start, map);
|
|
+
|
|
+ Label noException;
|
|
+
|
|
+ __ reset_last_Java_frame(false);
|
|
+
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, noException);
|
|
+
|
|
+ // Exception pending
|
|
+
|
|
+ reg_saver.restore_live_registers(masm);
|
|
+
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
+
|
|
+ // No exception case
|
|
+ __ bind(noException);
|
|
+
|
|
+ Label no_adjust, bail;
|
|
+ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
|
|
+ // If our stashed return pc was modified by the runtime we avoid touching it
|
|
+ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
|
|
+ __ bne(x18, t0, no_adjust);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // Verify the correct encoding of the poll we're about to skip.
|
|
+ // See NativeInstruction::is_lwu_to_zr()
|
|
+ __ lwu(t0, Address(x18));
|
|
+ __ andi(t1, t0, 0b0000011);
|
|
+ __ mv(t2, 0b0000011);
|
|
+ __ bne(t1, t2, bail); // 0-6:0b0000011
|
|
+ __ srli(t1, t0, 7);
|
|
+ __ andi(t1, t1, 0b00000);
|
|
+ __ bnez(t1, bail); // 7-11:0b00000
|
|
+ __ srli(t1, t0, 12);
|
|
+ __ andi(t1, t1, 0b110);
|
|
+ __ mv(t2, 0b110);
|
|
+ __ bne(t1, t2, bail); // 12-14:0b110
|
|
+#endif
|
|
+ // Adjust return pc forward to step over the safepoint poll instruction
|
|
+ __ add(x18, x18, NativeInstruction::instruction_size);
|
|
+ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
|
|
+ }
|
|
+
|
|
+ __ bind(no_adjust);
|
|
+ // Normal exit, restore registers and exit.
|
|
+
|
|
+ reg_saver.restore_live_registers(masm);
|
|
+ __ ret();
|
|
+
|
|
+#ifdef ASSERT
|
|
+ __ bind(bail);
|
|
+ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
|
|
+#endif
|
|
+
|
|
+ // Make sure all code is generated
|
|
+ masm->flush();
|
|
+
|
|
+ // Fill-out other meta info
|
|
+ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
|
|
+}
|
|
+
|
|
+//
|
|
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
|
|
+//
|
|
+// Generate a stub that calls into vm to find out the proper destination
|
|
+// of a java call. All the argument registers are live at this point
|
|
+// but since this is generic code we don't know what they are and the caller
|
|
+// must do any gc of the args.
|
|
+//
|
|
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
|
|
+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
|
|
+
|
|
+ // allocate space for the code
|
|
+ ResourceMark rm;
|
|
+
|
|
+ CodeBuffer buffer(name, 1000, 512);
|
|
+ MacroAssembler* masm = new MacroAssembler(&buffer);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+ int frame_size_in_words = -1;
|
|
+ RegisterSaver reg_saver(false /* save_vectors */);
|
|
+
|
|
+ OopMapSet *oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+ OopMap* map = NULL;
|
|
+
|
|
+ int start = __ offset();
|
|
+
|
|
+ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
|
|
+
|
|
+ int frame_complete = __ offset();
|
|
+
|
|
+ {
|
|
+ Label retaddr;
|
|
+ __ set_last_Java_frame(sp, noreg, retaddr, t0);
|
|
+
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(destination), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+ __ bind(retaddr);
|
|
+ }
|
|
+
|
|
+ // Set an oopmap for the call site.
|
|
+ // We need this not only for callee-saved registers, but also for volatile
|
|
+ // registers that the compiler might be keeping live across a safepoint.
|
|
+
|
|
+ oop_maps->add_gc_map( __ offset() - start, map);
|
|
+
|
|
+ // x10 contains the address we are going to jump to assuming no exception got installed
|
|
+
|
|
+ // clear last_Java_sp
|
|
+ __ reset_last_Java_frame(false);
|
|
+ // check for pending exceptions
|
|
+ Label pending;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ bnez(t0, pending);
|
|
+
|
|
+ // get the returned Method*
|
|
+ __ get_vm_result_2(xmethod, xthread);
|
|
+ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
|
|
+
|
|
+ // x10 is where we want to jump, overwrite t0 which is saved and temporary
|
|
+ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
|
|
+ reg_saver.restore_live_registers(masm);
|
|
+
|
|
+ // We are back the the original state on entry and ready to go.
|
|
+
|
|
+ __ jr(t0);
|
|
+
|
|
+ // Pending exception after the safepoint
|
|
+
|
|
+ __ bind(pending);
|
|
+
|
|
+ reg_saver.restore_live_registers(masm);
|
|
+
|
|
+ // exception pending => remove activation and forward to exception handler
|
|
+
|
|
+ __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
|
|
+
|
|
+ __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
+
|
|
+ // -------------
|
|
+ // make sure all code is generated
|
|
+ masm->flush();
|
|
+
|
|
+ // return the blob
|
|
+ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
|
|
+}
|
|
+
|
|
+#ifdef COMPILER2
|
|
+//------------------------------generate_exception_blob---------------------------
|
|
+// creates exception blob at the end
|
|
+// Using exception blob, this code is jumped from a compiled method.
|
|
+// (see emit_exception_handler in riscv.ad file)
|
|
+//
|
|
+// Given an exception pc at a call we call into the runtime for the
|
|
+// handler in this method. This handler might merely restore state
|
|
+// (i.e. callee save registers) unwind the frame and jump to the
|
|
+// exception handler for the nmethod if there is no Java level handler
|
|
+// for the nmethod.
|
|
+//
|
|
+// This code is entered with a jmp.
|
|
+//
|
|
+// Arguments:
|
|
+// x10: exception oop
|
|
+// x13: exception pc
|
|
+//
|
|
+// Results:
|
|
+// x10: exception oop
|
|
+// x13: exception pc in caller
|
|
+// destination: exception handler of caller
|
|
+//
|
|
+// Note: the exception pc MUST be at a call (precise debug information)
|
|
+// Registers x10, x13, x12, x14, x15, t0 are not callee saved.
|
|
+//
|
|
+
|
|
+void OptoRuntime::generate_exception_blob() {
|
|
+ assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
|
|
+ assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
|
|
+ assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
|
|
+
|
|
+ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
|
|
+
|
|
+ // Allocate space for the code
|
|
+ ResourceMark rm;
|
|
+ // Setup code generation tools
|
|
+ CodeBuffer buffer("exception_blob", 2048, 1024);
|
|
+ MacroAssembler* masm = new MacroAssembler(&buffer);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+ // TODO check various assumptions made here
|
|
+ //
|
|
+ // make sure we do so before running this
|
|
+
|
|
+ address start = __ pc();
|
|
+
|
|
+ // push fp and retaddr by hand
|
|
+ // Exception pc is 'return address' for stack walker
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ sd(ra, Address(sp, wordSize));
|
|
+ __ sd(fp, Address(sp));
|
|
+ // there are no callee save registers and we don't expect an
|
|
+ // arg reg save area
|
|
+#ifndef PRODUCT
|
|
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
+#endif
|
|
+ // Store exception in Thread object. We cannot pass any arguments to the
|
|
+ // handle_exception call, since we do not want to make any assumption
|
|
+ // about the size of the frame where the exception happened in.
|
|
+ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+
|
|
+ // This call does all the hard work. It checks if an exception handler
|
|
+ // exists in the method.
|
|
+ // If so, it returns the handler address.
|
|
+ // If not, it prepares for stack-unwinding, restoring the callee-save
|
|
+ // registers of the frame being removed.
|
|
+ //
|
|
+ // address OptoRuntime::handle_exception_C(JavaThread* thread)
|
|
+ //
|
|
+ // n.b. 1 gp arg, 0 fp args, integral return type
|
|
+
|
|
+ // the stack should always be aligned
|
|
+ address the_pc = __ pc();
|
|
+ __ set_last_Java_frame(sp, noreg, the_pc, t0);
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+
|
|
+ // Set an oopmap for the call site. This oopmap will only be used if we
|
|
+ // are unwinding the stack. Hence, all locations will be dead.
|
|
+ // Callee-saved registers will be the same as the frame above (i.e.,
|
|
+ // handle_exception_stub), since they were restored when we got the
|
|
+ // exception.
|
|
+
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ assert_cond(oop_maps != NULL);
|
|
+
|
|
+ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
|
|
+
|
|
+ __ reset_last_Java_frame(false);
|
|
+
|
|
+ // Restore callee-saved registers
|
|
+
|
|
+ // fp is an implicitly saved callee saved register (i.e. the calling
|
|
+ // convention will save restore it in prolog/epilog) Other than that
|
|
+ // there are no callee save registers now that adapter frames are gone.
|
|
+ // and we dont' expect an arg reg save area
|
|
+ __ ld(fp, Address(sp));
|
|
+ __ ld(x13, Address(sp, wordSize));
|
|
+ __ addi(sp, sp , 2 * wordSize);
|
|
+
|
|
+ // x10: exception handler
|
|
+
|
|
+ // We have a handler in x10 (could be deopt blob).
|
|
+ __ mv(t0, x10);
|
|
+
|
|
+ // Get the exception oop
|
|
+ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+ // Get the exception pc in case we are deoptimized
|
|
+ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+#ifdef ASSERT
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
|
|
+#endif
|
|
+ // Clear the exception oop so GC no longer processes it as a root.
|
|
+ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
|
|
+
|
|
+ // x10: exception oop
|
|
+ // t0: exception handler
|
|
+ // x14: exception pc
|
|
+ // Jump to handler
|
|
+
|
|
+ __ jr(t0);
|
|
+
|
|
+ // Make sure all code is generated
|
|
+ masm->flush();
|
|
+
|
|
+ // Set exception blob
|
|
+ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
|
|
+}
|
|
+#endif // COMPILER2
|
|
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..c5b3b094c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
|
|
@@ -0,0 +1,3743 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/shared/barrierSet.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "nativeInst_riscv.hpp"
|
|
+#include "oops/instanceOop.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/objArrayKlass.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "prims/methodHandles.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/handles.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubCodeGenerator.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "runtime/thread.inline.hpp"
|
|
+#include "utilities/align.hpp"
|
|
+#ifdef COMPILER2
|
|
+#include "opto/runtime.hpp"
|
|
+#endif
|
|
+
|
|
+
|
|
+// Declaration and definition of StubGenerator (no .hpp file).
|
|
+// For a more detailed description of the stub routine structure
|
|
+// see the comment in stubRoutines.hpp
|
|
+
|
|
+#undef __
|
|
+#define __ _masm->
|
|
+
|
|
+#ifdef PRODUCT
|
|
+#define BLOCK_COMMENT(str) /* nothing */
|
|
+#else
|
|
+#define BLOCK_COMMENT(str) __ block_comment(str)
|
|
+#endif
|
|
+
|
|
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
|
+
|
|
+// Stub Code definitions
|
|
+
|
|
+class StubGenerator: public StubCodeGenerator {
|
|
+ private:
|
|
+
|
|
+#ifdef PRODUCT
|
|
+#define inc_counter_np(counter) ((void)0)
|
|
+#else
|
|
+ void inc_counter_np_(int& counter) {
|
|
+ __ la(t1, ExternalAddress((address)&counter));
|
|
+ __ lwu(t0, Address(t1, 0));
|
|
+ __ addiw(t0, t0, 1);
|
|
+ __ sw(t0, Address(t1, 0));
|
|
+ }
|
|
+#define inc_counter_np(counter) \
|
|
+ BLOCK_COMMENT("inc_counter " #counter); \
|
|
+ inc_counter_np_(counter);
|
|
+#endif
|
|
+
|
|
+ // Call stubs are used to call Java from C
|
|
+ //
|
|
+ // Arguments:
|
|
+ // c_rarg0: call wrapper address address
|
|
+ // c_rarg1: result address
|
|
+ // c_rarg2: result type BasicType
|
|
+ // c_rarg3: method Method*
|
|
+ // c_rarg4: (interpreter) entry point address
|
|
+ // c_rarg5: parameters intptr_t*
|
|
+ // c_rarg6: parameter size (in words) int
|
|
+ // c_rarg7: thread Thread*
|
|
+ //
|
|
+ // There is no return from the stub itself as any Java result
|
|
+ // is written to result
|
|
+ //
|
|
+ // we save x1 (ra) as the return PC at the base of the frame and
|
|
+ // link x8 (fp) below it as the frame pointer installing sp (x2)
|
|
+ // into fp.
|
|
+ //
|
|
+ // we save x10-x17, which accounts for all the c arguments.
|
|
+ //
|
|
+ // TODO: strictly do we need to save them all? they are treated as
|
|
+ // volatile by C so could we omit saving the ones we are going to
|
|
+ // place in global registers (thread? method?) or those we only use
|
|
+ // during setup of the Java call?
|
|
+ //
|
|
+ // we don't need to save x5 which C uses as an indirect result location
|
|
+ // return register.
|
|
+ //
|
|
+ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
|
|
+ // volatile
|
|
+ //
|
|
+ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
|
|
+ // registers and C expects to be callee-save
|
|
+ //
|
|
+ // so the stub frame looks like this when we enter Java code
|
|
+ //
|
|
+ // [ return_from_Java ] <--- sp
|
|
+ // [ argument word n ]
|
|
+ // ...
|
|
+ // -34 [ argument word 1 ]
|
|
+ // -33 [ saved f27 ] <--- sp_after_call
|
|
+ // -32 [ saved f26 ]
|
|
+ // -31 [ saved f25 ]
|
|
+ // -30 [ saved f24 ]
|
|
+ // -29 [ saved f23 ]
|
|
+ // -28 [ saved f22 ]
|
|
+ // -27 [ saved f21 ]
|
|
+ // -26 [ saved f20 ]
|
|
+ // -25 [ saved f19 ]
|
|
+ // -24 [ saved f18 ]
|
|
+ // -23 [ saved f9 ]
|
|
+ // -22 [ saved f8 ]
|
|
+ // -21 [ saved x27 ]
|
|
+ // -20 [ saved x26 ]
|
|
+ // -19 [ saved x25 ]
|
|
+ // -18 [ saved x24 ]
|
|
+ // -17 [ saved x23 ]
|
|
+ // -16 [ saved x22 ]
|
|
+ // -15 [ saved x21 ]
|
|
+ // -14 [ saved x20 ]
|
|
+ // -13 [ saved x19 ]
|
|
+ // -12 [ saved x18 ]
|
|
+ // -11 [ saved x9 ]
|
|
+ // -10 [ call wrapper (x10) ]
|
|
+ // -9 [ result (x11) ]
|
|
+ // -8 [ result type (x12) ]
|
|
+ // -7 [ method (x13) ]
|
|
+ // -6 [ entry point (x14) ]
|
|
+ // -5 [ parameters (x15) ]
|
|
+ // -4 [ parameter size (x16) ]
|
|
+ // -3 [ thread (x17) ]
|
|
+ // -2 [ saved fp (x8) ]
|
|
+ // -1 [ saved ra (x1) ]
|
|
+ // 0 [ ] <--- fp == saved sp (x2)
|
|
+
|
|
+ // Call stub stack layout word offsets from fp
|
|
+ enum call_stub_layout {
|
|
+ sp_after_call_off = -33,
|
|
+
|
|
+ f27_off = -33,
|
|
+ f26_off = -32,
|
|
+ f25_off = -31,
|
|
+ f24_off = -30,
|
|
+ f23_off = -29,
|
|
+ f22_off = -28,
|
|
+ f21_off = -27,
|
|
+ f20_off = -26,
|
|
+ f19_off = -25,
|
|
+ f18_off = -24,
|
|
+ f9_off = -23,
|
|
+ f8_off = -22,
|
|
+
|
|
+ x27_off = -21,
|
|
+ x26_off = -20,
|
|
+ x25_off = -19,
|
|
+ x24_off = -18,
|
|
+ x23_off = -17,
|
|
+ x22_off = -16,
|
|
+ x21_off = -15,
|
|
+ x20_off = -14,
|
|
+ x19_off = -13,
|
|
+ x18_off = -12,
|
|
+ x9_off = -11,
|
|
+
|
|
+ call_wrapper_off = -10,
|
|
+ result_off = -9,
|
|
+ result_type_off = -8,
|
|
+ method_off = -7,
|
|
+ entry_point_off = -6,
|
|
+ parameters_off = -5,
|
|
+ parameter_size_off = -4,
|
|
+ thread_off = -3,
|
|
+ fp_f = -2,
|
|
+ retaddr_off = -1,
|
|
+ };
|
|
+
|
|
+ address generate_call_stub(address& return_address) {
|
|
+ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
|
|
+ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
|
|
+ "adjust this code");
|
|
+
|
|
+ StubCodeMark mark(this, "StubRoutines", "call_stub");
|
|
+ address start = __ pc();
|
|
+
|
|
+ const Address sp_after_call (fp, sp_after_call_off * wordSize);
|
|
+
|
|
+ const Address call_wrapper (fp, call_wrapper_off * wordSize);
|
|
+ const Address result (fp, result_off * wordSize);
|
|
+ const Address result_type (fp, result_type_off * wordSize);
|
|
+ const Address method (fp, method_off * wordSize);
|
|
+ const Address entry_point (fp, entry_point_off * wordSize);
|
|
+ const Address parameters (fp, parameters_off * wordSize);
|
|
+ const Address parameter_size(fp, parameter_size_off * wordSize);
|
|
+
|
|
+ const Address thread (fp, thread_off * wordSize);
|
|
+
|
|
+ const Address f27_save (fp, f27_off * wordSize);
|
|
+ const Address f26_save (fp, f26_off * wordSize);
|
|
+ const Address f25_save (fp, f25_off * wordSize);
|
|
+ const Address f24_save (fp, f24_off * wordSize);
|
|
+ const Address f23_save (fp, f23_off * wordSize);
|
|
+ const Address f22_save (fp, f22_off * wordSize);
|
|
+ const Address f21_save (fp, f21_off * wordSize);
|
|
+ const Address f20_save (fp, f20_off * wordSize);
|
|
+ const Address f19_save (fp, f19_off * wordSize);
|
|
+ const Address f18_save (fp, f18_off * wordSize);
|
|
+ const Address f9_save (fp, f9_off * wordSize);
|
|
+ const Address f8_save (fp, f8_off * wordSize);
|
|
+
|
|
+ const Address x27_save (fp, x27_off * wordSize);
|
|
+ const Address x26_save (fp, x26_off * wordSize);
|
|
+ const Address x25_save (fp, x25_off * wordSize);
|
|
+ const Address x24_save (fp, x24_off * wordSize);
|
|
+ const Address x23_save (fp, x23_off * wordSize);
|
|
+ const Address x22_save (fp, x22_off * wordSize);
|
|
+ const Address x21_save (fp, x21_off * wordSize);
|
|
+ const Address x20_save (fp, x20_off * wordSize);
|
|
+ const Address x19_save (fp, x19_off * wordSize);
|
|
+ const Address x18_save (fp, x18_off * wordSize);
|
|
+
|
|
+ const Address x9_save (fp, x9_off * wordSize);
|
|
+
|
|
+ // stub code
|
|
+
|
|
+ address riscv_entry = __ pc();
|
|
+
|
|
+ // set up frame and move sp to end of save area
|
|
+ __ enter();
|
|
+ __ addi(sp, fp, sp_after_call_off * wordSize);
|
|
+
|
|
+ // save register parameters and Java temporary/global registers
|
|
+ // n.b. we save thread even though it gets installed in
|
|
+ // xthread because we want to sanity check tp later
|
|
+ __ sd(c_rarg7, thread);
|
|
+ __ sw(c_rarg6, parameter_size);
|
|
+ __ sd(c_rarg5, parameters);
|
|
+ __ sd(c_rarg4, entry_point);
|
|
+ __ sd(c_rarg3, method);
|
|
+ __ sd(c_rarg2, result_type);
|
|
+ __ sd(c_rarg1, result);
|
|
+ __ sd(c_rarg0, call_wrapper);
|
|
+
|
|
+ __ sd(x9, x9_save);
|
|
+
|
|
+ __ sd(x18, x18_save);
|
|
+ __ sd(x19, x19_save);
|
|
+ __ sd(x20, x20_save);
|
|
+ __ sd(x21, x21_save);
|
|
+ __ sd(x22, x22_save);
|
|
+ __ sd(x23, x23_save);
|
|
+ __ sd(x24, x24_save);
|
|
+ __ sd(x25, x25_save);
|
|
+ __ sd(x26, x26_save);
|
|
+ __ sd(x27, x27_save);
|
|
+
|
|
+ __ fsd(f8, f8_save);
|
|
+ __ fsd(f9, f9_save);
|
|
+ __ fsd(f18, f18_save);
|
|
+ __ fsd(f19, f19_save);
|
|
+ __ fsd(f20, f20_save);
|
|
+ __ fsd(f21, f21_save);
|
|
+ __ fsd(f22, f22_save);
|
|
+ __ fsd(f23, f23_save);
|
|
+ __ fsd(f24, f24_save);
|
|
+ __ fsd(f25, f25_save);
|
|
+ __ fsd(f26, f26_save);
|
|
+ __ fsd(f27, f27_save);
|
|
+
|
|
+ // install Java thread in global register now we have saved
|
|
+ // whatever value it held
|
|
+ __ mv(xthread, c_rarg7);
|
|
+
|
|
+ // And method
|
|
+ __ mv(xmethod, c_rarg3);
|
|
+
|
|
+ // set up the heapbase register
|
|
+ __ reinit_heapbase();
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // make sure we have no pending exceptions
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
|
|
+ __ beqz(t0, L);
|
|
+ __ stop("StubRoutines::call_stub: entered with pending exception");
|
|
+ __ BIND(L);
|
|
+ }
|
|
+#endif
|
|
+ // pass parameters if any
|
|
+ __ mv(esp, sp);
|
|
+ __ slli(t0, c_rarg6, LogBytesPerWord);
|
|
+ __ sub(t0, sp, t0); // Move SP out of the way
|
|
+ __ andi(sp, t0, -2 * wordSize);
|
|
+
|
|
+ BLOCK_COMMENT("pass parameters if any");
|
|
+ Label parameters_done;
|
|
+ // parameter count is still in c_rarg6
|
|
+ // and parameter pointer identifying param 1 is in c_rarg5
|
|
+ __ beqz(c_rarg6, parameters_done);
|
|
+
|
|
+ address loop = __ pc();
|
|
+ __ ld(t0, c_rarg5, 0);
|
|
+ __ addi(c_rarg5, c_rarg5, wordSize);
|
|
+ __ addi(c_rarg6, c_rarg6, -1);
|
|
+ __ push_reg(t0);
|
|
+ __ bgtz(c_rarg6, loop);
|
|
+
|
|
+ __ BIND(parameters_done);
|
|
+
|
|
+ // call Java entry -- passing methdoOop, and current sp
|
|
+ // xmethod: Method*
|
|
+ // x30: sender sp
|
|
+ BLOCK_COMMENT("call Java function");
|
|
+ __ mv(x30, sp);
|
|
+ __ jalr(c_rarg4);
|
|
+
|
|
+ // save current address for use by exception handling code
|
|
+
|
|
+ return_address = __ pc();
|
|
+
|
|
+ // store result depending on type (everything that is not
|
|
+ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
|
|
+ // n.b. this assumes Java returns an integral result in x10
|
|
+ // and a floating result in j_farg0
|
|
+ __ ld(j_rarg2, result);
|
|
+ Label is_long, is_float, is_double, exit;
|
|
+ __ ld(j_rarg1, result_type);
|
|
+ __ mv(t0, (u1)T_OBJECT);
|
|
+ __ beq(j_rarg1, t0, is_long);
|
|
+ __ mv(t0, (u1)T_LONG);
|
|
+ __ beq(j_rarg1, t0, is_long);
|
|
+ __ mv(t0, (u1)T_FLOAT);
|
|
+ __ beq(j_rarg1, t0, is_float);
|
|
+ __ mv(t0, (u1)T_DOUBLE);
|
|
+ __ beq(j_rarg1, t0, is_double);
|
|
+
|
|
+ // handle T_INT case
|
|
+ __ sw(x10, Address(j_rarg2));
|
|
+
|
|
+ __ BIND(exit);
|
|
+
|
|
+ // pop parameters
|
|
+ __ addi(esp, fp, sp_after_call_off * wordSize);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // verify that threads correspond
|
|
+ {
|
|
+ Label L, S;
|
|
+ __ ld(t0, thread);
|
|
+ __ bne(xthread, t0, S);
|
|
+ __ get_thread(t0);
|
|
+ __ beq(xthread, t0, L);
|
|
+ __ BIND(S);
|
|
+ __ stop("StubRoutines::call_stub: threads must correspond");
|
|
+ __ BIND(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // restore callee-save registers
|
|
+ __ fld(f27, f27_save);
|
|
+ __ fld(f26, f26_save);
|
|
+ __ fld(f25, f25_save);
|
|
+ __ fld(f24, f24_save);
|
|
+ __ fld(f23, f23_save);
|
|
+ __ fld(f22, f22_save);
|
|
+ __ fld(f21, f21_save);
|
|
+ __ fld(f20, f20_save);
|
|
+ __ fld(f19, f19_save);
|
|
+ __ fld(f18, f18_save);
|
|
+ __ fld(f9, f9_save);
|
|
+ __ fld(f8, f8_save);
|
|
+
|
|
+ __ ld(x27, x27_save);
|
|
+ __ ld(x26, x26_save);
|
|
+ __ ld(x25, x25_save);
|
|
+ __ ld(x24, x24_save);
|
|
+ __ ld(x23, x23_save);
|
|
+ __ ld(x22, x22_save);
|
|
+ __ ld(x21, x21_save);
|
|
+ __ ld(x20, x20_save);
|
|
+ __ ld(x19, x19_save);
|
|
+ __ ld(x18, x18_save);
|
|
+
|
|
+ __ ld(x9, x9_save);
|
|
+
|
|
+ __ ld(c_rarg0, call_wrapper);
|
|
+ __ ld(c_rarg1, result);
|
|
+ __ ld(c_rarg2, result_type);
|
|
+ __ ld(c_rarg3, method);
|
|
+ __ ld(c_rarg4, entry_point);
|
|
+ __ ld(c_rarg5, parameters);
|
|
+ __ ld(c_rarg6, parameter_size);
|
|
+ __ ld(c_rarg7, thread);
|
|
+
|
|
+ // leave frame and return to caller
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ // handle return types different from T_INT
|
|
+
|
|
+ __ BIND(is_long);
|
|
+ __ sd(x10, Address(j_rarg2, 0));
|
|
+ __ j(exit);
|
|
+
|
|
+ __ BIND(is_float);
|
|
+ __ fsw(j_farg0, Address(j_rarg2, 0), t0);
|
|
+ __ j(exit);
|
|
+
|
|
+ __ BIND(is_double);
|
|
+ __ fsd(j_farg0, Address(j_rarg2, 0), t0);
|
|
+ __ j(exit);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Return point for a Java call if there's an exception thrown in
|
|
+ // Java code. The exception is caught and transformed into a
|
|
+ // pending exception stored in JavaThread that can be tested from
|
|
+ // within the VM.
|
|
+ //
|
|
+ // Note: Usually the parameters are removed by the callee. In case
|
|
+ // of an exception crossing an activation frame boundary, that is
|
|
+ // not the case if the callee is compiled code => need to setup the
|
|
+ // sp.
|
|
+ //
|
|
+ // x10: exception oop
|
|
+
|
|
+ address generate_catch_exception() {
|
|
+ StubCodeMark mark(this, "StubRoutines", "catch_exception");
|
|
+ address start = __ pc();
|
|
+
|
|
+ // same as in generate_call_stub():
|
|
+ const Address thread(fp, thread_off * wordSize);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // verify that threads correspond
|
|
+ {
|
|
+ Label L, S;
|
|
+ __ ld(t0, thread);
|
|
+ __ bne(xthread, t0, S);
|
|
+ __ get_thread(t0);
|
|
+ __ beq(xthread, t0, L);
|
|
+ __ bind(S);
|
|
+ __ stop("StubRoutines::catch_exception: threads must correspond");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // set pending exception
|
|
+ __ verify_oop(x10);
|
|
+
|
|
+ __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ mv(t0, (address)__FILE__);
|
|
+ __ sd(t0, Address(xthread, Thread::exception_file_offset()));
|
|
+ __ mv(t0, (int)__LINE__);
|
|
+ __ sw(t0, Address(xthread, Thread::exception_line_offset()));
|
|
+
|
|
+ // complete return to VM
|
|
+ assert(StubRoutines::_call_stub_return_address != NULL,
|
|
+ "_call_stub_return_address must have been generated before");
|
|
+ __ j(StubRoutines::_call_stub_return_address);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Continuation point for runtime calls returning with a pending
|
|
+ // exception. The pending exception check happened in the runtime
|
|
+ // or native call stub. The pending exception in Thread is
|
|
+ // converted into a Java-level exception.
|
|
+ //
|
|
+ // Contract with Java-level exception handlers:
|
|
+ // x10: exception
|
|
+ // x13: throwing pc
|
|
+ //
|
|
+ // NOTE: At entry of this stub, exception-pc must be in RA !!
|
|
+
|
|
+ // NOTE: this is always used as a jump target within generated code
|
|
+ // so it just needs to be generated code with no x86 prolog
|
|
+
|
|
+ address generate_forward_exception() {
|
|
+ StubCodeMark mark(this, "StubRoutines", "forward exception");
|
|
+ address start = __ pc();
|
|
+
|
|
+ // Upon entry, RA points to the return address returning into
|
|
+ // Java (interpreted or compiled) code; i.e., the return address
|
|
+ // becomes the throwing pc.
|
|
+ //
|
|
+ // Arguments pushed before the runtime call are still on the stack
|
|
+ // but the exception handler will reset the stack pointer ->
|
|
+ // ignore them. A potential result in registers can be ignored as
|
|
+ // well.
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // make sure this code is only executed if there is a pending exception
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ bnez(t0, L);
|
|
+ __ stop("StubRoutines::forward exception: no pending exception (1)");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // compute exception handler into x9
|
|
+
|
|
+ // call the VM to find the handler address associated with the
|
|
+ // caller address. pass thread in x10 and caller pc (ret address)
|
|
+ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
|
|
+ // the stack.
|
|
+ __ mv(c_rarg1, ra);
|
|
+ // ra will be trashed by the VM call so we move it to x9
|
|
+ // (callee-saved) because we also need to pass it to the handler
|
|
+ // returned by this call.
|
|
+ __ mv(x9, ra);
|
|
+ BLOCK_COMMENT("call exception_handler_for_return_address");
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::exception_handler_for_return_address),
|
|
+ xthread, c_rarg1);
|
|
+ // we should not really care that ra is no longer the callee
|
|
+ // address. we saved the value the handler needs in x9 so we can
|
|
+ // just copy it to x13. however, the C2 handler will push its own
|
|
+ // frame and then calls into the VM and the VM code asserts that
|
|
+ // the PC for the frame above the handler belongs to a compiled
|
|
+ // Java method. So, we restore ra here to satisfy that assert.
|
|
+ __ mv(ra, x9);
|
|
+ // setup x10 & x13 & clear pending exception
|
|
+ __ mv(x13, x9);
|
|
+ __ mv(x9, x10);
|
|
+ __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
|
|
+
|
|
+#ifdef ASSERT
|
|
+ // make sure exception is set
|
|
+ {
|
|
+ Label L;
|
|
+ __ bnez(x10, L);
|
|
+ __ stop("StubRoutines::forward exception: no pending exception (2)");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // continue at exception handler
|
|
+ // x10: exception
|
|
+ // x13: throwing pc
|
|
+ // x9: exception handler
|
|
+ __ verify_oop(x10);
|
|
+ __ jr(x9);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Non-destructive plausibility checks for oops
|
|
+ //
|
|
+ // Arguments:
|
|
+ // x10: oop to verify
|
|
+ // t0: error message
|
|
+ //
|
|
+ // Stack after saving c_rarg3:
|
|
+ // [tos + 0]: saved c_rarg3
|
|
+ // [tos + 1]: saved c_rarg2
|
|
+ // [tos + 2]: saved ra
|
|
+ // [tos + 3]: saved t1
|
|
+ // [tos + 4]: saved x10
|
|
+ // [tos + 5]: saved t0
|
|
+ address generate_verify_oop() {
|
|
+
|
|
+ StubCodeMark mark(this, "StubRoutines", "verify_oop");
|
|
+ address start = __ pc();
|
|
+
|
|
+ Label exit, error;
|
|
+
|
|
+ __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3
|
|
+
|
|
+ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
|
|
+ __ ld(c_rarg3, Address(c_rarg2));
|
|
+ __ add(c_rarg3, c_rarg3, 1);
|
|
+ __ sd(c_rarg3, Address(c_rarg2));
|
|
+
|
|
+ // object is in x10
|
|
+ // make sure object is 'reasonable'
|
|
+ __ beqz(x10, exit); // if obj is NULL it is OK
|
|
+
|
|
+ // Check if the oop is in the right area of memory
|
|
+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
|
|
+ __ andr(c_rarg2, x10, c_rarg3);
|
|
+ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
|
|
+
|
|
+ // Compare c_rarg2 and c_rarg3
|
|
+ __ bne(c_rarg2, c_rarg3, error);
|
|
+
|
|
+ // make sure klass is 'reasonable', which is not zero.
|
|
+ __ load_klass(x10, x10); // get klass
|
|
+ __ beqz(x10, error); // if klass is NULL it is broken
|
|
+
|
|
+ // return if everything seems ok
|
|
+ __ bind(exit);
|
|
+
|
|
+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
|
|
+ __ ret();
|
|
+
|
|
+ // handle errors
|
|
+ __ bind(error);
|
|
+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
|
|
+
|
|
+ __ push_reg(RegSet::range(x0, x31), sp);
|
|
+ // prepare parameters for debug64, c_rarg0: address of error message,
|
|
+ // c_rarg1: return address, c_rarg2: address of regs on stack
|
|
+ __ mv(c_rarg0, t0); // pass address of error message
|
|
+ __ mv(c_rarg1, ra); // pass return address
|
|
+ __ mv(c_rarg2, sp); // pass address of regs on stack
|
|
+#ifndef PRODUCT
|
|
+ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
|
|
+#endif
|
|
+ BLOCK_COMMENT("call MacroAssembler::debug");
|
|
+ int32_t offset = 0;
|
|
+ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // The inner part of zero_words().
|
|
+ //
|
|
+ // Inputs:
|
|
+ // x28: the HeapWord-aligned base address of an array to zero.
|
|
+ // x29: the count in HeapWords, x29 > 0.
|
|
+ //
|
|
+ // Returns x28 and x29, adjusted for the caller to clear.
|
|
+ // x28: the base address of the tail of words left to clear.
|
|
+ // x29: the number of words in the tail.
|
|
+ // x29 < MacroAssembler::zero_words_block_size.
|
|
+
|
|
+ address generate_zero_blocks() {
|
|
+ Label done;
|
|
+
|
|
+ const Register base = x28, cnt = x29;
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "zero_blocks");
|
|
+ address start = __ pc();
|
|
+
|
|
+ {
|
|
+ // Clear the remaining blocks.
|
|
+ Label loop;
|
|
+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
+ __ bltz(cnt, done);
|
|
+ __ bind(loop);
|
|
+ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
|
|
+ __ sd(zr, Address(base, 0));
|
|
+ __ add(base, base, 8);
|
|
+ }
|
|
+ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
+ __ bgez(cnt, loop);
|
|
+ __ bind(done);
|
|
+ __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
|
|
+ }
|
|
+
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset);
|
|
+
|
|
+ void copy_by_step(RegSet tmp_regs, Register src, Register dst,
|
|
+ unsigned unroll_factor, int unit) {
|
|
+ unsigned char regs[32];
|
|
+ int offset = unit < 0 ? unit : 0;
|
|
+
|
|
+ // Scan bitset to get tmp regs
|
|
+ unsigned int regsSize = 0;
|
|
+ unsigned bitset = tmp_regs.bits();
|
|
+
|
|
+ assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs");
|
|
+ assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs");
|
|
+
|
|
+ for (int reg = 31; reg >= 0; reg--) {
|
|
+ if ((1U << 31) & bitset) {
|
|
+ regs[regsSize++] = reg;
|
|
+ }
|
|
+ bitset <<= 1;
|
|
+ }
|
|
+
|
|
+ copy_insn ld_arr = NULL, st_arr = NULL;
|
|
+ switch (abs(unit)) {
|
|
+ case 1 :
|
|
+ ld_arr = (copy_insn)&MacroAssembler::lbu;
|
|
+ st_arr = (copy_insn)&MacroAssembler::sb;
|
|
+ break;
|
|
+ case BytesPerShort :
|
|
+ ld_arr = (copy_insn)&MacroAssembler::lhu;
|
|
+ st_arr = (copy_insn)&MacroAssembler::sh;
|
|
+ break;
|
|
+ case BytesPerInt :
|
|
+ ld_arr = (copy_insn)&MacroAssembler::lwu;
|
|
+ st_arr = (copy_insn)&MacroAssembler::sw;
|
|
+ break;
|
|
+ case BytesPerLong :
|
|
+ ld_arr = (copy_insn)&MacroAssembler::ld;
|
|
+ st_arr = (copy_insn)&MacroAssembler::sd;
|
|
+ break;
|
|
+ default :
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ for (unsigned i = 0; i < unroll_factor; i++) {
|
|
+ (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset);
|
|
+ (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset);
|
|
+ }
|
|
+
|
|
+ __ addi(src, src, unroll_factor * unit);
|
|
+ __ addi(dst, dst, unroll_factor * unit);
|
|
+ }
|
|
+
|
|
+ void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp,
|
|
+ int ele_size, unsigned align_unit) {
|
|
+ bool is_backwards = ele_size < 0;
|
|
+ size_t granularity = uabs(ele_size);
|
|
+ for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) {
|
|
+ int offset = is_backwards ? (int)(-unit) : unit;
|
|
+ Label exit;
|
|
+ __ andi(tmp, count_in_bytes, unit);
|
|
+ __ beqz(tmp, exit);
|
|
+ copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset);
|
|
+ __ bind(exit);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp,
|
|
+ int step, Label *Lcopy_small, Register loopsize = noreg) {
|
|
+ size_t granularity = uabs(step);
|
|
+ RegSet tmp_regs = RegSet::range(x13, x16);
|
|
+ assert_different_registers(src, dst, count_in_bytes, tmp);
|
|
+
|
|
+ Label loop, copy2, copy1, finish;
|
|
+ if (loopsize == noreg) {
|
|
+ loopsize = t1;
|
|
+ __ mv(loopsize, 8 * granularity);
|
|
+ }
|
|
+
|
|
+ // Cyclic copy with 8*step.
|
|
+ __ bind(loop);
|
|
+ {
|
|
+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step);
|
|
+ __ sub(count_in_bytes, count_in_bytes, 8 * granularity);
|
|
+ __ bge(count_in_bytes, loopsize, loop);
|
|
+ }
|
|
+
|
|
+ if (Lcopy_small != NULL) {
|
|
+ __ bind(*Lcopy_small);
|
|
+ }
|
|
+
|
|
+ // copy memory smaller than step * 8 bytes
|
|
+ __ andi(tmp, count_in_bytes, granularity << 2);
|
|
+ __ beqz(tmp, copy2);
|
|
+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step);
|
|
+
|
|
+ __ bind(copy2);
|
|
+ __ andi(tmp, count_in_bytes, granularity << 1);
|
|
+ __ beqz(tmp, copy1);
|
|
+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step);
|
|
+
|
|
+ __ bind(copy1);
|
|
+ __ andi(tmp, count_in_bytes, granularity);
|
|
+ __ beqz(tmp, finish);
|
|
+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step);
|
|
+
|
|
+ __ bind(finish);
|
|
+ }
|
|
+
|
|
+ // Cyclic copy with one step.
|
|
+ void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) {
|
|
+ size_t granularity = uabs(step);
|
|
+ Label loop1;
|
|
+ if (loopsize == noreg) {
|
|
+ loopsize = t0;
|
|
+ __ mv(loopsize, granularity);
|
|
+ }
|
|
+
|
|
+ __ bind(loop1);
|
|
+ {
|
|
+ copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step);
|
|
+ __ sub(count_in_bytes, count_in_bytes, granularity);
|
|
+ __ bge(count_in_bytes, loopsize, loop1);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void align_unit(Register src, Register dst, Register count_in_bytes,
|
|
+ unsigned unit, bool is_backwards) {
|
|
+ Label skip;
|
|
+ __ andi(t0, dst, unit);
|
|
+ __ beqz(t0, skip);
|
|
+ copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit);
|
|
+ __ sub(count_in_bytes, count_in_bytes, unit);
|
|
+ __ bind(skip);
|
|
+ }
|
|
+
|
|
+ void copy_memory(bool is_align, Register s, Register d, Register count_in_elements,
|
|
+ Register tmp, int ele_step) {
|
|
+
|
|
+ bool is_backwards = ele_step < 0;
|
|
+ unsigned int granularity = uabs(ele_step);
|
|
+ Label Lcopy_small, Ldone, Lcopy_ele, Laligned;
|
|
+ const Register count_in_bytes = x31, src = x28, dst = x29;
|
|
+ assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1);
|
|
+ __ slli(count_in_bytes, count_in_elements, exact_log2(granularity));
|
|
+ __ add(src, s, is_backwards ? count_in_bytes : zr);
|
|
+ __ add(dst, d, is_backwards ? count_in_bytes : zr);
|
|
+
|
|
+ // if count_in_elements < 8, copy_small
|
|
+ __ mv(t0, 8);
|
|
+ if (is_align && granularity < BytesPerLong) {
|
|
+ __ blt(count_in_bytes, t0, Lcopy_small);
|
|
+ } else {
|
|
+ __ blt(count_in_elements, t0, Lcopy_small);
|
|
+ }
|
|
+
|
|
+ if (granularity < BytesPerLong) {
|
|
+ Label Lcopy_aligned[3];
|
|
+ Label Lalign8;
|
|
+ if (!is_align) {
|
|
+ Label Lalign_and_copy;
|
|
+ __ mv(t0, EagerArrayCopyThreshold);
|
|
+ __ blt(count_in_bytes, t0, Lalign_and_copy);
|
|
+ // Align dst to 8.
|
|
+ for (unsigned unit = granularity; unit <= 4; unit <<= 1) {
|
|
+ align_unit(src, dst, count_in_bytes, unit, is_backwards);
|
|
+ }
|
|
+
|
|
+ Register shr = x30, shl = x7, tmp1 = x13;
|
|
+
|
|
+ __ andi(shr, src, 0x7);
|
|
+ __ beqz(shr, Lalign8);
|
|
+ {
|
|
+ // calculaute the shift for store doubleword
|
|
+ __ slli(shr, shr, 3);
|
|
+ __ sub(shl, shr, 64);
|
|
+ __ sub(shl, zr, shl);
|
|
+
|
|
+ // alsrc: previous position of src octal alignment
|
|
+ Register alsrc = t1;
|
|
+ __ andi(alsrc, src, -8);
|
|
+
|
|
+ // move src to tail
|
|
+ __ andi(t0, count_in_bytes, -8);
|
|
+ if (is_backwards) {
|
|
+ __ sub(src, src, t0);
|
|
+ } else {
|
|
+ __ add(src, src, t0);
|
|
+ }
|
|
+
|
|
+ // prepare for copy_dstaligned_loop
|
|
+ __ ld(tmp1, alsrc, 0);
|
|
+ dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards);
|
|
+ __ mv(x17, 8);
|
|
+ __ blt(count_in_bytes, x17, Lcopy_small);
|
|
+ dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards);
|
|
+ __ j(Lcopy_small);
|
|
+ }
|
|
+ __ j(Ldone);
|
|
+ __ bind(Lalign_and_copy);
|
|
+
|
|
+ // Check src and dst could be 8/4/2 algined at the same time. If could, align the
|
|
+ // memory and copy by 8/4/2.
|
|
+ __ xorr(t1, src, dst);
|
|
+
|
|
+ for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) {
|
|
+ Label skip;
|
|
+ unsigned int unit = alignment >> 1;
|
|
+ // Check src and dst could be aligned to checkbyte at the same time
|
|
+ // if copy from src to dst. If couldn't, jump to label not_aligned.
|
|
+ __ andi(t0, t1, alignment - 1);
|
|
+ __ bnez(t0, Lcopy_aligned[exact_log2(unit)]);
|
|
+ // Align src and dst to unit.
|
|
+ align_unit(src, dst, count_in_bytes, unit, is_backwards);
|
|
+ }
|
|
+ }
|
|
+ __ bind(Lalign8);
|
|
+ for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) {
|
|
+ // Copy memory by steps, which has been aligned to step_size.
|
|
+ Label loop8, Ltail;
|
|
+ int step = is_backwards ? (-step_size) : step_size;
|
|
+ if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before
|
|
+ // Check whether the memory size is smaller than step_size.
|
|
+ __ mv(t0, step_size);
|
|
+ __ blt(count_in_bytes, t0, Ltail);
|
|
+ }
|
|
+ const Register eight_step = t1;
|
|
+ __ mv(eight_step, step_size * 8);
|
|
+ __ bge(count_in_bytes, eight_step, loop8);
|
|
+ // If memory is less than 8*step_size bytes, loop by step.
|
|
+ copy_loop1(src, dst, count_in_bytes, step, t0);
|
|
+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
|
|
+ __ j(Ldone);
|
|
+
|
|
+ __ bind(loop8);
|
|
+ // If memory is greater than or equal to 8*step_size bytes, loop by step*8.
|
|
+ copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step);
|
|
+ __ bind(Ltail);
|
|
+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
|
|
+ __ j(Ldone);
|
|
+
|
|
+ __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]);
|
|
+ }
|
|
+ }
|
|
+ // If the ele_step is greater than 8, or the memory src and dst cannot
|
|
+ // be aligned with a number greater than the value of step.
|
|
+ // Cyclic copy with 8*ele_step.
|
|
+ copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg);
|
|
+
|
|
+ __ bind(Ldone);
|
|
+ }
|
|
+
|
|
+ void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst,
|
|
+ Register shr, Register shl,
|
|
+ Register count_in_bytes, bool is_backwards) {
|
|
+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17;
|
|
+ const Register sll_reg1 = is_backwards ? tmp1 : tmp2,
|
|
+ srl_reg1 = is_backwards ? tmp2 : tmp1,
|
|
+ sll_reg2 = is_backwards ? tmp2 : tmp3,
|
|
+ srl_reg2 = is_backwards ? tmp3 : tmp2,
|
|
+ sll_reg3 = is_backwards ? tmp3 : tmp4,
|
|
+ srl_reg3 = is_backwards ? tmp4 : tmp3,
|
|
+ sll_reg4 = is_backwards ? tmp4 : tmp1,
|
|
+ srl_reg4 = is_backwards ? tmp1 : tmp4;
|
|
+ assert_different_registers(t0, thirty_two, alsrc, shr, shl);
|
|
+ int unit = is_backwards ? -wordSize : wordSize;
|
|
+ int offset = is_backwards ? -wordSize : 0;
|
|
+ Label loop, done;
|
|
+
|
|
+ __ mv(thirty_two, 32);
|
|
+ __ blt(count_in_bytes, thirty_two, done);
|
|
+
|
|
+ __ bind(loop);
|
|
+ __ ld(tmp2, alsrc, unit);
|
|
+ __ sll(t0, sll_reg1, shl);
|
|
+ __ srl(tmp1, srl_reg1, shr);
|
|
+ __ orr(tmp1, tmp1, t0);
|
|
+ __ sd(tmp1, dst, offset);
|
|
+
|
|
+ __ ld(tmp3, alsrc, unit * 2);
|
|
+ __ sll(t0, sll_reg2, shl);
|
|
+ __ srl(tmp2, srl_reg2, shr);
|
|
+ __ orr(tmp2, tmp2, t0);
|
|
+ __ sd(tmp2, dst, unit + offset);
|
|
+
|
|
+ __ ld(tmp4, alsrc, unit * 3);
|
|
+ __ sll(t0, sll_reg3, shl);
|
|
+ __ srl(tmp3, srl_reg3, shr);
|
|
+ __ orr(tmp3, tmp3, t0);
|
|
+ __ sd(tmp3, dst, unit * 2 + offset);
|
|
+
|
|
+ __ ld(tmp1, alsrc, unit * 4);
|
|
+ __ sll(t0, sll_reg4, shl);
|
|
+ __ srl(tmp4, srl_reg4, shr);
|
|
+ __ orr(tmp4, tmp4, t0);
|
|
+ __ sd(tmp4, dst, unit * 3 + offset);
|
|
+
|
|
+ __ add(alsrc, alsrc, unit * 4);
|
|
+ __ add(dst, dst, unit * 4);
|
|
+ __ sub(count_in_bytes, count_in_bytes, 32);
|
|
+ __ bge(count_in_bytes, thirty_two, loop);
|
|
+
|
|
+ __ bind(done);
|
|
+ }
|
|
+
|
|
+ void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst,
|
|
+ Register shr, Register shl,
|
|
+ Register count_in_bytes, Register eight,
|
|
+ bool is_backwards) {
|
|
+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16;
|
|
+ const Register sll_reg = is_backwards ? tmp1 : tmp2,
|
|
+ srl_reg = is_backwards ? tmp2 : tmp1;
|
|
+ assert_different_registers(t0, eight, alsrc, shr, shl);
|
|
+ Label loop;
|
|
+ int unit = is_backwards ? -wordSize : wordSize;
|
|
+
|
|
+ __ bind(loop);
|
|
+ __ ld(tmp2, alsrc, unit);
|
|
+ __ sll(t0, sll_reg, shl);
|
|
+ __ srl(tmp1, srl_reg, shr);
|
|
+ __ orr(t0, tmp1, t0);
|
|
+ __ sd(t0, dst, is_backwards ? unit : 0);
|
|
+ __ mv(tmp1, tmp2);
|
|
+ __ add(alsrc, alsrc, unit);
|
|
+ __ add(dst, dst, unit);
|
|
+ __ sub(count_in_bytes, count_in_bytes, 8);
|
|
+ __ bge(count_in_bytes, eight, loop);
|
|
+ }
|
|
+
|
|
+ // Scan over array at a for count oops, verifying each one.
|
|
+ // Preserves a and count, clobbers t0 and t1.
|
|
+ void verify_oop_array(int size, Register a, Register count, Register temp) {
|
|
+ Label loop, end;
|
|
+ __ mv(t1, zr);
|
|
+ __ slli(t0, count, exact_log2(size));
|
|
+ __ bind(loop);
|
|
+ __ bgeu(t1, t0, end);
|
|
+
|
|
+ __ add(temp, a, t1);
|
|
+ if (size == wordSize) {
|
|
+ __ ld(temp, Address(temp, 0));
|
|
+ __ verify_oop(temp);
|
|
+ } else {
|
|
+ __ lwu(temp, Address(temp, 0));
|
|
+ __ decode_heap_oop(temp); // calls verify_oop
|
|
+ }
|
|
+ __ add(t1, t1, size);
|
|
+ __ j(loop);
|
|
+ __ bind(end);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // is_oop - true => oop array, so generate store check code
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
+ // the hardware handle it. The two dwords within qwords that span
|
|
+ // cache line boundaries will still be loaded and stored atomically.
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_int_copy_entry is set to the no-overlap entry point
|
|
+ // used by generate_conjoint_int_oop_copy().
|
|
+ //
|
|
+ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
|
|
+ const char* name, bool dest_uninitialized = false) {
|
|
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
+ RegSet saved_reg = RegSet::of(s, d, count);
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+ address start = __ pc();
|
|
+ __ enter();
|
|
+
|
|
+ if (entry != NULL) {
|
|
+ *entry = __ pc();
|
|
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ }
|
|
+
|
|
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
|
|
+ if (dest_uninitialized) {
|
|
+ decorators |= IS_DEST_UNINITIALIZED;
|
|
+ }
|
|
+ if (aligned) {
|
|
+ decorators |= ARRAYCOPY_ALIGNED;
|
|
+ }
|
|
+
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
|
|
+
|
|
+ if (is_oop) {
|
|
+ // save regs before copy_memory
|
|
+ __ push_reg(RegSet::of(d, count), sp);
|
|
+ }
|
|
+ copy_memory(aligned, s, d, count, t0, checked_cast<int>(size));
|
|
+
|
|
+ if (is_oop) {
|
|
+ __ pop_reg(RegSet::of(d, count), sp);
|
|
+ if (VerifyOops) {
|
|
+ verify_oop_array(checked_cast<int>(size), d, count, t2);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg);
|
|
+
|
|
+ __ leave();
|
|
+ __ mv(x10, zr); // return 0
|
|
+ __ ret();
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // is_oop - true => oop array, so generate store check code
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
+ // the hardware handle it. The two dwords within qwords that span
|
|
+ // cache line boundaries will still be loaded and stored atomically.
|
|
+ //
|
|
+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target,
|
|
+ address* entry, const char* name,
|
|
+ bool dest_uninitialized = false) {
|
|
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
+ RegSet saved_regs = RegSet::of(s, d, count);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+ address start = __ pc();
|
|
+ __ enter();
|
|
+
|
|
+ if (entry != NULL) {
|
|
+ *entry = __ pc();
|
|
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ }
|
|
+
|
|
+ // use fwd copy when (d-s) above_equal (count*size)
|
|
+ __ sub(t0, d, s);
|
|
+ __ slli(t1, count, exact_log2(size));
|
|
+ __ bgeu(t0, t1, nooverlap_target);
|
|
+
|
|
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY;
|
|
+ if (dest_uninitialized) {
|
|
+ decorators |= IS_DEST_UNINITIALIZED;
|
|
+ }
|
|
+ if (aligned) {
|
|
+ decorators |= ARRAYCOPY_ALIGNED;
|
|
+ }
|
|
+
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
|
|
+
|
|
+ if (is_oop) {
|
|
+ // save regs before copy_memory
|
|
+ __ push_reg(RegSet::of(d, count), sp);
|
|
+ }
|
|
+
|
|
+ copy_memory(aligned, s, d, count, t0, -size);
|
|
+ if (is_oop) {
|
|
+ __ pop_reg(RegSet::of(d, count), sp);
|
|
+ if (VerifyOops) {
|
|
+ verify_oop_array(size, d, count, t2);
|
|
+ }
|
|
+ }
|
|
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs);
|
|
+ __ leave();
|
|
+ __ mv(x10, zr); // return 0
|
|
+ __ ret();
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
+ // we let the hardware handle it. The one to eight bytes within words,
|
|
+ // dwords or qwords that span cache line boundaries will still be loaded
|
|
+ // and stored atomically.
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_byte_copy_entry is set to the no-overlap entry point //
|
|
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
+ // we let the hardware handle it. The one to eight bytes within words,
|
|
+ // dwords or qwords that span cache line boundaries will still be loaded
|
|
+ // and stored atomically.
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_byte_copy_entry is set to the no-overlap entry point
|
|
+ // used by generate_conjoint_byte_copy().
|
|
+ //
|
|
+ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
|
|
+ // we let the hardware handle it. The one to eight bytes within words,
|
|
+ // dwords or qwords that span cache line boundaries will still be loaded
|
|
+ // and stored atomically.
|
|
+ //
|
|
+ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
|
|
+ address* entry, const char* name) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
+ // let the hardware handle it. The two or four words within dwords
|
|
+ // or qwords that span cache line boundaries will still be loaded
|
|
+ // and stored atomically.
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_short_copy_entry is set to the no-overlap entry point
|
|
+ // used by generate_conjoint_short_copy().
|
|
+ //
|
|
+ address generate_disjoint_short_copy(bool aligned,
|
|
+ address* entry, const char* name) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
+ // let the hardware handle it. The two or four words within dwords
|
|
+ // or qwords that span cache line boundaries will still be loaded
|
|
+ // and stored atomically.
|
|
+ //
|
|
+ address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
|
|
+ address* entry, const char* name) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
+ // the hardware handle it. The two dwords within qwords that span
|
|
+ // cache line boundaries will still be loaded and stored atomically.
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_int_copy_entry is set to the no-overlap entry point
|
|
+ // used by generate_conjoint_int_oop_copy().
|
|
+ //
|
|
+ address generate_disjoint_int_copy(bool aligned, address* entry,
|
|
+ const char* name, bool dest_uninitialized = false) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
+ // the hardware handle it. The two dwords within qwords that span
|
|
+ // cache line boundaries will still be loaded and stored atomically.
|
|
+ //
|
|
+ address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
|
|
+ address* entry, const char* name,
|
|
+ bool dest_uninitialized = false) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
|
|
+ }
|
|
+
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as size_t, can be zero
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
|
|
+ // no-overlap entry point used by generate_conjoint_long_oop_copy().
|
|
+ //
|
|
+ address generate_disjoint_long_copy(bool aligned, address* entry,
|
|
+ const char* name, bool dest_uninitialized = false) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as size_t, can be zero
|
|
+ //
|
|
+ address generate_conjoint_long_copy(bool aligned,
|
|
+ address nooverlap_target, address* entry,
|
|
+ const char* name, bool dest_uninitialized = false) {
|
|
+ const bool not_oop = false;
|
|
+ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as size_t, can be zero
|
|
+ //
|
|
+ // Side Effects:
|
|
+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
|
|
+ // no-overlap entry point used by generate_conjoint_long_oop_copy().
|
|
+ //
|
|
+ address generate_disjoint_oop_copy(bool aligned, address* entry,
|
|
+ const char* name, bool dest_uninitialized) {
|
|
+ const bool is_oop = true;
|
|
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
|
|
+ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
|
|
+ }
|
|
+
|
|
+ // Arguments:
|
|
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
|
|
+ // ignored
|
|
+ // name - stub name string
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as size_t, can be zero
|
|
+ //
|
|
+ address generate_conjoint_oop_copy(bool aligned,
|
|
+ address nooverlap_target, address* entry,
|
|
+ const char* name, bool dest_uninitialized) {
|
|
+ const bool is_oop = true;
|
|
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
|
|
+ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
|
|
+ name, dest_uninitialized);
|
|
+ }
|
|
+
|
|
+ // Helper for generating a dynamic type check.
|
|
+ // Smashes t0, t1.
|
|
+ void generate_type_check(Register sub_klass,
|
|
+ Register super_check_offset,
|
|
+ Register super_klass,
|
|
+ Label& L_success) {
|
|
+ assert_different_registers(sub_klass, super_check_offset, super_klass);
|
|
+
|
|
+ BLOCK_COMMENT("type_check:");
|
|
+
|
|
+ Label L_miss;
|
|
+
|
|
+ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
|
|
+ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
|
|
+
|
|
+ // Fall through on failure!
|
|
+ __ BIND(L_miss);
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // Generate checkcasting array copy stub
|
|
+ //
|
|
+ // Input:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - element count, treated as ssize_t, can be zero
|
|
+ // c_rarg3 - size_t ckoff (super_check_offset)
|
|
+ // c_rarg4 - oop ckval (super_klass)
|
|
+ //
|
|
+ // Output:
|
|
+ // x10 == 0 - success
|
|
+ // x10 == -1^K - failure, where K is partial transfer count
|
|
+ //
|
|
+ address generate_checkcast_copy(const char* name, address* entry,
|
|
+ bool dest_uninitialized = false) {
|
|
+ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
|
|
+
|
|
+ // Input registers (after setup_arg_regs)
|
|
+ const Register from = c_rarg0; // source array address
|
|
+ const Register to = c_rarg1; // destination array address
|
|
+ const Register count = c_rarg2; // elementscount
|
|
+ const Register ckoff = c_rarg3; // super_check_offset
|
|
+ const Register ckval = c_rarg4; // super_klass
|
|
+
|
|
+ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
|
|
+ RegSet wb_post_saved_regs = RegSet::of(count);
|
|
+
|
|
+ // Registers used as temps (x7, x9, x18 are save-on-entry)
|
|
+ const Register count_save = x19; // orig elementscount
|
|
+ const Register start_to = x18; // destination array start address
|
|
+ const Register copied_oop = x7; // actual oop copied
|
|
+ const Register r9_klass = x9; // oop._klass
|
|
+
|
|
+ //---------------------------------------------------------------
|
|
+ // Assembler stub will be used for this call to arraycopy
|
|
+ // if the two arrays are subtypes of Object[] but the
|
|
+ // destination array type is not equal to or a supertype
|
|
+ // of the source type. Each element must be separately
|
|
+ // checked.
|
|
+
|
|
+ assert_different_registers(from, to, count, ckoff, ckval, start_to,
|
|
+ copied_oop, r9_klass, count_save);
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+ address start = __ pc();
|
|
+
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+
|
|
+ // Caller of this entry point must set up the argument registers
|
|
+ if (entry != NULL) {
|
|
+ *entry = __ pc();
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ }
|
|
+
|
|
+ // Empty array: Nothing to do
|
|
+ __ beqz(count, L_done);
|
|
+
|
|
+ __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ BLOCK_COMMENT("assert consistent ckoff/ckval");
|
|
+ // The ckoff and ckval must be mutually consistent,
|
|
+ // even though caller generates both.
|
|
+ { Label L;
|
|
+ int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
|
+ __ lwu(start_to, Address(ckval, sco_offset));
|
|
+ __ beq(ckoff, start_to, L);
|
|
+ __ stop("super_check_offset inconsistent");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif //ASSERT
|
|
+
|
|
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
|
|
+ bool is_oop = true;
|
|
+ if (dest_uninitialized) {
|
|
+ decorators |= IS_DEST_UNINITIALIZED;
|
|
+ }
|
|
+
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
|
|
+
|
|
+ // save the original count
|
|
+ __ mv(count_save, count);
|
|
+
|
|
+ // Copy from low to high addresses
|
|
+ __ mv(start_to, to); // Save destination array start address
|
|
+ __ j(L_load_element);
|
|
+
|
|
+ // ======== begin loop ========
|
|
+ // (Loop is rotated; its entry is L_load_element.)
|
|
+ // Loop control:
|
|
+ // for count to 0 do
|
|
+ // copied_oop = load_heap_oop(from++)
|
|
+ // ... generate_type_check ...
|
|
+ // store_heap_oop(to++, copied_oop)
|
|
+ // end
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+
|
|
+ __ BIND(L_store_element);
|
|
+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
|
|
+ __ add(to, to, UseCompressedOops ? 4 : 8);
|
|
+ __ sub(count, count, 1);
|
|
+ __ beqz(count, L_do_card_marks);
|
|
+
|
|
+ // ======== loop entry is here ========
|
|
+ __ BIND(L_load_element);
|
|
+ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
|
|
+ __ add(from, from, UseCompressedOops ? 4 : 8);
|
|
+ __ beqz(copied_oop, L_store_element);
|
|
+
|
|
+ __ load_klass(r9_klass, copied_oop);// query the object klass
|
|
+ generate_type_check(r9_klass, ckoff, ckval, L_store_element);
|
|
+ // ======== end loop ========
|
|
+
|
|
+ // It was a real error; we must depend on the caller to finish the job.
|
|
+ // Register count = remaining oops, count_orig = total oops.
|
|
+ // Emit GC store barriers for the oops we have copied and report
|
|
+ // their number to the caller.
|
|
+
|
|
+ __ sub(count, count_save, count); // K = partially copied oop count
|
|
+ __ xori(count, count, -1); // report (-1^K) to caller
|
|
+ __ beqz(count, L_done_pop);
|
|
+
|
|
+ __ BIND(L_do_card_marks);
|
|
+ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
|
|
+
|
|
+ __ bind(L_done_pop);
|
|
+ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
|
|
+ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
|
|
+
|
|
+ __ bind(L_done);
|
|
+ __ mv(x10, count);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ // Perform range checks on the proposed arraycopy.
|
|
+ // Kills temp, but nothing else.
|
|
+ // Also, clean the sign bits of src_pos and dst_pos.
|
|
+ void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
|
|
+ Register src_pos, // source position (c_rarg1)
|
|
+ Register dst, // destination array oo (c_rarg2)
|
|
+ Register dst_pos, // destination position (c_rarg3)
|
|
+ Register length,
|
|
+ Register temp,
|
|
+ Label& L_failed) {
|
|
+ BLOCK_COMMENT("arraycopy_range_checks:");
|
|
+
|
|
+ assert_different_registers(t0, temp);
|
|
+
|
|
+ // if [src_pos + length > arrayOop(src)->length()] then FAIL
|
|
+ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
|
|
+ __ addw(temp, length, src_pos);
|
|
+ __ bgtu(temp, t0, L_failed);
|
|
+
|
|
+ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
|
|
+ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
|
|
+ __ addw(temp, length, dst_pos);
|
|
+ __ bgtu(temp, t0, L_failed);
|
|
+
|
|
+ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
|
|
+ __ zero_extend(src_pos, src_pos, 32);
|
|
+ __ zero_extend(dst_pos, dst_pos, 32);
|
|
+
|
|
+ BLOCK_COMMENT("arraycopy_range_checks done");
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // Generate 'unsafe' array copy stub
|
|
+ // Though just as safe as the other stubs, it takes an unscaled
|
|
+ // size_t argument instead of an element count.
|
|
+ //
|
|
+ // Input:
|
|
+ // c_rarg0 - source array address
|
|
+ // c_rarg1 - destination array address
|
|
+ // c_rarg2 - byte count, treated as ssize_t, can be zero
|
|
+ //
|
|
+ // Examines the alignment of the operands and dispatches
|
|
+ // to a long, int, short, or byte copy loop.
|
|
+ //
|
|
+ address generate_unsafe_copy(const char* name,
|
|
+ address byte_copy_entry,
|
|
+ address short_copy_entry,
|
|
+ address int_copy_entry,
|
|
+ address long_copy_entry) {
|
|
+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
|
|
+ int_copy_entry != NULL && long_copy_entry != NULL);
|
|
+ Label L_long_aligned, L_int_aligned, L_short_aligned;
|
|
+ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+ address start = __ pc();
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+
|
|
+ // bump this on entry, not on exit:
|
|
+ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
|
|
+
|
|
+ __ orr(t0, s, d);
|
|
+ __ orr(t0, t0, count);
|
|
+
|
|
+ __ andi(t0, t0, BytesPerLong - 1);
|
|
+ __ beqz(t0, L_long_aligned);
|
|
+ __ andi(t0, t0, BytesPerInt - 1);
|
|
+ __ beqz(t0, L_int_aligned);
|
|
+ __ andi(t0, t0, 1);
|
|
+ __ beqz(t0, L_short_aligned);
|
|
+ __ j(RuntimeAddress(byte_copy_entry));
|
|
+
|
|
+ __ BIND(L_short_aligned);
|
|
+ __ srli(count, count, LogBytesPerShort); // size => short_count
|
|
+ __ j(RuntimeAddress(short_copy_entry));
|
|
+ __ BIND(L_int_aligned);
|
|
+ __ srli(count, count, LogBytesPerInt); // size => int_count
|
|
+ __ j(RuntimeAddress(int_copy_entry));
|
|
+ __ BIND(L_long_aligned);
|
|
+ __ srli(count, count, LogBytesPerLong); // size => long_count
|
|
+ __ j(RuntimeAddress(long_copy_entry));
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // Generate generic array copy stubs
|
|
+ //
|
|
+ // Input:
|
|
+ // c_rarg0 - src oop
|
|
+ // c_rarg1 - src_pos (32-bits)
|
|
+ // c_rarg2 - dst oop
|
|
+ // c_rarg3 - dst_pos (32-bits)
|
|
+ // c_rarg4 - element count (32-bits)
|
|
+ //
|
|
+ // Output:
|
|
+ // x10 == 0 - success
|
|
+ // x10 == -1^K - failure, where K is partial transfer count
|
|
+ //
|
|
+ address generate_generic_copy(const char* name,
|
|
+ address byte_copy_entry, address short_copy_entry,
|
|
+ address int_copy_entry, address oop_copy_entry,
|
|
+ address long_copy_entry, address checkcast_copy_entry) {
|
|
+ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
|
|
+ int_copy_entry != NULL && oop_copy_entry != NULL &&
|
|
+ long_copy_entry != NULL && checkcast_copy_entry != NULL);
|
|
+ Label L_failed, L_failed_0, L_objArray;
|
|
+ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
|
|
+
|
|
+ // Input registers
|
|
+ const Register src = c_rarg0; // source array oop
|
|
+ const Register src_pos = c_rarg1; // source position
|
|
+ const Register dst = c_rarg2; // destination array oop
|
|
+ const Register dst_pos = c_rarg3; // destination position
|
|
+ const Register length = c_rarg4;
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+
|
|
+ // Registers used as temps
|
|
+ const Register dst_klass = c_rarg5;
|
|
+
|
|
+ address start = __ pc();
|
|
+
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+
|
|
+ // bump this on entry, not on exit:
|
|
+ inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
|
|
+
|
|
+ //-----------------------------------------------------------------------
|
|
+ // Assembler stub will be used for this call to arraycopy
|
|
+ // if the following conditions are met:
|
|
+ //
|
|
+ // (1) src and dst must not be null.
|
|
+ // (2) src_pos must not be negative.
|
|
+ // (3) dst_pos must not be negative.
|
|
+ // (4) length must not be negative.
|
|
+ // (5) src klass and dst klass should be the same and not NULL.
|
|
+ // (6) src and dst should be arrays.
|
|
+ // (7) src_pos + length must not exceed length of src.
|
|
+ // (8) dst_pos + length must not exceed length of dst.
|
|
+ //
|
|
+
|
|
+ // if [src == NULL] then return -1
|
|
+ __ beqz(src, L_failed);
|
|
+
|
|
+ // if [src_pos < 0] then return -1
|
|
+ // i.e. sign bit set
|
|
+ __ andi(t0, src_pos, 1UL << 31);
|
|
+ __ bnez(t0, L_failed);
|
|
+
|
|
+ // if [dst == NULL] then return -1
|
|
+ __ beqz(dst, L_failed);
|
|
+
|
|
+ // if [dst_pos < 0] then return -1
|
|
+ // i.e. sign bit set
|
|
+ __ andi(t0, dst_pos, 1UL << 31);
|
|
+ __ bnez(t0, L_failed);
|
|
+
|
|
+ // registers used as temp
|
|
+ const Register scratch_length = x28; // elements count to copy
|
|
+ const Register scratch_src_klass = x29; // array klass
|
|
+ const Register lh = x30; // layout helper
|
|
+
|
|
+ // if [length < 0] then return -1
|
|
+ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value)
|
|
+ // i.e. sign bit set
|
|
+ __ andi(t0, scratch_length, 1UL << 31);
|
|
+ __ bnez(t0, L_failed);
|
|
+
|
|
+ __ load_klass(scratch_src_klass, src);
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ BLOCK_COMMENT("assert klasses not null {");
|
|
+ Label L1, L2;
|
|
+ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL
|
|
+ __ bind(L1);
|
|
+ __ stop("broken null klass");
|
|
+ __ bind(L2);
|
|
+ __ load_klass(t0, dst);
|
|
+ __ beqz(t0, L1); // this would be broken also
|
|
+ BLOCK_COMMENT("} assert klasses not null done");
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // Load layout helper (32-bits)
|
|
+ //
|
|
+ // |array_tag| | header_size | element_type | |log2_element_size|
|
|
+ // 32 30 24 16 8 2 0
|
|
+ //
|
|
+ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
|
|
+ //
|
|
+
|
|
+ const int lh_offset = in_bytes(Klass::layout_helper_offset());
|
|
+
|
|
+ // Handle objArrays completely differently...
|
|
+ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
|
|
+ __ lw(lh, Address(scratch_src_klass, lh_offset));
|
|
+ __ mvw(t0, objArray_lh);
|
|
+ __ beq(lh, t0, L_objArray);
|
|
+
|
|
+ // if [src->klass() != dst->klass()] then return -1
|
|
+ __ load_klass(t1, dst);
|
|
+ __ bne(t1, scratch_src_klass, L_failed);
|
|
+
|
|
+ // if [src->is_Array() != NULL] then return -1
|
|
+ // i.e. (lh >= 0)
|
|
+ __ andi(t0, lh, 1UL << 31);
|
|
+ __ beqz(t0, L_failed);
|
|
+
|
|
+ // At this point, it is known to be a typeArray (array_tag 0x3).
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ BLOCK_COMMENT("assert primitive array {");
|
|
+ Label L;
|
|
+ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
|
|
+ __ bge(lh, t1, L);
|
|
+ __ stop("must be a primitive array");
|
|
+ __ bind(L);
|
|
+ BLOCK_COMMENT("} assert primitive array done");
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
+ t1, L_failed);
|
|
+
|
|
+ // TypeArrayKlass
|
|
+ //
|
|
+ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
|
|
+ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
|
|
+ //
|
|
+
|
|
+ const Register t0_offset = t0; // array offset
|
|
+ const Register x22_elsize = lh; // element size
|
|
+
|
|
+ // Get array_header_in_bytes()
|
|
+ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
|
|
+ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
|
|
+ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32;
|
|
+ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
|
|
+
|
|
+ __ add(src, src, t0_offset); // src array offset
|
|
+ __ add(dst, dst, t0_offset); // dst array offset
|
|
+ BLOCK_COMMENT("choose copy loop based on element size");
|
|
+
|
|
+ // next registers should be set before the jump to corresponding stub
|
|
+ const Register from = c_rarg0; // source array address
|
|
+ const Register to = c_rarg1; // destination array address
|
|
+ const Register count = c_rarg2; // elements count
|
|
+
|
|
+ // 'from', 'to', 'count' registers should be set in such order
|
|
+ // since they are the same as 'src', 'src_pos', 'dst'.
|
|
+
|
|
+ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
|
|
+
|
|
+ // The possible values of elsize are 0-3, i.e. exact_log2(element
|
|
+ // size in bytes). We do a simple bitwise binary search.
|
|
+ __ BIND(L_copy_bytes);
|
|
+ __ andi(t0, x22_elsize, 2);
|
|
+ __ bnez(t0, L_copy_ints);
|
|
+ __ andi(t0, x22_elsize, 1);
|
|
+ __ bnez(t0, L_copy_shorts);
|
|
+ __ add(from, src, src_pos); // src_addr
|
|
+ __ add(to, dst, dst_pos); // dst_addr
|
|
+ __ addw(count, scratch_length, zr); // length
|
|
+ __ j(RuntimeAddress(byte_copy_entry));
|
|
+
|
|
+ __ BIND(L_copy_shorts);
|
|
+ __ shadd(from, src_pos, src, t0, 1); // src_addr
|
|
+ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
|
|
+ __ addw(count, scratch_length, zr); // length
|
|
+ __ j(RuntimeAddress(short_copy_entry));
|
|
+
|
|
+ __ BIND(L_copy_ints);
|
|
+ __ andi(t0, x22_elsize, 1);
|
|
+ __ bnez(t0, L_copy_longs);
|
|
+ __ shadd(from, src_pos, src, t0, 2); // src_addr
|
|
+ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
|
|
+ __ addw(count, scratch_length, zr); // length
|
|
+ __ j(RuntimeAddress(int_copy_entry));
|
|
+
|
|
+ __ BIND(L_copy_longs);
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ BLOCK_COMMENT("assert long copy {");
|
|
+ Label L;
|
|
+ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
|
|
+ __ addw(lh, lh, zr);
|
|
+ __ mvw(t0, LogBytesPerLong);
|
|
+ __ beq(x22_elsize, t0, L);
|
|
+ __ stop("must be long copy, but elsize is wrong");
|
|
+ __ bind(L);
|
|
+ BLOCK_COMMENT("} assert long copy done");
|
|
+ }
|
|
+#endif
|
|
+ __ shadd(from, src_pos, src, t0, 3); // src_addr
|
|
+ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
|
|
+ __ addw(count, scratch_length, zr); // length
|
|
+ __ j(RuntimeAddress(long_copy_entry));
|
|
+
|
|
+ // ObjArrayKlass
|
|
+ __ BIND(L_objArray);
|
|
+ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
|
|
+
|
|
+ Label L_plain_copy, L_checkcast_copy;
|
|
+ // test array classes for subtyping
|
|
+ __ load_klass(t2, dst);
|
|
+ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
|
|
+
|
|
+ // Identically typed arrays can be copied without element-wise checks.
|
|
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
+ t1, L_failed);
|
|
+
|
|
+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
|
|
+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
|
|
+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
+ __ addw(count, scratch_length, zr); // length
|
|
+ __ BIND(L_plain_copy);
|
|
+ __ j(RuntimeAddress(oop_copy_entry));
|
|
+
|
|
+ __ BIND(L_checkcast_copy);
|
|
+ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass)
|
|
+ {
|
|
+ // Before looking at dst.length, make sure dst is also an objArray.
|
|
+ __ lwu(t0, Address(t2, lh_offset));
|
|
+ __ mvw(t1, objArray_lh);
|
|
+ __ bne(t0, t1, L_failed);
|
|
+
|
|
+ // It is safe to examine both src.length and dst.length.
|
|
+ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
|
|
+ t2, L_failed);
|
|
+
|
|
+ __ load_klass(dst_klass, dst); // reload
|
|
+
|
|
+ // Marshal the base address arguments now, freeing registers.
|
|
+ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
|
|
+ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
+ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
|
|
+ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
|
|
+ __ addw(count, length, zr); // length (reloaded)
|
|
+ const Register sco_temp = c_rarg3; // this register is free now
|
|
+ assert_different_registers(from, to, count, sco_temp,
|
|
+ dst_klass, scratch_src_klass);
|
|
+
|
|
+ // Generate the type check.
|
|
+ const int sco_offset = in_bytes(Klass::super_check_offset_offset());
|
|
+ __ lwu(sco_temp, Address(dst_klass, sco_offset));
|
|
+
|
|
+ // Smashes t0, t1
|
|
+ generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
|
|
+
|
|
+ // Fetch destination element klass from the ObjArrayKlass header.
|
|
+ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
|
|
+ __ ld(dst_klass, Address(dst_klass, ek_offset));
|
|
+ __ lwu(sco_temp, Address(dst_klass, sco_offset));
|
|
+
|
|
+ // the checkcast_copy loop needs two extra arguments:
|
|
+ assert(c_rarg3 == sco_temp, "#3 already in place");
|
|
+ // Set up arguments for checkcast_copy_entry.
|
|
+ __ mv(c_rarg4, dst_klass); // dst.klass.element_klass
|
|
+ __ j(RuntimeAddress(checkcast_copy_entry));
|
|
+ }
|
|
+
|
|
+ __ BIND(L_failed);
|
|
+ __ mv(x10, -1);
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // Generate stub for array fill. If "aligned" is true, the
|
|
+ // "to" address is assumed to be heapword aligned.
|
|
+ //
|
|
+ // Arguments for generated stub:
|
|
+ // to: c_rarg0
|
|
+ // value: c_rarg1
|
|
+ // count: c_rarg2 treated as signed
|
|
+ //
|
|
+ address generate_fill(BasicType t, bool aligned, const char* name) {
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+ address start = __ pc();
|
|
+
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+
|
|
+ const Register to = c_rarg0; // source array address
|
|
+ const Register value = c_rarg1; // value
|
|
+ const Register count = c_rarg2; // elements count
|
|
+
|
|
+ const Register bz_base = x28; // base for block_zero routine
|
|
+ const Register cnt_words = x29; // temp register
|
|
+ const Register tmp_reg = t1;
|
|
+
|
|
+ __ enter();
|
|
+
|
|
+ Label L_fill_elements, L_exit1;
|
|
+
|
|
+ int shift = -1;
|
|
+ switch (t) {
|
|
+ case T_BYTE:
|
|
+ shift = 0;
|
|
+
|
|
+ // Zero extend value
|
|
+ // 8 bit -> 16 bit
|
|
+ __ andi(value, value, 0xff);
|
|
+ __ mv(tmp_reg, value);
|
|
+ __ slli(tmp_reg, tmp_reg, 8);
|
|
+ __ orr(value, value, tmp_reg);
|
|
+
|
|
+ // 16 bit -> 32 bit
|
|
+ __ mv(tmp_reg, value);
|
|
+ __ slli(tmp_reg, tmp_reg, 16);
|
|
+ __ orr(value, value, tmp_reg);
|
|
+
|
|
+ __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
|
+ __ bltu(count, tmp_reg, L_fill_elements);
|
|
+ break;
|
|
+ case T_SHORT:
|
|
+ shift = 1;
|
|
+ // Zero extend value
|
|
+ // 16 bit -> 32 bit
|
|
+ __ andi(value, value, 0xffff);
|
|
+ __ mv(tmp_reg, value);
|
|
+ __ slli(tmp_reg, tmp_reg, 16);
|
|
+ __ orr(value, value, tmp_reg);
|
|
+
|
|
+ // Short arrays (< 8 bytes) fill by element
|
|
+ __ mv(tmp_reg, 8 >> shift);
|
|
+ __ bltu(count, tmp_reg, L_fill_elements);
|
|
+ break;
|
|
+ case T_INT:
|
|
+ shift = 2;
|
|
+
|
|
+ // Short arrays (< 8 bytes) fill by element
|
|
+ __ mv(tmp_reg, 8 >> shift);
|
|
+ __ bltu(count, tmp_reg, L_fill_elements);
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // Align source address at 8 bytes address boundary.
|
|
+ Label L_skip_align1, L_skip_align2, L_skip_align4;
|
|
+ if (!aligned) {
|
|
+ switch (t) {
|
|
+ case T_BYTE:
|
|
+ // One byte misalignment happens only for byte arrays.
|
|
+ __ andi(t0, to, 1);
|
|
+ __ beqz(t0, L_skip_align1);
|
|
+ __ sb(value, Address(to, 0));
|
|
+ __ addi(to, to, 1);
|
|
+ __ addiw(count, count, -1);
|
|
+ __ bind(L_skip_align1);
|
|
+ // Fallthrough
|
|
+ case T_SHORT:
|
|
+ // Two bytes misalignment happens only for byte and short (char) arrays.
|
|
+ __ andi(t0, to, 2);
|
|
+ __ beqz(t0, L_skip_align2);
|
|
+ __ sh(value, Address(to, 0));
|
|
+ __ addi(to, to, 2);
|
|
+ __ addiw(count, count, -(2 >> shift));
|
|
+ __ bind(L_skip_align2);
|
|
+ // Fallthrough
|
|
+ case T_INT:
|
|
+ // Align to 8 bytes, we know we are 4 byte aligned to start.
|
|
+ __ andi(t0, to, 4);
|
|
+ __ beqz(t0, L_skip_align4);
|
|
+ __ sw(value, Address(to, 0));
|
|
+ __ addi(to, to, 4);
|
|
+ __ addiw(count, count, -(4 >> shift));
|
|
+ __ bind(L_skip_align4);
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // Fill large chunks
|
|
+ //
|
|
+ __ srliw(cnt_words, count, 3 - shift); // number of words
|
|
+
|
|
+ // 32 bit -> 64 bit
|
|
+ __ andi(value, value, 0xffffffff);
|
|
+ __ mv(tmp_reg, value);
|
|
+ __ slli(tmp_reg, tmp_reg, 32);
|
|
+ __ orr(value, value, tmp_reg);
|
|
+
|
|
+ __ slli(tmp_reg, cnt_words, 3 - shift);
|
|
+ __ subw(count, count, tmp_reg);
|
|
+ {
|
|
+ __ fill_words(to, cnt_words, value);
|
|
+ }
|
|
+
|
|
+ // Remaining count is less than 8 bytes. Fill it by a single store.
|
|
+ // Note that the total length is no less than 8 bytes.
|
|
+ if (t == T_BYTE || t == T_SHORT) {
|
|
+ __ beqz(count, L_exit1);
|
|
+ __ shadd(to, count, to, tmp_reg, shift); // points to the end
|
|
+ __ sd(value, Address(to, -8)); // overwrite some elements
|
|
+ __ bind(L_exit1);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ }
|
|
+
|
|
+ // Handle copies less than 8 bytes.
|
|
+ Label L_fill_2, L_fill_4, L_exit2;
|
|
+ __ bind(L_fill_elements);
|
|
+ switch (t) {
|
|
+ case T_BYTE:
|
|
+ __ andi(t0, count, 1);
|
|
+ __ beqz(t0, L_fill_2);
|
|
+ __ sb(value, Address(to, 0));
|
|
+ __ addi(to, to, 1);
|
|
+ __ bind(L_fill_2);
|
|
+ __ andi(t0, count, 2);
|
|
+ __ beqz(t0, L_fill_4);
|
|
+ __ sh(value, Address(to, 0));
|
|
+ __ addi(to, to, 2);
|
|
+ __ bind(L_fill_4);
|
|
+ __ andi(t0, count, 4);
|
|
+ __ beqz(t0, L_exit2);
|
|
+ __ sw(value, Address(to, 0));
|
|
+ break;
|
|
+ case T_SHORT:
|
|
+ __ andi(t0, count, 1);
|
|
+ __ beqz(t0, L_fill_4);
|
|
+ __ sh(value, Address(to, 0));
|
|
+ __ addi(to, to, 2);
|
|
+ __ bind(L_fill_4);
|
|
+ __ andi(t0, count, 2);
|
|
+ __ beqz(t0, L_exit2);
|
|
+ __ sw(value, Address(to, 0));
|
|
+ break;
|
|
+ case T_INT:
|
|
+ __ beqz(count, L_exit2);
|
|
+ __ sw(value, Address(to, 0));
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ __ bind(L_exit2);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ void generate_arraycopy_stubs() {
|
|
+ address entry = NULL;
|
|
+ address entry_jbyte_arraycopy = NULL;
|
|
+ address entry_jshort_arraycopy = NULL;
|
|
+ address entry_jint_arraycopy = NULL;
|
|
+ address entry_oop_arraycopy = NULL;
|
|
+ address entry_jlong_arraycopy = NULL;
|
|
+ address entry_checkcast_arraycopy = NULL;
|
|
+
|
|
+ StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
|
|
+
|
|
+ //*** jbyte
|
|
+ // Always need aligned and unaligned versions
|
|
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
|
|
+ "jbyte_disjoint_arraycopy");
|
|
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
|
|
+ &entry_jbyte_arraycopy,
|
|
+ "jbyte_arraycopy");
|
|
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
|
|
+ "arrayof_jbyte_disjoint_arraycopy");
|
|
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
|
|
+ "arrayof_jbyte_arraycopy");
|
|
+
|
|
+ //*** jshort
|
|
+ // Always need aligned and unaligned versions
|
|
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
|
|
+ "jshort_disjoint_arraycopy");
|
|
+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
|
|
+ &entry_jshort_arraycopy,
|
|
+ "jshort_arraycopy");
|
|
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
|
|
+ "arrayof_jshort_disjoint_arraycopy");
|
|
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
|
|
+ "arrayof_jshort_arraycopy");
|
|
+
|
|
+ //*** jint
|
|
+ // Aligned versions
|
|
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
|
|
+ "arrayof_jint_disjoint_arraycopy");
|
|
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
|
|
+ "arrayof_jint_arraycopy");
|
|
+ // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
|
|
+ // entry_jint_arraycopy always points to the unaligned version
|
|
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
|
|
+ "jint_disjoint_arraycopy");
|
|
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
|
|
+ &entry_jint_arraycopy,
|
|
+ "jint_arraycopy");
|
|
+
|
|
+ //*** jlong
|
|
+ // It is always aligned
|
|
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
|
|
+ "arrayof_jlong_disjoint_arraycopy");
|
|
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
|
|
+ "arrayof_jlong_arraycopy");
|
|
+ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
|
|
+ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
|
|
+
|
|
+ //*** oops
|
|
+ {
|
|
+ // With compressed oops we need unaligned versions; notice that
|
|
+ // we overwrite entry_oop_arraycopy.
|
|
+ bool aligned = !UseCompressedOops;
|
|
+
|
|
+ StubRoutines::_arrayof_oop_disjoint_arraycopy
|
|
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
|
|
+ /*dest_uninitialized*/false);
|
|
+ StubRoutines::_arrayof_oop_arraycopy
|
|
+ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
|
|
+ /*dest_uninitialized*/false);
|
|
+ // Aligned versions without pre-barriers
|
|
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
|
|
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
|
|
+ /*dest_uninitialized*/true);
|
|
+ StubRoutines::_arrayof_oop_arraycopy_uninit
|
|
+ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
|
|
+ /*dest_uninitialized*/true);
|
|
+ }
|
|
+
|
|
+ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
|
|
+ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
|
|
+ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
|
|
+ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
|
|
+
|
|
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
|
|
+ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
|
|
+ /*dest_uninitialized*/true);
|
|
+
|
|
+
|
|
+ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
|
|
+ entry_jbyte_arraycopy,
|
|
+ entry_jshort_arraycopy,
|
|
+ entry_jint_arraycopy,
|
|
+ entry_jlong_arraycopy);
|
|
+
|
|
+ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
|
|
+ entry_jbyte_arraycopy,
|
|
+ entry_jshort_arraycopy,
|
|
+ entry_jint_arraycopy,
|
|
+ entry_oop_arraycopy,
|
|
+ entry_jlong_arraycopy,
|
|
+ entry_checkcast_arraycopy);
|
|
+
|
|
+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
|
+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
|
+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
|
+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
|
+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
|
+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
|
+ }
|
|
+
|
|
+ // Safefetch stubs.
|
|
+ void generate_safefetch(const char* name, int size, address* entry,
|
|
+ address* fault_pc, address* continuation_pc) {
|
|
+ // safefetch signatures:
|
|
+ // int SafeFetch32(int* adr, int errValue)
|
|
+ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
|
|
+ //
|
|
+ // arguments:
|
|
+ // c_rarg0 = adr
|
|
+ // c_rarg1 = errValue
|
|
+ //
|
|
+ // result:
|
|
+ // PPC_RET = *adr or errValue
|
|
+ assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
|
|
+ StubCodeMark mark(this, "StubRoutines", name);
|
|
+
|
|
+ // Entry point, pc or function descriptor.
|
|
+ *entry = __ pc();
|
|
+
|
|
+ // Load *adr into c_rarg1, may fault.
|
|
+ *fault_pc = __ pc();
|
|
+ switch (size) {
|
|
+ case 4:
|
|
+ // int32_t
|
|
+ __ lw(c_rarg1, Address(c_rarg0, 0));
|
|
+ break;
|
|
+ case 8:
|
|
+ // int64_t
|
|
+ __ ld(c_rarg1, Address(c_rarg0, 0));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // return errValue or *adr
|
|
+ *continuation_pc = __ pc();
|
|
+ __ mv(x10, c_rarg1);
|
|
+ __ ret();
|
|
+ }
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ // code for comparing 16 bytes of strings with same encoding
|
|
+ void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) {
|
|
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
|
|
+ __ ld(tmp5, Address(str1));
|
|
+ __ addi(str1, str1, wordSize);
|
|
+ __ xorr(tmp4, tmp1, tmp2);
|
|
+ __ ld(cnt1, Address(str2));
|
|
+ __ addi(str2, str2, wordSize);
|
|
+ __ bnez(tmp4, DIFF1);
|
|
+ __ ld(tmp1, Address(str1));
|
|
+ __ addi(str1, str1, wordSize);
|
|
+ __ xorr(tmp4, tmp5, cnt1);
|
|
+ __ ld(tmp2, Address(str2));
|
|
+ __ addi(str2, str2, wordSize);
|
|
+ __ bnez(tmp4, DIFF2);
|
|
+ }
|
|
+
|
|
+ // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
|
|
+ void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
|
|
+ const Register tmp = x30;
|
|
+ __ ld(tmpL, Address(strL));
|
|
+ __ addi(strL, strL, wordSize);
|
|
+ __ ld(tmpU, Address(strU));
|
|
+ __ addi(strU, strU, wordSize);
|
|
+ __ inflate_lo32(tmp, tmpL);
|
|
+ __ mv(t0, tmp);
|
|
+ __ xorr(tmp, tmpU, t0);
|
|
+ __ bnez(tmp, DIFF);
|
|
+
|
|
+ __ ld(tmpU, Address(strU));
|
|
+ __ addi(strU, strU, wordSize);
|
|
+ __ inflate_hi32(tmp, tmpL);
|
|
+ __ mv(t0, tmp);
|
|
+ __ xorr(tmp, tmpU, t0);
|
|
+ __ bnez(tmp, DIFF);
|
|
+ }
|
|
+
|
|
+ // x10 = result
|
|
+ // x11 = str1
|
|
+ // x12 = cnt1
|
|
+ // x13 = str2
|
|
+ // x14 = cnt2
|
|
+ // x28 = tmp1
|
|
+ // x29 = tmp2
|
|
+ // x30 = tmp3
|
|
+ address generate_compare_long_string_different_encoding(bool isLU) {
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
|
|
+ address entry = __ pc();
|
|
+ Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE;
|
|
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
|
|
+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
|
|
+ RegSet spilled_regs = RegSet::of(tmp4, tmp5);
|
|
+
|
|
+ // cnt2 == amount of characters left to compare
|
|
+ // Check already loaded first 4 symbols
|
|
+ __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
|
|
+ __ mv(isLU ? tmp1 : tmp2, tmp3);
|
|
+ __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
|
|
+ __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
|
|
+ __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols.
|
|
+ __ push_reg(spilled_regs, sp);
|
|
+
|
|
+ __ xorr(tmp3, tmp1, tmp2);
|
|
+ __ mv(tmp5, tmp2);
|
|
+ __ bnez(tmp3, CALCULATE_DIFFERENCE);
|
|
+
|
|
+ Register strU = isLU ? str2 : str1,
|
|
+ strL = isLU ? str1 : str2,
|
|
+ tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
|
|
+ tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
|
|
+
|
|
+ // make sure main loop is byte-aligned, we should load another 4 bytes from strL
|
|
+ __ beqz(cnt2, DONE); // no characters left
|
|
+ __ lwu(tmpL, Address(strL));
|
|
+ __ addi(strL, strL, wordSize / 2);
|
|
+ __ ld(tmpU, Address(strU));
|
|
+ __ addi(strU, strU, wordSize);
|
|
+ __ inflate_lo32(tmp3, tmpL);
|
|
+ __ mv(tmpL, tmp3);
|
|
+ __ xorr(tmp3, tmpU, tmpL);
|
|
+ __ bnez(tmp3, CALCULATE_DIFFERENCE);
|
|
+ __ addi(cnt2, cnt2, -wordSize / 2);
|
|
+
|
|
+ __ beqz(cnt2, DONE); // no character left
|
|
+ __ sub(cnt2, cnt2, wordSize * 2);
|
|
+ __ bltz(cnt2, TAIL);
|
|
+ __ bind(SMALL_LOOP); // smaller loop
|
|
+ __ sub(cnt2, cnt2, wordSize * 2);
|
|
+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
|
|
+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
|
|
+ __ bgez(cnt2, SMALL_LOOP);
|
|
+ __ addi(t0, cnt2, wordSize * 2);
|
|
+ __ beqz(t0, DONE);
|
|
+ __ bind(TAIL); // 1..15 characters left
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ // Aligned access. Load bytes from byte-aligned address,
|
|
+ // which may contain invalid bytes in last load.
|
|
+ // Invalid bytes should be removed before comparison.
|
|
+ Label LOAD_LAST, WORD_CMP;
|
|
+ __ addi(t0, cnt2, wordSize);
|
|
+ __ bgtz(t0, LOAD_LAST);
|
|
+ // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU
|
|
+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
|
|
+ __ addi(cnt2, cnt2, wordSize);
|
|
+ __ beqz(cnt2, DONE); // no character left
|
|
+ __ bind(LOAD_LAST); // 1..7 characters left
|
|
+ __ lwu(tmpL, Address(strL));
|
|
+ __ addi(strL, strL, wordSize / 2);
|
|
+ __ ld(tmpU, Address(strU));
|
|
+ __ addi(strU, strU, wordSize);
|
|
+ __ inflate_lo32(tmp3, tmpL);
|
|
+ __ mv(tmpL, tmp3);
|
|
+ __ addi(t0, cnt2, wordSize / 2);
|
|
+ __ blez(t0, WORD_CMP);
|
|
+ __ slli(t0, t0, 1); // now in bytes
|
|
+ __ slli(t0, t0, LogBitsPerByte);
|
|
+ __ sll(tmpL, tmpL, t0);
|
|
+ __ sll(tmpU, tmpU, t0);
|
|
+ // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison
|
|
+ __ bind(WORD_CMP);
|
|
+ __ xorr(tmp3, tmpU, tmpL);
|
|
+ __ bnez(tmp3, CALCULATE_DIFFERENCE);
|
|
+ __ addi(cnt2, cnt2, wordSize / 2);
|
|
+ __ bltz(cnt2, LOAD_LAST); // 1..3 characters left
|
|
+ __ j(DONE); // no character left
|
|
+ } else {
|
|
+ // Unaligned accesses. Load from non-byte aligned address.
|
|
+ __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string
|
|
+ __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string
|
|
+ // last 16 characters
|
|
+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
|
|
+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
|
|
+ __ j(DONE);
|
|
+ }
|
|
+ __ bind(DIFF);
|
|
+ __ mv(tmpL, t0);
|
|
+ // Find the first different characters in the longwords and
|
|
+ // compute their difference.
|
|
+ __ bind(CALCULATE_DIFFERENCE);
|
|
+ __ ctzc_bit(tmp4, tmp3);
|
|
+ __ srl(tmp1, tmp1, tmp4);
|
|
+ __ srl(tmp5, tmp5, tmp4);
|
|
+ __ andi(tmp1, tmp1, 0xFFFF);
|
|
+ __ andi(tmp5, tmp5, 0xFFFF);
|
|
+ __ sub(result, tmp1, tmp5);
|
|
+ __ bind(DONE);
|
|
+ __ pop_reg(spilled_regs, sp);
|
|
+ __ ret();
|
|
+ return entry;
|
|
+ }
|
|
+
|
|
+ // x10 = result
|
|
+ // x11 = str1
|
|
+ // x12 = cnt1
|
|
+ // x13 = str2
|
|
+ // x14 = cnt2
|
|
+ // x28 = tmp1
|
|
+ // x29 = tmp2
|
|
+ // x30 = tmp3
|
|
+ // x31 = tmp4
|
|
+ address generate_compare_long_string_same_encoding(bool isLL) {
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", isLL ?
|
|
+ "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
|
|
+ address entry = __ pc();
|
|
+ Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
|
|
+ LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
|
|
+ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
|
|
+ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
|
|
+ RegSet spilled_regs = RegSet::of(tmp4, tmp5);
|
|
+
|
|
+ // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
|
|
+ // update cnt2 counter with already loaded 8 bytes
|
|
+ __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
|
|
+ // update pointers, because of previous read
|
|
+ __ add(str1, str1, wordSize);
|
|
+ __ add(str2, str2, wordSize);
|
|
+ // less than 16 bytes left?
|
|
+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
|
|
+ __ push_reg(spilled_regs, sp);
|
|
+ __ bltz(cnt2, TAIL);
|
|
+ __ bind(SMALL_LOOP);
|
|
+ compare_string_16_bytes_same(DIFF, DIFF2);
|
|
+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
|
|
+ __ bgez(cnt2, SMALL_LOOP);
|
|
+ __ bind(TAIL);
|
|
+ __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
|
|
+ __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
|
|
+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
|
|
+ __ blez(cnt2, CHECK_LAST);
|
|
+ __ xorr(tmp4, tmp1, tmp2);
|
|
+ __ bnez(tmp4, DIFF);
|
|
+ __ ld(tmp1, Address(str1));
|
|
+ __ addi(str1, str1, wordSize);
|
|
+ __ ld(tmp2, Address(str2));
|
|
+ __ addi(str2, str2, wordSize);
|
|
+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
|
|
+ __ bind(CHECK_LAST);
|
|
+ if (!isLL) {
|
|
+ __ add(cnt2, cnt2, cnt2); // now in bytes
|
|
+ }
|
|
+ __ xorr(tmp4, tmp1, tmp2);
|
|
+ __ bnez(tmp4, DIFF);
|
|
+ if (AvoidUnalignedAccesses) {
|
|
+ // Aligned access. Load bytes from byte-aligned address,
|
|
+ // which may contain invalid bytes in last load.
|
|
+ // Invalid bytes should be removed before comparison.
|
|
+ __ ld(tmp5, Address(str1));
|
|
+ __ ld(cnt1, Address(str2));
|
|
+ __ neg(cnt2, cnt2);
|
|
+ __ slli(cnt2, cnt2, LogBitsPerByte);
|
|
+ __ sll(tmp5, tmp5, cnt2);
|
|
+ __ sll(cnt1, cnt1, cnt2);
|
|
+ } else {
|
|
+ // Unaligned access. Load from non-byte aligned address.
|
|
+ __ add(str1, str1, cnt2);
|
|
+ __ ld(tmp5, Address(str1));
|
|
+ __ add(str2, str2, cnt2);
|
|
+ __ ld(cnt1, Address(str2));
|
|
+ }
|
|
+
|
|
+ __ xorr(tmp4, tmp5, cnt1);
|
|
+ __ beqz(tmp4, LENGTH_DIFF);
|
|
+ // Find the first different characters in the longwords and
|
|
+ // compute their difference.
|
|
+ __ bind(DIFF2);
|
|
+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
|
|
+ __ srl(tmp5, tmp5, tmp3);
|
|
+ __ srl(cnt1, cnt1, tmp3);
|
|
+ if (isLL) {
|
|
+ __ andi(tmp5, tmp5, 0xFF);
|
|
+ __ andi(cnt1, cnt1, 0xFF);
|
|
+ } else {
|
|
+ __ andi(tmp5, tmp5, 0xFFFF);
|
|
+ __ andi(cnt1, cnt1, 0xFFFF);
|
|
+ }
|
|
+ __ sub(result, tmp5, cnt1);
|
|
+ __ j(LENGTH_DIFF);
|
|
+ __ bind(DIFF);
|
|
+ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
|
|
+ __ srl(tmp1, tmp1, tmp3);
|
|
+ __ srl(tmp2, tmp2, tmp3);
|
|
+ if (isLL) {
|
|
+ __ andi(tmp1, tmp1, 0xFF);
|
|
+ __ andi(tmp2, tmp2, 0xFF);
|
|
+ } else {
|
|
+ __ andi(tmp1, tmp1, 0xFFFF);
|
|
+ __ andi(tmp2, tmp2, 0xFFFF);
|
|
+ }
|
|
+ __ sub(result, tmp1, tmp2);
|
|
+ __ j(LENGTH_DIFF);
|
|
+ __ bind(LAST_CHECK_AND_LENGTH_DIFF);
|
|
+ __ xorr(tmp4, tmp1, tmp2);
|
|
+ __ bnez(tmp4, DIFF);
|
|
+ __ bind(LENGTH_DIFF);
|
|
+ __ pop_reg(spilled_regs, sp);
|
|
+ __ ret();
|
|
+ return entry;
|
|
+ }
|
|
+
|
|
+ void generate_compare_long_strings() {
|
|
+ StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
|
|
+ StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
|
|
+ StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
|
|
+ StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
|
|
+ }
|
|
+
|
|
+ // x10 result
|
|
+ // x11 src
|
|
+ // x12 src count
|
|
+ // x13 pattern
|
|
+ // x14 pattern count
|
|
+ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
|
|
+ {
|
|
+ const char* stubName = needle_isL
|
|
+ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
|
|
+ : "indexof_linear_uu";
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", stubName);
|
|
+ address entry = __ pc();
|
|
+
|
|
+ int needle_chr_size = needle_isL ? 1 : 2;
|
|
+ int haystack_chr_size = haystack_isL ? 1 : 2;
|
|
+ int needle_chr_shift = needle_isL ? 0 : 1;
|
|
+ int haystack_chr_shift = haystack_isL ? 0 : 1;
|
|
+ bool isL = needle_isL && haystack_isL;
|
|
+ // parameters
|
|
+ Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
|
|
+ // temporary registers
|
|
+ Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25;
|
|
+ // redefinitions
|
|
+ Register ch1 = x28, ch2 = x29;
|
|
+ RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
|
|
+
|
|
+ __ push_reg(spilled_regs, sp);
|
|
+
|
|
+ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
|
|
+ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
|
|
+ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
|
|
+ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
|
|
+ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
|
|
+ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
|
|
+
|
|
+ __ ld(ch1, Address(needle));
|
|
+ __ ld(ch2, Address(haystack));
|
|
+ // src.length - pattern.length
|
|
+ __ sub(haystack_len, haystack_len, needle_len);
|
|
+
|
|
+ // first is needle[0]
|
|
+ __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
|
|
+ __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001);
|
|
+ __ mul(first, first, mask1);
|
|
+ __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
|
|
+ if (needle_isL != haystack_isL) {
|
|
+ __ mv(tmp, ch1);
|
|
+ }
|
|
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
|
|
+ __ blez(haystack_len, L_SMALL);
|
|
+
|
|
+ if (needle_isL != haystack_isL) {
|
|
+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
|
|
+ }
|
|
+ // xorr, sub, orr, notr, andr
|
|
+ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
|
|
+ // eg:
|
|
+ // first: aa aa aa aa aa aa aa aa
|
|
+ // ch2: aa aa li nx jd ka aa aa
|
|
+ // match_mask: 80 80 00 00 00 00 80 80
|
|
+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
|
|
+
|
|
+ // search first char of needle, if success, goto L_HAS_ZERO;
|
|
+ __ bnez(match_mask, L_HAS_ZERO);
|
|
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
|
|
+ __ add(result, result, wordSize / haystack_chr_size);
|
|
+ __ add(haystack, haystack, wordSize);
|
|
+ __ bltz(haystack_len, L_POST_LOOP);
|
|
+
|
|
+ __ bind(L_LOOP);
|
|
+ __ ld(ch2, Address(haystack));
|
|
+ __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
|
|
+ __ bnez(match_mask, L_HAS_ZERO);
|
|
+
|
|
+ __ bind(L_LOOP_PROCEED);
|
|
+ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
|
|
+ __ add(haystack, haystack, wordSize);
|
|
+ __ add(result, result, wordSize / haystack_chr_size);
|
|
+ __ bgez(haystack_len, L_LOOP);
|
|
+
|
|
+ __ bind(L_POST_LOOP);
|
|
+ __ mv(ch2, -wordSize / haystack_chr_size);
|
|
+ __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
|
|
+ __ ld(ch2, Address(haystack));
|
|
+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
|
|
+ __ neg(haystack_len, haystack_len);
|
|
+ __ xorr(ch2, first, ch2);
|
|
+ __ sub(match_mask, ch2, mask1);
|
|
+ __ orr(ch2, ch2, mask2);
|
|
+ __ mv(trailing_zero, -1); // all bits set
|
|
+ __ j(L_SMALL_PROCEED);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_SMALL);
|
|
+ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
|
|
+ __ neg(haystack_len, haystack_len);
|
|
+ if (needle_isL != haystack_isL) {
|
|
+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
|
|
+ }
|
|
+ __ xorr(ch2, first, ch2);
|
|
+ __ sub(match_mask, ch2, mask1);
|
|
+ __ orr(ch2, ch2, mask2);
|
|
+ __ mv(trailing_zero, -1); // all bits set
|
|
+
|
|
+ __ bind(L_SMALL_PROCEED);
|
|
+ __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits.
|
|
+ __ notr(ch2, ch2);
|
|
+ __ andr(match_mask, match_mask, ch2);
|
|
+ __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check
|
|
+ __ beqz(match_mask, NOMATCH);
|
|
+
|
|
+ __ bind(L_SMALL_HAS_ZERO_LOOP);
|
|
+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
|
|
+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
|
|
+ __ mv(ch2, wordSize / haystack_chr_size);
|
|
+ __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
|
|
+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
|
|
+ __ mv(trailing_zero, wordSize / haystack_chr_size);
|
|
+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
+
|
|
+ __ bind(L_SMALL_CMP_LOOP);
|
|
+ __ shadd(first, trailing_zero, needle, first, needle_chr_shift);
|
|
+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
|
|
+ needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
|
|
+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
|
|
+ __ add(trailing_zero, trailing_zero, 1);
|
|
+ __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
|
|
+ __ beq(first, ch2, L_SMALL_CMP_LOOP);
|
|
+
|
|
+ __ bind(L_SMALL_CMP_LOOP_NOMATCH);
|
|
+ __ beqz(match_mask, NOMATCH);
|
|
+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
|
|
+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
|
|
+ __ add(result, result, 1);
|
|
+ __ add(haystack, haystack, haystack_chr_size);
|
|
+ __ j(L_SMALL_HAS_ZERO_LOOP);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
|
|
+ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
+ __ j(DONE);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
|
|
+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
|
|
+ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
|
|
+ __ j(DONE);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_HAS_ZERO);
|
|
+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
|
|
+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
|
|
+ __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
|
|
+ __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
|
|
+ __ sub(result, result, 1); // array index from 0, so result -= 1
|
|
+
|
|
+ __ bind(L_HAS_ZERO_LOOP);
|
|
+ __ mv(needle_len, wordSize / haystack_chr_size);
|
|
+ __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
|
|
+ __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
|
|
+ // load next 8 bytes from haystack, and increase result index
|
|
+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
|
|
+ __ add(result, result, 1);
|
|
+ __ mv(trailing_zero, wordSize / haystack_chr_size);
|
|
+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
|
|
+
|
|
+ // compare one char
|
|
+ __ bind(L_CMP_LOOP);
|
|
+ __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift);
|
|
+ needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
|
|
+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
|
|
+ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
|
|
+ __ add(trailing_zero, trailing_zero, 1); // next char index
|
|
+ __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
|
|
+ __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP);
|
|
+ __ beq(needle_len, ch2, L_CMP_LOOP);
|
|
+
|
|
+ __ bind(L_CMP_LOOP_NOMATCH);
|
|
+ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
|
|
+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
|
|
+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
|
|
+ __ add(haystack, haystack, haystack_chr_size);
|
|
+ __ j(L_HAS_ZERO_LOOP);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_CMP_LOOP_LAST_CMP);
|
|
+ __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
|
|
+ __ j(DONE);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_CMP_LOOP_LAST_CMP2);
|
|
+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
|
|
+ __ add(result, result, 1);
|
|
+ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
|
|
+ __ j(DONE);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(L_HAS_ZERO_LOOP_NOMATCH);
|
|
+ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
|
|
+ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
|
|
+ // so, result was increased at max by wordSize/str2_chr_size - 1, so,
|
|
+ // respective high bit wasn't changed. L_LOOP_PROCEED will increase
|
|
+ // result by analyzed characters value, so, we can just reset lower bits
|
|
+ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
|
|
+ // 2) restore needle_len and haystack_len values from "compressed" haystack_len
|
|
+ // 3) advance haystack value to represent next haystack octet. result & 7/3 is
|
|
+ // index of last analyzed substring inside current octet. So, haystack in at
|
|
+ // respective start address. We need to advance it to next octet
|
|
+ __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
|
|
+ __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
|
|
+ __ andi(result, result, haystack_isL ? -8 : -4);
|
|
+ __ slli(tmp, match_mask, haystack_chr_shift);
|
|
+ __ sub(haystack, haystack, tmp);
|
|
+ __ addw(haystack_len, haystack_len, zr);
|
|
+ __ j(L_LOOP_PROCEED);
|
|
+
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ bind(NOMATCH);
|
|
+ __ mv(result, -1);
|
|
+
|
|
+ __ bind(DONE);
|
|
+ __ pop_reg(spilled_regs, sp);
|
|
+ __ ret();
|
|
+ return entry;
|
|
+ }
|
|
+
|
|
+ void generate_string_indexof_stubs()
|
|
+ {
|
|
+ StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
|
|
+ StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
|
|
+ StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
|
|
+ }
|
|
+
|
|
+ address generate_mulAdd()
|
|
+ {
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "mulAdd");
|
|
+
|
|
+ address start = __ pc();
|
|
+
|
|
+ const Register out = x10;
|
|
+ const Register in = x11;
|
|
+ const Register offset = x12;
|
|
+ const Register len = x13;
|
|
+ const Register k = x14;
|
|
+ const Register tmp1 = x28;
|
|
+ const Register tmp2 = x29;
|
|
+
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ __ enter();
|
|
+ __ mul_add(out, in, offset, len, k, tmp1, tmp2);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * Arguments:
|
|
+ *
|
|
+ * Input:
|
|
+ * c_rarg0 - x address
|
|
+ * c_rarg1 - x length
|
|
+ * c_rarg2 - y address
|
|
+ * c_rarg3 - y lenth
|
|
+ * c_rarg4 - z address
|
|
+ * c_rarg5 - z length
|
|
+ */
|
|
+ address generate_multiplyToLen()
|
|
+ {
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
|
|
+ address start = __ pc();
|
|
+
|
|
+ const Register x = x10;
|
|
+ const Register xlen = x11;
|
|
+ const Register y = x12;
|
|
+ const Register ylen = x13;
|
|
+ const Register z = x14;
|
|
+ const Register zlen = x15;
|
|
+
|
|
+ const Register tmp1 = x16;
|
|
+ const Register tmp2 = x17;
|
|
+ const Register tmp3 = x7;
|
|
+ const Register tmp4 = x28;
|
|
+ const Register tmp5 = x29;
|
|
+ const Register tmp6 = x30;
|
|
+ const Register tmp7 = x31;
|
|
+
|
|
+ RegSet spilled_regs = RegSet::of(tmp1, tmp2);
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ push_reg(spilled_regs, sp);
|
|
+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
|
+ __ pop_reg(spilled_regs, sp);
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+ address generate_squareToLen()
|
|
+ {
|
|
+ // squareToLen algorithm for sizes 1..127 described in java code works
|
|
+ // faster than multiply_to_len on some CPUs and slower on others, but
|
|
+ // multiply_to_len shows a bit better overall results
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "squareToLen");
|
|
+ address start = __ pc();
|
|
+
|
|
+ const Register x = x10;
|
|
+ const Register xlen = x11;
|
|
+ const Register z = x12;
|
|
+ const Register zlen = x13;
|
|
+ const Register y = x14; // == x
|
|
+ const Register ylen = x15; // == xlen
|
|
+
|
|
+ const Register tmp1 = x16;
|
|
+ const Register tmp2 = x17;
|
|
+ const Register tmp3 = x7;
|
|
+ const Register tmp4 = x28;
|
|
+ const Register tmp5 = x29;
|
|
+ const Register tmp6 = x30;
|
|
+ const Register tmp7 = x31;
|
|
+
|
|
+ RegSet spilled_regs = RegSet::of(y, tmp2);
|
|
+ BLOCK_COMMENT("Entry:");
|
|
+ __ enter();
|
|
+ __ push_reg(spilled_regs, sp);
|
|
+ __ mv(y, x);
|
|
+ __ mv(ylen, xlen);
|
|
+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
|
+ __ pop_reg(spilled_regs, sp);
|
|
+ __ leave();
|
|
+ __ ret();
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+#endif // COMPILER2
|
|
+
|
|
+ // Continuation point for throwing of implicit exceptions that are
|
|
+ // not handled in the current activation. Fabricates an exception
|
|
+ // oop and initiates normal exception dispatching in this
|
|
+ // frame. Since we need to preserve callee-saved values (currently
|
|
+ // only for C2, but done for C1 as well) we need a callee-saved oop
|
|
+ // map and therefore have to make these stubs into RuntimeStubs
|
|
+ // rather than BufferBlobs. If the compiler needs all registers to
|
|
+ // be preserved between the fault point and the exception handler
|
|
+ // then it must assume responsibility for that in
|
|
+ // AbstractCompiler::continuation_for_implicit_null_exception or
|
|
+ // continuation_for_implicit_division_by_zero_exception. All other
|
|
+ // implicit exceptions (e.g., NullPointerException or
|
|
+ // AbstractMethodError on entry) are either at call sites or
|
|
+ // otherwise assume that stack unwinding will be initiated, so
|
|
+ // caller saved registers were assumed volatile in the compiler.
|
|
+
|
|
+#undef __
|
|
+#define __ masm->
|
|
+
|
|
+ address generate_throw_exception(const char* name,
|
|
+ address runtime_entry,
|
|
+ Register arg1 = noreg,
|
|
+ Register arg2 = noreg) {
|
|
+ // Information about frame layout at time of blocking runtime call.
|
|
+ // Note that we only have to preserve callee-saved registers since
|
|
+ // the compilers are responsible for supplying a continuation point
|
|
+ // if they expect all registers to be preserved.
|
|
+ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
|
|
+ assert_cond(runtime_entry != NULL);
|
|
+ enum layout {
|
|
+ fp_off = 0,
|
|
+ fp_off2,
|
|
+ return_off,
|
|
+ return_off2,
|
|
+ framesize // inclusive of return address
|
|
+ };
|
|
+
|
|
+ const int insts_size = 512;
|
|
+ const int locs_size = 64;
|
|
+
|
|
+ CodeBuffer code(name, insts_size, locs_size);
|
|
+ OopMapSet* oop_maps = new OopMapSet();
|
|
+ MacroAssembler* masm = new MacroAssembler(&code);
|
|
+ assert_cond(oop_maps != NULL && masm != NULL);
|
|
+
|
|
+ address start = __ pc();
|
|
+
|
|
+ // This is an inlined and slightly modified version of call_VM
|
|
+ // which has the ability to fetch the return PC out of
|
|
+ // thread-local storage and also sets up last_Java_sp slightly
|
|
+ // differently than the real call_VM
|
|
+
|
|
+ __ enter(); // Save FP and RA before call
|
|
+
|
|
+ assert(is_even(framesize / 2), "sp not 16-byte aligned");
|
|
+
|
|
+ // ra and fp are already in place
|
|
+ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
|
|
+
|
|
+ int frame_complete = __ pc() - start;
|
|
+
|
|
+ // Set up last_Java_sp and last_Java_fp
|
|
+ address the_pc = __ pc();
|
|
+ __ set_last_Java_frame(sp, fp, the_pc, t0);
|
|
+
|
|
+ // Call runtime
|
|
+ if (arg1 != noreg) {
|
|
+ assert(arg2 != c_rarg1, "clobbered");
|
|
+ __ mv(c_rarg1, arg1);
|
|
+ }
|
|
+ if (arg2 != noreg) {
|
|
+ __ mv(c_rarg2, arg2);
|
|
+ }
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ BLOCK_COMMENT("call runtime_entry");
|
|
+ int32_t offset = 0;
|
|
+ __ movptr_with_offset(t0, runtime_entry, offset);
|
|
+ __ jalr(x1, t0, offset);
|
|
+
|
|
+ // Generate oop map
|
|
+ OopMap* map = new OopMap(framesize, 0);
|
|
+ assert_cond(map != NULL);
|
|
+
|
|
+ oop_maps->add_gc_map(the_pc - start, map);
|
|
+
|
|
+ __ reset_last_Java_frame(true);
|
|
+
|
|
+ __ leave();
|
|
+
|
|
+ // check for pending exceptions
|
|
+#ifdef ASSERT
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ bnez(t0, L);
|
|
+ __ should_not_reach_here();
|
|
+ __ bind(L);
|
|
+#endif // ASSERT
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
|
|
+
|
|
+
|
|
+ // codeBlob framesize is in words (not VMRegImpl::slot_size)
|
|
+ RuntimeStub* stub =
|
|
+ RuntimeStub::new_runtime_stub(name,
|
|
+ &code,
|
|
+ frame_complete,
|
|
+ (framesize >> (LogBytesPerWord - LogBytesPerInt)),
|
|
+ oop_maps, false);
|
|
+ assert(stub != NULL, "create runtime stub fail!");
|
|
+ return stub->entry_point();
|
|
+ }
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ class MontgomeryMultiplyGenerator : public MacroAssembler {
|
|
+
|
|
+ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
|
|
+ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
|
|
+
|
|
+ RegSet _toSave;
|
|
+ bool _squaring;
|
|
+
|
|
+ public:
|
|
+ MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
|
|
+ : MacroAssembler(as->code()), _squaring(squaring) {
|
|
+
|
|
+ // Register allocation
|
|
+
|
|
+ Register reg = c_rarg0;
|
|
+ Pa_base = reg; // Argument registers
|
|
+ if (squaring) {
|
|
+ Pb_base = Pa_base;
|
|
+ } else {
|
|
+ Pb_base = ++reg;
|
|
+ }
|
|
+ Pn_base = ++reg;
|
|
+ Rlen= ++reg;
|
|
+ inv = ++reg;
|
|
+ Pm_base = ++reg;
|
|
+
|
|
+ // Working registers:
|
|
+ Ra = ++reg; // The current digit of a, b, n, and m.
|
|
+ Rb = ++reg;
|
|
+ Rm = ++reg;
|
|
+ Rn = ++reg;
|
|
+
|
|
+ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m.
|
|
+ Pb = ++reg;
|
|
+ Pm = ++reg;
|
|
+ Pn = ++reg;
|
|
+
|
|
+ tmp0 = ++reg; // Three registers which form a
|
|
+ tmp1 = ++reg; // triple-precision accumuator.
|
|
+ tmp2 = ++reg;
|
|
+
|
|
+ Ri = x6; // Inner and outer loop indexes.
|
|
+ Rj = x7;
|
|
+
|
|
+ Rhi_ab = x28; // Product registers: low and high parts
|
|
+ Rlo_ab = x29; // of a*b and m*n.
|
|
+ Rhi_mn = x30;
|
|
+ Rlo_mn = x31;
|
|
+
|
|
+ // x18 and up are callee-saved.
|
|
+ _toSave = RegSet::range(x18, reg) + Pm_base;
|
|
+ }
|
|
+
|
|
+ private:
|
|
+ void save_regs() {
|
|
+ push_reg(_toSave, sp);
|
|
+ }
|
|
+
|
|
+ void restore_regs() {
|
|
+ pop_reg(_toSave, sp);
|
|
+ }
|
|
+
|
|
+ template <typename T>
|
|
+ void unroll_2(Register count, T block) {
|
|
+ Label loop, end, odd;
|
|
+ beqz(count, end);
|
|
+ andi(t0, count, 0x1);
|
|
+ bnez(t0, odd);
|
|
+ align(16);
|
|
+ bind(loop);
|
|
+ (this->*block)();
|
|
+ bind(odd);
|
|
+ (this->*block)();
|
|
+ addi(count, count, -2);
|
|
+ bgtz(count, loop);
|
|
+ bind(end);
|
|
+ }
|
|
+
|
|
+ template <typename T>
|
|
+ void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
|
|
+ Label loop, end, odd;
|
|
+ beqz(count, end);
|
|
+ andi(tmp, count, 0x1);
|
|
+ bnez(tmp, odd);
|
|
+ align(16);
|
|
+ bind(loop);
|
|
+ (this->*block)(d, s, tmp);
|
|
+ bind(odd);
|
|
+ (this->*block)(d, s, tmp);
|
|
+ addi(count, count, -2);
|
|
+ bgtz(count, loop);
|
|
+ bind(end);
|
|
+ }
|
|
+
|
|
+ void pre1(RegisterOrConstant i) {
|
|
+ block_comment("pre1");
|
|
+ // Pa = Pa_base;
|
|
+ // Pb = Pb_base + i;
|
|
+ // Pm = Pm_base;
|
|
+ // Pn = Pn_base + i;
|
|
+ // Ra = *Pa;
|
|
+ // Rb = *Pb;
|
|
+ // Rm = *Pm;
|
|
+ // Rn = *Pn;
|
|
+ if (i.is_register()) {
|
|
+ slli(t0, i.as_register(), LogBytesPerWord);
|
|
+ } else {
|
|
+ mv(t0, i.as_constant());
|
|
+ slli(t0, t0, LogBytesPerWord);
|
|
+ }
|
|
+
|
|
+ mv(Pa, Pa_base);
|
|
+ add(Pb, Pb_base, t0);
|
|
+ mv(Pm, Pm_base);
|
|
+ add(Pn, Pn_base, t0);
|
|
+
|
|
+ ld(Ra, Address(Pa));
|
|
+ ld(Rb, Address(Pb));
|
|
+ ld(Rm, Address(Pm));
|
|
+ ld(Rn, Address(Pn));
|
|
+
|
|
+ // Zero the m*n result.
|
|
+ mv(Rhi_mn, zr);
|
|
+ mv(Rlo_mn, zr);
|
|
+ }
|
|
+
|
|
+ // The core multiply-accumulate step of a Montgomery
|
|
+ // multiplication. The idea is to schedule operations as a
|
|
+ // pipeline so that instructions with long latencies (loads and
|
|
+ // multiplies) have time to complete before their results are
|
|
+ // used. This most benefits in-order implementations of the
|
|
+ // architecture but out-of-order ones also benefit.
|
|
+ void step() {
|
|
+ block_comment("step");
|
|
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
+ // Ra = *++Pa;
|
|
+ // Rb = *--Pb;
|
|
+ mulhu(Rhi_ab, Ra, Rb);
|
|
+ mul(Rlo_ab, Ra, Rb);
|
|
+ addi(Pa, Pa, wordSize);
|
|
+ ld(Ra, Address(Pa));
|
|
+ addi(Pb, Pb, -wordSize);
|
|
+ ld(Rb, Address(Pb));
|
|
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
|
|
+ // previous iteration.
|
|
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
+ // Rm = *++Pm;
|
|
+ // Rn = *--Pn;
|
|
+ mulhu(Rhi_mn, Rm, Rn);
|
|
+ mul(Rlo_mn, Rm, Rn);
|
|
+ addi(Pm, Pm, wordSize);
|
|
+ ld(Rm, Address(Pm));
|
|
+ addi(Pn, Pn, -wordSize);
|
|
+ ld(Rn, Address(Pn));
|
|
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
+ }
|
|
+
|
|
+ void post1() {
|
|
+ block_comment("post1");
|
|
+
|
|
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
+ // Ra = *++Pa;
|
|
+ // Rb = *--Pb;
|
|
+ mulhu(Rhi_ab, Ra, Rb);
|
|
+ mul(Rlo_ab, Ra, Rb);
|
|
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
+
|
|
+ // *Pm = Rm = tmp0 * inv;
|
|
+ mul(Rm, tmp0, inv);
|
|
+ sd(Rm, Address(Pm));
|
|
+
|
|
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
+ mulhu(Rhi_mn, Rm, Rn);
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
|
|
+ {
|
|
+ mul(Rlo_mn, Rm, Rn);
|
|
+ add(Rlo_mn, tmp0, Rlo_mn);
|
|
+ Label ok;
|
|
+ beqz(Rlo_mn, ok);
|
|
+ stop("broken Montgomery multiply");
|
|
+ bind(ok);
|
|
+ }
|
|
+#endif
|
|
+ // We have very carefully set things up so that
|
|
+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
|
|
+ // the lower half of Rm * Rn because we know the result already:
|
|
+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
|
|
+ // tmp0 != 0. So, rather than do a mul and an cad we just set
|
|
+ // the carry flag iff tmp0 is nonzero.
|
|
+ //
|
|
+ // mul(Rlo_mn, Rm, Rn);
|
|
+ // cad(zr, tmp0, Rlo_mn);
|
|
+ addi(t0, tmp0, -1);
|
|
+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
|
|
+ cadc(tmp0, tmp1, Rhi_mn, t0);
|
|
+ adc(tmp1, tmp2, zr, t0);
|
|
+ mv(tmp2, zr);
|
|
+ }
|
|
+
|
|
+ void pre2(Register i, Register len) {
|
|
+ block_comment("pre2");
|
|
+ // Pa = Pa_base + i-len;
|
|
+ // Pb = Pb_base + len;
|
|
+ // Pm = Pm_base + i-len;
|
|
+ // Pn = Pn_base + len;
|
|
+
|
|
+ sub(Rj, i, len);
|
|
+ // Rj == i-len
|
|
+
|
|
+ // Ra as temp register
|
|
+ shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord);
|
|
+ shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord);
|
|
+ shadd(Pb, len, Pb_base, Ra, LogBytesPerWord);
|
|
+ shadd(Pn, len, Pn_base, Ra, LogBytesPerWord);
|
|
+
|
|
+ // Ra = *++Pa;
|
|
+ // Rb = *--Pb;
|
|
+ // Rm = *++Pm;
|
|
+ // Rn = *--Pn;
|
|
+ add(Pa, Pa, wordSize);
|
|
+ ld(Ra, Address(Pa));
|
|
+ add(Pb, Pb, -wordSize);
|
|
+ ld(Rb, Address(Pb));
|
|
+ add(Pm, Pm, wordSize);
|
|
+ ld(Rm, Address(Pm));
|
|
+ add(Pn, Pn, -wordSize);
|
|
+ ld(Rn, Address(Pn));
|
|
+
|
|
+ mv(Rhi_mn, zr);
|
|
+ mv(Rlo_mn, zr);
|
|
+ }
|
|
+
|
|
+ void post2(Register i, Register len) {
|
|
+ block_comment("post2");
|
|
+ sub(Rj, i, len);
|
|
+
|
|
+ cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
|
|
+
|
|
+ // As soon as we know the least significant digit of our result,
|
|
+ // store it.
|
|
+ // Pm_base[i-len] = tmp0;
|
|
+ // Rj as temp register
|
|
+ shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord);
|
|
+ sd(tmp0, Address(Rj));
|
|
+
|
|
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
+ cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
|
|
+ adc(tmp1, tmp2, zr, t0);
|
|
+ mv(tmp2, zr);
|
|
+ }
|
|
+
|
|
+ // A carry in tmp0 after Montgomery multiplication means that we
|
|
+ // should subtract multiples of n from our result in m. We'll
|
|
+ // keep doing that until there is no carry.
|
|
+ void normalize(Register len) {
|
|
+ block_comment("normalize");
|
|
+ // while (tmp0)
|
|
+ // tmp0 = sub(Pm_base, Pn_base, tmp0, len);
|
|
+ Label loop, post, again;
|
|
+ Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
|
|
+ beqz(tmp0, post); {
|
|
+ bind(again); {
|
|
+ mv(i, zr);
|
|
+ mv(cnt, len);
|
|
+ slli(Rn, i, LogBytesPerWord);
|
|
+ add(Rm, Pm_base, Rn);
|
|
+ ld(Rm, Address(Rm));
|
|
+ add(Rn, Pn_base, Rn);
|
|
+ ld(Rn, Address(Rn));
|
|
+ mv(t0, 1); // set carry flag, i.e. no borrow
|
|
+ align(16);
|
|
+ bind(loop); {
|
|
+ notr(Rn, Rn);
|
|
+ add(Rm, Rm, t0);
|
|
+ add(Rm, Rm, Rn);
|
|
+ sltu(t0, Rm, Rn);
|
|
+ shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register
|
|
+ sd(Rm, Address(Rn));
|
|
+ add(i, i, 1);
|
|
+ slli(Rn, i, LogBytesPerWord);
|
|
+ add(Rm, Pm_base, Rn);
|
|
+ ld(Rm, Address(Rm));
|
|
+ add(Rn, Pn_base, Rn);
|
|
+ ld(Rn, Address(Rn));
|
|
+ sub(cnt, cnt, 1);
|
|
+ } bnez(cnt, loop);
|
|
+ addi(tmp0, tmp0, -1);
|
|
+ add(tmp0, tmp0, t0);
|
|
+ } bnez(tmp0, again);
|
|
+ } bind(post);
|
|
+ }
|
|
+
|
|
+ // Move memory at s to d, reversing words.
|
|
+ // Increments d to end of copied memory
|
|
+ // Destroys tmp1, tmp2
|
|
+ // Preserves len
|
|
+ // Leaves s pointing to the address which was in d at start
|
|
+ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
|
|
+ assert(tmp1 < x28 && tmp2 < x28, "register corruption");
|
|
+
|
|
+ shadd(s, len, s, tmp1, LogBytesPerWord);
|
|
+ mv(tmp1, len);
|
|
+ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
|
|
+ slli(tmp1, len, LogBytesPerWord);
|
|
+ sub(s, d, tmp1);
|
|
+ }
|
|
+ // [63...0] -> [31...0][63...32]
|
|
+ void reverse1(Register d, Register s, Register tmp) {
|
|
+ addi(s, s, -wordSize);
|
|
+ ld(tmp, Address(s));
|
|
+ ror_imm(tmp, tmp, 32, t0);
|
|
+ sd(tmp, Address(d));
|
|
+ addi(d, d, wordSize);
|
|
+ }
|
|
+
|
|
+ void step_squaring() {
|
|
+ // An extra ACC
|
|
+ step();
|
|
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
+ }
|
|
+
|
|
+ void last_squaring(Register i) {
|
|
+ Label dont;
|
|
+ // if ((i & 1) == 0) {
|
|
+ andi(t0, i, 0x1);
|
|
+ bnez(t0, dont); {
|
|
+ // MACC(Ra, Rb, tmp0, tmp1, tmp2);
|
|
+ // Ra = *++Pa;
|
|
+ // Rb = *--Pb;
|
|
+ mulhu(Rhi_ab, Ra, Rb);
|
|
+ mul(Rlo_ab, Ra, Rb);
|
|
+ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
|
|
+ } bind(dont);
|
|
+ }
|
|
+
|
|
+ void extra_step_squaring() {
|
|
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
+
|
|
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
+ // Rm = *++Pm;
|
|
+ // Rn = *--Pn;
|
|
+ mulhu(Rhi_mn, Rm, Rn);
|
|
+ mul(Rlo_mn, Rm, Rn);
|
|
+ addi(Pm, Pm, wordSize);
|
|
+ ld(Rm, Address(Pm));
|
|
+ addi(Pn, Pn, -wordSize);
|
|
+ ld(Rn, Address(Pn));
|
|
+ }
|
|
+
|
|
+
|
|
+ void post1_squaring() {
|
|
+ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n
|
|
+
|
|
+ // *Pm = Rm = tmp0 * inv;
|
|
+ mul(Rm, tmp0, inv);
|
|
+ sd(Rm, Address(Pm));
|
|
+
|
|
+ // MACC(Rm, Rn, tmp0, tmp1, tmp2);
|
|
+ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
|
|
+ mulhu(Rhi_mn, Rm, Rn);
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
|
|
+ {
|
|
+ mul(Rlo_mn, Rm, Rn);
|
|
+ add(Rlo_mn, tmp0, Rlo_mn);
|
|
+ Label ok;
|
|
+ beqz(Rlo_mn, ok); {
|
|
+ stop("broken Montgomery multiply");
|
|
+ } bind(ok);
|
|
+ }
|
|
+#endif
|
|
+ // We have very carefully set things up so that
|
|
+ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
|
|
+ // the lower half of Rm * Rn because we know the result already:
|
|
+ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff
|
|
+ // tmp0 != 0. So, rather than do a mul and a cad we just set
|
|
+ // the carry flag iff tmp0 is nonzero.
|
|
+ //
|
|
+ // mul(Rlo_mn, Rm, Rn);
|
|
+ // cad(zr, tmp0, Rlo_mn);
|
|
+ addi(t0, tmp0, -1);
|
|
+ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
|
|
+ cadc(tmp0, tmp1, Rhi_mn, t0);
|
|
+ adc(tmp1, tmp2, zr, t0);
|
|
+ mv(tmp2, zr);
|
|
+ }
|
|
+
|
|
+ // use t0 as carry
|
|
+ void acc(Register Rhi, Register Rlo,
|
|
+ Register tmp0, Register tmp1, Register tmp2) {
|
|
+ cad(tmp0, tmp0, Rlo, t0);
|
|
+ cadc(tmp1, tmp1, Rhi, t0);
|
|
+ adc(tmp2, tmp2, zr, t0);
|
|
+ }
|
|
+
|
|
+ public:
|
|
+ /**
|
|
+ * Fast Montgomery multiplication. The derivation of the
|
|
+ * algorithm is in A Cryptographic Library for the Motorola
|
|
+ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
|
|
+ *
|
|
+ * Arguments:
|
|
+ *
|
|
+ * Inputs for multiplication:
|
|
+ * c_rarg0 - int array elements a
|
|
+ * c_rarg1 - int array elements b
|
|
+ * c_rarg2 - int array elements n (the modulus)
|
|
+ * c_rarg3 - int length
|
|
+ * c_rarg4 - int inv
|
|
+ * c_rarg5 - int array elements m (the result)
|
|
+ *
|
|
+ * Inputs for squaring:
|
|
+ * c_rarg0 - int array elements a
|
|
+ * c_rarg1 - int array elements n (the modulus)
|
|
+ * c_rarg2 - int length
|
|
+ * c_rarg3 - int inv
|
|
+ * c_rarg4 - int array elements m (the result)
|
|
+ *
|
|
+ */
|
|
+ address generate_multiply() {
|
|
+ Label argh, nothing;
|
|
+ bind(argh);
|
|
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
|
|
+
|
|
+ align(CodeEntryAlignment);
|
|
+ address entry = pc();
|
|
+
|
|
+ beqz(Rlen, nothing);
|
|
+
|
|
+ enter();
|
|
+
|
|
+ // Make room.
|
|
+ mv(Ra, 512);
|
|
+ bgt(Rlen, Ra, argh);
|
|
+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
|
|
+ sub(Ra, sp, Ra);
|
|
+ andi(sp, Ra, -2 * wordSize);
|
|
+
|
|
+ srliw(Rlen, Rlen, 1); // length in longwords = len/2
|
|
+
|
|
+ {
|
|
+ // Copy input args, reversing as we go. We use Ra as a
|
|
+ // temporary variable.
|
|
+ reverse(Ra, Pa_base, Rlen, Ri, Rj);
|
|
+ if (!_squaring)
|
|
+ reverse(Ra, Pb_base, Rlen, Ri, Rj);
|
|
+ reverse(Ra, Pn_base, Rlen, Ri, Rj);
|
|
+ }
|
|
+
|
|
+ // Push all call-saved registers and also Pm_base which we'll need
|
|
+ // at the end.
|
|
+ save_regs();
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
|
|
+ {
|
|
+ ld(Rn, Address(Pn_base));
|
|
+ mul(Rlo_mn, Rn, inv);
|
|
+ mv(t0, -1);
|
|
+ Label ok;
|
|
+ beq(Rlo_mn, t0, ok);
|
|
+ stop("broken inverse in Montgomery multiply");
|
|
+ bind(ok);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ mv(Pm_base, Ra);
|
|
+
|
|
+ mv(tmp0, zr);
|
|
+ mv(tmp1, zr);
|
|
+ mv(tmp2, zr);
|
|
+
|
|
+ block_comment("for (int i = 0; i < len; i++) {");
|
|
+ mv(Ri, zr); {
|
|
+ Label loop, end;
|
|
+ bge(Ri, Rlen, end);
|
|
+
|
|
+ bind(loop);
|
|
+ pre1(Ri);
|
|
+
|
|
+ block_comment(" for (j = i; j; j--) {"); {
|
|
+ mv(Rj, Ri);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ post1();
|
|
+ addw(Ri, Ri, 1);
|
|
+ blt(Ri, Rlen, loop);
|
|
+ bind(end);
|
|
+ block_comment("} // i");
|
|
+ }
|
|
+
|
|
+ block_comment("for (int i = len; i < 2*len; i++) {");
|
|
+ mv(Ri, Rlen); {
|
|
+ Label loop, end;
|
|
+ slli(Rj, Rlen, 1); // Rj as temp register
|
|
+ bge(Ri, Rj, end);
|
|
+
|
|
+ bind(loop);
|
|
+ pre2(Ri, Rlen);
|
|
+
|
|
+ block_comment(" for (j = len*2-i-1; j; j--) {"); {
|
|
+ slliw(Rj, Rlen, 1);
|
|
+ subw(Rj, Rj, Ri);
|
|
+ subw(Rj, Rj, 1);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ post2(Ri, Rlen);
|
|
+ addw(Ri, Ri, 1);
|
|
+ slli(Rj, Rlen, 1);
|
|
+ blt(Ri, Rj, loop);
|
|
+ bind(end);
|
|
+ }
|
|
+ block_comment("} // i");
|
|
+
|
|
+
|
|
+ normalize(Rlen);
|
|
+
|
|
+ mv(Ra, Pm_base); // Save Pm_base in Ra
|
|
+ restore_regs(); // Restore caller's Pm_base
|
|
+
|
|
+ // Copy our result into caller's Pm_base
|
|
+ reverse(Pm_base, Ra, Rlen, Ri, Rj);
|
|
+
|
|
+ leave();
|
|
+ bind(nothing);
|
|
+ ret();
|
|
+
|
|
+ return entry;
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ *
|
|
+ * Arguments:
|
|
+ *
|
|
+ * Inputs:
|
|
+ * c_rarg0 - int array elements a
|
|
+ * c_rarg1 - int array elements n (the modulus)
|
|
+ * c_rarg2 - int length
|
|
+ * c_rarg3 - int inv
|
|
+ * c_rarg4 - int array elements m (the result)
|
|
+ *
|
|
+ */
|
|
+ address generate_square() {
|
|
+ Label argh;
|
|
+ bind(argh);
|
|
+ stop("MontgomeryMultiply total_allocation must be <= 8192");
|
|
+
|
|
+ align(CodeEntryAlignment);
|
|
+ address entry = pc();
|
|
+
|
|
+ enter();
|
|
+
|
|
+ // Make room.
|
|
+ mv(Ra, 512);
|
|
+ bgt(Rlen, Ra, argh);
|
|
+ slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
|
|
+ sub(Ra, sp, Ra);
|
|
+ andi(sp, Ra, -2 * wordSize);
|
|
+
|
|
+ srliw(Rlen, Rlen, 1); // length in longwords = len/2
|
|
+
|
|
+ {
|
|
+ // Copy input args, reversing as we go. We use Ra as a
|
|
+ // temporary variable.
|
|
+ reverse(Ra, Pa_base, Rlen, Ri, Rj);
|
|
+ reverse(Ra, Pn_base, Rlen, Ri, Rj);
|
|
+ }
|
|
+
|
|
+ // Push all call-saved registers and also Pm_base which we'll need
|
|
+ // at the end.
|
|
+ save_regs();
|
|
+
|
|
+ mv(Pm_base, Ra);
|
|
+
|
|
+ mv(tmp0, zr);
|
|
+ mv(tmp1, zr);
|
|
+ mv(tmp2, zr);
|
|
+
|
|
+ block_comment("for (int i = 0; i < len; i++) {");
|
|
+ mv(Ri, zr); {
|
|
+ Label loop, end;
|
|
+ bind(loop);
|
|
+ bge(Ri, Rlen, end);
|
|
+
|
|
+ pre1(Ri);
|
|
+
|
|
+ block_comment("for (j = (i+1)/2; j; j--) {"); {
|
|
+ addi(Rj, Ri, 1);
|
|
+ srliw(Rj, Rj, 1);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ last_squaring(Ri);
|
|
+
|
|
+ block_comment(" for (j = i/2; j; j--) {"); {
|
|
+ srliw(Rj, Ri, 1);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ post1_squaring();
|
|
+ addi(Ri, Ri, 1);
|
|
+ blt(Ri, Rlen, loop);
|
|
+
|
|
+ bind(end);
|
|
+ block_comment("} // i");
|
|
+ }
|
|
+
|
|
+ block_comment("for (int i = len; i < 2*len; i++) {");
|
|
+ mv(Ri, Rlen); {
|
|
+ Label loop, end;
|
|
+ bind(loop);
|
|
+ slli(Rj, Rlen, 1);
|
|
+ bge(Ri, Rj, end);
|
|
+
|
|
+ pre2(Ri, Rlen);
|
|
+
|
|
+ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); {
|
|
+ slli(Rj, Rlen, 1);
|
|
+ sub(Rj, Rj, Ri);
|
|
+ sub(Rj, Rj, 1);
|
|
+ srliw(Rj, Rj, 1);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ last_squaring(Ri);
|
|
+
|
|
+ block_comment(" for (j = (2*len-i)/2; j; j--) {"); {
|
|
+ slli(Rj, Rlen, 1);
|
|
+ sub(Rj, Rj, Ri);
|
|
+ srliw(Rj, Rj, 1);
|
|
+ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
|
|
+ } block_comment(" } // j");
|
|
+
|
|
+ post2(Ri, Rlen);
|
|
+ addi(Ri, Ri, 1);
|
|
+ slli(t0, Rlen, 1);
|
|
+ blt(Ri, t0, loop);
|
|
+
|
|
+ bind(end);
|
|
+ block_comment("} // i");
|
|
+ }
|
|
+
|
|
+ normalize(Rlen);
|
|
+
|
|
+ mv(Ra, Pm_base); // Save Pm_base in Ra
|
|
+ restore_regs(); // Restore caller's Pm_base
|
|
+
|
|
+ // Copy our result into caller's Pm_base
|
|
+ reverse(Pm_base, Ra, Rlen, Ri, Rj);
|
|
+
|
|
+ leave();
|
|
+ ret();
|
|
+
|
|
+ return entry;
|
|
+ }
|
|
+ };
|
|
+#endif // COMPILER2
|
|
+
|
|
+ // Initialization
|
|
+ void generate_initial() {
|
|
+ // Generate initial stubs and initializes the entry points
|
|
+
|
|
+ // entry points that exist in all platforms Note: This is code
|
|
+ // that could be shared among different platforms - however the
|
|
+ // benefit seems to be smaller than the disadvantage of having a
|
|
+ // much more complicated generator structure. See also comment in
|
|
+ // stubRoutines.hpp.
|
|
+
|
|
+ StubRoutines::_forward_exception_entry = generate_forward_exception();
|
|
+
|
|
+ StubRoutines::_call_stub_entry =
|
|
+ generate_call_stub(StubRoutines::_call_stub_return_address);
|
|
+
|
|
+ // is referenced by megamorphic call
|
|
+ StubRoutines::_catch_exception_entry = generate_catch_exception();
|
|
+
|
|
+ // Build this early so it's available for the interpreter.
|
|
+ StubRoutines::_throw_StackOverflowError_entry =
|
|
+ generate_throw_exception("StackOverflowError throw_exception",
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::throw_StackOverflowError));
|
|
+ StubRoutines::_throw_delayed_StackOverflowError_entry =
|
|
+ generate_throw_exception("delayed StackOverflowError throw_exception",
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::throw_delayed_StackOverflowError));
|
|
+ }
|
|
+
|
|
+ void generate_all() {
|
|
+ // support for verify_oop (must happen after universe_init)
|
|
+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
|
+ StubRoutines::_throw_AbstractMethodError_entry =
|
|
+ generate_throw_exception("AbstractMethodError throw_exception",
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::
|
|
+ throw_AbstractMethodError));
|
|
+
|
|
+ StubRoutines::_throw_IncompatibleClassChangeError_entry =
|
|
+ generate_throw_exception("IncompatibleClassChangeError throw_exception",
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::
|
|
+ throw_IncompatibleClassChangeError));
|
|
+
|
|
+ StubRoutines::_throw_NullPointerException_at_call_entry =
|
|
+ generate_throw_exception("NullPointerException at call throw_exception",
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::
|
|
+ throw_NullPointerException_at_call));
|
|
+ // arraycopy stubs used by compilers
|
|
+ generate_arraycopy_stubs();
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ if (UseMulAddIntrinsic) {
|
|
+ StubRoutines::_mulAdd = generate_mulAdd();
|
|
+ }
|
|
+
|
|
+ if (UseMultiplyToLenIntrinsic) {
|
|
+ StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
|
+ }
|
|
+
|
|
+ if (UseSquareToLenIntrinsic) {
|
|
+ StubRoutines::_squareToLen = generate_squareToLen();
|
|
+ }
|
|
+
|
|
+ generate_compare_long_strings();
|
|
+
|
|
+ generate_string_indexof_stubs();
|
|
+
|
|
+ if (UseMontgomeryMultiplyIntrinsic) {
|
|
+ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
|
|
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
|
|
+ StubRoutines::_montgomeryMultiply = g.generate_multiply();
|
|
+ }
|
|
+
|
|
+ if (UseMontgomerySquareIntrinsic) {
|
|
+ StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
|
|
+ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
|
|
+ StubRoutines::_montgomerySquare = g.generate_square();
|
|
+ }
|
|
+#endif // COMPILER2
|
|
+ // Safefetch stubs.
|
|
+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
|
|
+ &StubRoutines::_safefetch32_fault_pc,
|
|
+ &StubRoutines::_safefetch32_continuation_pc);
|
|
+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
|
|
+ &StubRoutines::_safefetchN_fault_pc,
|
|
+ &StubRoutines::_safefetchN_continuation_pc);
|
|
+
|
|
+ StubRoutines::riscv::set_completed();
|
|
+ }
|
|
+
|
|
+ public:
|
|
+ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
|
|
+ if (all) {
|
|
+ generate_all();
|
|
+ } else {
|
|
+ generate_initial();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ~StubGenerator() {}
|
|
+}; // end class declaration
|
|
+
|
|
+void StubGenerator_generate(CodeBuffer* code, bool all) {
|
|
+ StubGenerator g(code, all);
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..633108b95
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
|
|
@@ -0,0 +1,60 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "runtime/deoptimization.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "runtime/thread.inline.hpp"
|
|
+#include "utilities/globalDefinitions.hpp"
|
|
+
|
|
+// Implementation of the platform-specific part of StubRoutines - for
|
|
+// a description of how to extend it, see the stubRoutines.hpp file.
|
|
+
|
|
+address StubRoutines::riscv::_get_previous_fp_entry = NULL;
|
|
+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
|
|
+
|
|
+address StubRoutines::riscv::_f2i_fixup = NULL;
|
|
+address StubRoutines::riscv::_f2l_fixup = NULL;
|
|
+address StubRoutines::riscv::_d2i_fixup = NULL;
|
|
+address StubRoutines::riscv::_d2l_fixup = NULL;
|
|
+address StubRoutines::riscv::_float_sign_mask = NULL;
|
|
+address StubRoutines::riscv::_float_sign_flip = NULL;
|
|
+address StubRoutines::riscv::_double_sign_mask = NULL;
|
|
+address StubRoutines::riscv::_double_sign_flip = NULL;
|
|
+address StubRoutines::riscv::_zero_blocks = NULL;
|
|
+address StubRoutines::riscv::_has_negatives = NULL;
|
|
+address StubRoutines::riscv::_has_negatives_long = NULL;
|
|
+address StubRoutines::riscv::_compare_long_string_LL = NULL;
|
|
+address StubRoutines::riscv::_compare_long_string_UU = NULL;
|
|
+address StubRoutines::riscv::_compare_long_string_LU = NULL;
|
|
+address StubRoutines::riscv::_compare_long_string_UL = NULL;
|
|
+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
|
|
+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
|
|
+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
|
|
+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
|
|
+
|
|
+bool StubRoutines::riscv::_completed = false;
|
|
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..8aa81980e
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
|
|
@@ -0,0 +1,179 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
|
|
+#define CPU_RISCV_STUBROUTINES_RISCV_HPP
|
|
+
|
|
+// This file holds the platform specific parts of the StubRoutines
|
|
+// definition. See stubRoutines.hpp for a description on how to
|
|
+// extend it.
|
|
+
|
|
+static bool returns_to_call_stub(address return_pc) {
|
|
+ return return_pc == _call_stub_return_address;
|
|
+}
|
|
+
|
|
+enum platform_dependent_constants {
|
|
+ code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
|
|
+ code_size2 = 36000 // simply increase if too small (assembler will crash if too small)
|
|
+};
|
|
+
|
|
+class riscv {
|
|
+ friend class StubGenerator;
|
|
+
|
|
+ private:
|
|
+ static address _get_previous_fp_entry;
|
|
+ static address _get_previous_sp_entry;
|
|
+
|
|
+ static address _f2i_fixup;
|
|
+ static address _f2l_fixup;
|
|
+ static address _d2i_fixup;
|
|
+ static address _d2l_fixup;
|
|
+
|
|
+ static address _float_sign_mask;
|
|
+ static address _float_sign_flip;
|
|
+ static address _double_sign_mask;
|
|
+ static address _double_sign_flip;
|
|
+
|
|
+ static address _zero_blocks;
|
|
+
|
|
+ static address _has_negatives;
|
|
+ static address _has_negatives_long;
|
|
+ static address _compare_long_string_LL;
|
|
+ static address _compare_long_string_LU;
|
|
+ static address _compare_long_string_UL;
|
|
+ static address _compare_long_string_UU;
|
|
+ static address _string_indexof_linear_ll;
|
|
+ static address _string_indexof_linear_uu;
|
|
+ static address _string_indexof_linear_ul;
|
|
+ static address _large_byte_array_inflate;
|
|
+ static bool _completed;
|
|
+
|
|
+ public:
|
|
+
|
|
+ static address get_previous_fp_entry()
|
|
+ {
|
|
+ return _get_previous_fp_entry;
|
|
+ }
|
|
+
|
|
+ static address get_previous_sp_entry()
|
|
+ {
|
|
+ return _get_previous_sp_entry;
|
|
+ }
|
|
+
|
|
+ static address f2i_fixup()
|
|
+ {
|
|
+ return _f2i_fixup;
|
|
+ }
|
|
+
|
|
+ static address f2l_fixup()
|
|
+ {
|
|
+ return _f2l_fixup;
|
|
+ }
|
|
+
|
|
+ static address d2i_fixup()
|
|
+ {
|
|
+ return _d2i_fixup;
|
|
+ }
|
|
+
|
|
+ static address d2l_fixup()
|
|
+ {
|
|
+ return _d2l_fixup;
|
|
+ }
|
|
+
|
|
+ static address float_sign_mask()
|
|
+ {
|
|
+ return _float_sign_mask;
|
|
+ }
|
|
+
|
|
+ static address float_sign_flip()
|
|
+ {
|
|
+ return _float_sign_flip;
|
|
+ }
|
|
+
|
|
+ static address double_sign_mask()
|
|
+ {
|
|
+ return _double_sign_mask;
|
|
+ }
|
|
+
|
|
+ static address double_sign_flip()
|
|
+ {
|
|
+ return _double_sign_flip;
|
|
+ }
|
|
+
|
|
+ static address zero_blocks() {
|
|
+ return _zero_blocks;
|
|
+ }
|
|
+
|
|
+ static address has_negatives() {
|
|
+ return _has_negatives;
|
|
+ }
|
|
+
|
|
+ static address has_negatives_long() {
|
|
+ return _has_negatives_long;
|
|
+ }
|
|
+
|
|
+ static address compare_long_string_LL() {
|
|
+ return _compare_long_string_LL;
|
|
+ }
|
|
+
|
|
+ static address compare_long_string_LU() {
|
|
+ return _compare_long_string_LU;
|
|
+ }
|
|
+
|
|
+ static address compare_long_string_UL() {
|
|
+ return _compare_long_string_UL;
|
|
+ }
|
|
+
|
|
+ static address compare_long_string_UU() {
|
|
+ return _compare_long_string_UU;
|
|
+ }
|
|
+
|
|
+ static address string_indexof_linear_ul() {
|
|
+ return _string_indexof_linear_ul;
|
|
+ }
|
|
+
|
|
+ static address string_indexof_linear_ll() {
|
|
+ return _string_indexof_linear_ll;
|
|
+ }
|
|
+
|
|
+ static address string_indexof_linear_uu() {
|
|
+ return _string_indexof_linear_uu;
|
|
+ }
|
|
+
|
|
+ static address large_byte_array_inflate() {
|
|
+ return _large_byte_array_inflate;
|
|
+ }
|
|
+
|
|
+ static bool complete() {
|
|
+ return _completed;
|
|
+ }
|
|
+
|
|
+ static void set_completed() {
|
|
+ _completed = true;
|
|
+ }
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..f5e212204
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
|
|
@@ -0,0 +1,1841 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "classfile/javaClasses.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "interpreter/bytecodeHistogram.hpp"
|
|
+#include "interpreter/bytecodeTracer.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interpreterRuntime.hpp"
|
|
+#include "interpreter/templateInterpreterGenerator.hpp"
|
|
+#include "interpreter/templateTable.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "oops/arrayOop.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/methodData.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "prims/jvmtiExport.hpp"
|
|
+#include "prims/jvmtiThreadState.hpp"
|
|
+#include "runtime/arguments.hpp"
|
|
+#include "runtime/deoptimization.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "runtime/synchronizer.hpp"
|
|
+#include "runtime/timer.hpp"
|
|
+#include "runtime/vframeArray.hpp"
|
|
+#include "utilities/debug.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+#include <sys/types.h>
|
|
+
|
|
+#ifndef PRODUCT
|
|
+#include "oops/method.hpp"
|
|
+#endif // !PRODUCT
|
|
+
|
|
+// Size of interpreter code. Increase if too small. Interpreter will
|
|
+// fail with a guarantee ("not enough space for interpreter generation");
|
|
+// if too small.
|
|
+// Run with +PrintInterpreter to get the VM to print out the size.
|
|
+// Max size with JVMTI
|
|
+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
|
|
+ address entry = __ pc();
|
|
+
|
|
+ __ andi(esp, esp, -16);
|
|
+ __ mv(c_rarg3, esp);
|
|
+ // xmethod
|
|
+ // xlocals
|
|
+ // c_rarg3: first stack arg - wordSize
|
|
+ // adjust sp
|
|
+
|
|
+ __ addi(sp, c_rarg3, -18 * wordSize);
|
|
+ __ addi(sp, sp, -2 * wordSize);
|
|
+ __ sd(ra, Address(sp, 0));
|
|
+
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::slow_signature_handler),
|
|
+ xmethod, xlocals, c_rarg3);
|
|
+
|
|
+ // x10: result handler
|
|
+
|
|
+ // Stack layout:
|
|
+ // sp: return address <- sp
|
|
+ // 1 garbage
|
|
+ // 8 integer args (if static first is unused)
|
|
+ // 1 float/double identifiers
|
|
+ // 8 double args
|
|
+ // stack args <- esp
|
|
+ // garbage
|
|
+ // expression stack bottom
|
|
+ // bcp (NULL)
|
|
+ // ...
|
|
+
|
|
+ // Restore RA
|
|
+ __ ld(ra, Address(sp, 0));
|
|
+ __ addi(sp, sp , 2 * wordSize);
|
|
+
|
|
+ // Do FP first so we can use c_rarg3 as temp
|
|
+ __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
|
|
+
|
|
+ for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
|
|
+ const FloatRegister r = g_FPArgReg[i];
|
|
+ Label d, done;
|
|
+
|
|
+ __ andi(t0, c_rarg3, 1UL << i);
|
|
+ __ bnez(t0, d);
|
|
+ __ flw(r, Address(sp, (10 + i) * wordSize));
|
|
+ __ j(done);
|
|
+ __ bind(d);
|
|
+ __ fld(r, Address(sp, (10 + i) * wordSize));
|
|
+ __ bind(done);
|
|
+ }
|
|
+
|
|
+ // c_rarg0 contains the result from the call of
|
|
+ // InterpreterRuntime::slow_signature_handler so we don't touch it
|
|
+ // here. It will be loaded with the JNIEnv* later.
|
|
+ for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
|
|
+ const Register rm = g_INTArgReg[i];
|
|
+ __ ld(rm, Address(sp, i * wordSize));
|
|
+ }
|
|
+
|
|
+ __ addi(sp, sp, 18 * wordSize);
|
|
+ __ ret();
|
|
+
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+// Various method entries
|
|
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
|
|
+ // xmethod: Method*
|
|
+ // x30: sender sp
|
|
+ // esp: args
|
|
+
|
|
+ if (!InlineIntrinsics) {
|
|
+ return NULL; // Generate a vanilla entry
|
|
+ }
|
|
+
|
|
+ // These don't need a safepoint check because they aren't virtually
|
|
+ // callable. We won't enter these intrinsics from compiled code.
|
|
+ // If in the future we added an intrinsic which was virtually callable
|
|
+ // we'd have to worry about how to safepoint so that this code is used.
|
|
+
|
|
+ // mathematical functions inlined by compiler
|
|
+ // (interpreter must provide identical implementation
|
|
+ // in order to avoid monotonicity bugs when switching
|
|
+ // from interpreter to compiler in the middle of some
|
|
+ // computation)
|
|
+ //
|
|
+ // stack:
|
|
+ // [ arg ] <-- esp
|
|
+ // [ arg ]
|
|
+ // retaddr in ra
|
|
+
|
|
+ address fn = NULL;
|
|
+ address entry_point = NULL;
|
|
+ Register continuation = ra;
|
|
+ switch (kind) {
|
|
+ case Interpreter::java_lang_math_abs:
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ fabs_d(f10, f10);
|
|
+ __ mv(sp, x30); // Restore caller's SP
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_sqrt:
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ fsqrt_d(f10, f10);
|
|
+ __ mv(sp, x30);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_sin :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dsin() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_cos :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dcos() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_tan :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dtan() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_log :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dlog() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_log10 :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dlog10() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_exp :
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9; // The first callee-saved register
|
|
+ if (StubRoutines::dexp() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_pow :
|
|
+ entry_point = __ pc();
|
|
+ __ mv(x9, ra);
|
|
+ continuation = x9;
|
|
+ __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
|
|
+ __ fld(f11, Address(esp));
|
|
+ __ mv(sp, x30);
|
|
+ if (StubRoutines::dpow() == NULL) {
|
|
+ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
|
|
+ } else {
|
|
+ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
|
|
+ }
|
|
+ __ mv(t0, fn);
|
|
+ __ jalr(t0);
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_fmaD :
|
|
+ if (UseFMA) {
|
|
+ entry_point = __ pc();
|
|
+ __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
|
|
+ __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
|
|
+ __ fld(f12, Address(esp));
|
|
+ __ fmadd_d(f10, f10, f11, f12);
|
|
+ __ mv(sp, x30); // Restore caller's SP
|
|
+ }
|
|
+ break;
|
|
+ case Interpreter::java_lang_math_fmaF :
|
|
+ if (UseFMA) {
|
|
+ entry_point = __ pc();
|
|
+ __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
|
|
+ __ flw(f11, Address(esp, Interpreter::stackElementSize));
|
|
+ __ flw(f12, Address(esp));
|
|
+ __ fmadd_s(f10, f10, f11, f12);
|
|
+ __ mv(sp, x30); // Restore caller's SP
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ;
|
|
+ }
|
|
+ if (entry_point != NULL) {
|
|
+ __ jr(continuation);
|
|
+ }
|
|
+
|
|
+ return entry_point;
|
|
+}
|
|
+
|
|
+// Abstract method entry
|
|
+// Attempt to execute abstract method. Throw exception
|
|
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
|
|
+ // xmethod: Method*
|
|
+ // x30: sender SP
|
|
+
|
|
+ address entry_point = __ pc();
|
|
+
|
|
+ // abstract method entry
|
|
+
|
|
+ // pop return address, reset last_sp to NULL
|
|
+ __ empty_expression_stack();
|
|
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
|
|
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
|
|
+
|
|
+ // throw exception
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
|
|
+ xmethod);
|
|
+ // the call_VM checks for exception, so we should never return here.
|
|
+ __ should_not_reach_here();
|
|
+
|
|
+ return entry_point;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
|
|
+ address entry = __ pc();
|
|
+
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
|
|
+ __ mv(t1, sp);
|
|
+ // maximal sp for current fp (stack grows negative)
|
|
+ // check if frame is complete
|
|
+ __ bge(t0, t1, L);
|
|
+ __ stop ("interpreter frame not set up");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+ // Restore bcp under the assumption that the current frame is still
|
|
+ // interpreted
|
|
+ __ restore_bcp();
|
|
+
|
|
+ // expression stack must be empty before entering the VM if an
|
|
+ // exception happened
|
|
+ __ empty_expression_stack();
|
|
+ // throw exception
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
|
|
+ address entry = __ pc();
|
|
+ // expression stack must be empty before entering the VM if an
|
|
+ // exception happened
|
|
+ __ empty_expression_stack();
|
|
+ // setup parameters
|
|
+
|
|
+ // convention: expect aberrant index in register x11
|
|
+ __ zero_extend(c_rarg2, x11, 32);
|
|
+ // convention: expect array in register x13
|
|
+ __ mv(c_rarg1, x13);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::
|
|
+ throw_ArrayIndexOutOfBoundsException),
|
|
+ c_rarg1, c_rarg2);
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
|
|
+ address entry = __ pc();
|
|
+
|
|
+ // object is at TOS
|
|
+ __ pop_reg(c_rarg1);
|
|
+
|
|
+ // expression stack must be empty before entering the VM if an
|
|
+ // exception happened
|
|
+ __ empty_expression_stack();
|
|
+
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::
|
|
+ throw_ClassCastException),
|
|
+ c_rarg1);
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_exception_handler_common(
|
|
+ const char* name, const char* message, bool pass_oop) {
|
|
+ assert(!pass_oop || message == NULL, "either oop or message but not both");
|
|
+ address entry = __ pc();
|
|
+ if (pass_oop) {
|
|
+ // object is at TOS
|
|
+ __ pop_reg(c_rarg2);
|
|
+ }
|
|
+ // expression stack must be empty before entering the VM if an
|
|
+ // exception happened
|
|
+ __ empty_expression_stack();
|
|
+ // setup parameters
|
|
+ __ la(c_rarg1, Address((address)name));
|
|
+ if (pass_oop) {
|
|
+ __ call_VM(x10, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::
|
|
+ create_klass_exception),
|
|
+ c_rarg1, c_rarg2);
|
|
+ } else {
|
|
+ // kind of lame ExternalAddress can't take NULL because
|
|
+ // external_word_Relocation will assert.
|
|
+ if (message != NULL) {
|
|
+ __ la(c_rarg2, Address((address)message));
|
|
+ } else {
|
|
+ __ mv(c_rarg2, NULL_WORD);
|
|
+ }
|
|
+ __ call_VM(x10,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
|
|
+ c_rarg1, c_rarg2);
|
|
+ }
|
|
+ // throw exception
|
|
+ __ j(address(Interpreter::throw_exception_entry()));
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
|
|
+ address entry = __ pc();
|
|
+
|
|
+ // Restore stack bottom in case i2c adjusted stack
|
|
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ // and NULL it as marker that esp is now tos until next java call
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ __ restore_bcp();
|
|
+ __ restore_locals();
|
|
+ __ restore_constant_pool_cache();
|
|
+ __ get_method(xmethod);
|
|
+
|
|
+ if (state == atos) {
|
|
+ Register obj = x10;
|
|
+ Register mdp = x11;
|
|
+ Register tmp = x12;
|
|
+ __ ld(mdp, Address(xmethod, Method::method_data_offset()));
|
|
+ __ profile_return_type(mdp, obj, tmp);
|
|
+ }
|
|
+
|
|
+ // Pop N words from the stack
|
|
+ __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
|
|
+ __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
|
|
+ __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
|
|
+
|
|
+ __ shadd(esp, x11, esp, t0, 3);
|
|
+
|
|
+ // Restore machine SP
|
|
+ __ ld(t0, Address(xmethod, Method::const_offset()));
|
|
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
|
|
+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
|
|
+ __ ld(t1,
|
|
+ Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
|
|
+ __ slli(t0, t0, 3);
|
|
+ __ sub(t0, t1, t0);
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ __ check_and_handle_popframe(xthread);
|
|
+ __ check_and_handle_earlyret(xthread);
|
|
+
|
|
+ __ get_dispatch();
|
|
+ __ dispatch_next(state, step);
|
|
+
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
|
|
+ int step,
|
|
+ address continuation) {
|
|
+ address entry = __ pc();
|
|
+ __ restore_bcp();
|
|
+ __ restore_locals();
|
|
+ __ restore_constant_pool_cache();
|
|
+ __ get_method(xmethod);
|
|
+ __ get_dispatch();
|
|
+
|
|
+ // Calculate stack limit
|
|
+ __ ld(t0, Address(xmethod, Method::const_offset()));
|
|
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
|
|
+ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
|
|
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
|
|
+ __ slli(t0, t0, 3);
|
|
+ __ sub(t0, t1, t0);
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ // Restore expression stack pointer
|
|
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ // NULL last_sp until next java call
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+
|
|
+ // handle exceptions
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, L);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
|
|
+ __ should_not_reach_here();
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ if (continuation == NULL) {
|
|
+ __ dispatch_next(state, step);
|
|
+ } else {
|
|
+ __ jump_to_entry(continuation);
|
|
+ }
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
|
|
+ address entry = __ pc();
|
|
+ if (type == T_OBJECT) {
|
|
+ // retrieve result from frame
|
|
+ __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
|
|
+ // and verify it
|
|
+ __ verify_oop(x10);
|
|
+ } else {
|
|
+ __ cast_primitive_type(type, x10);
|
|
+ }
|
|
+
|
|
+ __ ret(); // return from result handler
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
|
|
+ address runtime_entry) {
|
|
+ assert_cond(runtime_entry != NULL);
|
|
+ address entry = __ pc();
|
|
+ __ push(state);
|
|
+ __ call_VM(noreg, runtime_entry);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+// Helpers for commoning out cases in the various type of method entries.
|
|
+//
|
|
+
|
|
+
|
|
+// increment invocation count & check for overflow
|
|
+//
|
|
+// Note: checking for negative value instead of overflow
|
|
+// so we have a 'sticky' overflow test
|
|
+//
|
|
+// xmethod: method
|
|
+//
|
|
+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
|
|
+ Label* profile_method,
|
|
+ Label* profile_method_continue) {
|
|
+ Label done;
|
|
+ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
|
|
+ if (TieredCompilation) {
|
|
+ int increment = InvocationCounter::count_increment;
|
|
+ Label no_mdo;
|
|
+ if (ProfileInterpreter) {
|
|
+ // Are we profiling?
|
|
+ __ ld(x10, Address(xmethod, Method::method_data_offset()));
|
|
+ __ beqz(x10, no_mdo);
|
|
+ // Increment counter in the MDO
|
|
+ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
|
|
+ in_bytes(InvocationCounter::counter_offset()));
|
|
+ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
|
|
+ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
|
|
+ __ j(done);
|
|
+ }
|
|
+ __ bind(no_mdo);
|
|
+ // Increment counter in MethodCounters
|
|
+ const Address invocation_counter(t1,
|
|
+ MethodCounters::invocation_counter_offset() +
|
|
+ InvocationCounter::counter_offset());
|
|
+ __ get_method_counters(xmethod, t1, done);
|
|
+ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
|
|
+ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
|
|
+ __ bind(done);
|
|
+ } else { // not TieredCompilation
|
|
+ const Address backedge_counter(t1,
|
|
+ MethodCounters::backedge_counter_offset() +
|
|
+ InvocationCounter::counter_offset());
|
|
+ const Address invocation_counter(t1,
|
|
+ MethodCounters::invocation_counter_offset() +
|
|
+ InvocationCounter::counter_offset());
|
|
+
|
|
+ __ get_method_counters(xmethod, t1, done);
|
|
+
|
|
+ if (ProfileInterpreter) { // %%% Merge this into MethodData*
|
|
+ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
|
|
+ __ addw(x11, x11, 1);
|
|
+ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
|
|
+ }
|
|
+ // Update standard invocation counters
|
|
+ __ lwu(x11, invocation_counter);
|
|
+ __ lwu(x10, backedge_counter);
|
|
+
|
|
+ __ addw(x11, x11, InvocationCounter::count_increment);
|
|
+ __ andi(x10, x10, InvocationCounter::count_mask_value);
|
|
+
|
|
+ __ sw(x11, invocation_counter);
|
|
+ __ addw(x10, x10, x11); // add both counters
|
|
+
|
|
+ // profile_method is non-null only for interpreted method so
|
|
+ // profile_method != NULL == !native_call
|
|
+
|
|
+ if (ProfileInterpreter && profile_method != NULL) {
|
|
+ // Test to see if we should create a method data oop
|
|
+ __ ld(t1, Address(xmethod, Method::method_counters_offset()));
|
|
+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
|
|
+ __ blt(x10, t1, *profile_method_continue);
|
|
+
|
|
+ // if no method data exists, go to profile_method
|
|
+ __ test_method_data_pointer(t1, *profile_method);
|
|
+ }
|
|
+
|
|
+ {
|
|
+ __ ld(t1, Address(xmethod, Method::method_counters_offset()));
|
|
+ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
|
|
+ __ bltu(x10, t1, done);
|
|
+ __ j(*overflow); // offset is too large so we have to use j instead of bgeu here
|
|
+ }
|
|
+ __ bind(done);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
|
|
+ __ mv(c_rarg1, zr);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
|
|
+ __ j(do_continue);
|
|
+}
|
|
+
|
|
+// See if we've got enough room on the stack for locals plus overhead
|
|
+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
|
|
+// without going through the signal handler, i.e., reserved and yellow zones
|
|
+// will not be made usable. The shadow zone must suffice to handle the
|
|
+// overflow.
|
|
+// The expression stack grows down incrementally, so the normal guard
|
|
+// page mechanism will work for that.
|
|
+//
|
|
+// NOTE: Since the additional locals are also always pushed (wasn't
|
|
+// obvious in generate_method_entry) so the guard should work for them
|
|
+// too.
|
|
+//
|
|
+// Args:
|
|
+// x13: number of additional locals this frame needs (what we must check)
|
|
+// xmethod: Method*
|
|
+//
|
|
+// Kills:
|
|
+// x10
|
|
+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
|
|
+
|
|
+ // monitor entry size: see picture of stack set
|
|
+ // (generate_method_entry) and frame_amd64.hpp
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
|
|
+
|
|
+ // total overhead size: entry_size + (saved fp through expr stack
|
|
+ // bottom). be sure to change this if you add/subtract anything
|
|
+ // to/from the overhead area
|
|
+ const int overhead_size =
|
|
+ -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
|
|
+
|
|
+ const int page_size = os::vm_page_size();
|
|
+
|
|
+ Label after_frame_check;
|
|
+
|
|
+ // see if the frame is greater than one page in size. If so,
|
|
+ // then we need to verify there is enough stack space remaining
|
|
+ // for the additional locals.
|
|
+ __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
|
|
+ __ bleu(x13, t0, after_frame_check);
|
|
+
|
|
+ // compute sp as if this were going to be the last frame on
|
|
+ // the stack before the red zone
|
|
+
|
|
+ // locals + overhead, in bytes
|
|
+ __ mv(x10, overhead_size);
|
|
+ __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter.
|
|
+
|
|
+ const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
|
|
+ __ ld(t0, stack_limit);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ Label limit_okay;
|
|
+ // Verify that thread stack limit is non-zero.
|
|
+ __ bnez(t0, limit_okay);
|
|
+ __ stop("stack overflow limit is zero");
|
|
+ __ bind(limit_okay);
|
|
+#endif
|
|
+
|
|
+ // Add stack limit to locals.
|
|
+ __ add(x10, x10, t0);
|
|
+
|
|
+ // Check against the current stack bottom.
|
|
+ __ bgtu(sp, x10, after_frame_check);
|
|
+
|
|
+ // Remove the incoming args, peeling the machine SP back to where it
|
|
+ // was in the caller. This is not strictly necessary, but unless we
|
|
+ // do so the stack frame may have a garbage FP; this ensures a
|
|
+ // correct call stack that we can always unwind. The ANDI should be
|
|
+ // unnecessary because the sender SP in x30 is always aligned, but
|
|
+ // it doesn't hurt.
|
|
+ __ andi(sp, x30, -16);
|
|
+
|
|
+ // Note: the restored frame is not necessarily interpreted.
|
|
+ // Use the shared runtime version of the StackOverflowError.
|
|
+ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
|
|
+ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
|
|
+
|
|
+ // all done with frame size check
|
|
+ __ bind(after_frame_check);
|
|
+}
|
|
+
|
|
+// Allocate monitor and lock method (asm interpreter)
|
|
+//
|
|
+// Args:
|
|
+// xmethod: Method*
|
|
+// xlocals: locals
|
|
+//
|
|
+// Kills:
|
|
+// x10
|
|
+// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
|
|
+// t0, t1 (temporary regs)
|
|
+void TemplateInterpreterGenerator::lock_method() {
|
|
+ // synchronize method
|
|
+ const Address access_flags(xmethod, Method::access_flags_offset());
|
|
+ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
|
|
+
|
|
+#ifdef ASSERT
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
|
|
+#endif // ASSERT
|
|
+
|
|
+ // get synchronization object
|
|
+ {
|
|
+ Label done;
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ andi(t0, x10, JVM_ACC_STATIC);
|
|
+ // get receiver (assume this is frequent case)
|
|
+ __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
|
|
+ __ beqz(t0, done);
|
|
+ __ load_mirror(x10, xmethod);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ __ bnez(x10, L);
|
|
+ __ stop("synchronization object is NULL");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+ __ bind(done);
|
|
+ }
|
|
+
|
|
+ // add space for monitor & lock
|
|
+ __ add(sp, sp, - entry_size); // add space for a monitor entry
|
|
+ __ add(esp, esp, - entry_size);
|
|
+ __ mv(t0, esp);
|
|
+ __ sd(t0, monitor_block_top); // set new monitor block top
|
|
+ // store object
|
|
+ __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ __ mv(c_rarg1, esp); // object address
|
|
+ __ lock_object(c_rarg1);
|
|
+}
|
|
+
|
|
+// Generate a fixed interpreter frame. This is identical setup for
|
|
+// interpreted methods and for native methods hence the shared code.
|
|
+//
|
|
+// Args:
|
|
+// ra: return address
|
|
+// xmethod: Method*
|
|
+// xlocals: pointer to locals
|
|
+// xcpool: cp cache
|
|
+// stack_pointer: previous sp
|
|
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
|
|
+ // initialize fixed part of activation frame
|
|
+ if (native_call) {
|
|
+ __ add(esp, sp, - 14 * wordSize);
|
|
+ __ mv(xbcp, zr);
|
|
+ __ add(sp, sp, - 14 * wordSize);
|
|
+ // add 2 zero-initialized slots for native calls
|
|
+ __ sd(zr, Address(sp, 13 * wordSize));
|
|
+ __ sd(zr, Address(sp, 12 * wordSize));
|
|
+ } else {
|
|
+ __ add(esp, sp, - 12 * wordSize);
|
|
+ __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod
|
|
+ __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
|
|
+ __ add(sp, sp, - 12 * wordSize);
|
|
+ }
|
|
+ __ sd(xbcp, Address(sp, wordSize));
|
|
+ __ sd(esp, Address(sp, 0));
|
|
+
|
|
+ if (ProfileInterpreter) {
|
|
+ Label method_data_continue;
|
|
+ __ ld(t0, Address(xmethod, Method::method_data_offset()));
|
|
+ __ beqz(t0, method_data_continue);
|
|
+ __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
|
|
+ __ bind(method_data_continue);
|
|
+ }
|
|
+
|
|
+ __ sd(xmethod, Address(sp, 7 * wordSize));
|
|
+ __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
|
|
+
|
|
+ // Get mirror and store it in the frame as GC root for this Method*
|
|
+#if INCLUDE_SHENANDOAHGC
|
|
+ if (UseShenandoahGC) {
|
|
+ __ load_mirror(x28, xmethod);
|
|
+ __ sd(x28, Address(sp, 4 * wordSize));
|
|
+ } else
|
|
+#endif
|
|
+ {
|
|
+ __ load_mirror(t0, xmethod);
|
|
+ __ sd(t0, Address(sp, 4 * wordSize));
|
|
+ }
|
|
+ __ sd(zr, Address(sp, 5 * wordSize));
|
|
+
|
|
+ __ load_constant_pool_cache(xcpool, xmethod);
|
|
+ __ sd(xcpool, Address(sp, 3 * wordSize));
|
|
+ __ sd(xlocals, Address(sp, 2 * wordSize));
|
|
+
|
|
+ __ sd(ra, Address(sp, 11 * wordSize));
|
|
+ __ sd(fp, Address(sp, 10 * wordSize));
|
|
+ __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
|
|
+
|
|
+ // set sender sp
|
|
+ // leave last_sp as null
|
|
+ __ sd(x30, Address(sp, 9 * wordSize));
|
|
+ __ sd(zr, Address(sp, 8 * wordSize));
|
|
+
|
|
+ // Move SP out of the way
|
|
+ if (!native_call) {
|
|
+ __ load_max_stack(t0, xmethod);
|
|
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
|
|
+ __ slli(t0, t0, 3);
|
|
+ __ sub(t0, sp, t0);
|
|
+ __ andi(sp, t0, -16);
|
|
+ }
|
|
+}
|
|
+
|
|
+// End of helpers
|
|
+
|
|
+// Various method entries
|
|
+//------------------------------------------------------------------------------------------------------------------------
|
|
+//
|
|
+//
|
|
+
|
|
+// Method entry for java.lang.ref.Reference.get.
|
|
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
|
|
+ // Code: _aload_0, _getfield, _areturn
|
|
+ // parameter size = 1
|
|
+ //
|
|
+ // The code that gets generated by this routine is split into 2 parts:
|
|
+ // 1. The "intrinsified" code for G1 (or any SATB based GC),
|
|
+ // 2. The slow path - which is an expansion of the regular method entry.
|
|
+ //
|
|
+ // Notes:-
|
|
+ // * In the G1 code we do not check whether we need to block for
|
|
+ // a safepoint. If G1 is enabled then we must execute the specialized
|
|
+ // code for Reference.get (except when the Reference object is null)
|
|
+ // so that we can log the value in the referent field with an SATB
|
|
+ // update buffer.
|
|
+ // If the code for the getfield template is modified so that the
|
|
+ // G1 pre-barrier code is executed when the current method is
|
|
+ // Reference.get() then going through the normal method entry
|
|
+ // will be fine.
|
|
+ // * The G1 code can, however, check the receiver object (the instance
|
|
+ // of java.lang.Reference) and jump to the slow path if null. If the
|
|
+ // Reference object is null then we obviously cannot fetch the referent
|
|
+ // and so we don't need to call the G1 pre-barrier. Thus we can use the
|
|
+ // regular method entry code to generate the NPE.
|
|
+ //
|
|
+ // This code is based on generate_accessor_entry.
|
|
+ //
|
|
+ // xmethod: Method*
|
|
+ // x30: senderSP must preserve for slow path, set SP to it on fast path
|
|
+
|
|
+ // RA is live. It must be saved around calls.
|
|
+
|
|
+ address entry = __ pc();
|
|
+
|
|
+ const int referent_offset = java_lang_ref_Reference::referent_offset;
|
|
+ guarantee(referent_offset > 0, "referent offset not initialized");
|
|
+
|
|
+ Label slow_path;
|
|
+ const Register local_0 = c_rarg0;
|
|
+ // Check if local 0 != NULL
|
|
+ // If the receiver is null then it is OK to jump to the slow path.
|
|
+ __ ld(local_0, Address(esp, 0));
|
|
+ __ beqz(local_0, slow_path);
|
|
+
|
|
+ __ mv(x9, x30); // Move senderSP to a callee-saved register
|
|
+
|
|
+ // Load the value of the referent field.
|
|
+ const Address field_address(local_0, referent_offset);
|
|
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
|
+ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
|
|
+
|
|
+ // areturn
|
|
+ __ andi(sp, x9, -16); // done with stack
|
|
+ __ ret();
|
|
+
|
|
+ // generate a vanilla interpreter entry as the slow path
|
|
+ __ bind(slow_path);
|
|
+ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Method entry for static native methods:
|
|
+ * int java.util.zip.CRC32.update(int crc, int b)
|
|
+ */
|
|
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
|
|
+ // TODO: Unimplemented generate_CRC32_update_entry
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Method entry for static native methods:
|
|
+ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
|
|
+ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
|
|
+ */
|
|
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
|
+ // TODO: Unimplemented generate_CRC32_updateBytes_entry
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Method entry for intrinsic-candidate (non-native) methods:
|
|
+ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
|
|
+ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
|
|
+ * Unlike CRC32, CRC32C does not have any methods marked as native
|
|
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
|
|
+ */
|
|
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
|
|
+ // TODO: Unimplemented generate_CRC32C_updateBytes_entry
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
|
|
+ // Bang each page in the shadow zone. We can't assume it's been done for
|
|
+ // an interpreter frame with greater than a page of locals, so each page
|
|
+ // needs to be checked. Only true for non-native.
|
|
+ if (UseStackBanging) {
|
|
+ const int n_shadow_pages = checked_cast<int>(JavaThread::stack_shadow_zone_size()) / os::vm_page_size();
|
|
+ const int start_page = native_call ? n_shadow_pages : 1;
|
|
+ const int page_size = os::vm_page_size();
|
|
+ for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
|
|
+ __ sub(t1, sp, pages * page_size);
|
|
+ __ sd(zr, Address(t1));
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// Interpreter stub for calling a native method. (asm interpreter)
|
|
+// This sets up a somewhat different looking stack for calling the
|
|
+// native method than the typical interpreter frame setup.
|
|
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
|
+ // determine code generation flags
|
|
+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
|
|
+
|
|
+ // x11: Method*
|
|
+ // x30: sender sp
|
|
+
|
|
+ address entry_point = __ pc();
|
|
+
|
|
+ const Address constMethod (xmethod, Method::const_offset());
|
|
+ const Address access_flags (xmethod, Method::access_flags_offset());
|
|
+ const Address size_of_parameters(x12, ConstMethod::
|
|
+ size_of_parameters_offset());
|
|
+
|
|
+ // get parameter size (always needed)
|
|
+ __ ld(x12, constMethod);
|
|
+ __ load_unsigned_short(x12, size_of_parameters);
|
|
+
|
|
+ // Native calls don't need the stack size check since they have no
|
|
+ // expression stack and the arguments are already on the stack and
|
|
+ // we only add a handful of words to the stack.
|
|
+
|
|
+ // xmethod: Method*
|
|
+ // x12: size of parameters
|
|
+ // x30: sender sp
|
|
+
|
|
+ // for natives the size of locals is zero
|
|
+
|
|
+ // compute beginning of parameters (xlocals)
|
|
+ __ shadd(xlocals, x12, esp, xlocals, 3);
|
|
+ __ addi(xlocals, xlocals, -wordSize);
|
|
+
|
|
+ // Pull SP back to minimum size: this avoids holes in the stack
|
|
+ __ andi(sp, esp, -16);
|
|
+
|
|
+ // initialize fixed part of activation frame
|
|
+ generate_fixed_frame(true);
|
|
+
|
|
+ // make sure method is native & not abstract
|
|
+#ifdef ASSERT
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
|
|
+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
|
|
+#endif
|
|
+
|
|
+ // Since at this point in the method invocation the exception
|
|
+ // handler would try to exit the monitor of synchronized methods
|
|
+ // which hasn't been entered yet, we set the thread local variable
|
|
+ // _do_not_unlock_if_synchronized to true. The remove_activation
|
|
+ // will check this flag.
|
|
+
|
|
+ const Address do_not_unlock_if_synchronized(xthread,
|
|
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
|
|
+ __ mv(t1, true);
|
|
+ __ sb(t1, do_not_unlock_if_synchronized);
|
|
+
|
|
+ // increment invocation count & check for overflow
|
|
+ Label invocation_counter_overflow;
|
|
+ if (inc_counter) {
|
|
+ generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
|
|
+ }
|
|
+
|
|
+ Label continue_after_compile;
|
|
+ __ bind(continue_after_compile);
|
|
+
|
|
+ bang_stack_shadow_pages(true);
|
|
+
|
|
+ // reset the _do_not_unlock_if_synchronized flag
|
|
+ __ sb(zr, do_not_unlock_if_synchronized);
|
|
+
|
|
+ // check for synchronized methods
|
|
+ // Must happen AFTER invocation_counter check and stack overflow check,
|
|
+ // so method is not locked if overflows.
|
|
+ if (synchronized) {
|
|
+ lock_method();
|
|
+ } else {
|
|
+ // no synchronization necessary
|
|
+#ifdef ASSERT
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ // start execution
|
|
+#ifdef ASSERT
|
|
+ __ verify_frame_setup();
|
|
+#endif
|
|
+
|
|
+ // jvmti support
|
|
+ __ notify_method_entry();
|
|
+
|
|
+ // work registers
|
|
+ const Register t = x18;
|
|
+ const Register result_handler = x19;
|
|
+
|
|
+ // allocate space for parameters
|
|
+ __ ld(t, Address(xmethod, Method::const_offset()));
|
|
+ __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
|
|
+
|
|
+ __ slli(t, t, Interpreter::logStackElementSize);
|
|
+ __ sub(x30, esp, t);
|
|
+ __ andi(sp, x30, -16);
|
|
+ __ mv(esp, x30);
|
|
+
|
|
+ // get signature handler
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t, Address(xmethod, Method::signature_handler_offset()));
|
|
+ __ bnez(t, L);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::prepare_native_call),
|
|
+ xmethod);
|
|
+ __ ld(t, Address(xmethod, Method::signature_handler_offset()));
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // call signature handler
|
|
+ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
|
|
+ "adjust this code");
|
|
+ assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
|
|
+ "adjust this code");
|
|
+ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
|
|
+ "adjust this code");
|
|
+
|
|
+ // The generated handlers do not touch xmethod (the method).
|
|
+ // However, large signatures cannot be cached and are generated
|
|
+ // each time here. The slow-path generator can do a GC on return,
|
|
+ // so we must reload it after the call.
|
|
+ __ jalr(t);
|
|
+ __ get_method(xmethod); // slow path can do a GC, reload xmethod
|
|
+
|
|
+
|
|
+ // result handler is in x10
|
|
+ // set result handler
|
|
+ __ mv(result_handler, x10);
|
|
+ // pass mirror handle if static call
|
|
+ {
|
|
+ Label L;
|
|
+ __ lwu(t, Address(xmethod, Method::access_flags_offset()));
|
|
+ __ andi(t0, t, JVM_ACC_STATIC);
|
|
+ __ beqz(t0, L);
|
|
+ // get mirror
|
|
+ __ load_mirror(t, xmethod);
|
|
+ // copy mirror into activation frame
|
|
+ __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
|
|
+ // pass handle to mirror
|
|
+ __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // get native function entry point in x28
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(x28, Address(xmethod, Method::native_function_offset()));
|
|
+ address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
|
|
+ __ mv(t1, unsatisfied);
|
|
+ __ ld(t1, t1);
|
|
+ __ bne(x28, t1, L);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::prepare_native_call),
|
|
+ xmethod);
|
|
+ __ get_method(xmethod);
|
|
+ __ ld(x28, Address(xmethod, Method::native_function_offset()));
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // pass JNIEnv
|
|
+ __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
|
|
+
|
|
+ // It is enough that the pc() points into the right code
|
|
+ // segment. It does not have to be the correct return pc.
|
|
+ Label native_return;
|
|
+ __ set_last_Java_frame(esp, fp, native_return, x30);
|
|
+
|
|
+ // change thread state
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ Label L;
|
|
+ __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ __ addi(t0, zr, (u1)_thread_in_Java);
|
|
+ __ beq(t, t0, L);
|
|
+ __ stop("Wrong thread state in native stub");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // Change state to native
|
|
+ __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
|
|
+ __ mv(t0, _thread_in_native);
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+ __ sw(t0, Address(t1));
|
|
+
|
|
+ // Call the native method.
|
|
+ __ jalr(x28);
|
|
+ __ bind(native_return);
|
|
+ __ get_method(xmethod);
|
|
+ // result potentially in x10 or f10
|
|
+
|
|
+ // make room for the pushes we're about to do
|
|
+ __ sub(t0, esp, 4 * wordSize);
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ // NOTE: The order of these pushes is known to frame::interpreter_frame_result
|
|
+ // in order to extract the result of a method call. If the order of these
|
|
+ // pushes change or anything else is added to the stack then the code in
|
|
+ // interpreter_frame_result must also change.
|
|
+ __ push(dtos);
|
|
+ __ push(ltos);
|
|
+
|
|
+ // change thread state
|
|
+ // Force all preceding writes to be observed prior to thread state change
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+
|
|
+ __ mv(t0, _thread_in_native_trans);
|
|
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
|
|
+
|
|
+ if (os::is_MP()) {
|
|
+ if (UseMembar) {
|
|
+ // Force this write out before the read below
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ } else {
|
|
+ // Write serialization page so VM thread can do a pseudo remote membar.
|
|
+ // We use the current thread pointer to calculate a thread specific
|
|
+ // offset to write to within the page. This minimizes bus traffic
|
|
+ // due to cache line collision.
|
|
+ __ serialize_memory(xthread, t0, t1);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // check for safepoint operation in progress and/or pending suspend requests
|
|
+ {
|
|
+ Label L, Continue;
|
|
+ __ safepoint_poll_acquire(L);
|
|
+ __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
|
|
+ __ beqz(t1, Continue);
|
|
+ __ bind(L);
|
|
+
|
|
+ // Don't use call_VM as it will see a possible pending exception
|
|
+ // and forward it and never return here preventing us from
|
|
+ // clearing _last_native_pc down below. So we do a runtime call by
|
|
+ // hand.
|
|
+ //
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
|
|
+ __ jalr(t1);
|
|
+ __ get_method(xmethod);
|
|
+ __ reinit_heapbase();
|
|
+ __ bind(Continue);
|
|
+ }
|
|
+
|
|
+ // change thread state
|
|
+ // Force all preceding writes to be observed prior to thread state change
|
|
+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
|
|
+
|
|
+ __ mv(t0, _thread_in_Java);
|
|
+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
|
|
+
|
|
+ // reset_last_Java_frame
|
|
+ __ reset_last_Java_frame(true);
|
|
+
|
|
+ if (CheckJNICalls) {
|
|
+ // clear_pending_jni_exception_check
|
|
+ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
|
|
+ }
|
|
+
|
|
+ // reset handle block
|
|
+ __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
|
|
+ __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
|
|
+
|
|
+ // If result is an oop unbox and store it in frame where gc will see it
|
|
+ // and result handler will pick it up
|
|
+
|
|
+ {
|
|
+ Label no_oop, not_weak, store_result;
|
|
+ __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
|
|
+ __ bne(t, result_handler, no_oop);
|
|
+ // Unbox oop result, e.g. JNIHandles::resolve result.
|
|
+ __ pop(ltos);
|
|
+ __ resolve_jobject(x10, xthread, t);
|
|
+ __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
|
|
+ // keep stack depth as expected by pushing oop which will eventually be discarded
|
|
+ __ push(ltos);
|
|
+ __ bind(no_oop);
|
|
+ }
|
|
+
|
|
+ {
|
|
+ Label no_reguard;
|
|
+ __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
|
|
+ __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled);
|
|
+ __ bne(t0, t1, no_reguard);
|
|
+
|
|
+ __ push_call_clobbered_registers();
|
|
+ __ mv(c_rarg0, xthread);
|
|
+ __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
|
|
+ __ jalr(t1);
|
|
+ __ pop_call_clobbered_registers();
|
|
+ __ bind(no_reguard);
|
|
+ }
|
|
+
|
|
+ // The method register is junk from after the thread_in_native transition
|
|
+ // until here. Also can't call_VM until the bcp has been
|
|
+ // restored. Need bcp for throwing exception below so get it now.
|
|
+ __ get_method(xmethod);
|
|
+
|
|
+ // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
|
|
+ // xbcp == code_base()
|
|
+ __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod*
|
|
+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase
|
|
+ // handle exceptions (exception handling will handle unlocking!)
|
|
+ {
|
|
+ Label L;
|
|
+ __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
|
|
+ __ beqz(t0, L);
|
|
+ // Note: At some point we may want to unify this with the code
|
|
+ // used in call_VM_base(); i.e., we should use the
|
|
+ // StubRoutines::forward_exception code. For now this doesn't work
|
|
+ // here because the sp is not correctly set at this point.
|
|
+ __ MacroAssembler::call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_pending_exception));
|
|
+ __ should_not_reach_here();
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // do unlocking if necessary
|
|
+ {
|
|
+ Label L;
|
|
+ __ lwu(t, Address(xmethod, Method::access_flags_offset()));
|
|
+ __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
|
|
+ __ beqz(t0, L);
|
|
+ // the code below should be shared with interpreter macro
|
|
+ // assembler implementation
|
|
+ {
|
|
+ Label unlock;
|
|
+ // BasicObjectLock will be first in list, since this is a
|
|
+ // synchronized method. However, need to check that the object
|
|
+ // has not been unlocked by an explicit monitorexit bytecode.
|
|
+
|
|
+ // monitor expect in c_rarg1 for slow unlock path
|
|
+ __ la(c_rarg1, Address(fp, // address of first monitor
|
|
+ (intptr_t)(frame::interpreter_frame_initial_sp_offset *
|
|
+ wordSize - sizeof(BasicObjectLock))));
|
|
+
|
|
+ __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ __ bnez(t, unlock);
|
|
+
|
|
+ // Entry already unlocked, need to throw exception
|
|
+ __ MacroAssembler::call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
|
|
+ __ should_not_reach_here();
|
|
+
|
|
+ __ bind(unlock);
|
|
+ __ unlock_object(c_rarg1);
|
|
+ }
|
|
+ __ bind(L);
|
|
+ }
|
|
+
|
|
+ // jvmti support
|
|
+ // Note: This must happen _after_ handling/throwing any exceptions since
|
|
+ // the exception handler code notifies the runtime of method exits
|
|
+ // too. If this happens before, method entry/exit notifications are
|
|
+ // not properly paired (was bug - gri 11/22/99).
|
|
+ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
|
|
+
|
|
+ __ pop(ltos);
|
|
+ __ pop(dtos);
|
|
+
|
|
+ __ jalr(result_handler);
|
|
+
|
|
+ // remove activation
|
|
+ __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
|
|
+ // remove frame anchor
|
|
+ __ leave();
|
|
+
|
|
+ // restore sender sp
|
|
+ __ mv(sp, esp);
|
|
+
|
|
+ __ ret();
|
|
+
|
|
+ if (inc_counter) {
|
|
+ // Handle overflow of counter and compile method
|
|
+ __ bind(invocation_counter_overflow);
|
|
+ generate_counter_overflow(continue_after_compile);
|
|
+ }
|
|
+
|
|
+ return entry_point;
|
|
+}
|
|
+
|
|
+//
|
|
+// Generic interpreted method entry to (asm) interpreter
|
|
+//
|
|
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
|
|
+
|
|
+ // determine code generation flags
|
|
+ const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
|
|
+
|
|
+ // t0: sender sp
|
|
+ address entry_point = __ pc();
|
|
+
|
|
+ const Address constMethod(xmethod, Method::const_offset());
|
|
+ const Address access_flags(xmethod, Method::access_flags_offset());
|
|
+ const Address size_of_parameters(x13,
|
|
+ ConstMethod::size_of_parameters_offset());
|
|
+ const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
|
|
+
|
|
+ // get parameter size (always needed)
|
|
+ // need to load the const method first
|
|
+ __ ld(x13, constMethod);
|
|
+ __ load_unsigned_short(x12, size_of_parameters);
|
|
+
|
|
+ // x12: size of parameters
|
|
+
|
|
+ __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
|
|
+ __ sub(x13, x13, x12); // x13 = no. of additional locals
|
|
+
|
|
+ // see if we've got enough room on the stack for locals plus overhead.
|
|
+ generate_stack_overflow_check();
|
|
+
|
|
+ // compute beginning of parameters (xlocals)
|
|
+ __ shadd(xlocals, x12, esp, t1, 3);
|
|
+ __ add(xlocals, xlocals, -wordSize);
|
|
+
|
|
+ // Make room for additional locals
|
|
+ __ slli(t1, x13, 3);
|
|
+ __ sub(t0, esp, t1);
|
|
+
|
|
+ // Padding between locals and fixed part of activation frame to ensure
|
|
+ // SP is always 16-byte aligned.
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ // x13 - # of additional locals
|
|
+ // allocate space for locals
|
|
+ // explicitly initialize locals
|
|
+ {
|
|
+ Label exit, loop;
|
|
+ __ blez(x13, exit); // do nothing if x13 <= 0
|
|
+ __ bind(loop);
|
|
+ __ sd(zr, Address(t0));
|
|
+ __ add(t0, t0, wordSize);
|
|
+ __ add(x13, x13, -1); // until everything initialized
|
|
+ __ bnez(x13, loop);
|
|
+ __ bind(exit);
|
|
+ }
|
|
+
|
|
+ // And the base dispatch table
|
|
+ __ get_dispatch();
|
|
+
|
|
+ // initialize fixed part of activation frame
|
|
+ generate_fixed_frame(false);
|
|
+
|
|
+ // make sure method is not native & not abstract
|
|
+#ifdef ASSERT
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
|
|
+ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
|
|
+#endif
|
|
+
|
|
+ // Since at this point in the method invocation the exception
|
|
+ // handler would try to exit the monitor of synchronized methods
|
|
+ // which hasn't been entered yet, we set the thread local variable
|
|
+ // _do_not_unlock_if_synchronized to true. The remove_activation
|
|
+ // will check this flag.
|
|
+
|
|
+ const Address do_not_unlock_if_synchronized(xthread,
|
|
+ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
|
|
+ __ mv(t1, true);
|
|
+ __ sb(t1, do_not_unlock_if_synchronized);
|
|
+
|
|
+ Label no_mdp;
|
|
+ const Register mdp = x13;
|
|
+ __ ld(mdp, Address(xmethod, Method::method_data_offset()));
|
|
+ __ beqz(mdp, no_mdp);
|
|
+ __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
|
|
+ __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
|
|
+ __ bind(no_mdp);
|
|
+
|
|
+ // increment invocation count & check for overflow
|
|
+ Label invocation_counter_overflow;
|
|
+ Label profile_method;
|
|
+ Label profile_method_continue;
|
|
+ if (inc_counter) {
|
|
+ generate_counter_incr(&invocation_counter_overflow,
|
|
+ &profile_method,
|
|
+ &profile_method_continue);
|
|
+ if (ProfileInterpreter) {
|
|
+ __ bind(profile_method_continue);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Label continue_after_compile;
|
|
+ __ bind(continue_after_compile);
|
|
+
|
|
+ bang_stack_shadow_pages(false);
|
|
+
|
|
+ // reset the _do_not_unlock_if_synchronized flag
|
|
+ __ sb(zr, do_not_unlock_if_synchronized);
|
|
+
|
|
+ // check for synchronized methods
|
|
+ // Must happen AFTER invocation_counter check and stack overflow check,
|
|
+ // so method is not locked if overflows.
|
|
+ if (synchronized) {
|
|
+ // Allocate monitor and lock method
|
|
+ lock_method();
|
|
+ } else {
|
|
+ // no synchronization necessary
|
|
+#ifdef ASSERT
|
|
+ __ lwu(x10, access_flags);
|
|
+ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ // start execution
|
|
+#ifdef ASSERT
|
|
+ __ verify_frame_setup();
|
|
+#endif
|
|
+
|
|
+ // jvmti support
|
|
+ __ notify_method_entry();
|
|
+
|
|
+ __ dispatch_next(vtos);
|
|
+
|
|
+ // invocation counter overflow
|
|
+ if (inc_counter) {
|
|
+ if (ProfileInterpreter) {
|
|
+ // We have decided to profile this method in the interpreter
|
|
+ __ bind(profile_method);
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
|
|
+ __ set_method_data_pointer_for_bcp();
|
|
+ // don't think we need this
|
|
+ __ get_method(x11);
|
|
+ __ jal(profile_method_continue);
|
|
+ }
|
|
+ // Handle overflow of counter and compile method
|
|
+ __ bind(invocation_counter_overflow);
|
|
+ generate_counter_overflow(continue_after_compile);
|
|
+ }
|
|
+
|
|
+ return entry_point;
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Exceptions
|
|
+
|
|
+void TemplateInterpreterGenerator::generate_throw_exception() {
|
|
+ // Entry point in previous activation (i.e., if the caller was
|
|
+ // interpreted)
|
|
+ Interpreter::_rethrow_exception_entry = __ pc();
|
|
+ // Restore sp to interpreter_frame_last_sp even though we are going
|
|
+ // to empty the expression stack for the exception processing.
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ // x10: exception
|
|
+ // x13: return address/pc that threw exception
|
|
+ __ restore_bcp(); // xbcp points to call/send
|
|
+ __ restore_locals();
|
|
+ __ restore_constant_pool_cache();
|
|
+ __ reinit_heapbase(); // restore xheapbase as heapbase.
|
|
+ __ get_dispatch();
|
|
+
|
|
+ // Entry point for exceptions thrown within interpreter code
|
|
+ Interpreter::_throw_exception_entry = __ pc();
|
|
+ // If we came here via a NullPointerException on the receiver of a
|
|
+ // method, xthread may be corrupt.
|
|
+ __ get_method(xmethod);
|
|
+ // expression stack is undefined here
|
|
+ // x10: exception
|
|
+ // xbcp: exception bcp
|
|
+ __ verify_oop(x10);
|
|
+ __ mv(c_rarg1, x10);
|
|
+
|
|
+ // expression stack must be empty before entering the VM in case of
|
|
+ // an exception
|
|
+ __ empty_expression_stack();
|
|
+ // find exception handler address and preserve exception oop
|
|
+ __ call_VM(x13,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::exception_handler_for_exception),
|
|
+ c_rarg1);
|
|
+
|
|
+ // Calculate stack limit
|
|
+ __ ld(t0, Address(xmethod, Method::const_offset()));
|
|
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
|
|
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
|
|
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
|
|
+ __ slli(t0, t0, 3);
|
|
+ __ sub(t0, t1, t0);
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ // x10: exception handler entry point
|
|
+ // x13: preserved exception oop
|
|
+ // xbcp: bcp for exception handler
|
|
+ __ push_ptr(x13); // push exception which is now the only value on the stack
|
|
+ __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
|
|
+
|
|
+ // If the exception is not handled in the current frame the frame is
|
|
+ // removed and the exception is rethrown (i.e. exception
|
|
+ // continuation is _rethrow_exception).
|
|
+ //
|
|
+ // Note: At this point the bci is still the bxi for the instruction
|
|
+ // which caused the exception and the expression stack is
|
|
+ // empty. Thus, for any VM calls at this point, GC will find a legal
|
|
+ // oop map (with empty expression stack).
|
|
+
|
|
+ //
|
|
+ // JVMTI PopFrame support
|
|
+ //
|
|
+
|
|
+ Interpreter::_remove_activation_preserving_args_entry = __ pc();
|
|
+ __ empty_expression_stack();
|
|
+ // Set the popframe_processing bit in pending_popframe_condition
|
|
+ // indicating that we are currently handling popframe, so that
|
|
+ // call_VMs that may happen later do not trigger new popframe
|
|
+ // handling cycles.
|
|
+ __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
|
|
+ __ ori(x13, x13, JavaThread::popframe_processing_bit);
|
|
+ __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
|
|
+
|
|
+ {
|
|
+ // Check to see whether we are returning to a deoptimized frame.
|
|
+ // (The PopFrame call ensures that the caller of the popped frame is
|
|
+ // either interpreted or compiled and deoptimizes it if compiled.)
|
|
+ // In this case, we can't call dispatch_next() after the frame is
|
|
+ // popped, but instead must save the incoming arguments and restore
|
|
+ // them after deoptimization has occurred.
|
|
+ //
|
|
+ // Note that we don't compare the return PC against the
|
|
+ // deoptimization blob's unpack entry because of the presence of
|
|
+ // adapter frames in C2.
|
|
+ Label caller_not_deoptimized;
|
|
+ __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
|
|
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
|
|
+ __ bnez(x10, caller_not_deoptimized);
|
|
+
|
|
+ // Compute size of arguments for saving when returning to
|
|
+ // deoptimized caller
|
|
+ __ get_method(x10);
|
|
+ __ ld(x10, Address(x10, Method::const_offset()));
|
|
+ __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
|
|
+ size_of_parameters_offset())));
|
|
+ __ slli(x10, x10, Interpreter::logStackElementSize);
|
|
+ __ restore_locals();
|
|
+ __ sub(xlocals, xlocals, x10);
|
|
+ __ add(xlocals, xlocals, wordSize);
|
|
+ // Save these arguments
|
|
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
|
|
+ Deoptimization::
|
|
+ popframe_preserve_args),
|
|
+ xthread, x10, xlocals);
|
|
+
|
|
+ __ remove_activation(vtos,
|
|
+ /* throw_monitor_exception */ false,
|
|
+ /* install_monitor_exception */ false,
|
|
+ /* notify_jvmdi */ false);
|
|
+
|
|
+ // Inform deoptimization that it is responsible for restoring
|
|
+ // these arguments
|
|
+ __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
|
|
+ __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
|
|
+
|
|
+ // Continue in deoptimization handler
|
|
+ __ ret();
|
|
+
|
|
+ __ bind(caller_not_deoptimized);
|
|
+ }
|
|
+
|
|
+ __ remove_activation(vtos,
|
|
+ /* throw_monitor_exception */ false,
|
|
+ /* install_monitor_exception */ false,
|
|
+ /* notify_jvmdi */ false);
|
|
+
|
|
+ // Restore the last_sp and null it out
|
|
+ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
|
|
+
|
|
+ __ restore_bcp();
|
|
+ __ restore_locals();
|
|
+ __ restore_constant_pool_cache();
|
|
+ __ get_method(xmethod);
|
|
+ __ get_dispatch();
|
|
+
|
|
+ // The method data pointer was incremented already during
|
|
+ // call profiling. We have to restore the mdp for the current bcp.
|
|
+ if (ProfileInterpreter) {
|
|
+ __ set_method_data_pointer_for_bcp();
|
|
+ }
|
|
+
|
|
+ // Clear the popframe condition flag
|
|
+ __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
|
|
+ assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
|
|
+
|
|
+#if INCLUDE_JVMTI
|
|
+ {
|
|
+ Label L_done;
|
|
+
|
|
+ __ lbu(t0, Address(xbcp, 0));
|
|
+ __ mv(t1, Bytecodes::_invokestatic);
|
|
+ __ bne(t1, t0, L_done);
|
|
+
|
|
+ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
|
|
+ // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
|
|
+
|
|
+ __ ld(c_rarg0, Address(xlocals, 0));
|
|
+ __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
|
|
+
|
|
+ __ beqz(x10, L_done);
|
|
+
|
|
+ __ sd(x10, Address(esp, 0));
|
|
+ __ bind(L_done);
|
|
+ }
|
|
+#endif // INCLUDE_JVMTI
|
|
+
|
|
+ // Restore machine SP
|
|
+ __ ld(t0, Address(xmethod, Method::const_offset()));
|
|
+ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
|
|
+ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
|
|
+ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
|
|
+ __ slliw(t0, t0, 3);
|
|
+ __ sub(t0, t1, t0);
|
|
+ __ andi(sp, t0, -16);
|
|
+
|
|
+ __ dispatch_next(vtos);
|
|
+ // end of PopFrame support
|
|
+
|
|
+ Interpreter::_remove_activation_entry = __ pc();
|
|
+
|
|
+ // preserve exception over this code sequence
|
|
+ __ pop_ptr(x10);
|
|
+ __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
|
|
+ // remove the activation (without doing throws on illegalMonitorExceptions)
|
|
+ __ remove_activation(vtos, false, true, false);
|
|
+ // restore exception
|
|
+ __ get_vm_result(x10, xthread);
|
|
+
|
|
+ // In between activations - previous activation type unknown yet
|
|
+ // compute continuation point - the continuation point expects the
|
|
+ // following registers set up:
|
|
+ //
|
|
+ // x10: exception
|
|
+ // ra: return address/pc that threw exception
|
|
+ // sp: expression stack of caller
|
|
+ // fp: fp of caller
|
|
+ // FIXME: There's no point saving RA here because VM calls don't trash it
|
|
+ __ sub(sp, sp, 2 * wordSize);
|
|
+ __ sd(x10, Address(sp, 0)); // save exception
|
|
+ __ sd(ra, Address(sp, wordSize)); // save return address
|
|
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
|
|
+ SharedRuntime::exception_handler_for_return_address),
|
|
+ xthread, ra);
|
|
+ __ mv(x11, x10); // save exception handler
|
|
+ __ ld(x10, Address(sp, 0)); // restore exception
|
|
+ __ ld(ra, Address(sp, wordSize)); // restore return address
|
|
+ __ add(sp, sp, 2 * wordSize);
|
|
+ // We might be returning to a deopt handler that expects x13 to
|
|
+ // contain the exception pc
|
|
+ __ mv(x13, ra);
|
|
+ // Note that an "issuing PC" is actually the next PC after the call
|
|
+ __ jr(x11); // jump to exception
|
|
+ // handler of caller
|
|
+}
|
|
+
|
|
+//
|
|
+// JVMTI ForceEarlyReturn support
|
|
+//
|
|
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
|
|
+ address entry = __ pc();
|
|
+
|
|
+ __ restore_bcp();
|
|
+ __ restore_locals();
|
|
+ __ empty_expression_stack();
|
|
+ __ load_earlyret_value(state);
|
|
+
|
|
+ __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
|
|
+ Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
|
|
+
|
|
+ // Clear the earlyret state
|
|
+ assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
|
|
+ __ sd(zr, cond_addr);
|
|
+
|
|
+ __ remove_activation(state,
|
|
+ false, /* throw_monitor_exception */
|
|
+ false, /* install_monitor_exception */
|
|
+ true); /* notify_jvmdi */
|
|
+ __ ret();
|
|
+
|
|
+ return entry;
|
|
+}
|
|
+// end of ForceEarlyReturn support
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Helper for vtos entry point generation
|
|
+
|
|
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
|
|
+ address& bep,
|
|
+ address& cep,
|
|
+ address& sep,
|
|
+ address& aep,
|
|
+ address& iep,
|
|
+ address& lep,
|
|
+ address& fep,
|
|
+ address& dep,
|
|
+ address& vep) {
|
|
+ assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
|
|
+ Label L;
|
|
+ aep = __ pc(); __ push_ptr(); __ j(L);
|
|
+ fep = __ pc(); __ push_f(); __ j(L);
|
|
+ dep = __ pc(); __ push_d(); __ j(L);
|
|
+ lep = __ pc(); __ push_l(); __ j(L);
|
|
+ bep = cep = sep =
|
|
+ iep = __ pc(); __ push_i();
|
|
+ vep = __ pc();
|
|
+ __ bind(L);
|
|
+ generate_and_dispatch(t);
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+
|
|
+// Non-product code
|
|
+#ifndef PRODUCT
|
|
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
|
|
+ address entry = __ pc();
|
|
+
|
|
+ __ push_reg(ra);
|
|
+ __ push(state);
|
|
+ __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
|
|
+ __ mv(c_rarg2, x10); // Pass itos
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
|
|
+ __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
|
|
+ __ pop(state);
|
|
+ __ pop_reg(ra);
|
|
+ __ ret(); // return from result handler
|
|
+
|
|
+ return entry;
|
|
+}
|
|
+
|
|
+void TemplateInterpreterGenerator::count_bytecode() {
|
|
+ __ push_reg(t0);
|
|
+ __ push_reg(x10);
|
|
+ __ mv(x10, (address) &BytecodeCounter::_counter_value);
|
|
+ __ mv(t0, 1);
|
|
+ __ amoadd_d(zr, x10, t0, Assembler::aqrl);
|
|
+ __ pop_reg(x10);
|
|
+ __ pop_reg(t0);
|
|
+}
|
|
+
|
|
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
|
|
+
|
|
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
|
|
+
|
|
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
|
|
+ // Call a little run-time stub to avoid blow-up for each bytecode.
|
|
+ // The run-time runtime saves the right registers, depending on
|
|
+ // the tosca in-state for the given template.
|
|
+
|
|
+ assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
|
|
+ __ jal(Interpreter::trace_code(t->tos_in()));
|
|
+ __ reinit_heapbase();
|
|
+}
|
|
+
|
|
+void TemplateInterpreterGenerator::stop_interpreter_at() {
|
|
+ Label L;
|
|
+ __ push_reg(t0);
|
|
+ __ mv(t0, (address) &BytecodeCounter::_counter_value);
|
|
+ __ ld(t0, Address(t0));
|
|
+ __ mv(t1, StopInterpreterAt);
|
|
+ __ bne(t0, t1, L);
|
|
+ __ ebreak();
|
|
+ __ bind(L);
|
|
+ __ pop_reg(t0);
|
|
+}
|
|
+
|
|
+#endif // !PRODUCT
|
|
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..8e6e7dee5
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
|
|
@@ -0,0 +1,4028 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "gc/shared/barrierSetAssembler.hpp"
|
|
+#include "interpreter/interp_masm.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "interpreter/interpreterRuntime.hpp"
|
|
+#include "interpreter/templateTable.hpp"
|
|
+#include "memory/universe.hpp"
|
|
+#include "oops/method.hpp"
|
|
+#include "oops/methodData.hpp"
|
|
+#include "oops/objArrayKlass.hpp"
|
|
+#include "oops/oop.inline.hpp"
|
|
+#include "prims/methodHandles.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "runtime/synchronizer.hpp"
|
|
+
|
|
+#define __ _masm->
|
|
+
|
|
+// Platform-dependent initialization
|
|
+
|
|
+void TemplateTable::pd_initialize() {
|
|
+ // No riscv specific initialization
|
|
+}
|
|
+
|
|
+// Address computation: local variables
|
|
+
|
|
+static inline Address iaddress(int n) {
|
|
+ return Address(xlocals, Interpreter::local_offset_in_bytes(n));
|
|
+}
|
|
+
|
|
+static inline Address laddress(int n) {
|
|
+ return iaddress(n + 1);
|
|
+}
|
|
+
|
|
+static inline Address faddress(int n) {
|
|
+ return iaddress(n);
|
|
+}
|
|
+
|
|
+static inline Address daddress(int n) {
|
|
+ return laddress(n);
|
|
+}
|
|
+
|
|
+static inline Address aaddress(int n) {
|
|
+ return iaddress(n);
|
|
+}
|
|
+
|
|
+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
|
|
+ _masm->shadd(temp, r, xlocals, temp, 3);
|
|
+ return Address(temp, 0);
|
|
+}
|
|
+
|
|
+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
|
|
+ _masm->shadd(temp, r, xlocals, temp, 3);
|
|
+ return Address(temp, Interpreter::local_offset_in_bytes(1));;
|
|
+}
|
|
+
|
|
+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
|
|
+ return iaddress(r, temp, _masm);
|
|
+}
|
|
+
|
|
+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
|
|
+ return laddress(r, temp, _masm);
|
|
+}
|
|
+
|
|
+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
|
|
+ return iaddress(r, temp, _masm);
|
|
+}
|
|
+
|
|
+// At top of Java expression stack which may be different than esp(). It
|
|
+// isn't for category 1 objects.
|
|
+static inline Address at_tos () {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(0));
|
|
+}
|
|
+
|
|
+static inline Address at_tos_p1() {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(1));
|
|
+}
|
|
+
|
|
+static inline Address at_tos_p2() {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(2));
|
|
+}
|
|
+
|
|
+static inline Address at_tos_p3() {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(3));
|
|
+}
|
|
+
|
|
+static inline Address at_tos_p4() {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(4));
|
|
+}
|
|
+
|
|
+static inline Address at_tos_p5() {
|
|
+ return Address(esp, Interpreter::expr_offset_in_bytes(5));
|
|
+}
|
|
+
|
|
+// Miscelaneous helper routines
|
|
+// Store an oop (or NULL) at the Address described by obj.
|
|
+// If val == noreg this means store a NULL
|
|
+static void do_oop_store(InterpreterMacroAssembler* _masm,
|
|
+ Address dst,
|
|
+ Register val,
|
|
+ DecoratorSet decorators) {
|
|
+ assert(val == noreg || val == x10, "parameter is just for looks");
|
|
+ __ store_heap_oop(dst, val, x29, x11, x13, decorators);
|
|
+}
|
|
+
|
|
+static void do_oop_load(InterpreterMacroAssembler* _masm,
|
|
+ Address src,
|
|
+ Register dst,
|
|
+ DecoratorSet decorators) {
|
|
+ __ load_heap_oop(dst, src, x7, x11, decorators);
|
|
+}
|
|
+
|
|
+Address TemplateTable::at_bcp(int offset) {
|
|
+ assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
|
|
+ return Address(xbcp, offset);
|
|
+}
|
|
+
|
|
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
|
|
+ Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
|
|
+ int byte_no)
|
|
+{
|
|
+ if (!RewriteBytecodes) { return; }
|
|
+ Label L_patch_done;
|
|
+
|
|
+ switch (bc) {
|
|
+ case Bytecodes::_fast_aputfield: // fall through
|
|
+ case Bytecodes::_fast_bputfield: // fall through
|
|
+ case Bytecodes::_fast_zputfield: // fall through
|
|
+ case Bytecodes::_fast_cputfield: // fall through
|
|
+ case Bytecodes::_fast_dputfield: // fall through
|
|
+ case Bytecodes::_fast_fputfield: // fall through
|
|
+ case Bytecodes::_fast_iputfield: // fall through
|
|
+ case Bytecodes::_fast_lputfield: // fall through
|
|
+ case Bytecodes::_fast_sputfield: {
|
|
+ // We skip bytecode quickening for putfield instructions when
|
|
+ // the put_code written to the constant pool cache is zero.
|
|
+ // This is required so that every execution of this instruction
|
|
+ // calls out to InterpreterRuntime::resolve_get_put to do
|
|
+ // additional, required work.
|
|
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
|
|
+ assert(load_bc_into_bc_reg, "we use bc_reg as temp");
|
|
+ __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
|
|
+ __ mv(bc_reg, bc);
|
|
+ __ beqz(temp_reg, L_patch_done);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ assert(byte_no == -1, "sanity");
|
|
+ // the pair bytecodes have already done the load.
|
|
+ if (load_bc_into_bc_reg) {
|
|
+ __ mv(bc_reg, bc);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (JvmtiExport::can_post_breakpoint()) {
|
|
+ Label L_fast_patch;
|
|
+ // if a breakpoint is present we can't rewrite the stream directly
|
|
+ __ load_unsigned_byte(temp_reg, at_bcp(0));
|
|
+ __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
|
|
+ __ bnez(temp_reg, L_fast_patch);
|
|
+ // Let breakpoint table handling rewrite to quicker bytecode
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
|
|
+ __ j(L_patch_done);
|
|
+ __ bind(L_fast_patch);
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ Label L_okay;
|
|
+ __ load_unsigned_byte(temp_reg, at_bcp(0));
|
|
+ __ beq(temp_reg, bc_reg, L_okay);
|
|
+ __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
|
|
+ __ beqz(temp_reg, L_okay);
|
|
+ __ stop("patching the wrong bytecode");
|
|
+ __ bind(L_okay);
|
|
+#endif
|
|
+
|
|
+ // patch bytecode
|
|
+ __ sb(bc_reg, at_bcp(0));
|
|
+ __ bind(L_patch_done);
|
|
+}
|
|
+
|
|
+// Individual instructions
|
|
+
|
|
+void TemplateTable::nop() {
|
|
+ transition(vtos, vtos);
|
|
+ // nothing to do
|
|
+}
|
|
+
|
|
+void TemplateTable::shouldnotreachhere() {
|
|
+ transition(vtos, vtos);
|
|
+ __ stop("should not reach here bytecode");
|
|
+}
|
|
+
|
|
+void TemplateTable::aconst_null()
|
|
+{
|
|
+ transition(vtos, atos);
|
|
+ __ mv(x10, zr);
|
|
+}
|
|
+
|
|
+void TemplateTable::iconst(int value)
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ __ mv(x10, value);
|
|
+}
|
|
+
|
|
+void TemplateTable::lconst(int value)
|
|
+{
|
|
+ transition(vtos, ltos);
|
|
+ __ mv(x10, value);
|
|
+}
|
|
+
|
|
+void TemplateTable::fconst(int value)
|
|
+{
|
|
+ transition(vtos, ftos);
|
|
+ static float fBuf[2] = {1.0, 2.0};
|
|
+ __ mv(t0, (intptr_t)fBuf);
|
|
+ switch (value) {
|
|
+ case 0:
|
|
+ __ fmv_w_x(f10, zr);
|
|
+ break;
|
|
+ case 1:
|
|
+ __ flw(f10, t0, 0);
|
|
+ break;
|
|
+ case 2:
|
|
+ __ flw(f10, t0, sizeof(float));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::dconst(int value)
|
|
+{
|
|
+ transition(vtos, dtos);
|
|
+ static double dBuf[2] = {1.0, 2.0};
|
|
+ __ mv(t0, (intptr_t)dBuf);
|
|
+ switch (value) {
|
|
+ case 0:
|
|
+ __ fmv_d_x(f10, zr);
|
|
+ break;
|
|
+ case 1:
|
|
+ __ fld(f10, t0, 0);
|
|
+ break;
|
|
+ case 2:
|
|
+ __ fld(f10, t0, sizeof(double));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::bipush()
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ __ load_signed_byte(x10, at_bcp(1));
|
|
+}
|
|
+
|
|
+void TemplateTable::sipush()
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ __ load_unsigned_short(x10, at_bcp(1));
|
|
+ __ revb_w_w(x10, x10);
|
|
+ __ sraiw(x10, x10, 16);
|
|
+}
|
|
+
|
|
+void TemplateTable::ldc(bool wide)
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ Label call_ldc, notFloat, notClass, notInt, Done;
|
|
+
|
|
+ if (wide) {
|
|
+ __ get_unsigned_2_byte_index_at_bcp(x11, 1);
|
|
+ } else {
|
|
+ __ load_unsigned_byte(x11, at_bcp(1));
|
|
+ }
|
|
+ __ get_cpool_and_tags(x12, x10);
|
|
+
|
|
+ const int base_offset = ConstantPool::header_size() * wordSize;
|
|
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
|
|
+
|
|
+ // get type
|
|
+ __ addi(x13, x11, tags_offset);
|
|
+ __ add(x13, x10, x13);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ lbu(x13, Address(x13, 0));
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+
|
|
+ // unresolved class - get the resolved class
|
|
+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
|
|
+ __ beq(x13, t1, call_ldc);
|
|
+
|
|
+ // unresolved class in error state - call into runtime to throw the error
|
|
+ // from the first resolution attempt
|
|
+ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
|
|
+ __ beq(x13, t1, call_ldc);
|
|
+
|
|
+ // resolved class - need to call vm to get java mirror of the class
|
|
+ __ mv(t1, (u1)JVM_CONSTANT_Class);
|
|
+ __ bne(x13, t1, notClass);
|
|
+
|
|
+ __ bind(call_ldc);
|
|
+ __ mv(c_rarg1, wide);
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
|
|
+ __ push_ptr(x10);
|
|
+ __ verify_oop(x10);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notClass);
|
|
+ __ mv(t1, (u1)JVM_CONSTANT_Float);
|
|
+ __ bne(x13, t1, notFloat);
|
|
+
|
|
+ // ftos
|
|
+ __ shadd(x11, x11, x12, x11, 3);
|
|
+ __ flw(f10, Address(x11, base_offset));
|
|
+ __ push_f(f10);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notFloat);
|
|
+
|
|
+ __ mv(t1, (u1)JVM_CONSTANT_Integer);
|
|
+ __ bne(x13, t1, notInt);
|
|
+
|
|
+ // itos
|
|
+ __ shadd(x11, x11, x12, x11, 3);
|
|
+ __ lw(x10, Address(x11, base_offset));
|
|
+ __ push_i(x10);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notInt);
|
|
+ condy_helper(Done);
|
|
+
|
|
+ __ bind(Done);
|
|
+}
|
|
+
|
|
+// Fast path for caching oop constants.
|
|
+void TemplateTable::fast_aldc(bool wide)
|
|
+{
|
|
+ transition(vtos, atos);
|
|
+
|
|
+ const Register result = x10;
|
|
+ const Register tmp = x11;
|
|
+ const Register rarg = x12;
|
|
+
|
|
+ const int index_size = wide ? sizeof(u2) : sizeof(u1);
|
|
+
|
|
+ Label resolved;
|
|
+
|
|
+ // We are resolved if the resolved reference cache entry contains a
|
|
+ // non-null object (String, MethodType, etc.)
|
|
+ assert_different_registers(result, tmp);
|
|
+ __ get_cache_index_at_bcp(tmp, 1, index_size);
|
|
+ __ load_resolved_reference_at_index(result, tmp);
|
|
+ __ bnez(result, resolved);
|
|
+
|
|
+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
|
|
+
|
|
+ // first time invocation - must resolve first
|
|
+ __ mv(rarg, (int)bytecode());
|
|
+ __ call_VM(result, entry, rarg);
|
|
+
|
|
+ __ bind(resolved);
|
|
+
|
|
+ { // Check for the null sentinel.
|
|
+ // If we just called the VM, it already did the mapping for us,
|
|
+ // but it's harmless to retry.
|
|
+ Label notNull;
|
|
+
|
|
+ // Stash null_sentinel address to get its value later
|
|
+ int32_t offset = 0;
|
|
+ __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
|
|
+ __ ld(tmp, Address(rarg, offset));
|
|
+ __ bne(result, tmp, notNull);
|
|
+ __ mv(result, zr); // NULL object reference
|
|
+ __ bind(notNull);
|
|
+ }
|
|
+
|
|
+ if (VerifyOops) {
|
|
+ // Safe to call with 0 result
|
|
+ __ verify_oop(result);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::ldc2_w()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ Label notDouble, notLong, Done;
|
|
+ __ get_unsigned_2_byte_index_at_bcp(x10, 1);
|
|
+
|
|
+ __ get_cpool_and_tags(x11, x12);
|
|
+ const int base_offset = ConstantPool::header_size() * wordSize;
|
|
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
|
|
+
|
|
+ // get type
|
|
+ __ add(x12, x12, x10);
|
|
+ __ load_unsigned_byte(x12, Address(x12, tags_offset));
|
|
+ __ mv(t1, JVM_CONSTANT_Double);
|
|
+ __ bne(x12, t1, notDouble);
|
|
+
|
|
+ // dtos
|
|
+ __ shadd(x12, x10, x11, x12, 3);
|
|
+ __ fld(f10, Address(x12, base_offset));
|
|
+ __ push_d(f10);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notDouble);
|
|
+ __ mv(t1, (int)JVM_CONSTANT_Long);
|
|
+ __ bne(x12, t1, notLong);
|
|
+
|
|
+ // ltos
|
|
+ __ shadd(x10, x10, x11, x10, 3);
|
|
+ __ ld(x10, Address(x10, base_offset));
|
|
+ __ push_l(x10);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notLong);
|
|
+ condy_helper(Done);
|
|
+ __ bind(Done);
|
|
+
|
|
+}
|
|
+
|
|
+void TemplateTable::condy_helper(Label& Done)
|
|
+{
|
|
+ const Register obj = x10;
|
|
+ const Register rarg = x11;
|
|
+ const Register flags = x12;
|
|
+ const Register off = x13;
|
|
+
|
|
+ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
|
|
+
|
|
+ __ mv(rarg, (int) bytecode());
|
|
+ __ call_VM(obj, entry, rarg);
|
|
+
|
|
+ __ get_vm_result_2(flags, xthread);
|
|
+
|
|
+ // VMr = obj = base address to find primitive value to push
|
|
+ // VMr2 = flags = (tos, off) using format of CPCE::_flags
|
|
+ __ mv(off, flags);
|
|
+ __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
|
|
+ __ andrw(off, off, t0);
|
|
+
|
|
+ __ add(off, obj, off);
|
|
+ const Address field(off, 0); // base + R---->base + offset
|
|
+
|
|
+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
|
|
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
|
|
+
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_ldc: // fall through
|
|
+ case Bytecodes::_ldc_w: {
|
|
+ // tos in (itos, ftos, stos, btos, ctos, ztos)
|
|
+ Label notInt, notFloat, notShort, notByte, notChar, notBool;
|
|
+ __ mv(t1, itos);
|
|
+ __ bne(flags, t1, notInt);
|
|
+ // itos
|
|
+ __ lw(x10, field);
|
|
+ __ push(itos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notInt);
|
|
+ __ mv(t1, ftos);
|
|
+ __ bne(flags, t1, notFloat);
|
|
+ // ftos
|
|
+ __ load_float(field);
|
|
+ __ push(ftos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notFloat);
|
|
+ __ mv(t1, stos);
|
|
+ __ bne(flags, t1, notShort);
|
|
+ // stos
|
|
+ __ load_signed_short(x10, field);
|
|
+ __ push(stos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notShort);
|
|
+ __ mv(t1, btos);
|
|
+ __ bne(flags, t1, notByte);
|
|
+ // btos
|
|
+ __ load_signed_byte(x10, field);
|
|
+ __ push(btos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notByte);
|
|
+ __ mv(t1, ctos);
|
|
+ __ bne(flags, t1, notChar);
|
|
+ // ctos
|
|
+ __ load_unsigned_short(x10, field);
|
|
+ __ push(ctos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notChar);
|
|
+ __ mv(t1, ztos);
|
|
+ __ bne(flags, t1, notBool);
|
|
+ // ztos
|
|
+ __ load_signed_byte(x10, field);
|
|
+ __ push(ztos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notBool);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ case Bytecodes::_ldc2_w: {
|
|
+ Label notLong, notDouble;
|
|
+ __ mv(t1, ltos);
|
|
+ __ bne(flags, t1, notLong);
|
|
+ // ltos
|
|
+ __ ld(x10, field);
|
|
+ __ push(ltos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notLong);
|
|
+ __ mv(t1, dtos);
|
|
+ __ bne(flags, t1, notDouble);
|
|
+ // dtos
|
|
+ __ load_double(field);
|
|
+ __ push(dtos);
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notDouble);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ __ stop("bad ldc/condy");
|
|
+}
|
|
+
|
|
+void TemplateTable::locals_index(Register reg, int offset)
|
|
+{
|
|
+ __ lbu(reg, at_bcp(offset));
|
|
+ __ neg(reg, reg);
|
|
+}
|
|
+
|
|
+void TemplateTable::iload() {
|
|
+ iload_internal();
|
|
+}
|
|
+
|
|
+void TemplateTable::nofast_iload() {
|
|
+ iload_internal(may_not_rewrite);
|
|
+}
|
|
+
|
|
+void TemplateTable::iload_internal(RewriteControl rc) {
|
|
+ transition(vtos, itos);
|
|
+ if (RewriteFrequentPairs && rc == may_rewrite) {
|
|
+ Label rewrite, done;
|
|
+ const Register bc = x14;
|
|
+
|
|
+ // get next bytecode
|
|
+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
|
|
+
|
|
+ // if _iload, wait to rewrite to iload2. We only want to rewrite the
|
|
+ // last two iloads in a pair. Comparing against fast_iload means that
|
|
+ // the next bytecode is neither an iload or a caload, and therefore
|
|
+ // an iload pair.
|
|
+ __ mv(t1, Bytecodes::_iload);
|
|
+ __ beq(x11, t1, done);
|
|
+
|
|
+ // if _fast_iload rewrite to _fast_iload2
|
|
+ __ mv(t1, Bytecodes::_fast_iload);
|
|
+ __ mv(bc, Bytecodes::_fast_iload2);
|
|
+ __ beq(x11, t1, rewrite);
|
|
+
|
|
+ // if _caload rewrite to _fast_icaload
|
|
+ __ mv(t1, Bytecodes::_caload);
|
|
+ __ mv(bc, Bytecodes::_fast_icaload);
|
|
+ __ beq(x11, t1, rewrite);
|
|
+
|
|
+ // else rewrite to _fast_iload
|
|
+ __ mv(bc, Bytecodes::_fast_iload);
|
|
+
|
|
+ // rewrite
|
|
+ // bc: new bytecode
|
|
+ __ bind(rewrite);
|
|
+ patch_bytecode(Bytecodes::_iload, bc, x11, false);
|
|
+ __ bind(done);
|
|
+
|
|
+ }
|
|
+
|
|
+ // do iload, get the local value into tos
|
|
+ locals_index(x11);
|
|
+ __ lw(x10, iaddress(x11, x10, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_iload2()
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ locals_index(x11);
|
|
+ __ lw(x10, iaddress(x11, x10, _masm));
|
|
+ __ push(itos);
|
|
+ locals_index(x11, 3);
|
|
+ __ lw(x10, iaddress(x11, x10, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_iload()
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ locals_index(x11);
|
|
+ __ lw(x10, iaddress(x11, x10, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::lload()
|
|
+{
|
|
+ transition(vtos, ltos);
|
|
+ __ lbu(x11, at_bcp(1));
|
|
+ __ slli(x11, x11, LogBytesPerWord);
|
|
+ __ sub(x11, xlocals, x11);
|
|
+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
|
|
+}
|
|
+
|
|
+void TemplateTable::fload()
|
|
+{
|
|
+ transition(vtos, ftos);
|
|
+ locals_index(x11);
|
|
+ __ flw(f10, faddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::dload()
|
|
+{
|
|
+ transition(vtos, dtos);
|
|
+ __ lbu(x11, at_bcp(1));
|
|
+ __ slli(x11, x11, LogBytesPerWord);
|
|
+ __ sub(x11, xlocals, x11);
|
|
+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
|
|
+}
|
|
+
|
|
+void TemplateTable::aload()
|
|
+{
|
|
+ transition(vtos, atos);
|
|
+ locals_index(x11);
|
|
+ __ ld(x10, iaddress(x11, x10, _masm));
|
|
+
|
|
+}
|
|
+
|
|
+void TemplateTable::locals_index_wide(Register reg) {
|
|
+ __ lhu(reg, at_bcp(2));
|
|
+ __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
|
|
+ __ neg(reg, reg);
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_iload() {
|
|
+ transition(vtos, itos);
|
|
+ locals_index_wide(x11);
|
|
+ __ lw(x10, iaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_lload()
|
|
+{
|
|
+ transition(vtos, ltos);
|
|
+ __ lhu(x11, at_bcp(2));
|
|
+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
|
|
+ __ slli(x11, x11, LogBytesPerWord);
|
|
+ __ sub(x11, xlocals, x11);
|
|
+ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_fload()
|
|
+{
|
|
+ transition(vtos, ftos);
|
|
+ locals_index_wide(x11);
|
|
+ __ flw(f10, faddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_dload()
|
|
+{
|
|
+ transition(vtos, dtos);
|
|
+ __ lhu(x11, at_bcp(2));
|
|
+ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
|
|
+ __ slli(x11, x11, LogBytesPerWord);
|
|
+ __ sub(x11, xlocals, x11);
|
|
+ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_aload()
|
|
+{
|
|
+ transition(vtos, atos);
|
|
+ locals_index_wide(x11);
|
|
+ __ ld(x10, aaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::index_check(Register array, Register index)
|
|
+{
|
|
+ // destroys x11, t0
|
|
+ // check array
|
|
+ __ null_check(array, arrayOopDesc::length_offset_in_bytes());
|
|
+ // sign extend index for use by indexed load
|
|
+ // check index
|
|
+ const Register length = t0;
|
|
+ __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
|
|
+ if (index != x11) {
|
|
+ assert(x11 != array, "different registers");
|
|
+ __ mv(x11, index);
|
|
+ }
|
|
+ Label ok;
|
|
+ __ addw(index, index, zr);
|
|
+ __ bltu(index, length, ok);
|
|
+ __ mv(x13, array);
|
|
+ __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
|
|
+ __ jr(t0);
|
|
+ __ bind(ok);
|
|
+}
|
|
+
|
|
+void TemplateTable::iaload()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
|
|
+ __ shadd(t0, x11, x10, t0, 2);
|
|
+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+ __ addw(x10, x10, zr); // signed extended
|
|
+}
|
|
+
|
|
+void TemplateTable::laload()
|
|
+{
|
|
+ transition(itos, ltos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
|
|
+ __ shadd(t0, x11, x10, t0, 3);
|
|
+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::faload()
|
|
+{
|
|
+ transition(itos, ftos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
|
|
+ __ shadd(t0, x11, x10, t0, 2);
|
|
+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::daload()
|
|
+{
|
|
+ transition(itos, dtos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
|
|
+ __ shadd(t0, x11, x10, t0, 3);
|
|
+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::aaload()
|
|
+{
|
|
+ transition(itos, atos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
|
|
+ __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop);
|
|
+ do_oop_load(_masm,
|
|
+ Address(t0),
|
|
+ x10,
|
|
+ IS_ARRAY);
|
|
+}
|
|
+
|
|
+void TemplateTable::baload()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
|
|
+ __ shadd(t0, x11, x10, t0, 0);
|
|
+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::caload()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
|
|
+ __ shadd(t0, x11, x10, t0, 1);
|
|
+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+// iload followed by caload frequent pair
|
|
+void TemplateTable::fast_icaload()
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ // load index out of locals
|
|
+ locals_index(x12);
|
|
+ __ lw(x11, iaddress(x12, x11, _masm));
|
|
+ __ pop_ptr(x10);
|
|
+
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11, kills t0
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
|
|
+ __ shadd(t0, x11, x10, t0, 1);
|
|
+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::saload()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ __ mv(x11, x10);
|
|
+ __ pop_ptr(x10);
|
|
+ // x10: array
|
|
+ // x11: index
|
|
+ index_check(x10, x11); // leaves index in x11, kills t0
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
|
|
+ __ shadd(t0, x11, x10, t0, 1);
|
|
+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::iload(int n)
|
|
+{
|
|
+ transition(vtos, itos);
|
|
+ __ lw(x10, iaddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::lload(int n)
|
|
+{
|
|
+ transition(vtos, ltos);
|
|
+ __ ld(x10, laddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::fload(int n)
|
|
+{
|
|
+ transition(vtos, ftos);
|
|
+ __ flw(f10, faddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::dload(int n)
|
|
+{
|
|
+ transition(vtos, dtos);
|
|
+ __ fld(f10, daddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::aload(int n)
|
|
+{
|
|
+ transition(vtos, atos);
|
|
+ __ ld(x10, iaddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::aload_0() {
|
|
+ aload_0_internal();
|
|
+}
|
|
+
|
|
+void TemplateTable::nofast_aload_0() {
|
|
+ aload_0_internal(may_not_rewrite);
|
|
+}
|
|
+
|
|
+void TemplateTable::aload_0_internal(RewriteControl rc) {
|
|
+ // According to bytecode histograms, the pairs:
|
|
+ //
|
|
+ // _aload_0, _fast_igetfield
|
|
+ // _aload_0, _fast_agetfield
|
|
+ // _aload_0, _fast_fgetfield
|
|
+ //
|
|
+ // occur frequently. If RewriteFrequentPairs is set, the (slow)
|
|
+ // _aload_0 bytecode checks if the next bytecode is either
|
|
+ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
|
|
+ // rewrites the current bytecode into a pair bytecode; otherwise it
|
|
+ // rewrites the current bytecode into _fast_aload_0 that doesn't do
|
|
+ // the pair check anymore.
|
|
+ //
|
|
+ // Note: If the next bytecode is _getfield, the rewrite must be
|
|
+ // delayed, otherwise we may miss an opportunity for a pair.
|
|
+ //
|
|
+ // Also rewrite frequent pairs
|
|
+ // aload_0, aload_1
|
|
+ // aload_0, iload_1
|
|
+ // These bytecodes with a small amount of code are most profitable
|
|
+ // to rewrite
|
|
+ if (RewriteFrequentPairs && rc == may_rewrite) {
|
|
+ Label rewrite, done;
|
|
+ const Register bc = x14;
|
|
+
|
|
+ // get next bytecode
|
|
+ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
|
|
+
|
|
+ // if _getfield then wait with rewrite
|
|
+ __ mv(t1, Bytecodes::Bytecodes::_getfield);
|
|
+ __ beq(x11, t1, done);
|
|
+
|
|
+ // if _igetfield then rewrite to _fast_iaccess_0
|
|
+ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
|
|
+ __ mv(t1, Bytecodes::_fast_igetfield);
|
|
+ __ mv(bc, Bytecodes::_fast_iaccess_0);
|
|
+ __ beq(x11, t1, rewrite);
|
|
+
|
|
+ // if _agetfield then rewrite to _fast_aaccess_0
|
|
+ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
|
|
+ __ mv(t1, Bytecodes::_fast_agetfield);
|
|
+ __ mv(bc, Bytecodes::_fast_aaccess_0);
|
|
+ __ beq(x11, t1, rewrite);
|
|
+
|
|
+ // if _fgetfield then rewrite to _fast_faccess_0
|
|
+ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
|
|
+ __ mv(t1, Bytecodes::_fast_fgetfield);
|
|
+ __ mv(bc, Bytecodes::_fast_faccess_0);
|
|
+ __ beq(x11, t1, rewrite);
|
|
+
|
|
+ // else rewrite to _fast_aload0
|
|
+ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
|
|
+ __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
|
|
+
|
|
+ // rewrite
|
|
+ // bc: new bytecode
|
|
+ __ bind(rewrite);
|
|
+ patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
|
|
+
|
|
+ __ bind(done);
|
|
+ }
|
|
+
|
|
+ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
|
|
+ aload(0);
|
|
+}
|
|
+
|
|
+void TemplateTable::istore()
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ locals_index(x11);
|
|
+ __ sw(x10, iaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::lstore()
|
|
+{
|
|
+ transition(ltos, vtos);
|
|
+ locals_index(x11);
|
|
+ __ sd(x10, laddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::fstore() {
|
|
+ transition(ftos, vtos);
|
|
+ locals_index(x11);
|
|
+ __ fsw(f10, iaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::dstore() {
|
|
+ transition(dtos, vtos);
|
|
+ locals_index(x11);
|
|
+ __ fsd(f10, daddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::astore()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_ptr(x10);
|
|
+ locals_index(x11);
|
|
+ __ sd(x10, aaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_istore() {
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_i();
|
|
+ locals_index_wide(x11);
|
|
+ __ sw(x10, iaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_lstore() {
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_l();
|
|
+ locals_index_wide(x11);
|
|
+ __ sd(x10, laddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_fstore() {
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_f();
|
|
+ locals_index_wide(x11);
|
|
+ __ fsw(f10, faddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_dstore() {
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_d();
|
|
+ locals_index_wide(x11);
|
|
+ __ fsd(f10, daddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_astore() {
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_ptr(x10);
|
|
+ locals_index_wide(x11);
|
|
+ __ sd(x10, aaddress(x11, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::iastore() {
|
|
+ transition(itos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // x10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
|
|
+ __ shadd(t0, x11, x13, t0, 2);
|
|
+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::lastore() {
|
|
+ transition(ltos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // x10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
|
|
+ __ shadd(t0, x11, x13, t0, 3);
|
|
+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::fastore() {
|
|
+ transition(ftos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // f10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
|
|
+ __ shadd(t0, x11, x13, t0, 2);
|
|
+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::dastore() {
|
|
+ transition(dtos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // f10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
|
|
+ __ shadd(t0, x11, x13, t0, 3);
|
|
+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::aastore() {
|
|
+ Label is_null, ok_is_subtype, done;
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., array, index, value
|
|
+ __ ld(x10, at_tos()); // value
|
|
+ __ ld(x12, at_tos_p1()); // index
|
|
+ __ ld(x13, at_tos_p2()); // array
|
|
+
|
|
+ index_check(x13, x12); // kills x11
|
|
+ __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
|
|
+ __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
|
|
+
|
|
+ Address element_address(x14, 0);
|
|
+
|
|
+ // do array store check - check for NULL value first
|
|
+ __ beqz(x10, is_null);
|
|
+
|
|
+ // Move subklass into x11
|
|
+ __ load_klass(x11, x10);
|
|
+ // Move superklass into x10
|
|
+ __ load_klass(x10, x13);
|
|
+ __ ld(x10, Address(x10,
|
|
+ ObjArrayKlass::element_klass_offset()));
|
|
+ // Compress array + index * oopSize + 12 into a single register. Frees x12.
|
|
+
|
|
+ // Generate subtype check. Blows x12, x15
|
|
+ // Superklass in x10. Subklass in x11.
|
|
+ __ gen_subtype_check(x11, ok_is_subtype); //todo
|
|
+
|
|
+ // Come here on failure
|
|
+ // object is at TOS
|
|
+ __ j(Interpreter::_throw_ArrayStoreException_entry);
|
|
+
|
|
+ // Come here on success
|
|
+ __ bind(ok_is_subtype);
|
|
+
|
|
+ // Get the value we will store
|
|
+ __ ld(x10, at_tos());
|
|
+ // Now store using the appropriate barrier
|
|
+ do_oop_store(_masm, element_address, x10, IS_ARRAY);
|
|
+ __ j(done);
|
|
+
|
|
+ // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx]
|
|
+ __ bind(is_null);
|
|
+ __ profile_null_seen(x12);
|
|
+
|
|
+ // Store a NULL
|
|
+ do_oop_store(_masm, element_address, noreg, IS_ARRAY);
|
|
+
|
|
+ // Pop stack arguments
|
|
+ __ bind(done);
|
|
+ __ add(esp, esp, 3 * Interpreter::stackElementSize);
|
|
+
|
|
+}
|
|
+
|
|
+void TemplateTable::bastore()
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // x10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+
|
|
+ // Need to check whether array is boolean or byte
|
|
+ // since both types share the bastore bytecode.
|
|
+ __ load_klass(x12, x13);
|
|
+ __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
|
|
+ Label L_skip;
|
|
+ __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
|
|
+ __ beqz(t0, L_skip);
|
|
+ __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
|
|
+ __ bind(L_skip);
|
|
+
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
|
|
+
|
|
+ __ add(x11, x13, x11);
|
|
+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::castore()
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ __ pop_i(x11);
|
|
+ __ pop_ptr(x13);
|
|
+ // x10: value
|
|
+ // x11: index
|
|
+ // x13: array
|
|
+ index_check(x13, x11); // prefer index in x11
|
|
+ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
|
|
+ __ shadd(t0, x11, x13, t0, 1);
|
|
+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
|
|
+}
|
|
+
|
|
+void TemplateTable::sastore()
|
|
+{
|
|
+ castore();
|
|
+}
|
|
+
|
|
+void TemplateTable::istore(int n)
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ __ sd(x10, iaddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::lstore(int n)
|
|
+{
|
|
+ transition(ltos, vtos);
|
|
+ __ sd(x10, laddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::fstore(int n)
|
|
+{
|
|
+ transition(ftos, vtos);
|
|
+ __ fsw(f10, faddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::dstore(int n)
|
|
+{
|
|
+ transition(dtos, vtos);
|
|
+ __ fsd(f10, daddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::astore(int n)
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ pop_ptr(x10);
|
|
+ __ sd(x10, iaddress(n));
|
|
+}
|
|
+
|
|
+void TemplateTable::pop()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ addi(esp, esp, Interpreter::stackElementSize);
|
|
+}
|
|
+
|
|
+void TemplateTable::pop2()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ addi(esp, esp, 2 * Interpreter::stackElementSize);
|
|
+}
|
|
+
|
|
+void TemplateTable::dup()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ ld(x10, Address(esp, 0));
|
|
+ __ push_reg(x10);
|
|
+ // stack: ..., a, a
|
|
+}
|
|
+
|
|
+void TemplateTable::dup_x1()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b
|
|
+ __ ld(x10, at_tos()); // load b
|
|
+ __ ld(x12, at_tos_p1()); // load a
|
|
+ __ sd(x10, at_tos_p1()); // store b
|
|
+ __ sd(x12, at_tos()); // store a
|
|
+ __ push_reg(x10); // push b
|
|
+ // stack: ..., b, a, b
|
|
+}
|
|
+
|
|
+void TemplateTable::dup_x2()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b, c
|
|
+ __ ld(x10, at_tos()); // load c
|
|
+ __ ld(x12, at_tos_p2()); // load a
|
|
+ __ sd(x10, at_tos_p2()); // store c in a
|
|
+ __ push_reg(x10); // push c
|
|
+ // stack: ..., c, b, c, c
|
|
+ __ ld(x10, at_tos_p2()); // load b
|
|
+ __ sd(x12, at_tos_p2()); // store a in b
|
|
+ // stack: ..., c, a, c, c
|
|
+ __ sd(x10, at_tos_p1()); // store b in c
|
|
+ // stack: ..., c, a, b, c
|
|
+}
|
|
+
|
|
+void TemplateTable::dup2()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b
|
|
+ __ ld(x10, at_tos_p1()); // load a
|
|
+ __ push_reg(x10); // push a
|
|
+ __ ld(x10, at_tos_p1()); // load b
|
|
+ __ push_reg(x10); // push b
|
|
+ // stack: ..., a, b, a, b
|
|
+}
|
|
+
|
|
+void TemplateTable::dup2_x1()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b, c
|
|
+ __ ld(x12, at_tos()); // load c
|
|
+ __ ld(x10, at_tos_p1()); // load b
|
|
+ __ push_reg(x10); // push b
|
|
+ __ push_reg(x12); // push c
|
|
+ // stack: ..., a, b, c, b, c
|
|
+ __ sd(x12, at_tos_p3()); // store c in b
|
|
+ // stack: ..., a, c, c, b, c
|
|
+ __ ld(x12, at_tos_p4()); // load a
|
|
+ __ sd(x12, at_tos_p2()); // store a in 2nd c
|
|
+ // stack: ..., a, c, a, b, c
|
|
+ __ sd(x10, at_tos_p4()); // store b in a
|
|
+ // stack: ..., b, c, a, b, c
|
|
+}
|
|
+
|
|
+void TemplateTable::dup2_x2()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b, c, d
|
|
+ __ ld(x12, at_tos()); // load d
|
|
+ __ ld(x10, at_tos_p1()); // load c
|
|
+ __ push_reg(x10); // push c
|
|
+ __ push_reg(x12); // push d
|
|
+ // stack: ..., a, b, c, d, c, d
|
|
+ __ ld(x10, at_tos_p4()); // load b
|
|
+ __ sd(x10, at_tos_p2()); // store b in d
|
|
+ __ sd(x12, at_tos_p4()); // store d in b
|
|
+ // stack: ..., a, d, c, b, c, d
|
|
+ __ ld(x12, at_tos_p5()); // load a
|
|
+ __ ld(x10, at_tos_p3()); // load c
|
|
+ __ sd(x12, at_tos_p3()); // store a in c
|
|
+ __ sd(x10, at_tos_p5()); // store c in a
|
|
+ // stack: ..., c, d, a, b, c, d
|
|
+}
|
|
+
|
|
+void TemplateTable::swap()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ // stack: ..., a, b
|
|
+ __ ld(x12, at_tos_p1()); // load a
|
|
+ __ ld(x10, at_tos()); // load b
|
|
+ __ sd(x12, at_tos()); // store a in b
|
|
+ __ sd(x10, at_tos_p1()); // store b in a
|
|
+ // stack: ..., b, a
|
|
+}
|
|
+
|
|
+void TemplateTable::iop2(Operation op)
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ // x10 <== x11 op x10
|
|
+ __ pop_i(x11);
|
|
+ switch (op) {
|
|
+ case add : __ addw(x10, x11, x10); break;
|
|
+ case sub : __ subw(x10, x11, x10); break;
|
|
+ case mul : __ mulw(x10, x11, x10); break;
|
|
+ case _and : __ andrw(x10, x11, x10); break;
|
|
+ case _or : __ orrw(x10, x11, x10); break;
|
|
+ case _xor : __ xorrw(x10, x11, x10); break;
|
|
+ case shl : __ sllw(x10, x11, x10); break;
|
|
+ case shr : __ sraw(x10, x11, x10); break;
|
|
+ case ushr : __ srlw(x10, x11, x10); break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::lop2(Operation op)
|
|
+{
|
|
+ transition(ltos, ltos);
|
|
+ // x10 <== x11 op x10
|
|
+ __ pop_l(x11);
|
|
+ switch (op) {
|
|
+ case add : __ add(x10, x11, x10); break;
|
|
+ case sub : __ sub(x10, x11, x10); break;
|
|
+ case mul : __ mul(x10, x11, x10); break;
|
|
+ case _and : __ andr(x10, x11, x10); break;
|
|
+ case _or : __ orr(x10, x11, x10); break;
|
|
+ case _xor : __ xorr(x10, x11, x10); break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::idiv()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ // explicitly check for div0
|
|
+ Label no_div0;
|
|
+ __ bnez(x10, no_div0);
|
|
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
|
|
+ __ jr(t0);
|
|
+ __ bind(no_div0);
|
|
+ __ pop_i(x11);
|
|
+ // x10 <== x11 idiv x10
|
|
+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
|
|
+}
|
|
+
|
|
+void TemplateTable::irem()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ // explicitly check for div0
|
|
+ Label no_div0;
|
|
+ __ bnez(x10, no_div0);
|
|
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
|
|
+ __ jr(t0);
|
|
+ __ bind(no_div0);
|
|
+ __ pop_i(x11);
|
|
+ // x10 <== x11 irem x10
|
|
+ __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
|
|
+}
|
|
+
|
|
+void TemplateTable::lmul()
|
|
+{
|
|
+ transition(ltos, ltos);
|
|
+ __ pop_l(x11);
|
|
+ __ mul(x10, x10, x11);
|
|
+}
|
|
+
|
|
+void TemplateTable::ldiv()
|
|
+{
|
|
+ transition(ltos, ltos);
|
|
+ // explicitly check for div0
|
|
+ Label no_div0;
|
|
+ __ bnez(x10, no_div0);
|
|
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
|
|
+ __ jr(t0);
|
|
+ __ bind(no_div0);
|
|
+ __ pop_l(x11);
|
|
+ // x10 <== x11 ldiv x10
|
|
+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
|
|
+}
|
|
+
|
|
+void TemplateTable::lrem()
|
|
+{
|
|
+ transition(ltos, ltos);
|
|
+ // explicitly check for div0
|
|
+ Label no_div0;
|
|
+ __ bnez(x10, no_div0);
|
|
+ __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
|
|
+ __ jr(t0);
|
|
+ __ bind(no_div0);
|
|
+ __ pop_l(x11);
|
|
+ // x10 <== x11 lrem x10
|
|
+ __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
|
|
+}
|
|
+
|
|
+void TemplateTable::lshl()
|
|
+{
|
|
+ transition(itos, ltos);
|
|
+ // shift count is in x10
|
|
+ __ pop_l(x11);
|
|
+ __ sll(x10, x11, x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::lshr()
|
|
+{
|
|
+ transition(itos, ltos);
|
|
+ // shift count is in x10
|
|
+ __ pop_l(x11);
|
|
+ __ sra(x10, x11, x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::lushr()
|
|
+{
|
|
+ transition(itos, ltos);
|
|
+ // shift count is in x10
|
|
+ __ pop_l(x11);
|
|
+ __ srl(x10, x11, x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::fop2(Operation op)
|
|
+{
|
|
+ transition(ftos, ftos);
|
|
+ switch (op) {
|
|
+ case add:
|
|
+ __ pop_f(f11);
|
|
+ __ fadd_s(f10, f11, f10);
|
|
+ break;
|
|
+ case sub:
|
|
+ __ pop_f(f11);
|
|
+ __ fsub_s(f10, f11, f10);
|
|
+ break;
|
|
+ case mul:
|
|
+ __ pop_f(f11);
|
|
+ __ fmul_s(f10, f11, f10);
|
|
+ break;
|
|
+ case div:
|
|
+ __ pop_f(f11);
|
|
+ __ fdiv_s(f10, f11, f10);
|
|
+ break;
|
|
+ case rem:
|
|
+ __ fmv_s(f11, f10);
|
|
+ __ pop_f(f10);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::dop2(Operation op)
|
|
+{
|
|
+ transition(dtos, dtos);
|
|
+ switch (op) {
|
|
+ case add:
|
|
+ __ pop_d(f11);
|
|
+ __ fadd_d(f10, f11, f10);
|
|
+ break;
|
|
+ case sub:
|
|
+ __ pop_d(f11);
|
|
+ __ fsub_d(f10, f11, f10);
|
|
+ break;
|
|
+ case mul:
|
|
+ __ pop_d(f11);
|
|
+ __ fmul_d(f10, f11, f10);
|
|
+ break;
|
|
+ case div:
|
|
+ __ pop_d(f11);
|
|
+ __ fdiv_d(f10, f11, f10);
|
|
+ break;
|
|
+ case rem:
|
|
+ __ fmv_d(f11, f10);
|
|
+ __ pop_d(f10);
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::ineg()
|
|
+{
|
|
+ transition(itos, itos);
|
|
+ __ negw(x10, x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::lneg()
|
|
+{
|
|
+ transition(ltos, ltos);
|
|
+ __ neg(x10, x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::fneg()
|
|
+{
|
|
+ transition(ftos, ftos);
|
|
+ __ fneg_s(f10, f10);
|
|
+}
|
|
+
|
|
+void TemplateTable::dneg()
|
|
+{
|
|
+ transition(dtos, dtos);
|
|
+ __ fneg_d(f10, f10);
|
|
+}
|
|
+
|
|
+void TemplateTable::iinc()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ load_signed_byte(x11, at_bcp(2)); // get constant
|
|
+ locals_index(x12);
|
|
+ __ ld(x10, iaddress(x12, x10, _masm));
|
|
+ __ addw(x10, x10, x11);
|
|
+ __ sd(x10, iaddress(x12, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_iinc()
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ __ lwu(x11, at_bcp(2)); // get constant and index
|
|
+ __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
|
|
+ __ zero_extend(x12, x11, 16);
|
|
+ __ neg(x12, x12);
|
|
+ __ slli(x11, x11, 32);
|
|
+ __ srai(x11, x11, 48);
|
|
+ __ ld(x10, iaddress(x12, t0, _masm));
|
|
+ __ addw(x10, x10, x11);
|
|
+ __ sd(x10, iaddress(x12, t0, _masm));
|
|
+}
|
|
+
|
|
+void TemplateTable::convert()
|
|
+{
|
|
+ // Checking
|
|
+#ifdef ASSERT
|
|
+ {
|
|
+ TosState tos_in = ilgl;
|
|
+ TosState tos_out = ilgl;
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_i2l: // fall through
|
|
+ case Bytecodes::_i2f: // fall through
|
|
+ case Bytecodes::_i2d: // fall through
|
|
+ case Bytecodes::_i2b: // fall through
|
|
+ case Bytecodes::_i2c: // fall through
|
|
+ case Bytecodes::_i2s: tos_in = itos; break;
|
|
+ case Bytecodes::_l2i: // fall through
|
|
+ case Bytecodes::_l2f: // fall through
|
|
+ case Bytecodes::_l2d: tos_in = ltos; break;
|
|
+ case Bytecodes::_f2i: // fall through
|
|
+ case Bytecodes::_f2l: // fall through
|
|
+ case Bytecodes::_f2d: tos_in = ftos; break;
|
|
+ case Bytecodes::_d2i: // fall through
|
|
+ case Bytecodes::_d2l: // fall through
|
|
+ case Bytecodes::_d2f: tos_in = dtos; break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_l2i: // fall through
|
|
+ case Bytecodes::_f2i: // fall through
|
|
+ case Bytecodes::_d2i: // fall through
|
|
+ case Bytecodes::_i2b: // fall through
|
|
+ case Bytecodes::_i2c: // fall through
|
|
+ case Bytecodes::_i2s: tos_out = itos; break;
|
|
+ case Bytecodes::_i2l: // fall through
|
|
+ case Bytecodes::_f2l: // fall through
|
|
+ case Bytecodes::_d2l: tos_out = ltos; break;
|
|
+ case Bytecodes::_i2f: // fall through
|
|
+ case Bytecodes::_l2f: // fall through
|
|
+ case Bytecodes::_d2f: tos_out = ftos; break;
|
|
+ case Bytecodes::_i2d: // fall through
|
|
+ case Bytecodes::_l2d: // fall through
|
|
+ case Bytecodes::_f2d: tos_out = dtos; break;
|
|
+ default : ShouldNotReachHere();
|
|
+ }
|
|
+ transition(tos_in, tos_out);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+ // Conversion
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_i2l:
|
|
+ __ sign_extend(x10, x10, 32);
|
|
+ break;
|
|
+ case Bytecodes::_i2f:
|
|
+ __ fcvt_s_w(f10, x10);
|
|
+ break;
|
|
+ case Bytecodes::_i2d:
|
|
+ __ fcvt_d_w(f10, x10);
|
|
+ break;
|
|
+ case Bytecodes::_i2b:
|
|
+ __ sign_extend(x10, x10, 8);
|
|
+ break;
|
|
+ case Bytecodes::_i2c:
|
|
+ __ zero_extend(x10, x10, 16);
|
|
+ break;
|
|
+ case Bytecodes::_i2s:
|
|
+ __ sign_extend(x10, x10, 16);
|
|
+ break;
|
|
+ case Bytecodes::_l2i:
|
|
+ __ addw(x10, x10, zr);
|
|
+ break;
|
|
+ case Bytecodes::_l2f:
|
|
+ __ fcvt_s_l(f10, x10);
|
|
+ break;
|
|
+ case Bytecodes::_l2d:
|
|
+ __ fcvt_d_l(f10, x10);
|
|
+ break;
|
|
+ case Bytecodes::_f2i:
|
|
+ __ fcvt_w_s_safe(x10, f10);
|
|
+ break;
|
|
+ case Bytecodes::_f2l:
|
|
+ __ fcvt_l_s_safe(x10, f10);
|
|
+ break;
|
|
+ case Bytecodes::_f2d:
|
|
+ __ fcvt_d_s(f10, f10);
|
|
+ break;
|
|
+ case Bytecodes::_d2i:
|
|
+ __ fcvt_w_d_safe(x10, f10);
|
|
+ break;
|
|
+ case Bytecodes::_d2l:
|
|
+ __ fcvt_l_d_safe(x10, f10);
|
|
+ break;
|
|
+ case Bytecodes::_d2f:
|
|
+ __ fcvt_s_d(f10, f10);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::lcmp()
|
|
+{
|
|
+ transition(ltos, itos);
|
|
+ __ pop_l(x11);
|
|
+ __ cmp_l2i(t0, x11, x10);
|
|
+ __ mv(x10, t0);
|
|
+}
|
|
+
|
|
+void TemplateTable::float_cmp(bool is_float, int unordered_result)
|
|
+{
|
|
+ // For instruction feq, flt and fle, the result is 0 if either operand is NaN
|
|
+ if (is_float) {
|
|
+ __ pop_f(f11);
|
|
+ // if unordered_result < 0:
|
|
+ // we want -1 for unordered or less than, 0 for equal and 1 for
|
|
+ // greater than.
|
|
+ // else:
|
|
+ // we want -1 for less than, 0 for equal and 1 for unordered or
|
|
+ // greater than.
|
|
+ // f11 primary, f10 secondary
|
|
+ __ float_compare(x10, f11, f10, unordered_result);
|
|
+ } else {
|
|
+ __ pop_d(f11);
|
|
+ // if unordered_result < 0:
|
|
+ // we want -1 for unordered or less than, 0 for equal and 1 for
|
|
+ // greater than.
|
|
+ // else:
|
|
+ // we want -1 for less than, 0 for equal and 1 for unordered or
|
|
+ // greater than.
|
|
+ // f11 primary, f10 secondary
|
|
+ __ double_compare(x10, f11, f10, unordered_result);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::branch(bool is_jsr, bool is_wide)
|
|
+{
|
|
+ // We might be moving to a safepoint. The thread which calls
|
|
+ // Interpreter::notice_safepoints() will effectively flush its cache
|
|
+ // when it makes a system call, but we need to do something to
|
|
+ // ensure that we see the changed dispatch table.
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+
|
|
+ __ profile_taken_branch(x10, x11);
|
|
+ const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
|
|
+ InvocationCounter::counter_offset();
|
|
+ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
|
|
+ InvocationCounter::counter_offset();
|
|
+
|
|
+ // load branch displacement
|
|
+ if (!is_wide) {
|
|
+ __ lhu(x12, at_bcp(1));
|
|
+ __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
|
|
+ } else {
|
|
+ __ lwu(x12, at_bcp(1));
|
|
+ __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
|
|
+ }
|
|
+
|
|
+ // Handle all the JSR stuff here, then exit.
|
|
+ // It's much shorter and cleaner than intermingling with the non-JSR
|
|
+ // normal-branch stuff occurring below.
|
|
+
|
|
+ if (is_jsr) {
|
|
+ // compute return address as bci
|
|
+ __ ld(t1, Address(xmethod, Method::const_offset()));
|
|
+ __ add(t1, t1,
|
|
+ in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
|
|
+ __ sub(x11, xbcp, t1);
|
|
+ __ push_i(x11);
|
|
+ // Adjust the bcp by the 16-bit displacement in x12
|
|
+ __ add(xbcp, xbcp, x12);
|
|
+ __ load_unsigned_byte(t0, Address(xbcp, 0));
|
|
+ // load the next target bytecode into t0, it is the argument of dispatch_only
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ // Normal (non-jsr) branch handling
|
|
+
|
|
+ // Adjust the bcp by the displacement in x12
|
|
+ __ add(xbcp, xbcp, x12);
|
|
+
|
|
+ assert(UseLoopCounter || !UseOnStackReplacement,
|
|
+ "on-stack-replacement requires loop counters");
|
|
+ Label backedge_counter_overflow;
|
|
+ Label profile_method;
|
|
+ Label dispatch;
|
|
+ if (UseLoopCounter) {
|
|
+ // increment backedge counter for backward branches
|
|
+ // x10: MDO
|
|
+ // x11: MDO bumped taken-count
|
|
+ // x12: target offset
|
|
+ __ bgtz(x12, dispatch); // count only if backward branch
|
|
+
|
|
+ // check if MethodCounters exists
|
|
+ Label has_counters;
|
|
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
|
|
+ __ bnez(t0, has_counters);
|
|
+ __ push_reg(x10);
|
|
+ __ push_reg(x11);
|
|
+ __ push_reg(x12);
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::build_method_counters), xmethod);
|
|
+ __ pop_reg(x12);
|
|
+ __ pop_reg(x11);
|
|
+ __ pop_reg(x10);
|
|
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
|
|
+ __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
|
|
+ __ bind(has_counters);
|
|
+
|
|
+ if (TieredCompilation) {
|
|
+ Label no_mdo;
|
|
+ int increment = InvocationCounter::count_increment;
|
|
+ if (ProfileInterpreter) {
|
|
+ // Are we profiling?
|
|
+ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
|
|
+ __ beqz(x11, no_mdo);
|
|
+ // Increment the MDO backedge counter
|
|
+ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
|
|
+ in_bytes(InvocationCounter::counter_offset()));
|
|
+ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
|
|
+ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
|
|
+ x10, t0, false,
|
|
+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
|
|
+ __ j(dispatch);
|
|
+ }
|
|
+ __ bind(no_mdo);
|
|
+ // Increment backedge counter in MethodCounters*
|
|
+ __ ld(t0, Address(xmethod, Method::method_counters_offset()));
|
|
+ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
|
|
+ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
|
|
+ x10, t1, false,
|
|
+ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
|
|
+ } else { // not TieredCompilation
|
|
+ // increment counter
|
|
+ __ ld(t1, Address(xmethod, Method::method_counters_offset()));
|
|
+ __ lwu(x10, Address(t1, be_offset)); // load backedge counter
|
|
+ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
|
|
+ __ sw(t0, Address(t1, be_offset)); // store counter
|
|
+
|
|
+ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter
|
|
+ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
|
|
+ __ addw(x10, x10, t0); // add both counters
|
|
+
|
|
+ if (ProfileInterpreter) {
|
|
+ // Test to see if we should create a method data oop
|
|
+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
|
|
+ __ blt(x10, t0, dispatch);
|
|
+
|
|
+ // if no method data exists, go to profile method
|
|
+ __ test_method_data_pointer(x10, profile_method);
|
|
+
|
|
+ if (UseOnStackReplacement) {
|
|
+ // check for overflow against x11 which is the MDO taken count
|
|
+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
|
|
+ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
|
|
+
|
|
+ // When ProfileInterpreter is on, the backedge_count comes
|
|
+ // from the MethodData*, which value does not get reset on
|
|
+ // the call to frequency_counter_overflow(). To avoid
|
|
+ // excessive calls to the overflow routine while the method is
|
|
+ // being compiled, add a second test to make sure the overflow
|
|
+ // function is called only once every overflow_frequency.
|
|
+ const int overflow_frequency = 1024;
|
|
+ __ andi(x11, x11, overflow_frequency - 1);
|
|
+ __ beqz(x11, backedge_counter_overflow);
|
|
+
|
|
+ }
|
|
+ } else {
|
|
+ if (UseOnStackReplacement) {
|
|
+ // check for overflow against x10, which is the sum of the
|
|
+ // counters
|
|
+ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
|
|
+ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ __ bind(dispatch);
|
|
+ }
|
|
+
|
|
+ // Pre-load the next target bytecode into t0
|
|
+ __ load_unsigned_byte(t0, Address(xbcp, 0));
|
|
+
|
|
+ // continue with the bytecode @ target
|
|
+ // t0: target bytecode
|
|
+ // xbcp: target bcp
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+
|
|
+ if (UseLoopCounter) {
|
|
+ if (ProfileInterpreter && !TieredCompilation) {
|
|
+ // Out-of-line code to allocate method data oop.
|
|
+ __ bind(profile_method);
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
|
|
+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode
|
|
+ __ set_method_data_pointer_for_bcp();
|
|
+ __ j(dispatch);
|
|
+ }
|
|
+
|
|
+ if (UseOnStackReplacement) {
|
|
+ // invocation counter overflow
|
|
+ __ bind(backedge_counter_overflow);
|
|
+ __ neg(x12, x12);
|
|
+ __ add(x12, x12, xbcp); // branch xbcp
|
|
+ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::frequency_counter_overflow),
|
|
+ x12);
|
|
+ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode
|
|
+
|
|
+ // x10: osr nmethod (osr ok) or NULL (osr not possible)
|
|
+ // w11: target bytecode
|
|
+ // x12: temporary
|
|
+ __ beqz(x10, dispatch); // test result -- no osr if null
|
|
+ // nmethod may have been invalidated (VM may block upon call_VM return)
|
|
+ __ lbu(x12, Address(x10, nmethod::state_offset()));
|
|
+ if (nmethod::in_use != 0) {
|
|
+ __ sub(x12, x12, nmethod::in_use);
|
|
+ }
|
|
+ __ bnez(x12, dispatch);
|
|
+
|
|
+ // We have the address of an on stack replacement routine in x10
|
|
+ // We need to prepare to execute the OSR method. First we must
|
|
+ // migrate the locals and monitors off of the stack.
|
|
+
|
|
+ __ mv(x9, x10); // save the nmethod
|
|
+
|
|
+ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
|
|
+
|
|
+ // x10 is OSR buffer, move it to expected parameter location
|
|
+ __ mv(j_rarg0, x10);
|
|
+
|
|
+ // remove activation
|
|
+ // get sender esp
|
|
+ __ ld(esp,
|
|
+ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
|
|
+ // remove frame anchor
|
|
+ __ leave();
|
|
+ // Ensure compiled code always sees stack at proper alignment
|
|
+ __ andi(sp, esp, -16);
|
|
+
|
|
+ // and begin the OSR nmethod
|
|
+ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
|
|
+ __ jr(t0);
|
|
+ }
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+void TemplateTable::if_0cmp(Condition cc)
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ // assume branch is more often taken than not (loops use backward branches)
|
|
+ Label not_taken;
|
|
+
|
|
+ __ addw(x10, x10, zr);
|
|
+ switch (cc) {
|
|
+ case equal:
|
|
+ __ bnez(x10, not_taken);
|
|
+ break;
|
|
+ case not_equal:
|
|
+ __ beqz(x10, not_taken);
|
|
+ break;
|
|
+ case less:
|
|
+ __ bgez(x10, not_taken);
|
|
+ break;
|
|
+ case less_equal:
|
|
+ __ bgtz(x10, not_taken);
|
|
+ break;
|
|
+ case greater:
|
|
+ __ blez(x10, not_taken);
|
|
+ break;
|
|
+ case greater_equal:
|
|
+ __ bltz(x10, not_taken);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ branch(false, false);
|
|
+ __ bind(not_taken);
|
|
+ __ profile_not_taken_branch(x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::if_icmp(Condition cc)
|
|
+{
|
|
+ transition(itos, vtos);
|
|
+ // assume branch is more often taken than not (loops use backward branches)
|
|
+ Label not_taken;
|
|
+ __ pop_i(x11);
|
|
+ __ addw(x10, x10, zr);
|
|
+ switch (cc) {
|
|
+ case equal:
|
|
+ __ bne(x11, x10, not_taken);
|
|
+ break;
|
|
+ case not_equal:
|
|
+ __ beq(x11, x10, not_taken);
|
|
+ break;
|
|
+ case less:
|
|
+ __ bge(x11, x10, not_taken);
|
|
+ break;
|
|
+ case less_equal:
|
|
+ __ bgt(x11, x10, not_taken);
|
|
+ break;
|
|
+ case greater:
|
|
+ __ ble(x11, x10, not_taken);
|
|
+ break;
|
|
+ case greater_equal:
|
|
+ __ blt(x11, x10, not_taken);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ branch(false, false);
|
|
+ __ bind(not_taken);
|
|
+ __ profile_not_taken_branch(x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::if_nullcmp(Condition cc)
|
|
+{
|
|
+ transition(atos, vtos);
|
|
+ // assume branch is more often taken than not (loops use backward branches)
|
|
+ Label not_taken;
|
|
+ if (cc == equal) {
|
|
+ __ bnez(x10, not_taken);
|
|
+ } else {
|
|
+ __ beqz(x10, not_taken);
|
|
+ }
|
|
+ branch(false, false);
|
|
+ __ bind(not_taken);
|
|
+ __ profile_not_taken_branch(x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::if_acmp(Condition cc)
|
|
+{
|
|
+ transition(atos, vtos);
|
|
+ // assume branch is more often taken than not (loops use backward branches)
|
|
+ Label not_taken;
|
|
+ __ pop_ptr(x11);
|
|
+
|
|
+ if (cc == equal) {
|
|
+ __ oop_nequal(x11, x10, not_taken);
|
|
+ } else if (cc == not_equal) {
|
|
+ __ oop_equal(x11, x10, not_taken);
|
|
+ }
|
|
+ branch(false, false);
|
|
+ __ bind(not_taken);
|
|
+ __ profile_not_taken_branch(x10);
|
|
+}
|
|
+
|
|
+void TemplateTable::ret() {
|
|
+ transition(vtos, vtos);
|
|
+ // We might be moving to a safepoint. The thread which calls
|
|
+ // Interpreter::notice_safepoints() will effectively flush its cache
|
|
+ // when it makes a system call, but we need to do something to
|
|
+ // ensure that we see the changed dispatch table.
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+
|
|
+ locals_index(x11);
|
|
+ __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
|
|
+ __ profile_ret(x11, x12);
|
|
+ __ ld(xbcp, Address(xmethod, Method::const_offset()));
|
|
+ __ add(xbcp, xbcp, x11);
|
|
+ __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
|
|
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
|
|
+}
|
|
+
|
|
+void TemplateTable::wide_ret() {
|
|
+ transition(vtos, vtos);
|
|
+ locals_index_wide(x11);
|
|
+ __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
|
|
+ __ profile_ret(x11, x12);
|
|
+ __ ld(xbcp, Address(xmethod, Method::const_offset()));
|
|
+ __ add(xbcp, xbcp, x11);
|
|
+ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
|
|
+ __ dispatch_next(vtos, 0, /*generate_poll*/true);
|
|
+}
|
|
+
|
|
+void TemplateTable::tableswitch() {
|
|
+ Label default_case, continue_execution;
|
|
+ transition(itos, vtos);
|
|
+ // align xbcp
|
|
+ __ la(x11, at_bcp(BytesPerInt));
|
|
+ __ andi(x11, x11, -BytesPerInt);
|
|
+ // load lo & hi
|
|
+ __ lwu(x12, Address(x11, BytesPerInt));
|
|
+ __ lwu(x13, Address(x11, 2 * BytesPerInt));
|
|
+ __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
|
|
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
|
+ // check against lo & hi
|
|
+ __ blt(x10, x12, default_case);
|
|
+ __ bgt(x10, x13, default_case);
|
|
+ // lookup dispatch offset
|
|
+ __ subw(x10, x10, x12);
|
|
+ __ shadd(x13, x10, x11, t0, 2);
|
|
+ __ lwu(x13, Address(x13, 3 * BytesPerInt));
|
|
+ __ profile_switch_case(x10, x11, x12);
|
|
+ // continue execution
|
|
+ __ bind(continue_execution);
|
|
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
|
+ __ add(xbcp, xbcp, x13);
|
|
+ __ load_unsigned_byte(t0, Address(xbcp));
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+ // handle default
|
|
+ __ bind(default_case);
|
|
+ __ profile_switch_default(x10);
|
|
+ __ lwu(x13, Address(x11, 0));
|
|
+ __ j(continue_execution);
|
|
+}
|
|
+
|
|
+void TemplateTable::lookupswitch() {
|
|
+ transition(itos, itos);
|
|
+ __ stop("lookupswitch bytecode should have been rewritten");
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_linearswitch() {
|
|
+ transition(itos, vtos);
|
|
+ Label loop_entry, loop, found, continue_execution;
|
|
+ // bswap x10 so we can avoid bswapping the table entries
|
|
+ __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
|
|
+ // align xbcp
|
|
+ __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
|
|
+ // this instruction (change offsets
|
|
+ // below)
|
|
+ __ andi(x9, x9, -BytesPerInt);
|
|
+ // set counter
|
|
+ __ lwu(x11, Address(x9, BytesPerInt));
|
|
+ __ revb_w(x11, x11);
|
|
+ __ j(loop_entry);
|
|
+ // table search
|
|
+ __ bind(loop);
|
|
+ __ shadd(t0, x11, x9, t0, 3);
|
|
+ __ lw(t0, Address(t0, 2 * BytesPerInt));
|
|
+ __ beq(x10, t0, found);
|
|
+ __ bind(loop_entry);
|
|
+ __ addi(x11, x11, -1);
|
|
+ __ bgez(x11, loop);
|
|
+ // default case
|
|
+ __ profile_switch_default(x10);
|
|
+ __ lwu(x13, Address(x9, 0));
|
|
+ __ j(continue_execution);
|
|
+ // entry found -> get offset
|
|
+ __ bind(found);
|
|
+ __ shadd(t0, x11, x9, t0, 3);
|
|
+ __ lwu(x13, Address(t0, 3 * BytesPerInt));
|
|
+ __ profile_switch_case(x11, x10, x9);
|
|
+ // continue execution
|
|
+ __ bind(continue_execution);
|
|
+ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
|
|
+ __ add(xbcp, xbcp, x13);
|
|
+ __ lbu(t0, Address(xbcp, 0));
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_binaryswitch() {
|
|
+ transition(itos, vtos);
|
|
+ // Implementation using the following core algorithm:
|
|
+ //
|
|
+ // int binary_search(int key, LookupswitchPair* array, int n)
|
|
+ // binary_search start:
|
|
+ // #Binary search according to "Methodik des Programmierens" by
|
|
+ // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
|
|
+ // int i = 0;
|
|
+ // int j = n;
|
|
+ // while (i + 1 < j) do
|
|
+ // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
|
|
+ // # with Q: for all i: 0 <= i < n: key < a[i]
|
|
+ // # where a stands for the array and assuming that the (inexisting)
|
|
+ // # element a[n] is infinitely big.
|
|
+ // int h = (i + j) >> 1
|
|
+ // # i < h < j
|
|
+ // if (key < array[h].fast_match())
|
|
+ // then [j = h]
|
|
+ // else [i = h]
|
|
+ // end
|
|
+ // # R: a[i] <= key < a[i+1] or Q
|
|
+ // # (i.e., if key is within array, i is the correct index)
|
|
+ // return i
|
|
+ // binary_search end
|
|
+
|
|
+
|
|
+ // Register allocation
|
|
+ const Register key = x10; // already set (tosca)
|
|
+ const Register array = x11;
|
|
+ const Register i = x12;
|
|
+ const Register j = x13;
|
|
+ const Register h = x14;
|
|
+ const Register temp = x15;
|
|
+
|
|
+ // Find array start
|
|
+ __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
|
|
+ // get rid of this
|
|
+ // instruction (change
|
|
+ // offsets below)
|
|
+ __ andi(array, array, -BytesPerInt);
|
|
+
|
|
+ // Initialize i & j
|
|
+ __ mv(i, zr); // i = 0
|
|
+ __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
|
|
+
|
|
+ // Convert j into native byteordering
|
|
+ __ revb_w(j, j);
|
|
+
|
|
+ // And start
|
|
+ Label entry;
|
|
+ __ j(entry);
|
|
+
|
|
+ // binary search loop
|
|
+ {
|
|
+ Label loop;
|
|
+ __ bind(loop);
|
|
+ __ addw(h, i, j); // h = i + j
|
|
+ __ srliw(h, h, 1); // h = (i + j) >> 1
|
|
+ // if [key < array[h].fast_match()]
|
|
+ // then [j = h]
|
|
+ // else [i = h]
|
|
+ // Convert array[h].match to native byte-ordering before compare
|
|
+ __ shadd(temp, h, array, temp, 3);
|
|
+ __ ld(temp, Address(temp, 0));
|
|
+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
|
|
+
|
|
+ Label L_done, L_greater;
|
|
+ __ bge(key, temp, L_greater);
|
|
+ // if [key < array[h].fast_match()] then j = h
|
|
+ __ mv(j, h);
|
|
+ __ j(L_done);
|
|
+ __ bind(L_greater);
|
|
+ // if [key >= array[h].fast_match()] then i = h
|
|
+ __ mv(i, h);
|
|
+ __ bind(L_done);
|
|
+
|
|
+ // while [i + 1 < j]
|
|
+ __ bind(entry);
|
|
+ __ addiw(h, i, 1); // i + 1
|
|
+ __ blt(h, j, loop); // i + 1 < j
|
|
+ }
|
|
+
|
|
+ // end of binary search, result index is i (must check again!)
|
|
+ Label default_case;
|
|
+ // Convert array[i].match to native byte-ordering before compare
|
|
+ __ shadd(temp, i, array, temp, 3);
|
|
+ __ ld(temp, Address(temp, 0));
|
|
+ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
|
|
+ __ bne(key, temp, default_case);
|
|
+
|
|
+ // entry found -> j = offset
|
|
+ __ shadd(temp, i, array, temp, 3);
|
|
+ __ lwu(j, Address(temp, BytesPerInt));
|
|
+ __ profile_switch_case(i, key, array);
|
|
+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
|
|
+
|
|
+ __ add(temp, xbcp, j);
|
|
+ __ load_unsigned_byte(t0, Address(temp, 0));
|
|
+
|
|
+ __ add(xbcp, xbcp, j);
|
|
+ __ la(xbcp, Address(xbcp, 0));
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+
|
|
+ // default case -> j = default offset
|
|
+ __ bind(default_case);
|
|
+ __ profile_switch_default(i);
|
|
+ __ lwu(j, Address(array, -2 * BytesPerInt));
|
|
+ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
|
|
+
|
|
+ __ add(temp, xbcp, j);
|
|
+ __ load_unsigned_byte(t0, Address(temp, 0));
|
|
+
|
|
+ __ add(xbcp, xbcp, j);
|
|
+ __ la(xbcp, Address(xbcp, 0));
|
|
+ __ dispatch_only(vtos, /*generate_poll*/true);
|
|
+}
|
|
+
|
|
+void TemplateTable::_return(TosState state)
|
|
+{
|
|
+ transition(state, state);
|
|
+ assert(_desc->calls_vm(),
|
|
+ "inconsistent calls_vm information"); // call in remove_activation
|
|
+
|
|
+ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
|
|
+ assert(state == vtos, "only valid state");
|
|
+
|
|
+ __ ld(c_rarg1, aaddress(0));
|
|
+ __ load_klass(x13, c_rarg1);
|
|
+ __ lwu(x13, Address(x13, Klass::access_flags_offset()));
|
|
+ Label skip_register_finalizer;
|
|
+ __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
|
|
+ __ beqz(t0, skip_register_finalizer);
|
|
+
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
|
|
+
|
|
+ __ bind(skip_register_finalizer);
|
|
+ }
|
|
+
|
|
+ // Issue a StoreStore barrier after all stores but before return
|
|
+ // from any constructor for any class with a final field. We don't
|
|
+ // know if this is a finalizer, so we always do so.
|
|
+ if (_desc->bytecode() == Bytecodes::_return) {
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+ }
|
|
+
|
|
+ // Narrow result if state is itos but result type is smaller.
|
|
+ // Need to narrow in the return bytecode rather than in generate_return_entry
|
|
+ // since compiled code callers expect the result to already be narrowed.
|
|
+ if (state == itos) {
|
|
+ __ narrow(x10);
|
|
+ }
|
|
+
|
|
+ __ remove_activation(state);
|
|
+ __ ret();
|
|
+}
|
|
+
|
|
+
|
|
+// ----------------------------------------------------------------------------
|
|
+// Volatile variables demand their effects be made known to all CPU's
|
|
+// in order. Store buffers on most chips allow reads & writes to
|
|
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
|
|
+// without some kind of memory barrier (i.e., it's not sufficient that
|
|
+// the interpreter does not reorder volatile references, the hardware
|
|
+// also must not reorder them).
|
|
+//
|
|
+// According to the new Java Memory Model (JMM):
|
|
+// (1) All volatiles are serialized wrt to each other. ALSO reads &
|
|
+// writes act as aquire & release, so:
|
|
+// (2) A read cannot let unrelated NON-volatile memory refs that
|
|
+// happen after the read float up to before the read. It's OK for
|
|
+// non-volatile memory refs that happen before the volatile read to
|
|
+// float down below it.
|
|
+// (3) Similar a volatile write cannot let unrelated NON-volatile
|
|
+// memory refs that happen BEFORE the write float down to after the
|
|
+// write. It's OK for non-volatile memory refs that happen after the
|
|
+// volatile write to float up before it.
|
|
+//
|
|
+// We only put in barriers around volatile refs (they are expensive),
|
|
+// not _between_ memory refs (that would require us to track the
|
|
+// flavor of the previous memory refs). Requirements (2) and (3)
|
|
+// require some barriers before volatile stores and after volatile
|
|
+// loads. These nearly cover requirement (1) but miss the
|
|
+// volatile-store-volatile-load case. This final case is placed after
|
|
+// volatile-stores although it could just as well go before
|
|
+// volatile-loads.
|
|
+
|
|
+void TemplateTable::resolve_cache_and_index(int byte_no,
|
|
+ Register Rcache,
|
|
+ Register index,
|
|
+ size_t index_size) {
|
|
+ const Register temp = x9;
|
|
+ assert_different_registers(Rcache, index, temp);
|
|
+
|
|
+ Label resolved;
|
|
+
|
|
+ Bytecodes::Code code = bytecode();
|
|
+ switch (code) {
|
|
+ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
|
|
+ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
|
|
+ default: break;
|
|
+ }
|
|
+
|
|
+ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
|
|
+ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
|
|
+ __ mv(t0, (int) code);
|
|
+ __ beq(temp, t0, resolved);
|
|
+
|
|
+ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
|
|
+ __ mv(temp, (int) code);
|
|
+ __ call_VM(noreg, entry, temp);
|
|
+
|
|
+ // Update registers with resolved info
|
|
+ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
|
|
+ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
|
|
+ // so all clients ofthis method must be modified accordingly
|
|
+ __ bind(resolved);
|
|
+}
|
|
+
|
|
+// The Rcache and index registers must be set before call
|
|
+// n.b unlike x86 cache already includes the index offset
|
|
+void TemplateTable::load_field_cp_cache_entry(Register obj,
|
|
+ Register cache,
|
|
+ Register index,
|
|
+ Register off,
|
|
+ Register flags,
|
|
+ bool is_static = false) {
|
|
+ assert_different_registers(cache, index, flags, off);
|
|
+
|
|
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
|
|
+ // Field offset
|
|
+ __ ld(off, Address(cache, in_bytes(cp_base_offset +
|
|
+ ConstantPoolCacheEntry::f2_offset())));
|
|
+ // Flags
|
|
+ __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+
|
|
+ // klass overwrite register
|
|
+ if (is_static) {
|
|
+ __ ld(obj, Address(cache, in_bytes(cp_base_offset +
|
|
+ ConstantPoolCacheEntry::f1_offset())));
|
|
+ const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
|
+ __ ld(obj, Address(obj, mirror_offset));
|
|
+ __ resolve_oop_handle(obj);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
|
|
+ Register method,
|
|
+ Register itable_index,
|
|
+ Register flags,
|
|
+ bool is_invokevirtual,
|
|
+ bool is_invokevfinal, /*unused*/
|
|
+ bool is_invokedynamic) {
|
|
+ // setup registers
|
|
+ const Register cache = t1;
|
|
+ const Register index = x14;
|
|
+ assert_different_registers(method, flags);
|
|
+ assert_different_registers(method, cache, index);
|
|
+ assert_different_registers(itable_index, flags);
|
|
+ assert_different_registers(itable_index, cache, index);
|
|
+ // determine constant pool cache field offsets
|
|
+ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
|
|
+ const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
|
|
+ (is_invokevirtual ?
|
|
+ ConstantPoolCacheEntry::f2_offset() :
|
|
+ ConstantPoolCacheEntry::f1_offset()));
|
|
+ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::flags_offset());
|
|
+ // access constant pool cache fields
|
|
+ const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::f2_offset());
|
|
+
|
|
+ const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
|
|
+ resolve_cache_and_index(byte_no, cache, index, index_size);
|
|
+ __ ld(method, Address(cache, method_offset));
|
|
+
|
|
+ if (itable_index != noreg) {
|
|
+ __ ld(itable_index, Address(cache, index_offset));
|
|
+ }
|
|
+ __ lwu(flags, Address(cache, flags_offset));
|
|
+}
|
|
+
|
|
+// The registers cache and index expected to be set before call.
|
|
+// Correct values of the cache and index registers are preserved.
|
|
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
|
|
+ bool is_static, bool has_tos) {
|
|
+ // do the JVMTI work here to avoid disturbing the register state below
|
|
+ // We use c_rarg registers here beacause we want to use the register used in
|
|
+ // the call to the VM
|
|
+ if (JvmtiExport::can_post_field_access()) {
|
|
+ // Check to see if a field access watch has been set before we
|
|
+ // take the time to call into the VM.
|
|
+ Label L1;
|
|
+ assert_different_registers(cache, index, x10);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
|
|
+ __ lwu(x10, Address(t0, offset));
|
|
+
|
|
+ __ beqz(x10, L1);
|
|
+
|
|
+ __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
|
|
+ __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
|
|
+
|
|
+ if (is_static) {
|
|
+ __ mv(c_rarg1, zr); // NULL object reference
|
|
+ } else {
|
|
+ __ ld(c_rarg1, at_tos()); // get object pointer without popping it
|
|
+ __ verify_oop(c_rarg1);
|
|
+ }
|
|
+ // c_rarg1: object pointer or NULL
|
|
+ // c_rarg2: cache entry pointer
|
|
+ // c_rarg3: jvalue object on the stack
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::post_field_access),
|
|
+ c_rarg1, c_rarg2, c_rarg3);
|
|
+ __ get_cache_and_index_at_bcp(cache, index, 1);
|
|
+ __ bind(L1);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::pop_and_check_object(Register r)
|
|
+{
|
|
+ __ pop_ptr(r);
|
|
+ __ null_check(r); // for field access must check obj.
|
|
+ __ verify_oop(r);
|
|
+}
|
|
+
|
|
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
|
|
+{
|
|
+ const Register cache = x12;
|
|
+ const Register index = x13;
|
|
+ const Register obj = x14;
|
|
+ const Register off = x9;
|
|
+ const Register flags = x10;
|
|
+ const Register raw_flags = x16;
|
|
+ const Register bc = x14; // uses same reg as obj, so don't mix them
|
|
+
|
|
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
|
|
+ jvmti_post_field_access(cache, index, is_static, false);
|
|
+ load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
|
|
+
|
|
+ if (!is_static) {
|
|
+ // obj is on the stack
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+
|
|
+ if (!UseBarriersForVolatile) {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ __ add(off, obj, off);
|
|
+ const Address field(off);
|
|
+
|
|
+ Label Done, notByte, notBool, notInt, notShort, notChar,
|
|
+ notLong, notFloat, notObj, notDouble;
|
|
+
|
|
+ __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
|
|
+ ConstantPoolCacheEntry::tos_state_bits));
|
|
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
|
|
+
|
|
+ assert(btos == 0, "change code, btos != 0");
|
|
+ __ bnez(flags, notByte);
|
|
+
|
|
+ // Dont't rewrite getstatic, only getfield
|
|
+ if (is_static) {
|
|
+ rc = may_not_rewrite;
|
|
+ }
|
|
+
|
|
+ // btos
|
|
+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ push(btos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notByte);
|
|
+ __ sub(t0, flags, (u1)ztos);
|
|
+ __ bnez(t0, notBool);
|
|
+
|
|
+ // ztos (same code as btos)
|
|
+ __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ push(ztos);
|
|
+ // Rewirte bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ // uses btos rewriting, no truncating to t/f bit is needed for getfield
|
|
+ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notBool);
|
|
+ __ sub(t0, flags, (u1)atos);
|
|
+ __ bnez(t0, notObj);
|
|
+ // atos
|
|
+ do_oop_load(_masm, field, x10, IN_HEAP);
|
|
+ __ push(atos);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notObj);
|
|
+ __ sub(t0, flags, (u1)itos);
|
|
+ __ bnez(t0, notInt);
|
|
+ // itos
|
|
+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ addw(x10, x10, zr); // signed extended
|
|
+ __ push(itos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notInt);
|
|
+ __ sub(t0, flags, (u1)ctos);
|
|
+ __ bnez(t0, notChar);
|
|
+ // ctos
|
|
+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ push(ctos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notChar);
|
|
+ __ sub(t0, flags, (u1)stos);
|
|
+ __ bnez(t0, notShort);
|
|
+ // stos
|
|
+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ push(stos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notShort);
|
|
+ __ sub(t0, flags, (u1)ltos);
|
|
+ __ bnez(t0, notLong);
|
|
+ // ltos
|
|
+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ push(ltos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notLong);
|
|
+ __ sub(t0, flags, (u1)ftos);
|
|
+ __ bnez(t0, notFloat);
|
|
+ // ftos
|
|
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
|
|
+ __ push(ftos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
|
|
+ }
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notFloat);
|
|
+#ifdef ASSERT
|
|
+ __ sub(t0, flags, (u1)dtos);
|
|
+ __ bnez(t0, notDouble);
|
|
+#endif
|
|
+ // dtos
|
|
+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
|
|
+ __ push(dtos);
|
|
+ // Rewrite bytecode to be faster
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
|
|
+ }
|
|
+#ifdef ASSERT
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notDouble);
|
|
+ __ stop("Bad state");
|
|
+#endif
|
|
+
|
|
+ __ bind(Done);
|
|
+
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ bind(notVolatile);
|
|
+}
|
|
+
|
|
+void TemplateTable::getfield(int byte_no)
|
|
+{
|
|
+ getfield_or_static(byte_no, false);
|
|
+}
|
|
+
|
|
+void TemplateTable::nofast_getfield(int byte_no) {
|
|
+ getfield_or_static(byte_no, false, may_not_rewrite);
|
|
+}
|
|
+
|
|
+void TemplateTable::getstatic(int byte_no)
|
|
+{
|
|
+ getfield_or_static(byte_no, true);
|
|
+}
|
|
+
|
|
+// The registers cache and index expected to be set before call.
|
|
+// The function may destroy various registers, just not the cache and index registers.
|
|
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
|
|
+ transition(vtos, vtos);
|
|
+
|
|
+ ByteSize cp_base_offset = ConstantPoolCache::base_offset();
|
|
+
|
|
+ if (JvmtiExport::can_post_field_modification()) {
|
|
+ // Check to see if a field modification watch has been set before
|
|
+ // we take the time to call into the VM.
|
|
+ Label L1;
|
|
+ assert_different_registers(cache, index, x10);
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
|
|
+ __ lwu(x10, Address(t0, offset));
|
|
+ __ beqz(x10, L1);
|
|
+
|
|
+ __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
|
|
+
|
|
+ if (is_static) {
|
|
+ // Life is simple. Null out the object pointer.
|
|
+ __ mv(c_rarg1, zr);
|
|
+ } else {
|
|
+ // Life is harder. The stack holds the value on top, followed by
|
|
+ // the object. We don't know the size of the value, though; it
|
|
+ // could be one or two words depending on its type. As a result,
|
|
+ // we must find the type to determine where the object is.
|
|
+ __ lwu(c_rarg3, Address(c_rarg2,
|
|
+ in_bytes(cp_base_offset +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+ __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
|
|
+ ConstantPoolCacheEntry::verify_tos_state_shift();
|
|
+ Label nope2, done, ok;
|
|
+ __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue
|
|
+ __ sub(t0, c_rarg3, ltos);
|
|
+ __ beqz(t0, ok);
|
|
+ __ sub(t0, c_rarg3, dtos);
|
|
+ __ bnez(t0, nope2);
|
|
+ __ bind(ok);
|
|
+ __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue);
|
|
+ __ bind(nope2);
|
|
+ }
|
|
+ // cache entry pointer
|
|
+ __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
|
|
+ // object (tos)
|
|
+ __ mv(c_rarg3, esp);
|
|
+ // c_rarg1: object pointer set up above (NULL if static)
|
|
+ // c_rarg2: cache entry pointer
|
|
+ // c_rarg3: jvalue object on the stack
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::post_field_modification),
|
|
+ c_rarg1, c_rarg2, c_rarg3);
|
|
+ __ get_cache_and_index_at_bcp(cache, index, 1);
|
|
+ __ bind(L1);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
|
|
+ transition(vtos, vtos);
|
|
+
|
|
+ const Register cache = x12;
|
|
+ const Register index = x13;
|
|
+ const Register obj = x12;
|
|
+ const Register off = x9;
|
|
+ const Register flags = x10;
|
|
+ const Register bc = x14;
|
|
+
|
|
+ resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
|
|
+ jvmti_post_field_mod(cache, index, is_static);
|
|
+ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
|
|
+
|
|
+ Label Done;
|
|
+ __ mv(x15, flags);
|
|
+
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ Label notByte, notBool, notInt, notShort, notChar,
|
|
+ notLong, notFloat, notObj, notDouble;
|
|
+
|
|
+ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
|
|
+ ConstantPoolCacheEntry::tos_state_bits));
|
|
+ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
|
|
+
|
|
+ assert(btos == 0, "change code, btos != 0");
|
|
+ __ bnez(flags, notByte);
|
|
+
|
|
+ // Don't rewrite putstatic, only putfield
|
|
+ if (is_static) {
|
|
+ rc = may_not_rewrite;
|
|
+ }
|
|
+
|
|
+ // btos
|
|
+ {
|
|
+ __ pop(btos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0); // off register as temparator register.
|
|
+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notByte);
|
|
+ __ sub(t0, flags, (u1)ztos);
|
|
+ __ bnez(t0, notBool);
|
|
+
|
|
+ // ztos
|
|
+ {
|
|
+ __ pop(ztos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notBool);
|
|
+ __ sub(t0, flags, (u1)atos);
|
|
+ __ bnez(t0, notObj);
|
|
+
|
|
+ // atos
|
|
+ {
|
|
+ __ pop(atos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ // Store into the field
|
|
+ do_oop_store(_masm, field, x10, IN_HEAP);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notObj);
|
|
+ __ sub(t0, flags, (u1)itos);
|
|
+ __ bnez(t0, notInt);
|
|
+
|
|
+ // itos
|
|
+ {
|
|
+ __ pop(itos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notInt);
|
|
+ __ sub(t0, flags, (u1)ctos);
|
|
+ __ bnez(t0, notChar);
|
|
+
|
|
+ // ctos
|
|
+ {
|
|
+ __ pop(ctos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notChar);
|
|
+ __ sub(t0, flags, (u1)stos);
|
|
+ __ bnez(t0, notShort);
|
|
+
|
|
+ // stos
|
|
+ {
|
|
+ __ pop(stos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notShort);
|
|
+ __ sub(t0, flags, (u1)ltos);
|
|
+ __ bnez(t0, notLong);
|
|
+
|
|
+ // ltos
|
|
+ {
|
|
+ __ pop(ltos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notLong);
|
|
+ __ sub(t0, flags, (u1)ftos);
|
|
+ __ bnez(t0, notFloat);
|
|
+
|
|
+ // ftos
|
|
+ {
|
|
+ __ pop(ftos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ __ j(Done);
|
|
+ }
|
|
+
|
|
+ __ bind(notFloat);
|
|
+#ifdef ASSERT
|
|
+ __ sub(t0, flags, (u1)dtos);
|
|
+ __ bnez(t0, notDouble);
|
|
+#endif
|
|
+
|
|
+ // dtos
|
|
+ {
|
|
+ __ pop(dtos);
|
|
+ // field address
|
|
+ if (!is_static) {
|
|
+ pop_and_check_object(obj);
|
|
+ }
|
|
+ __ add(off, obj, off); // if static, obj from cache, else obj from stack.
|
|
+ const Address field(off, 0);
|
|
+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
|
|
+ if (rc == may_rewrite) {
|
|
+ patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
|
|
+ }
|
|
+ }
|
|
+
|
|
+#ifdef ASSERT
|
|
+ __ j(Done);
|
|
+
|
|
+ __ bind(notDouble);
|
|
+ __ stop("Bad state");
|
|
+#endif
|
|
+
|
|
+ __ bind(Done);
|
|
+
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::putfield(int byte_no)
|
|
+{
|
|
+ putfield_or_static(byte_no, false);
|
|
+}
|
|
+
|
|
+void TemplateTable::nofast_putfield(int byte_no) {
|
|
+ putfield_or_static(byte_no, false, may_not_rewrite);
|
|
+}
|
|
+
|
|
+void TemplateTable::putstatic(int byte_no) {
|
|
+ putfield_or_static(byte_no, true);
|
|
+}
|
|
+
|
|
+void TemplateTable::jvmti_post_fast_field_mod()
|
|
+{
|
|
+ if (JvmtiExport::can_post_field_modification()) {
|
|
+ // Check to see if a field modification watch has been set before
|
|
+ // we take the time to call into the VM.
|
|
+ Label L2;
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
|
|
+ __ lwu(c_rarg3, Address(t0, offset));
|
|
+ __ beqz(c_rarg3, L2);
|
|
+ __ pop_ptr(x9); // copy the object pointer from tos
|
|
+ __ verify_oop(x9);
|
|
+ __ push_ptr(x9); // put the object pointer back on tos
|
|
+ // Save tos values before call_VM() clobbers them. Since we have
|
|
+ // to do it for every data type, we use the saved values as the
|
|
+ // jvalue object.
|
|
+ switch (bytecode()) { // load values into the jvalue object
|
|
+ case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
|
|
+ case Bytecodes::_fast_bputfield: // fall through
|
|
+ case Bytecodes::_fast_zputfield: // fall through
|
|
+ case Bytecodes::_fast_sputfield: // fall through
|
|
+ case Bytecodes::_fast_cputfield: // fall through
|
|
+ case Bytecodes::_fast_iputfield: __ push_i(x10); break;
|
|
+ case Bytecodes::_fast_dputfield: __ push_d(); break;
|
|
+ case Bytecodes::_fast_fputfield: __ push_f(); break;
|
|
+ case Bytecodes::_fast_lputfield: __ push_l(x10); break;
|
|
+
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ __ mv(c_rarg3, esp); // points to jvalue on the stack
|
|
+ // access constant pool cache entry
|
|
+ __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
|
|
+ __ verify_oop(x9);
|
|
+ // x9: object pointer copied above
|
|
+ // c_rarg2: cache entry pointer
|
|
+ // c_rarg3: jvalue object on the stack
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::post_field_modification),
|
|
+ x9, c_rarg2, c_rarg3);
|
|
+
|
|
+ switch (bytecode()) { // restore tos values
|
|
+ case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
|
|
+ case Bytecodes::_fast_bputfield: // fall through
|
|
+ case Bytecodes::_fast_zputfield: // fall through
|
|
+ case Bytecodes::_fast_sputfield: // fall through
|
|
+ case Bytecodes::_fast_cputfield: // fall through
|
|
+ case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
|
|
+ case Bytecodes::_fast_dputfield: __ pop_d(); break;
|
|
+ case Bytecodes::_fast_fputfield: __ pop_f(); break;
|
|
+ case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
|
|
+ default: break;
|
|
+ }
|
|
+ __ bind(L2);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_storefield(TosState state)
|
|
+{
|
|
+ transition(state, vtos);
|
|
+
|
|
+ ByteSize base = ConstantPoolCache::base_offset();
|
|
+
|
|
+ jvmti_post_fast_field_mod();
|
|
+
|
|
+ // access constant pool cache
|
|
+ __ get_cache_and_index_at_bcp(x12, x11, 1);
|
|
+
|
|
+ // Must prevent reordering of the following cp cache loads with bytecode load
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+
|
|
+ // test for volatile with x13
|
|
+ __ lwu(x13, Address(x12, in_bytes(base +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+
|
|
+ // replace index with field offset from cache entry
|
|
+ __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
|
|
+
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ // Get object from stack
|
|
+ pop_and_check_object(x12);
|
|
+
|
|
+ // field address
|
|
+ __ add(x11, x12, x11);
|
|
+ const Address field(x11, 0);
|
|
+
|
|
+ // access field
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_fast_aputfield:
|
|
+ do_oop_store(_masm, field, x10, IN_HEAP);
|
|
+ break;
|
|
+ case Bytecodes::_fast_lputfield:
|
|
+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_iputfield:
|
|
+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_zputfield:
|
|
+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_bputfield:
|
|
+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_sputfield:
|
|
+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_cputfield:
|
|
+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_fputfield:
|
|
+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_dputfield:
|
|
+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_accessfield(TosState state)
|
|
+{
|
|
+ transition(atos, state);
|
|
+ // Do the JVMTI work here to avoid disturbing the register state below
|
|
+ if (JvmtiExport::can_post_field_access()) {
|
|
+ // Check to see if a field access watch has been set before we
|
|
+ // take the time to call into the VM.
|
|
+ Label L1;
|
|
+ int32_t offset = 0;
|
|
+ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
|
|
+ __ lwu(x12, Address(t0, offset));
|
|
+ __ beqz(x12, L1);
|
|
+ // access constant pool cache entry
|
|
+ __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
|
|
+ __ verify_oop(x10);
|
|
+ __ push_ptr(x10); // save object pointer before call_VM() clobbers it
|
|
+ __ mv(c_rarg1, x10);
|
|
+ // c_rarg1: object pointer copied above
|
|
+ // c_rarg2: cache entry pointer
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::post_field_access),
|
|
+ c_rarg1, c_rarg2);
|
|
+ __ pop_ptr(x10); // restore object pointer
|
|
+ __ bind(L1);
|
|
+ }
|
|
+
|
|
+ // access constant pool cache
|
|
+ __ get_cache_and_index_at_bcp(x12, x11, 1);
|
|
+
|
|
+ // Must prevent reordering of the following cp cache loads with bytecode load
|
|
+ __ membar(MacroAssembler::LoadLoad);
|
|
+
|
|
+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::f2_offset())));
|
|
+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+
|
|
+ // x10: object
|
|
+ __ verify_oop(x10);
|
|
+ __ null_check(x10);
|
|
+ __ add(x11, x10, x11);
|
|
+ const Address field(x11, 0);
|
|
+
|
|
+ if (!UseBarriersForVolatile) {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ // access field
|
|
+ switch (bytecode()) {
|
|
+ case Bytecodes::_fast_agetfield:
|
|
+ do_oop_load(_masm, field, x10, IN_HEAP);
|
|
+ __ verify_oop(x10);
|
|
+ break;
|
|
+ case Bytecodes::_fast_lgetfield:
|
|
+ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_igetfield:
|
|
+ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
|
|
+ __ addw(x10, x10, zr); // signed extended
|
|
+ break;
|
|
+ case Bytecodes::_fast_bgetfield:
|
|
+ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_sgetfield:
|
|
+ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_cgetfield:
|
|
+ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_fgetfield:
|
|
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
|
|
+ break;
|
|
+ case Bytecodes::_fast_dgetfield:
|
|
+ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_xaccess(TosState state)
|
|
+{
|
|
+ transition(vtos, state);
|
|
+
|
|
+ // get receiver
|
|
+ __ ld(x10, aaddress(0));
|
|
+ // access constant pool cache
|
|
+ __ get_cache_and_index_at_bcp(x12, x13, 2);
|
|
+ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::f2_offset())));
|
|
+
|
|
+ if (!UseBarriersForVolatile) {
|
|
+ Label notVolatile;
|
|
+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ // make sure exception is reported in correct bcp range (getfield is
|
|
+ // next instruction)
|
|
+ __ addi(xbcp, xbcp, 1);
|
|
+ __ null_check(x10);
|
|
+ switch (state) {
|
|
+ case itos:
|
|
+ __ add(x10, x10, x11);
|
|
+ __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
|
|
+ __ addw(x10, x10, zr); // signed extended
|
|
+ break;
|
|
+ case atos:
|
|
+ __ add(x10, x10, x11);
|
|
+ do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
|
|
+ __ verify_oop(x10);
|
|
+ break;
|
|
+ case ftos:
|
|
+ __ add(t0, x10, x11);
|
|
+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg);
|
|
+ break;
|
|
+ default:
|
|
+ ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ {
|
|
+ Label notVolatile;
|
|
+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
|
|
+ ConstantPoolCacheEntry::flags_offset())));
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
|
|
+ __ beqz(t0, notVolatile);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ bind(notVolatile);
|
|
+ }
|
|
+
|
|
+ __ sub(xbcp, xbcp, 1);
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Calls
|
|
+
|
|
+void TemplateTable::count_calls(Register method, Register temp)
|
|
+{
|
|
+ __ call_Unimplemented();
|
|
+}
|
|
+
|
|
+void TemplateTable::prepare_invoke(int byte_no,
|
|
+ Register method, // linked method (or i-klass)
|
|
+ Register index, // itable index, MethodType, etc.
|
|
+ Register recv, // if caller wants to see it
|
|
+ Register flags // if caller wants to test it
|
|
+ ) {
|
|
+ // determine flags
|
|
+ const Bytecodes::Code code = bytecode();
|
|
+ const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
|
|
+ const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
|
|
+ const bool is_invokehandle = code == Bytecodes::_invokehandle;
|
|
+ const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
|
|
+ const bool is_invokespecial = code == Bytecodes::_invokespecial;
|
|
+ const bool load_receiver = (recv != noreg);
|
|
+ const bool save_flags = (flags != noreg);
|
|
+ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
|
|
+ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
|
|
+ assert(flags == noreg || flags == x13, "");
|
|
+ assert(recv == noreg || recv == x12, "");
|
|
+
|
|
+ // setup registers & access constant pool cache
|
|
+ if (recv == noreg) {
|
|
+ recv = x12;
|
|
+ }
|
|
+ if (flags == noreg) {
|
|
+ flags = x13;
|
|
+ }
|
|
+ assert_different_registers(method, index, recv, flags);
|
|
+
|
|
+ // save 'interpreter return address'
|
|
+ __ save_bcp();
|
|
+
|
|
+ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
|
|
+
|
|
+ // maybe push appendix to arguments (just before return address)
|
|
+ if (is_invokedynamic || is_invokehandle) {
|
|
+ Label L_no_push;
|
|
+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
|
|
+ __ beqz(t0, L_no_push);
|
|
+ // Push the appendix as a trailing parameter.
|
|
+ // This must be done before we get the receiver,
|
|
+ // since the parameter_size includes it.
|
|
+ __ push_reg(x9);
|
|
+ __ mv(x9, index);
|
|
+ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
|
|
+ __ load_resolved_reference_at_index(index, x9);
|
|
+ __ pop_reg(x9);
|
|
+ __ push_reg(index); // push appendix (MethodType, CallSite, etc.)
|
|
+ __ bind(L_no_push);
|
|
+ }
|
|
+
|
|
+ // load receiver if needed (note: no return address pushed yet)
|
|
+ if (load_receiver) {
|
|
+ __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
|
|
+ __ shadd(t0, recv, esp, t0, 3);
|
|
+ __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
|
|
+ __ verify_oop(recv);
|
|
+ }
|
|
+
|
|
+ // compute return type
|
|
+ __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
|
|
+ __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
|
|
+
|
|
+ // load return address
|
|
+ {
|
|
+ const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
|
|
+ __ mv(t0, table_addr);
|
|
+ __ shadd(t0, t1, t0, t1, 3);
|
|
+ __ ld(ra, Address(t0, 0));
|
|
+ }
|
|
+}
|
|
+
|
|
+void TemplateTable::invokevirtual_helper(Register index,
|
|
+ Register recv,
|
|
+ Register flags)
|
|
+{
|
|
+ // Uses temporary registers x10, x13
|
|
+ assert_different_registers(index, recv, x10, x13);
|
|
+ // Test for an invoke of a final method
|
|
+ Label notFinal;
|
|
+ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
|
|
+ __ beqz(t0, notFinal);
|
|
+
|
|
+ const Register method = index; // method must be xmethod
|
|
+ assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention");
|
|
+
|
|
+ // do the call - the index is actually the method to call
|
|
+ // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
|
|
+
|
|
+ // It's final, need a null check here!
|
|
+ __ null_check(recv);
|
|
+
|
|
+ // profile this call
|
|
+ __ profile_final_call(x10);
|
|
+ __ profile_arguments_type(x10, method, x14, true);
|
|
+
|
|
+ __ jump_from_interpreted(method);
|
|
+
|
|
+ __ bind(notFinal);
|
|
+
|
|
+ // get receiver klass
|
|
+ __ null_check(recv, oopDesc::klass_offset_in_bytes());
|
|
+ __ load_klass(x10, recv);
|
|
+
|
|
+ // profile this call
|
|
+ __ profile_virtual_call(x10, xlocals, x13);
|
|
+
|
|
+ // get target methodOop & entry point
|
|
+ __ lookup_virtual_method(x10, index, method);
|
|
+ __ profile_arguments_type(x13, method, x14, true);
|
|
+ __ jump_from_interpreted(method);
|
|
+}
|
|
+
|
|
+void TemplateTable::invokevirtual(int byte_no)
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f2_byte, "use this argument");
|
|
+
|
|
+ prepare_invoke(byte_no, xmethod, noreg, x12, x13);
|
|
+
|
|
+ // xmethod: index (actually a Method*)
|
|
+ // x12: receiver
|
|
+ // x13: flags
|
|
+
|
|
+ invokevirtual_helper(xmethod, x12, x13);
|
|
+}
|
|
+
|
|
+void TemplateTable::invokespecial(int byte_no)
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f1_byte, "use this argument");
|
|
+
|
|
+ prepare_invoke(byte_no, xmethod, noreg, // get f1 Method*
|
|
+ x12); // get receiver also for null check
|
|
+ __ verify_oop(x12);
|
|
+ __ null_check(x12);
|
|
+ // do the call
|
|
+ __ profile_call(x10);
|
|
+ __ profile_arguments_type(x10, xmethod, xbcp, false);
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+}
|
|
+
|
|
+void TemplateTable::invokestatic(int byte_no)
|
|
+{
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f1_byte, "use this arugment");
|
|
+
|
|
+ prepare_invoke(byte_no, xmethod); // get f1 Method*
|
|
+ // do the call
|
|
+ __ profile_call(x10);
|
|
+ __ profile_arguments_type(x10, xmethod, x14, false);
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+}
|
|
+
|
|
+void TemplateTable::fast_invokevfinal(int byte_no)
|
|
+{
|
|
+ __ call_Unimplemented();
|
|
+}
|
|
+
|
|
+void TemplateTable::invokeinterface(int byte_no) {
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f1_byte, "use this argument");
|
|
+
|
|
+ prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method*
|
|
+ x12, x13); // recv, flags
|
|
+
|
|
+ // x10: interface klass (from f1)
|
|
+ // xmethod: method (from f2)
|
|
+ // x12: receiver
|
|
+ // x13: flags
|
|
+
|
|
+ // First check for Object case, then private interface method,
|
|
+ // then regular interface method.
|
|
+
|
|
+ // Special case of invokeinterface called for virtual method of
|
|
+ // java.lang.Object. See cpCache.cpp for details
|
|
+ Label notObjectMethod;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
|
|
+ __ beqz(t0, notObjectMethod);
|
|
+
|
|
+ invokevirtual_helper(xmethod, x12, x13);
|
|
+ __ bind(notObjectMethod);
|
|
+
|
|
+ Label no_such_interface;
|
|
+
|
|
+ // Check for private method invocation - indicated by vfinal
|
|
+ Label notVFinal;
|
|
+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
|
|
+ __ beqz(t0, notVFinal);
|
|
+
|
|
+ // Check receiver klass into x13 - also a null check
|
|
+ __ null_check(x12, oopDesc::klass_offset_in_bytes());
|
|
+ __ load_klass(x13, x12);
|
|
+
|
|
+ Label subtype;
|
|
+ __ check_klass_subtype(x13, x10, x14, subtype);
|
|
+ // If we get here the typecheck failed
|
|
+ __ j(no_such_interface);
|
|
+ __ bind(subtype);
|
|
+
|
|
+ __ profile_final_call(x10);
|
|
+ __ profile_arguments_type(x10, xmethod, x14, true);
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+
|
|
+ __ bind(notVFinal);
|
|
+
|
|
+ // Get receiver klass into x13 - also a null check
|
|
+ __ restore_locals();
|
|
+ __ null_check(x12, oopDesc::klass_offset_in_bytes());
|
|
+ __ load_klass(x13, x12);
|
|
+
|
|
+ Label no_such_method;
|
|
+
|
|
+ // Preserve method for the throw_AbstractMethodErrorVerbose.
|
|
+ __ mv(x28, xmethod);
|
|
+ // Receiver subtype check against REFC.
|
|
+ // Superklass in x10. Subklass in x13. Blows t1, x30
|
|
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
|
|
+ x13, x10, noreg,
|
|
+ // outputs: scan temp. reg, scan temp. reg
|
|
+ t1, x30,
|
|
+ no_such_interface,
|
|
+ /*return_method=*/false);
|
|
+
|
|
+ // profile this call
|
|
+ __ profile_virtual_call(x13, x30, x9);
|
|
+
|
|
+ // Get declaring interface class from method, and itable index
|
|
+ __ ld(x10, Address(xmethod, Method::const_offset()));
|
|
+ __ ld(x10, Address(x10, ConstMethod::constants_offset()));
|
|
+ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
|
|
+ __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
|
|
+ __ subw(xmethod, xmethod, Method::itable_index_max);
|
|
+ __ negw(xmethod, xmethod);
|
|
+
|
|
+ // Preserve recvKlass for throw_AbstractMethodErrorVerbose
|
|
+ __ mv(xlocals, x13);
|
|
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
|
|
+ xlocals, x10, xmethod,
|
|
+ // outputs: method, scan temp. reg
|
|
+ xmethod, x30,
|
|
+ no_such_interface);
|
|
+
|
|
+ // xmethod: methodOop to call
|
|
+ // x12: receiver
|
|
+ // Check for abstract method error
|
|
+ // Note: This should be done more efficiently via a throw_abstract_method_error
|
|
+ // interpreter entry point and a conditional jump to it in case of a null
|
|
+ // method.
|
|
+ __ beqz(xmethod, no_such_method);
|
|
+
|
|
+ __ profile_arguments_type(x13, xmethod, x30, true);
|
|
+
|
|
+ // do the call
|
|
+ // x12: receiver
|
|
+ // xmethod,: methodOop
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+ __ should_not_reach_here();
|
|
+
|
|
+ // exception handling code follows ...
|
|
+ // note: must restore interpreter registers to canonical
|
|
+ // state for exception handling to work correctly!
|
|
+
|
|
+ __ bind(no_such_method);
|
|
+ // throw exception
|
|
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
|
|
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
|
|
+ // Pass arguments for generating a verbose error message.
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
|
|
+ // the call_VM checks for exception, so we should never return here.
|
|
+ __ should_not_reach_here();
|
|
+
|
|
+ __ bind(no_such_interface);
|
|
+ // throw exceptiong
|
|
+ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed)
|
|
+ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
|
|
+ // Pass arguments for generating a verbose error message.
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
|
|
+ // the call_VM checks for exception, so we should never return here.
|
|
+ __ should_not_reach_here();
|
|
+ return;
|
|
+}
|
|
+
|
|
+void TemplateTable::invokehandle(int byte_no) {
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f1_byte, "use this argument");
|
|
+
|
|
+ prepare_invoke(byte_no, xmethod, x10, x12);
|
|
+ __ verify_method_ptr(x12);
|
|
+ __ verify_oop(x12);
|
|
+ __ null_check(x12);
|
|
+
|
|
+ // FIXME: profile the LambdaForm also
|
|
+
|
|
+ // x30 is safe to use here as a temp reg because it is about to
|
|
+ // be clobbered by jump_from_interpreted().
|
|
+ __ profile_final_call(x30);
|
|
+ __ profile_arguments_type(x30, xmethod, x14, true);
|
|
+
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+}
|
|
+
|
|
+void TemplateTable::invokedynamic(int byte_no) {
|
|
+ transition(vtos, vtos);
|
|
+ assert(byte_no == f1_byte, "use this argument");
|
|
+
|
|
+ prepare_invoke(byte_no, xmethod, x10);
|
|
+
|
|
+ // x10: CallSite object (from cpool->resolved_references[])
|
|
+ // xmethod: MH.linkToCallSite method (from f2)
|
|
+
|
|
+ // Note: x10_callsite is already pushed by prepare_invoke
|
|
+
|
|
+ // %%% should make a type profile for any invokedynamic that takes a ref argument
|
|
+ // profile this call
|
|
+ __ profile_call(xbcp);
|
|
+ __ profile_arguments_type(x13, xmethod, x30, false);
|
|
+
|
|
+ __ verify_oop(x10);
|
|
+
|
|
+ __ jump_from_interpreted(xmethod);
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Allocation
|
|
+
|
|
+void TemplateTable::_new() {
|
|
+ transition(vtos, atos);
|
|
+
|
|
+ __ get_unsigned_2_byte_index_at_bcp(x13, 1);
|
|
+ Label slow_case;
|
|
+ Label done;
|
|
+ Label initialize_header;
|
|
+ Label initialize_object; // including clearing the fields
|
|
+
|
|
+ __ get_cpool_and_tags(x14, x10);
|
|
+ // Make sure the class we're about to instantiate has been resolved.
|
|
+ // This is done before loading InstanceKlass to be consistent with the order
|
|
+ // how Constant Pool is update (see ConstantPool::klass_at_put)
|
|
+ const int tags_offset = Array<u1>::base_offset_in_bytes();
|
|
+ __ add(t0, x10, x13);
|
|
+ __ la(t0, Address(t0, tags_offset));
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ lbu(t0, t0);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
|
|
+ __ bnez(t1, slow_case);
|
|
+
|
|
+ // get InstanceKlass
|
|
+ __ load_resolved_klass_at_offset(x14, x13, x14, t0);
|
|
+
|
|
+ // make sure klass is initialized & doesn't have finalizer
|
|
+ // make sure klass is fully initialized
|
|
+ __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
|
|
+ __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
|
|
+ __ bnez(t1, slow_case);
|
|
+
|
|
+ // get instance_size in InstanceKlass (scaled to a count of bytes)
|
|
+ __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
|
|
+ // test to see if it has a finalizer or is malformed in some way
|
|
+ __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
|
|
+ __ bnez(t0, slow_case);
|
|
+
|
|
+ // Allocate the instance:
|
|
+ // If TLAB is enabled:
|
|
+ // Try to allocate in the TLAB.
|
|
+ // If fails, go to the slow path.
|
|
+ // Else If inline contiguous allocations are enabled:
|
|
+ // Try to allocate in eden.
|
|
+ // If fails due to heap end, go to slow path
|
|
+ //
|
|
+ // If TLAB is enabled OR inline contiguous is enabled:
|
|
+ // Initialize the allocation.
|
|
+ // Exit.
|
|
+ // Go to slow path.
|
|
+ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
|
|
+
|
|
+ if (UseTLAB) {
|
|
+ __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
|
|
+
|
|
+ if (ZeroTLAB) {
|
|
+ // the fields have been already cleared
|
|
+ __ j(initialize_header);
|
|
+ } else {
|
|
+ // initialize both the header and fields
|
|
+ __ j(initialize_object);
|
|
+ }
|
|
+ } else {
|
|
+ // Allocation in the shared Eden, if allowed.
|
|
+ //
|
|
+ // x13: instance size in bytes
|
|
+ if (allow_shared_alloc) {
|
|
+ __ eden_allocate(x10, x13, 0, x28, slow_case);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // If USETLAB or allow_shared_alloc are true, the object is created above and
|
|
+ // there is an initialized need. Otherwise, skip and go to the slow path.
|
|
+ if (UseTLAB || allow_shared_alloc) {
|
|
+ // The object is initialized before the header. If the object size is
|
|
+ // zero, go directly to the header initialization.
|
|
+ __ bind(initialize_object);
|
|
+ __ sub(x13, x13, sizeof(oopDesc));
|
|
+ __ beqz(x13, initialize_header);
|
|
+
|
|
+ // Initialize obejct fields
|
|
+ {
|
|
+ __ add(x12, x10, sizeof(oopDesc));
|
|
+ Label loop;
|
|
+ __ bind(loop);
|
|
+ __ sd(zr, Address(x12));
|
|
+ __ add(x12, x12, BytesPerLong);
|
|
+ __ sub(x13, x13, BytesPerLong);
|
|
+ __ bnez(x13, loop);
|
|
+ }
|
|
+
|
|
+ // initialize object hader only.
|
|
+ __ bind(initialize_header);
|
|
+ if (UseBiasedLocking) {
|
|
+ __ ld(t0, Address(x14, Klass::prototype_header_offset()));
|
|
+ } else {
|
|
+ __ mv(t0, (intptr_t)markOopDesc::prototype());
|
|
+ }
|
|
+ __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
|
|
+ __ store_klass_gap(x10, zr); // zero klass gap for compressed oops
|
|
+ __ store_klass(x10, x14); // store klass last
|
|
+
|
|
+ {
|
|
+ SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
|
|
+ // Trigger dtrace event for fastpath
|
|
+ __ push(atos); // save the return value
|
|
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
|
|
+ __ pop(atos); // restore the return value
|
|
+ }
|
|
+ __ j(done);
|
|
+ }
|
|
+
|
|
+ // slow case
|
|
+ __ bind(slow_case);
|
|
+ __ get_constant_pool(c_rarg1);
|
|
+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
|
|
+ __ verify_oop(x10);
|
|
+
|
|
+ // continue
|
|
+ __ bind(done);
|
|
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+}
|
|
+
|
|
+void TemplateTable::newarray() {
|
|
+ transition(itos, atos);
|
|
+ __ load_unsigned_byte(c_rarg1, at_bcp(1));
|
|
+ __ mv(c_rarg2, x10);
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
|
|
+ c_rarg1, c_rarg2);
|
|
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+}
|
|
+
|
|
+void TemplateTable::anewarray() {
|
|
+ transition(itos, atos);
|
|
+ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
|
|
+ __ get_constant_pool(c_rarg1);
|
|
+ __ mv(c_rarg3, x10);
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
|
|
+ c_rarg1, c_rarg2, c_rarg3);
|
|
+ // Must prevent reordering of stores for object initialization with stores that publish the new object.
|
|
+ __ membar(MacroAssembler::StoreStore);
|
|
+}
|
|
+
|
|
+void TemplateTable::arraylength() {
|
|
+ transition(atos, itos);
|
|
+ __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
|
|
+ __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
|
|
+}
|
|
+
|
|
+void TemplateTable::checkcast()
|
|
+{
|
|
+ transition(atos, atos);
|
|
+ Label done, is_null, ok_is_subtype, quicked, resolved;
|
|
+ __ beqz(x10, is_null);
|
|
+
|
|
+ // Get cpool & tags index
|
|
+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
|
|
+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
|
|
+ // See if bytecode has already been quicked
|
|
+ __ add(t0, x13, Array<u1>::base_offset_in_bytes());
|
|
+ __ add(x11, t0, x9);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ lbu(x11, x11);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
|
|
+ __ beqz(t0, quicked);
|
|
+
|
|
+ __ push(atos); // save receiver for result, and for GC
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
|
|
+ // vm_result_2 has metadata result
|
|
+ __ get_vm_result_2(x10, xthread);
|
|
+ __ pop_reg(x13); // restore receiver
|
|
+ __ j(resolved);
|
|
+
|
|
+ // Get superklass in x10 and subklass in x13
|
|
+ __ bind(quicked);
|
|
+ __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
|
|
+ __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
|
|
+
|
|
+ __ bind(resolved);
|
|
+ __ load_klass(x9, x13);
|
|
+
|
|
+ // Generate subtype check. Blows x12, x15. Object in x13.
|
|
+ // Superklass in x10. Subklass in x9.
|
|
+ __ gen_subtype_check(x9, ok_is_subtype);
|
|
+
|
|
+ // Come here on failure
|
|
+ __ push_reg(x13);
|
|
+ // object is at TOS
|
|
+ __ j(Interpreter::_throw_ClassCastException_entry);
|
|
+
|
|
+ // Come here on success
|
|
+ __ bind(ok_is_subtype);
|
|
+ __ mv(x10, x13); // Restore object in x13
|
|
+
|
|
+ // Collect counts on whether this test sees NULLs a lot or not.
|
|
+ if (ProfileInterpreter) {
|
|
+ __ j(done);
|
|
+ __ bind(is_null);
|
|
+ __ profile_null_seen(x12);
|
|
+ } else {
|
|
+ __ bind(is_null); // same as 'done'
|
|
+ }
|
|
+ __ bind(done);
|
|
+}
|
|
+
|
|
+void TemplateTable::instanceof() {
|
|
+ transition(atos, itos);
|
|
+ Label done, is_null, ok_is_subtype, quicked, resolved;
|
|
+ __ beqz(x10, is_null);
|
|
+
|
|
+ // Get cpool & tags index
|
|
+ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
|
|
+ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
|
|
+ // See if bytecode has already been quicked
|
|
+ __ add(t0, x13, Array<u1>::base_offset_in_bytes());
|
|
+ __ add(x11, t0, x9);
|
|
+ __ membar(MacroAssembler::AnyAny);
|
|
+ __ lbu(x11, x11);
|
|
+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
|
|
+ __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
|
|
+ __ beqz(t0, quicked);
|
|
+
|
|
+ __ push(atos); // save receiver for result, and for GC
|
|
+ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
|
|
+ // vm_result_2 has metadata result
|
|
+ __ get_vm_result_2(x10, xthread);
|
|
+ __ pop_reg(x13); // restore receiver
|
|
+ __ verify_oop(x13);
|
|
+ __ load_klass(x13, x13);
|
|
+ __ j(resolved);
|
|
+
|
|
+ // Get superklass in x10 and subklass in x13
|
|
+ __ bind(quicked);
|
|
+ __ load_klass(x13, x10);
|
|
+ __ load_resolved_klass_at_offset(x12, x9, x10, t0);
|
|
+
|
|
+ __ bind(resolved);
|
|
+
|
|
+ // Generate subtype check. Blows x12, x15
|
|
+ // Superklass in x10. Subklass in x13.
|
|
+ __ gen_subtype_check(x13, ok_is_subtype);
|
|
+
|
|
+ // Come here on failure
|
|
+ __ mv(x10, zr);
|
|
+ __ j(done);
|
|
+ // Come here on success
|
|
+ __ bind(ok_is_subtype);
|
|
+ __ mv(x10, 1);
|
|
+
|
|
+ // Collect counts on whether this test sees NULLs a lot or not.
|
|
+ if (ProfileInterpreter) {
|
|
+ __ j(done);
|
|
+ __ bind(is_null);
|
|
+ __ profile_null_seen(x12);
|
|
+ } else {
|
|
+ __ bind(is_null); // same as 'done'
|
|
+ }
|
|
+ __ bind(done);
|
|
+ // x10 = 0: obj == NULL or obj is not an instanceof the specified klass
|
|
+ // x10 = 1: obj != NULL and obj is an instanceof the specified klass
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Breakpoints
|
|
+void TemplateTable::_breakpoint() {
|
|
+ // Note: We get here even if we are single stepping..
|
|
+ // jbug inists on setting breakpoints at every bytecode
|
|
+ // even if we are in single step mode.
|
|
+
|
|
+ transition(vtos, vtos);
|
|
+
|
|
+ // get the unpatched byte code
|
|
+ __ get_method(c_rarg1);
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::get_original_bytecode_at),
|
|
+ c_rarg1, xbcp);
|
|
+ __ mv(x9, x10);
|
|
+
|
|
+ // post the breakpoint event
|
|
+ __ call_VM(noreg,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
|
|
+ xmethod, xbcp);
|
|
+
|
|
+ // complete the execution of original bytecode
|
|
+ __ mv(t0, x9);
|
|
+ __ dispatch_only_normal(vtos);
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Exceptions
|
|
+
|
|
+void TemplateTable::athrow() {
|
|
+ transition(atos, vtos);
|
|
+ __ null_check(x10);
|
|
+ __ j(Interpreter::throw_exception_entry());
|
|
+}
|
|
+
|
|
+//-----------------------------------------------------------------------------
|
|
+// Synchronization
|
|
+//
|
|
+// Note: monitorenter & exit are symmetric routines; which is reflected
|
|
+// in the assembly code structure as well
|
|
+//
|
|
+// Stack layout:
|
|
+//
|
|
+// [expressions ] <--- esp = expression stack top
|
|
+// ..
|
|
+// [expressions ]
|
|
+// [monitor entry] <--- monitor block top = expression stack bot
|
|
+// ..
|
|
+// [monitor entry]
|
|
+// [frame data ] <--- monitor block bot
|
|
+// ...
|
|
+// [saved fp ] <--- fp
|
|
+void TemplateTable::monitorenter()
|
|
+{
|
|
+ transition(atos, vtos);
|
|
+
|
|
+ // check for NULL object
|
|
+ __ null_check(x10);
|
|
+
|
|
+ const Address monitor_block_top(
|
|
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
|
|
+ const Address monitor_block_bot(
|
|
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
|
|
+
|
|
+ Label allocated;
|
|
+
|
|
+ // initialize entry pointer
|
|
+ __ mv(c_rarg1, zr); // points to free slot or NULL
|
|
+
|
|
+ // find a free slot in the monitor block (result in c_rarg1)
|
|
+ {
|
|
+ Label entry, loop, exit, notUsed;
|
|
+ __ ld(c_rarg3, monitor_block_top); // points to current entry,
|
|
+ // starting with top-most entry
|
|
+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom
|
|
+
|
|
+ __ j(entry);
|
|
+
|
|
+ __ bind(loop);
|
|
+ // check if current entry is used
|
|
+ // if not used then remember entry in c_rarg1
|
|
+ __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ __ bnez(t0, notUsed);
|
|
+ __ mv(c_rarg1, c_rarg3);
|
|
+ __ bind(notUsed);
|
|
+ // check if current entry is for same object
|
|
+ // if same object then stop searching
|
|
+ __ beq(x10, t0, exit);
|
|
+ // otherwise advance to next entry
|
|
+ __ add(c_rarg3, c_rarg3, entry_size);
|
|
+ __ bind(entry);
|
|
+ // check if bottom reached
|
|
+ // if not at bottom then check this entry
|
|
+ __ bne(c_rarg3, c_rarg2, loop);
|
|
+ __ bind(exit);
|
|
+ }
|
|
+
|
|
+ __ bnez(c_rarg1, allocated); // check if a slot has been found and
|
|
+ // if found, continue with that on
|
|
+
|
|
+ // allocate one if there's no free slot
|
|
+ {
|
|
+ Label entry, loop;
|
|
+ // 1. compute new pointers // esp: old expression stack top
|
|
+ __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom
|
|
+ __ sub(esp, esp, entry_size); // move expression stack top
|
|
+ __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
|
|
+ __ mv(c_rarg3, esp); // set start value for copy loop
|
|
+ __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom
|
|
+ __ sub(sp, sp, entry_size); // make room for the monitor
|
|
+
|
|
+ __ j(entry);
|
|
+ // 2. move expression stack contents
|
|
+ __ bind(loop);
|
|
+ __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
|
|
+ // word from old location
|
|
+ __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location
|
|
+ __ add(c_rarg3, c_rarg3, wordSize); // advance to next word
|
|
+ __ bind(entry);
|
|
+ __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom
|
|
+ // then copy next word
|
|
+ }
|
|
+
|
|
+ // call run-time routine
|
|
+ // c_rarg1: points to monitor entry
|
|
+ __ bind(allocated);
|
|
+
|
|
+ // Increment bcp to point to the next bytecode, so exception
|
|
+ // handling for async. exceptions work correctly.
|
|
+ // The object has already been poped from the stack, so the
|
|
+ // expression stack looks correct.
|
|
+ __ addi(xbcp, xbcp, 1);
|
|
+
|
|
+ // store object
|
|
+ __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ __ lock_object(c_rarg1);
|
|
+
|
|
+ // check to make sure this monitor doesn't cause stack overflow after locking
|
|
+ __ save_bcp(); // in case of exception
|
|
+ __ generate_stack_overflow_check(0);
|
|
+
|
|
+ // The bcp has already been incremented. Just need to dispatch to
|
|
+ // next instruction.
|
|
+ __ dispatch_next(vtos);
|
|
+}
|
|
+
|
|
+void TemplateTable::monitorexit()
|
|
+{
|
|
+ transition(atos, vtos);
|
|
+
|
|
+ // check for NULL object
|
|
+ __ null_check(x10);
|
|
+
|
|
+ const Address monitor_block_top(
|
|
+ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
|
|
+ const Address monitor_block_bot(
|
|
+ fp, frame::interpreter_frame_initial_sp_offset * wordSize);
|
|
+ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
|
|
+
|
|
+ Label found;
|
|
+
|
|
+ // find matching slot
|
|
+ {
|
|
+ Label entry, loop;
|
|
+ __ ld(c_rarg1, monitor_block_top); // points to current entry,
|
|
+ // starting with top-most entry
|
|
+ __ la(c_rarg2, monitor_block_bot); // points to word before bottom
|
|
+ // of monitor block
|
|
+ __ j(entry);
|
|
+
|
|
+ __ bind(loop);
|
|
+ // check if current entry is for same object
|
|
+ __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
|
|
+ // if same object then stop searching
|
|
+ __ beq(x10, t0, found);
|
|
+ // otherwise advance to next entry
|
|
+ __ add(c_rarg1, c_rarg1, entry_size);
|
|
+ __ bind(entry);
|
|
+ // check if bottom reached
|
|
+ // if not at bottom then check this entry
|
|
+ __ bne(c_rarg1, c_rarg2, loop);
|
|
+ }
|
|
+
|
|
+ // error handling. Unlocking was not block-structured
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address,
|
|
+ InterpreterRuntime::throw_illegal_monitor_state_exception));
|
|
+ __ should_not_reach_here();
|
|
+
|
|
+ // call run-time routine
|
|
+ __ bind(found);
|
|
+ __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
|
|
+ __ unlock_object(c_rarg1);
|
|
+ __ pop_ptr(x10); // discard object
|
|
+}
|
|
+
|
|
+// Wide instructions
|
|
+void TemplateTable::wide()
|
|
+{
|
|
+ __ load_unsigned_byte(x9, at_bcp(1));
|
|
+ __ mv(t0, (address)Interpreter::_wentry_point);
|
|
+ __ shadd(t0, x9, t0, t1, 3);
|
|
+ __ ld(t0, Address(t0));
|
|
+ __ jr(t0);
|
|
+}
|
|
+
|
|
+// Multi arrays
|
|
+void TemplateTable::multianewarray() {
|
|
+ transition(vtos, atos);
|
|
+ __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
|
|
+ // last dim is on top of stack; we want address of first one:
|
|
+ // first_addr = last_addr + (ndims - 1) * wordSize
|
|
+ __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
|
|
+ __ sub(c_rarg1, c_rarg1, wordSize);
|
|
+ call_VM(x10,
|
|
+ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
|
|
+ c_rarg1);
|
|
+ __ load_unsigned_byte(x11, at_bcp(3));
|
|
+ __ shadd(esp, x11, esp, t0, 3);
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..b437c8f4c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
|
|
@@ -0,0 +1,42 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
|
|
+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
|
|
+
|
|
+static void prepare_invoke(int byte_no,
|
|
+ Register method, // linked method (or i-klass)
|
|
+ Register index = noreg, // itable index, MethodType, etc.
|
|
+ Register recv = noreg, // if caller wants to see it
|
|
+ Register flags = noreg // if caller wants to test it
|
|
+ );
|
|
+static void invokevirtual_helper(Register index, Register recv,
|
|
+ Register flags);
|
|
+
|
|
+// Helpers
|
|
+static void index_check(Register array, Register index);
|
|
+
|
|
+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..03079aec0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
|
|
@@ -0,0 +1,43 @@
|
|
+/*
|
|
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
|
|
+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
|
|
+
|
|
+// These are the CPU-specific fields, types and integer
|
|
+// constants required by the Serviceability Agent. This file is
|
|
+// referenced by vmStructs.cpp.
|
|
+
|
|
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
|
|
+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
|
|
+
|
|
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
|
|
+
|
|
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
|
|
+
|
|
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
|
|
+
|
|
+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..dd4f5c9ae
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
|
|
@@ -0,0 +1,91 @@
|
|
+/*
|
|
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "memory/allocation.hpp"
|
|
+#include "memory/allocation.inline.hpp"
|
|
+#include "runtime/os.inline.hpp"
|
|
+#include "vm_version_ext_riscv.hpp"
|
|
+
|
|
+// VM_Version_Ext statics
|
|
+int VM_Version_Ext::_no_of_threads = 0;
|
|
+int VM_Version_Ext::_no_of_cores = 0;
|
|
+int VM_Version_Ext::_no_of_sockets = 0;
|
|
+bool VM_Version_Ext::_initialized = false;
|
|
+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
|
|
+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
|
|
+
|
|
+void VM_Version_Ext::initialize_cpu_information(void) {
|
|
+ // do nothing if cpu info has been initialized
|
|
+ if (_initialized) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ int core_id = -1;
|
|
+ int chip_id = -1;
|
|
+ int len = 0;
|
|
+ char* src_string = NULL;
|
|
+
|
|
+ _no_of_cores = os::processor_count();
|
|
+ _no_of_threads = _no_of_cores;
|
|
+ _no_of_sockets = _no_of_cores;
|
|
+ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
|
|
+ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
|
|
+ _initialized = true;
|
|
+}
|
|
+
|
|
+int VM_Version_Ext::number_of_threads(void) {
|
|
+ initialize_cpu_information();
|
|
+ return _no_of_threads;
|
|
+}
|
|
+
|
|
+int VM_Version_Ext::number_of_cores(void) {
|
|
+ initialize_cpu_information();
|
|
+ return _no_of_cores;
|
|
+}
|
|
+
|
|
+int VM_Version_Ext::number_of_sockets(void) {
|
|
+ initialize_cpu_information();
|
|
+ return _no_of_sockets;
|
|
+}
|
|
+
|
|
+const char* VM_Version_Ext::cpu_name(void) {
|
|
+ initialize_cpu_information();
|
|
+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
|
|
+ if (NULL == tmp) {
|
|
+ return NULL;
|
|
+ }
|
|
+ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
|
|
+ return tmp;
|
|
+}
|
|
+
|
|
+const char* VM_Version_Ext::cpu_description(void) {
|
|
+ initialize_cpu_information();
|
|
+ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
|
|
+ if (NULL == tmp) {
|
|
+ return NULL;
|
|
+ }
|
|
+ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
|
|
+ return tmp;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..0982b6668
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
|
|
+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
|
|
+
|
|
+#include "runtime/vm_version.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+
|
|
+class VM_Version_Ext : public VM_Version {
|
|
+ private:
|
|
+ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256;
|
|
+ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096;
|
|
+
|
|
+ static int _no_of_threads;
|
|
+ static int _no_of_cores;
|
|
+ static int _no_of_sockets;
|
|
+ static bool _initialized;
|
|
+ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
|
|
+ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
|
|
+
|
|
+ public:
|
|
+ static int number_of_threads(void);
|
|
+ static int number_of_cores(void);
|
|
+ static int number_of_sockets(void);
|
|
+
|
|
+ static const char* cpu_name(void);
|
|
+ static const char* cpu_description(void);
|
|
+ static void initialize_cpu_information(void);
|
|
+
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..31d5bb5f4
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
|
|
@@ -0,0 +1,190 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "runtime/java.hpp"
|
|
+#include "runtime/vm_version.hpp"
|
|
+#include "utilities/macros.hpp"
|
|
+#include "utilities/formatBuffer.hpp"
|
|
+
|
|
+#include OS_HEADER_INLINE(os)
|
|
+
|
|
+const char* VM_Version::_uarch = "";
|
|
+uint32_t VM_Version::_initial_vector_length = 0;
|
|
+
|
|
+void VM_Version::initialize() {
|
|
+ get_os_cpu_info();
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseFMA)) {
|
|
+ FLAG_SET_DEFAULT(UseFMA, true);
|
|
+ }
|
|
+ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
|
|
+ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
|
|
+ }
|
|
+
|
|
+ if (UseAES || UseAESIntrinsics) {
|
|
+ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
|
|
+ warning("AES instructions are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAES, false);
|
|
+ }
|
|
+ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
|
|
+ warning("AES intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ warning("AES/CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseSHA) {
|
|
+ warning("SHA instructions are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseSHA, false);
|
|
+ }
|
|
+
|
|
+ if (UseSHA1Intrinsics) {
|
|
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
|
+ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseSHA256Intrinsics) {
|
|
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
|
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseSHA512Intrinsics) {
|
|
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
|
|
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseCRC32Intrinsics) {
|
|
+ warning("CRC32Intrinsics instructions are not available on this CPU.");
|
|
+ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseCRC32CIntrinsics) {
|
|
+ warning("CRC32CIntrinsics instructions are not available on this CPU.");
|
|
+ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
|
|
+ }
|
|
+
|
|
+ if (UseRVV) {
|
|
+ if (!(_features & CPU_V)) {
|
|
+ warning("RVV is not supported on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseRVV, false);
|
|
+ } else {
|
|
+ // read vector length from vector CSR vlenb
|
|
+ _initial_vector_length = get_current_vector_length();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
|
|
+ FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
|
|
+ }
|
|
+
|
|
+ if (UseZbb) {
|
|
+ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
|
|
+ FLAG_SET_DEFAULT(UsePopCountInstruction, true);
|
|
+ }
|
|
+ } else {
|
|
+ FLAG_SET_DEFAULT(UsePopCountInstruction, false);
|
|
+ }
|
|
+
|
|
+ char buf[512];
|
|
+ buf[0] = '\0';
|
|
+ if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
|
|
+ strcat(buf, "rv64");
|
|
+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
|
|
+ CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
|
|
+#undef ADD_FEATURE_IF_SUPPORTED
|
|
+
|
|
+ _features_string = os::strdup(buf);
|
|
+
|
|
+#ifdef COMPILER2
|
|
+ initialize_c2();
|
|
+#endif // COMPILER2
|
|
+}
|
|
+
|
|
+#ifdef COMPILER2
|
|
+void VM_Version::initialize_c2() {
|
|
+ // lack of cmove in riscv
|
|
+ if (UseCMoveUnconditionally) {
|
|
+ FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
|
|
+ }
|
|
+ if (ConditionalMoveLimit > 0) {
|
|
+ FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
|
|
+ }
|
|
+
|
|
+ if (!UseRVV) {
|
|
+ FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
|
|
+ }
|
|
+
|
|
+ if (!UseRVV && MaxVectorSize) {
|
|
+ FLAG_SET_DEFAULT(MaxVectorSize, 0);
|
|
+ }
|
|
+
|
|
+ if (UseRVV) {
|
|
+ if (FLAG_IS_DEFAULT(MaxVectorSize)) {
|
|
+ MaxVectorSize = _initial_vector_length;
|
|
+ } else if (MaxVectorSize < 16) {
|
|
+ warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
|
|
+ UseRVV = false;
|
|
+ } else if (is_power_of_2(MaxVectorSize)) {
|
|
+ if (MaxVectorSize > _initial_vector_length) {
|
|
+ warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
|
|
+ _initial_vector_length, _initial_vector_length);
|
|
+ }
|
|
+ MaxVectorSize = _initial_vector_length;
|
|
+ } else {
|
|
+ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // disable prefetch
|
|
+ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
|
+ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
|
|
+ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
|
|
+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
|
|
+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
|
+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
|
|
+ }
|
|
+
|
|
+ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
|
+ FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
|
|
+ }
|
|
+}
|
|
+#endif // COMPILER2
|
|
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..0178e6d75
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
|
|
@@ -0,0 +1,65 @@
|
|
+/*
|
|
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
|
|
+#define CPU_RISCV_VM_VERSION_RISCV_HPP
|
|
+
|
|
+#include "runtime/abstract_vm_version.hpp"
|
|
+#include "runtime/globals_extension.hpp"
|
|
+#include "utilities/sizes.hpp"
|
|
+
|
|
+class VM_Version : public Abstract_VM_Version {
|
|
+public:
|
|
+ // Initialization
|
|
+ static void initialize();
|
|
+
|
|
+ enum Feature_Flag {
|
|
+#define CPU_FEATURE_FLAGS(decl) \
|
|
+ decl(I, "i", 8) \
|
|
+ decl(M, "m", 12) \
|
|
+ decl(A, "a", 0) \
|
|
+ decl(F, "f", 5) \
|
|
+ decl(D, "d", 3) \
|
|
+ decl(C, "c", 2) \
|
|
+ decl(V, "v", 21)
|
|
+
|
|
+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
|
|
+ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
|
+#undef DECLARE_CPU_FEATURE_FLAG
|
|
+ };
|
|
+
|
|
+protected:
|
|
+ static const char* _uarch;
|
|
+ static uint32_t _initial_vector_length;
|
|
+ static void get_os_cpu_info();
|
|
+ static uint32_t get_current_vector_length();
|
|
+
|
|
+#ifdef COMPILER2
|
|
+private:
|
|
+ static void initialize_c2();
|
|
+#endif // COMPILER2
|
|
+};
|
|
+
|
|
+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..6572d9334
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
|
|
@@ -0,0 +1,60 @@
|
|
+/*
|
|
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/assembler.hpp"
|
|
+#include "code/vmreg.hpp"
|
|
+
|
|
+void VMRegImpl::set_regName() {
|
|
+ Register reg = ::as_Register(0);
|
|
+ int i = 0;
|
|
+ for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
|
|
+ for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) {
|
|
+ regName[i++] = reg->name();
|
|
+ }
|
|
+ reg = reg->successor();
|
|
+ }
|
|
+
|
|
+ FloatRegister freg = ::as_FloatRegister(0);
|
|
+ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
|
|
+ for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) {
|
|
+ regName[i++] = freg->name();
|
|
+ }
|
|
+ freg = freg->successor();
|
|
+ }
|
|
+
|
|
+ VectorRegister vreg = ::as_VectorRegister(0);
|
|
+ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
|
|
+ for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) {
|
|
+ regName[i++] = vreg->name();
|
|
+ }
|
|
+ vreg = vreg->successor();
|
|
+ }
|
|
+
|
|
+ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) {
|
|
+ regName[i] = "NON-GPR-FPR-VPR";
|
|
+ }
|
|
+}
|
|
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..ec76a1db1
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
|
|
@@ -0,0 +1,64 @@
|
|
+/*
|
|
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VMREG_RISCV_HPP
|
|
+#define CPU_RISCV_VMREG_RISCV_HPP
|
|
+
|
|
+inline bool is_Register() {
|
|
+ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
|
|
+}
|
|
+
|
|
+inline bool is_FloatRegister() {
|
|
+ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
|
|
+}
|
|
+
|
|
+inline bool is_VectorRegister() {
|
|
+ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
|
|
+}
|
|
+
|
|
+inline Register as_Register() {
|
|
+ assert( is_Register(), "must be");
|
|
+ return ::as_Register(value() / RegisterImpl::max_slots_per_register);
|
|
+}
|
|
+
|
|
+inline FloatRegister as_FloatRegister() {
|
|
+ assert( is_FloatRegister() && is_even(value()), "must be" );
|
|
+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
|
|
+ FloatRegisterImpl::max_slots_per_register);
|
|
+}
|
|
+
|
|
+inline VectorRegister as_VectorRegister() {
|
|
+ assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" );
|
|
+ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
|
|
+ VectorRegisterImpl::max_slots_per_register);
|
|
+}
|
|
+
|
|
+inline bool is_concrete() {
|
|
+ assert(is_reg(), "must be");
|
|
+ return is_even(value());
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_VMREG_RISCV_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..9605e59f4
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
|
|
@@ -0,0 +1,47 @@
|
|
+/*
|
|
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
|
|
+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
|
|
+
|
|
+inline VMReg RegisterImpl::as_VMReg() {
|
|
+ if( this == noreg ) {
|
|
+ return VMRegImpl::Bad();
|
|
+ }
|
|
+ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
|
|
+}
|
|
+
|
|
+inline VMReg FloatRegisterImpl::as_VMReg() {
|
|
+ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
|
|
+ ConcreteRegisterImpl::max_gpr);
|
|
+}
|
|
+
|
|
+inline VMReg VectorRegisterImpl::as_VMReg() {
|
|
+ return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
|
|
+ ConcreteRegisterImpl::max_fpr);
|
|
+}
|
|
+
|
|
+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..b2aa87ab8
|
|
--- /dev/null
|
|
+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
|
|
@@ -0,0 +1,260 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/macroAssembler.inline.hpp"
|
|
+#include "assembler_riscv.inline.hpp"
|
|
+#include "code/vtableStubs.hpp"
|
|
+#include "interp_masm_riscv.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "oops/compiledICHolder.hpp"
|
|
+#include "oops/instanceKlass.hpp"
|
|
+#include "oops/klassVtable.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "vmreg_riscv.inline.hpp"
|
|
+#ifdef COMPILER2
|
|
+#include "opto/runtime.hpp"
|
|
+#endif
|
|
+
|
|
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
|
|
+// initialize its code
|
|
+
|
|
+#define __ masm->
|
|
+
|
|
+#ifndef PRODUCT
|
|
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
|
|
+#endif
|
|
+
|
|
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
|
|
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
|
|
+ const int stub_code_length = code_size_limit(true);
|
|
+ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
|
|
+ // Can be NULL if there is no free space in the code cache.
|
|
+ if (s == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ // Count unused bytes in instruction sequences of variable size.
|
|
+ // We add them to the computed buffer size in order to avoid
|
|
+ // overflow in subsequently generated stubs.
|
|
+ address start_pc = NULL;
|
|
+ int slop_bytes = 0;
|
|
+ int slop_delta = 0;
|
|
+
|
|
+ ResourceMark rm;
|
|
+ CodeBuffer cb(s->entry_point(), stub_code_length);
|
|
+ MacroAssembler* masm = new MacroAssembler(&cb);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+#if (!defined(PRODUCT) && defined(COMPILER2))
|
|
+ if (CountCompiledCalls) {
|
|
+ __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
|
|
+ __ increment(Address(t2));
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // get receiver (need to skip return address on top of stack)
|
|
+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
|
|
+
|
|
+ // get receiver klass
|
|
+ address npe_addr = __ pc();
|
|
+ __ load_klass(t2, j_rarg0);
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ if (DebugVtables) {
|
|
+ Label L;
|
|
+ start_pc = __ pc();
|
|
+
|
|
+ // check offset vs vtable length
|
|
+ __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
|
|
+ __ mvw(t1, vtable_index * vtableEntry::size());
|
|
+ __ bgt(t0, t1, L);
|
|
+ __ enter();
|
|
+ __ mv(x12, vtable_index);
|
|
+
|
|
+ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
|
|
+ const ptrdiff_t estimate = 256;
|
|
+ const ptrdiff_t codesize = __ pc() - start_pc;
|
|
+ slop_delta = estimate - codesize; // call_VM varies in length, depending on data
|
|
+ slop_bytes += slop_delta;
|
|
+ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
|
|
+
|
|
+ __ leave();
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // PRODUCT
|
|
+
|
|
+ start_pc = __ pc();
|
|
+ __ lookup_virtual_method(t2, vtable_index, xmethod);
|
|
+ // lookup_virtual_method generates
|
|
+ // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
|
|
+ // 1 instruction (best case):ld * 1
|
|
+ slop_delta = 16 - (int)(__ pc() - start_pc);
|
|
+ slop_bytes += slop_delta;
|
|
+ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
|
|
+
|
|
+#ifndef PRODUCT
|
|
+ if (DebugVtables) {
|
|
+ Label L;
|
|
+ __ beqz(xmethod, L);
|
|
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
|
|
+ __ bnez(t0, L);
|
|
+ __ stop("Vtable entry is NULL");
|
|
+ __ bind(L);
|
|
+ }
|
|
+#endif // PRODUCT
|
|
+
|
|
+ // x10: receiver klass
|
|
+ // xmethod: Method*
|
|
+ // x12: receiver
|
|
+ address ame_addr = __ pc();
|
|
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
|
|
+ __ jr(t0);
|
|
+
|
|
+ masm->flush();
|
|
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
|
|
+
|
|
+ return s;
|
|
+}
|
|
+
|
|
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
|
|
+ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
|
|
+ const int stub_code_length = code_size_limit(false);
|
|
+ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
|
|
+ // Can be NULL if there is no free space in the code cache.
|
|
+ if (s == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+ // Count unused bytes in instruction sequences of variable size.
|
|
+ // We add them to the computed buffer size in order to avoid
|
|
+ // overflow in subsequently generated stubs.
|
|
+ address start_pc = NULL;
|
|
+ int slop_bytes = 0;
|
|
+ int slop_delta = 0;
|
|
+
|
|
+ ResourceMark rm;
|
|
+ CodeBuffer cb(s->entry_point(), stub_code_length);
|
|
+ MacroAssembler* masm = new MacroAssembler(&cb);
|
|
+ assert_cond(masm != NULL);
|
|
+
|
|
+#if (!defined(PRODUCT) && defined(COMPILER2))
|
|
+ if (CountCompiledCalls) {
|
|
+ __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
|
|
+ __ increment(Address(x18));
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ // get receiver (need to skip return address on top of stack)
|
|
+ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
|
|
+
|
|
+ // Entry arguments:
|
|
+ // t2: CompiledICHolder
|
|
+ // j_rarg0: Receiver
|
|
+
|
|
+ // This stub is called from compiled code which has no callee-saved registers,
|
|
+ // so all registers except arguments are free at this point.
|
|
+ const Register recv_klass_reg = x18;
|
|
+ const Register holder_klass_reg = x19; // declaring interface klass (DECC)
|
|
+ const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
|
|
+ const Register temp_reg = x28;
|
|
+ const Register temp_reg2 = x29;
|
|
+ const Register icholder_reg = t1;
|
|
+
|
|
+ Label L_no_such_interface;
|
|
+
|
|
+ __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
|
|
+ __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
|
|
+
|
|
+ start_pc = __ pc();
|
|
+
|
|
+ // get receiver klass (also an implicit null-check)
|
|
+ address npe_addr = __ pc();
|
|
+ __ load_klass(recv_klass_reg, j_rarg0);
|
|
+
|
|
+ // Receiver subtype check against REFC.
|
|
+ __ lookup_interface_method(// inputs: rec. class, interface
|
|
+ recv_klass_reg, resolved_klass_reg, noreg,
|
|
+ // outputs: scan temp. reg1, scan temp. reg2
|
|
+ temp_reg2, temp_reg,
|
|
+ L_no_such_interface,
|
|
+ /*return_method=*/false);
|
|
+
|
|
+ const ptrdiff_t typecheckSize = __ pc() - start_pc;
|
|
+ start_pc = __ pc();
|
|
+
|
|
+ // Get selected method from declaring class and itable index
|
|
+ __ lookup_interface_method(// inputs: rec. class, interface, itable index
|
|
+ recv_klass_reg, holder_klass_reg, itable_index,
|
|
+ // outputs: method, scan temp. reg
|
|
+ xmethod, temp_reg,
|
|
+ L_no_such_interface);
|
|
+
|
|
+ const ptrdiff_t lookupSize = __ pc() - start_pc;
|
|
+
|
|
+ // Reduce "estimate" such that "padding" does not drop below 8.
|
|
+ const ptrdiff_t estimate = 256;
|
|
+ const ptrdiff_t codesize = typecheckSize + lookupSize;
|
|
+ slop_delta = (int)(estimate - codesize);
|
|
+ slop_bytes += slop_delta;
|
|
+ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
|
|
+
|
|
+#ifdef ASSERT
|
|
+ if (DebugVtables) {
|
|
+ Label L2;
|
|
+ __ beqz(xmethod, L2);
|
|
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
|
|
+ __ bnez(t0, L2);
|
|
+ __ stop("compiler entrypoint is null");
|
|
+ __ bind(L2);
|
|
+ }
|
|
+#endif // ASSERT
|
|
+
|
|
+ // xmethod: Method*
|
|
+ // j_rarg0: receiver
|
|
+ address ame_addr = __ pc();
|
|
+ __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
|
|
+ __ jr(t0);
|
|
+
|
|
+ __ bind(L_no_such_interface);
|
|
+ // Handle IncompatibleClassChangeError in itable stubs.
|
|
+ // More detailed error message.
|
|
+ // We force resolving of the call site by jumping to the "handle
|
|
+ // wrong method" stub, and so let the interpreter runtime do all the
|
|
+ // dirty work.
|
|
+ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
|
|
+ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
|
|
+
|
|
+ masm->flush();
|
|
+ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
|
|
+
|
|
+ return s;
|
|
+}
|
|
+
|
|
+int VtableStub::pd_code_alignment() {
|
|
+ // riscv cache line size is 64 bytes, but we want to limit alignment loss.
|
|
+ const unsigned int icache_line_size = wordSize;
|
|
+ return icache_line_size;
|
|
+}
|
|
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
|
|
index 897be2209..3b836fe6b 100644
|
|
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
|
|
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
|
|
@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
|
|
}
|
|
|
|
// result = condition ? opr1 : opr2
|
|
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390");
|
|
+
|
|
Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
|
|
switch (condition) {
|
|
case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break;
|
|
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
|
|
index e335f473d..53ad912cb 100644
|
|
--- a/src/hotspot/cpu/s390/s390.ad
|
|
+++ b/src/hotspot/cpu/s390/s390.ad
|
|
@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
// BUT: make sure match rule is not disabled by a false predicate!
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
// TODO
|
|
// Identify extra cases that we might want to provide match rules for
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
|
|
- bool ret_value = match_rule_supported(opcode);
|
|
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
|
|
+ return false;
|
|
+ }
|
|
// Add rules here.
|
|
|
|
- return ret_value; // Per default match rules are supported.
|
|
+ return true; // Per default match rules are supported.
|
|
}
|
|
|
|
int Matcher::regnum_to_fpu_offset(int regnum) {
|
|
@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) {
|
|
return Node::NotAMachineReg;
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
// z/Architecture does support misaligned store/load at minimal extra cost.
|
|
const bool Matcher::misaligned_vectors_ok() {
|
|
return true;
|
|
diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
|
|
index 7a2798a51..7d9b17b44 100644
|
|
--- a/src/hotspot/cpu/sparc/sparc.ad
|
|
+++ b/src/hotspot/cpu/sparc/sparc.ad
|
|
@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
return true; // Per default match rules are supported.
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
|
|
// TODO
|
|
// identify extra cases that we might want to provide match rules for
|
|
@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
return max_vector_size(bt); // Same as max.
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
// SPARC doesn't support misaligned vectors store/load.
|
|
const bool Matcher::misaligned_vectors_ok() {
|
|
return false;
|
|
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
|
|
index cee3140f4..d38c63600 100644
|
|
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
|
|
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
|
|
@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
|
|
}
|
|
}
|
|
|
|
-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
|
|
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
|
|
+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86");
|
|
+
|
|
Assembler::Condition acond, ncond;
|
|
switch (condition) {
|
|
case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break;
|
|
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
|
|
index 82fd8522b..8016d328a 100644
|
|
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
|
|
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
|
|
@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register
|
|
bind(DONE_LABEL);
|
|
} // string_indexof_char
|
|
|
|
+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
|
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
|
|
+ ShortBranchVerifier sbv(this);
|
|
+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
|
|
+
|
|
+ int stride = 16;
|
|
+
|
|
+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
|
|
+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
|
|
+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
|
|
+ FOUND_SEQ_CHAR, DONE_LABEL;
|
|
+
|
|
+ movptr(result, str1);
|
|
+ if (UseAVX >= 2) {
|
|
+ cmpl(cnt1, stride);
|
|
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);
|
|
+ cmpl(cnt1, stride*2);
|
|
+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
|
|
+ movdl(vec1, ch);
|
|
+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
|
|
+ vpxor(vec2, vec2);
|
|
+ movl(tmp, cnt1);
|
|
+ andl(tmp, 0xFFFFFFE0); //vector count (in chars)
|
|
+ andl(cnt1,0x0000001F); //tail count (in chars)
|
|
+
|
|
+ bind(SCAN_TO_32_CHAR_LOOP);
|
|
+ vmovdqu(vec3, Address(result, 0));
|
|
+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
|
|
+ vptest(vec2, vec3);
|
|
+ jcc(Assembler::carryClear, FOUND_CHAR);
|
|
+ addptr(result, 32);
|
|
+ subl(tmp, stride*2);
|
|
+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
|
|
+ jmp(SCAN_TO_16_CHAR);
|
|
+
|
|
+ bind(SCAN_TO_16_CHAR_INIT);
|
|
+ movdl(vec1, ch);
|
|
+ pxor(vec2, vec2);
|
|
+ pshufb(vec1, vec2);
|
|
+ }
|
|
+
|
|
+ bind(SCAN_TO_16_CHAR);
|
|
+ cmpl(cnt1, stride);
|
|
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
|
|
+ if (UseAVX < 2) {
|
|
+ movdl(vec1, ch);
|
|
+ pxor(vec2, vec2);
|
|
+ pshufb(vec1, vec2);
|
|
+ }
|
|
+ movl(tmp, cnt1);
|
|
+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes)
|
|
+ andl(cnt1,0x0000000F); //tail count (in bytes)
|
|
+
|
|
+ bind(SCAN_TO_16_CHAR_LOOP);
|
|
+ movdqu(vec3, Address(result, 0));
|
|
+ pcmpeqb(vec3, vec1);
|
|
+ ptest(vec2, vec3);
|
|
+ jcc(Assembler::carryClear, FOUND_CHAR);
|
|
+ addptr(result, 16);
|
|
+ subl(tmp, stride);
|
|
+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
|
|
+
|
|
+ bind(SCAN_TO_CHAR_INIT);
|
|
+ testl(cnt1, cnt1);
|
|
+ jcc(Assembler::zero, RET_NOT_FOUND);
|
|
+ bind(SCAN_TO_CHAR_LOOP);
|
|
+ load_unsigned_byte(tmp, Address(result, 0));
|
|
+ cmpl(ch, tmp);
|
|
+ jccb(Assembler::equal, FOUND_SEQ_CHAR);
|
|
+ addptr(result, 1);
|
|
+ subl(cnt1, 1);
|
|
+ jccb(Assembler::zero, RET_NOT_FOUND);
|
|
+ jmp(SCAN_TO_CHAR_LOOP);
|
|
+
|
|
+ bind(RET_NOT_FOUND);
|
|
+ movl(result, -1);
|
|
+ jmpb(DONE_LABEL);
|
|
+
|
|
+ bind(FOUND_CHAR);
|
|
+ if (UseAVX >= 2) {
|
|
+ vpmovmskb(tmp, vec3);
|
|
+ } else {
|
|
+ pmovmskb(tmp, vec3);
|
|
+ }
|
|
+ bsfl(ch, tmp);
|
|
+ addptr(result, ch);
|
|
+
|
|
+ bind(FOUND_SEQ_CHAR);
|
|
+ subptr(result, str1);
|
|
+
|
|
+ bind(DONE_LABEL);
|
|
+} // stringL_indexof_char
|
|
+
|
|
// helper function for string_compare
|
|
void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
|
|
Address::ScaleFactor scale, Address::ScaleFactor scale1,
|
|
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
|
|
index 1bed0cce9..47a062c11 100644
|
|
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
|
|
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
|
|
@@ -1659,6 +1659,8 @@ public:
|
|
#ifdef COMPILER2
|
|
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
|
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
|
+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
|
|
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
|
|
|
|
// IndexOf strings.
|
|
// Small strings are loaded through stack if they cross page boundary.
|
|
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
|
|
index baa7cc774..238d8729b 100644
|
|
--- a/src/hotspot/cpu/x86/x86.ad
|
|
+++ b/src/hotspot/cpu/x86/x86.ad
|
|
@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|
return ret_value; // Per default match rules are supported.
|
|
}
|
|
|
|
-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
|
|
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
// identify extra cases that we might want to provide match rules for
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
bool ret_value = match_rule_supported(opcode);
|
|
+ if (!vector_size_supported(bt, vlen)) {
|
|
+ ret_value = false;
|
|
+ }
|
|
if (ret_value) {
|
|
switch (opcode) {
|
|
case Op_AbsVB:
|
|
@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
|
return MIN2(size,max_size);
|
|
}
|
|
|
|
+const bool Matcher::supports_scalable_vector() {
|
|
+ return false;
|
|
+}
|
|
+
|
|
+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+
|
|
// Vector ideal reg corresponding to specified size in bytes
|
|
const uint Matcher::vector_ideal_reg(int size) {
|
|
assert(MaxVectorSize >= size, "");
|
|
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
|
|
index bc9947327..bbe49bd62 100644
|
|
--- a/src/hotspot/cpu/x86/x86_32.ad
|
|
+++ b/src/hotspot/cpu/x86/x86_32.ad
|
|
@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
-instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
|
|
+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
|
|
eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
|
|
- predicate(UseSSE42Intrinsics);
|
|
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U));
|
|
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
|
|
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
ins_encode %{
|
|
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
|
$vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
|
|
@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
|
|
+ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
|
|
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
|
|
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
|
|
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
+ ins_encode %{
|
|
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
|
+ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
|
|
+ %}
|
|
+ ins_pipe( pipe_slow );
|
|
+%}
|
|
+
|
|
// fast array equals
|
|
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
|
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
|
|
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
|
|
index 7e6739ffe..53f887ea6 100644
|
|
--- a/src/hotspot/cpu/x86/x86_64.ad
|
|
+++ b/src/hotspot/cpu/x86/x86_64.ad
|
|
@@ -2975,7 +2975,7 @@ frame
|
|
RAX_H_num // Op_RegL
|
|
};
|
|
// Excluded flags and vector registers.
|
|
- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
|
|
+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
|
|
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
|
|
%}
|
|
%}
|
|
@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
-instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
|
|
- rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
|
|
+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
|
|
+ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
|
|
%{
|
|
- predicate(UseSSE42Intrinsics);
|
|
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
|
|
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
|
|
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
ins_encode %{
|
|
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
|
$vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
|
|
@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
|
|
+ rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
|
|
+%{
|
|
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
|
|
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
|
|
+ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
|
|
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
|
|
+ ins_encode %{
|
|
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
|
|
+ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
|
|
+ %}
|
|
+ ins_pipe( pipe_slow );
|
|
+%}
|
|
+
|
|
// fast string equals
|
|
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
|
|
legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
|
|
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
|
|
index 74945999e..6c79d20a4 100644
|
|
--- a/src/hotspot/os/linux/os_linux.cpp
|
|
+++ b/src/hotspot/os/linux/os_linux.cpp
|
|
@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
|
|
{EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
|
|
{EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
|
|
{EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
|
|
- {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"},
|
|
+#ifdef _LP64
|
|
+ {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"},
|
|
+#else
|
|
+ {EM_RISCV, EM_RISCV, ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"},
|
|
+#endif
|
|
{EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"},
|
|
};
|
|
|
|
@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
|
|
strncpy(cpuinfo, "IA64", length);
|
|
#elif defined(PPC)
|
|
strncpy(cpuinfo, "PPC64", length);
|
|
+#elif defined(RISCV)
|
|
+ strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length);
|
|
#elif defined(S390)
|
|
strncpy(cpuinfo, "S390", length);
|
|
#elif defined(SPARC)
|
|
@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() {
|
|
IA64_ONLY(256 * M)
|
|
PPC_ONLY(4 * M)
|
|
S390_ONLY(1 * M)
|
|
- SPARC_ONLY(4 * M);
|
|
+ SPARC_ONLY(4 * M)
|
|
+ RISCV64_ONLY(2 * M);
|
|
#endif // ZERO
|
|
|
|
FILE *fp = fopen("/proc/meminfo", "r");
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..961fff011
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
|
|
@@ -0,0 +1,113 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
|
|
+
|
|
+#include "vm_version_riscv.hpp"
|
|
+
|
|
+// Implementation of class atomic
|
|
+// Note that memory_order_conservative requires a full barrier after atomic stores.
|
|
+// See https://patchwork.kernel.org/patch/3575821/
|
|
+
|
|
+#define FULL_MEM_BARRIER __sync_synchronize()
|
|
+#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE);
|
|
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
|
|
+
|
|
+template<size_t byte_size>
|
|
+struct Atomic::PlatformAdd
|
|
+ : public Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
|
|
+{
|
|
+ template<typename I, typename D>
|
|
+ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
|
|
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
|
|
+ FULL_MEM_BARRIER;
|
|
+ return res;
|
|
+ }
|
|
+};
|
|
+
|
|
+template<size_t byte_size>
|
|
+template<typename T>
|
|
+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
|
|
+ T volatile* dest,
|
|
+ atomic_memory_order order) const {
|
|
+ STATIC_ASSERT(byte_size == sizeof(T));
|
|
+ T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
|
|
+ FULL_MEM_BARRIER;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+// No direct support for cmpxchg of bytes; emulate using int.
|
|
+template<size_t byte_size>
|
|
+template<typename T>
|
|
+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
|
|
+ T volatile* dest,
|
|
+ T compare_value,
|
|
+ atomic_memory_order order) const {
|
|
+ STATIC_ASSERT(byte_size == sizeof(T));
|
|
+ T value = compare_value;
|
|
+ if (order != memory_order_relaxed) {
|
|
+ FULL_MEM_BARRIER;
|
|
+ }
|
|
+
|
|
+ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
|
|
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
|
|
+
|
|
+ if (order != memory_order_relaxed) {
|
|
+ FULL_MEM_BARRIER;
|
|
+ }
|
|
+ return value;
|
|
+}
|
|
+
|
|
+template<>
|
|
+template<typename T>
|
|
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
|
|
+ T volatile* dest,
|
|
+ T compare_value,
|
|
+ atomic_memory_order order) const {
|
|
+ STATIC_ASSERT(4 == sizeof(T));
|
|
+ if (order != memory_order_relaxed) {
|
|
+ FULL_MEM_BARRIER;
|
|
+ }
|
|
+ T rv;
|
|
+ int tmp;
|
|
+ __asm volatile(
|
|
+ "1:\n\t"
|
|
+ " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
|
|
+ " lr.w.aq %[rv], (%[dest])\n\t"
|
|
+ " bne %[rv], %[tmp], 2f\n\t"
|
|
+ " sc.w.rl %[tmp], %[ev], (%[dest])\n\t"
|
|
+ " bnez %[tmp], 1b\n\t"
|
|
+ "2:\n\t"
|
|
+ : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
|
|
+ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
|
|
+ : "memory");
|
|
+ if (order != memory_order_relaxed) {
|
|
+ FULL_MEM_BARRIER;
|
|
+ }
|
|
+ return rv;
|
|
+}
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..44f04d1a9
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
|
|
@@ -0,0 +1,44 @@
|
|
+/*
|
|
+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
|
|
+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
|
|
+
|
|
+#include <byteswap.h>
|
|
+
|
|
+// Efficient swapping of data bytes from Java byte
|
|
+// ordering to native byte ordering and vice versa.
|
|
+inline u2 Bytes::swap_u2(u2 x) {
|
|
+ return bswap_16(x);
|
|
+}
|
|
+
|
|
+inline u4 Bytes::swap_u4(u4 x) {
|
|
+ return bswap_32(x);
|
|
+}
|
|
+
|
|
+inline u8 Bytes::swap_u8(u8 x) {
|
|
+ return bswap_64(x);
|
|
+}
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..645b40a7c
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
|
|
@@ -0,0 +1,116 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
|
|
+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
|
|
+
|
|
+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ (void)memmove(to, from, count * HeapWordSize);
|
|
+}
|
|
+
|
|
+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) {
|
|
+ switch (count) {
|
|
+ case 8: to[7] = from[7]; // fall through
|
|
+ case 7: to[6] = from[6]; // fall through
|
|
+ case 6: to[5] = from[5]; // fall through
|
|
+ case 5: to[4] = from[4]; // fall through
|
|
+ case 4: to[3] = from[3]; // fall through
|
|
+ case 3: to[2] = from[2]; // fall through
|
|
+ case 2: to[1] = from[1]; // fall through
|
|
+ case 1: to[0] = from[0]; // fall through
|
|
+ case 0: break;
|
|
+ default:
|
|
+ if(is_atomic) {
|
|
+ while (count-- > 0) { *to++ = *from++; }
|
|
+ } else {
|
|
+ memcpy(to, from, count * HeapWordSize);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ pd_disjoint_words_helper(from, to, count, false);
|
|
+}
|
|
+
|
|
+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ pd_disjoint_words_helper(from, to, count, true);
|
|
+}
|
|
+
|
|
+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ pd_conjoint_words(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ pd_disjoint_words(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
|
|
+ (void)memmove(to, from, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
|
|
+ pd_conjoint_bytes(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
|
|
+ _Copy_conjoint_jshorts_atomic(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
|
|
+ _Copy_conjoint_jints_atomic(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
|
|
+ _Copy_conjoint_jlongs_atomic(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
|
|
+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
|
|
+ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
|
|
+}
|
|
+
|
|
+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ _Copy_arrayof_conjoint_bytes(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ _Copy_arrayof_conjoint_jshorts(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ _Copy_arrayof_conjoint_jints(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ _Copy_arrayof_conjoint_jlongs(from, to, count);
|
|
+}
|
|
+
|
|
+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
|
|
+ assert(!UseCompressedOops, "foo!");
|
|
+ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
|
|
+ _Copy_arrayof_conjoint_jlongs(from, to, count);
|
|
+}
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..041cdf4ff
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
|
|
@@ -0,0 +1,43 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
|
|
+
|
|
+// Sets the default values for platform dependent flags used by the runtime system.
|
|
+// (see globals.hpp)
|
|
+
|
|
+define_pd_global(bool, DontYieldALot, false);
|
|
+define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default
|
|
+define_pd_global(intx, VMThreadStackSize, 2048);
|
|
+
|
|
+define_pd_global(intx, CompilerThreadStackSize, 2048);
|
|
+
|
|
+define_pd_global(uintx, JVMInvokeMethodSlack, 8192);
|
|
+
|
|
+// Used on 64 bit platforms for UseCompressedOops base address
|
|
+define_pd_global(uintx, HeapBaseMinAddress, 2 * G);
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..842aa51e0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
|
|
@@ -0,0 +1,73 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
|
|
+
|
|
+// Included in orderAccess.hpp header file.
|
|
+
|
|
+#include "vm_version_riscv.hpp"
|
|
+
|
|
+// Implementation of class OrderAccess.
|
|
+
|
|
+inline void OrderAccess::loadload() { acquire(); }
|
|
+inline void OrderAccess::storestore() { release(); }
|
|
+inline void OrderAccess::loadstore() { acquire(); }
|
|
+inline void OrderAccess::storeload() { fence(); }
|
|
+
|
|
+inline void OrderAccess::acquire() {
|
|
+ READ_MEM_BARRIER;
|
|
+}
|
|
+
|
|
+inline void OrderAccess::release() {
|
|
+ WRITE_MEM_BARRIER;
|
|
+}
|
|
+
|
|
+inline void OrderAccess::fence() {
|
|
+ FULL_MEM_BARRIER;
|
|
+}
|
|
+
|
|
+template<size_t byte_size>
|
|
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
|
|
+{
|
|
+ template <typename T>
|
|
+ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
|
+};
|
|
+
|
|
+template<size_t byte_size>
|
|
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
|
|
+{
|
|
+ template <typename T>
|
|
+ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
|
+};
|
|
+
|
|
+template<size_t byte_size>
|
|
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
|
|
+{
|
|
+ template <typename T>
|
|
+ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
|
|
+};
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..37947701b
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
|
|
@@ -0,0 +1,628 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+// no precompiled headers
|
|
+#include "asm/macroAssembler.hpp"
|
|
+#include "classfile/classLoader.hpp"
|
|
+#include "classfile/systemDictionary.hpp"
|
|
+#include "classfile/vmSymbols.hpp"
|
|
+#include "code/codeCache.hpp"
|
|
+#include "code/icBuffer.hpp"
|
|
+#include "code/nativeInst.hpp"
|
|
+#include "code/vtableStubs.hpp"
|
|
+#include "interpreter/interpreter.hpp"
|
|
+#include "jvm.h"
|
|
+#include "memory/allocation.inline.hpp"
|
|
+#include "os_share_linux.hpp"
|
|
+#include "prims/jniFastGetField.hpp"
|
|
+#include "prims/jvm_misc.hpp"
|
|
+#include "runtime/arguments.hpp"
|
|
+#include "runtime/extendedPC.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/interfaceSupport.inline.hpp"
|
|
+#include "runtime/java.hpp"
|
|
+#include "runtime/javaCalls.hpp"
|
|
+#include "runtime/mutexLocker.hpp"
|
|
+#include "runtime/osThread.hpp"
|
|
+#include "runtime/sharedRuntime.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+#include "runtime/thread.inline.hpp"
|
|
+#include "runtime/timer.hpp"
|
|
+#include "utilities/debug.hpp"
|
|
+#include "utilities/events.hpp"
|
|
+#include "utilities/vmError.hpp"
|
|
+
|
|
+// put OS-includes here
|
|
+# include <dlfcn.h>
|
|
+# include <errno.h>
|
|
+# include <pthread.h>
|
|
+# include <signal.h>
|
|
+# include <stdio.h>
|
|
+# include <stdlib.h>
|
|
+# include <sys/mman.h>
|
|
+# include <sys/resource.h>
|
|
+# include <sys/socket.h>
|
|
+# include <sys/stat.h>
|
|
+# include <sys/time.h>
|
|
+# include <sys/types.h>
|
|
+# include <sys/utsname.h>
|
|
+# include <sys/wait.h>
|
|
+# include <poll.h>
|
|
+# include <pwd.h>
|
|
+# include <ucontext.h>
|
|
+# include <unistd.h>
|
|
+
|
|
+#define REG_LR 1
|
|
+#define REG_FP 8
|
|
+
|
|
+NOINLINE address os::current_stack_pointer() {
|
|
+ return (address)__builtin_frame_address(0);
|
|
+}
|
|
+
|
|
+char* os::non_memory_address_word() {
|
|
+ // Must never look like an address returned by reserve_memory,
|
|
+ return (char*) -1;
|
|
+}
|
|
+
|
|
+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
|
|
+ return (address)uc->uc_mcontext.__gregs[REG_PC];
|
|
+}
|
|
+
|
|
+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
|
|
+ uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
|
|
+}
|
|
+
|
|
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
|
|
+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
|
|
+}
|
|
+
|
|
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
|
|
+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
|
|
+}
|
|
+
|
|
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
|
|
+// is currently interrupted by SIGPROF.
|
|
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
|
|
+// frames. Currently we don't do that on Linux, so it's the same as
|
|
+// os::fetch_frame_from_context().
|
|
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
|
|
+ const ucontext_t* uc,
|
|
+ intptr_t** ret_sp,
|
|
+ intptr_t** ret_fp) {
|
|
+
|
|
+ assert(thread != NULL, "just checking");
|
|
+ assert(ret_sp != NULL, "just checking");
|
|
+ assert(ret_fp != NULL, "just checking");
|
|
+
|
|
+ return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
|
|
+}
|
|
+
|
|
+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
|
|
+ intptr_t** ret_sp, intptr_t** ret_fp) {
|
|
+
|
|
+ ExtendedPC epc;
|
|
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
|
|
+
|
|
+ if (uc != NULL) {
|
|
+ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
|
|
+ if (ret_sp != NULL) {
|
|
+ *ret_sp = os::Linux::ucontext_get_sp(uc);
|
|
+ }
|
|
+ if (ret_fp != NULL) {
|
|
+ *ret_fp = os::Linux::ucontext_get_fp(uc);
|
|
+ }
|
|
+ } else {
|
|
+ // construct empty ExtendedPC for return value checking
|
|
+ epc = ExtendedPC(NULL);
|
|
+ if (ret_sp != NULL) {
|
|
+ *ret_sp = (intptr_t *)NULL;
|
|
+ }
|
|
+ if (ret_fp != NULL) {
|
|
+ *ret_fp = (intptr_t *)NULL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return epc;
|
|
+}
|
|
+
|
|
+frame os::fetch_frame_from_context(const void* ucVoid) {
|
|
+ intptr_t* frame_sp = NULL;
|
|
+ intptr_t* frame_fp = NULL;
|
|
+ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
|
|
+ return frame(frame_sp, frame_fp, epc.pc());
|
|
+}
|
|
+
|
|
+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
|
|
+ address pc = (address) os::Linux::ucontext_get_pc(uc);
|
|
+ if (Interpreter::contains(pc)) {
|
|
+ // interpreter performs stack banging after the fixed frame header has
|
|
+ // been generated while the compilers perform it before. To maintain
|
|
+ // semantic consistency between interpreted and compiled frames, the
|
|
+ // method returns the Java sender of the current frame.
|
|
+ *fr = os::fetch_frame_from_context(uc);
|
|
+ if (!fr->is_first_java_frame()) {
|
|
+ assert(fr->safe_for_sender(thread), "Safety check");
|
|
+ *fr = fr->java_sender();
|
|
+ }
|
|
+ } else {
|
|
+ // more complex code with compiled code
|
|
+ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
|
|
+ CodeBlob* cb = CodeCache::find_blob(pc);
|
|
+ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
|
|
+ // Not sure where the pc points to, fallback to default
|
|
+ // stack overflow handling
|
|
+ return false;
|
|
+ } else {
|
|
+ // In compiled code, the stack banging is performed before RA
|
|
+ // has been saved in the frame. RA is live, and SP and FP
|
|
+ // belong to the caller.
|
|
+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
|
|
+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
|
|
+ address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] -
|
|
+ NativeInstruction::instruction_size);
|
|
+ *fr = frame(frame_sp, frame_fp, frame_pc);
|
|
+ if (!fr->is_java_frame()) {
|
|
+ assert(fr->safe_for_sender(thread), "Safety check");
|
|
+ assert(!fr->is_first_frame(), "Safety check");
|
|
+ *fr = fr->java_sender();
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ assert(fr->is_java_frame(), "Safety check");
|
|
+ return true;
|
|
+}
|
|
+
|
|
+// By default, gcc always saves frame pointer rfp on this stack. This
|
|
+// may get turned off by -fomit-frame-pointer.
|
|
+frame os::get_sender_for_C_frame(frame* fr) {
|
|
+ return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
|
|
+}
|
|
+
|
|
+NOINLINE frame os::current_frame() {
|
|
+ intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
|
|
+ if(sender_sp != NULL) {
|
|
+ frame myframe((intptr_t*)os::current_stack_pointer(),
|
|
+ sender_sp[frame::link_offset],
|
|
+ CAST_FROM_FN_PTR(address, os::current_frame));
|
|
+ if (os::is_first_C_frame(&myframe)) {
|
|
+ // stack is not walkable
|
|
+ return frame();
|
|
+ } else {
|
|
+ return os::get_sender_for_C_frame(&myframe);
|
|
+ }
|
|
+ } else {
|
|
+ ShouldNotReachHere();
|
|
+ return frame();
|
|
+ }
|
|
+}
|
|
+
|
|
+// Utility functions
|
|
+extern "C" JNIEXPORT int
|
|
+JVM_handle_linux_signal(int sig,
|
|
+ siginfo_t* info,
|
|
+ void* ucVoid,
|
|
+ int abort_if_unrecognized) {
|
|
+ ucontext_t* uc = (ucontext_t*) ucVoid;
|
|
+
|
|
+ Thread* t = Thread::current_or_null_safe();
|
|
+
|
|
+ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
|
|
+ // (no destructors can be run)
|
|
+ os::ThreadCrashProtection::check_crash_protection(sig, t);
|
|
+
|
|
+ SignalHandlerMark shm(t);
|
|
+
|
|
+ // Note: it's not uncommon that JNI code uses signal/sigset to install
|
|
+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
|
|
+ // or have a SIGILL handler when detecting CPU type). When that happens,
|
|
+ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
|
|
+ // avoid unnecessary crash when libjsig is not preloaded, try handle signals
|
|
+ // that do not require siginfo/ucontext first.
|
|
+
|
|
+ if (sig == SIGPIPE || sig == SIGXFSZ) {
|
|
+ // allow chained handler to go first
|
|
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
|
|
+ return true;
|
|
+ } else {
|
|
+ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
|
|
+ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
|
|
+ if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ JavaThread* thread = NULL;
|
|
+ VMThread* vmthread = NULL;
|
|
+ if (os::Linux::signal_handlers_are_installed) {
|
|
+ if (t != NULL ) {
|
|
+ if(t->is_Java_thread()) {
|
|
+ thread = (JavaThread*)t;
|
|
+ } else if(t->is_VM_thread()) {
|
|
+ vmthread = (VMThread *)t;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Handle SafeFetch faults
|
|
+ if (uc != NULL) {
|
|
+ address const pc = (address) os::Linux::ucontext_get_pc(uc);
|
|
+ if (StubRoutines::is_safefetch_fault(pc)) {
|
|
+ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // decide if this trap can be handled by a stub
|
|
+ address stub = NULL;
|
|
+
|
|
+ address pc = NULL;
|
|
+
|
|
+ //%note os_trap_1
|
|
+ if (info != NULL && uc != NULL && thread != NULL) {
|
|
+ pc = (address) os::Linux::ucontext_get_pc(uc);
|
|
+
|
|
+ // Handle ALL stack overflow variations here
|
|
+ if (sig == SIGSEGV) {
|
|
+ address addr = (address) info->si_addr;
|
|
+
|
|
+ // check if fault address is within thread stack
|
|
+ if (thread->on_local_stack(addr)) {
|
|
+ // stack overflow
|
|
+ if (thread->in_stack_yellow_reserved_zone(addr)) {
|
|
+ if (thread->thread_state() == _thread_in_Java) {
|
|
+ if (thread->in_stack_reserved_zone(addr)) {
|
|
+ frame fr;
|
|
+ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
|
|
+ assert(fr.is_java_frame(), "Must be a Java frame");
|
|
+ frame activation =
|
|
+ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
|
|
+ if (activation.sp() != NULL) {
|
|
+ thread->disable_stack_reserved_zone();
|
|
+ if (activation.is_interpreted_frame()) {
|
|
+ thread->set_reserved_stack_activation((address)(
|
|
+ activation.fp() + frame::interpreter_frame_initial_sp_offset));
|
|
+ } else {
|
|
+ thread->set_reserved_stack_activation((address)activation.unextended_sp());
|
|
+ }
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ // Throw a stack overflow exception. Guard pages will be reenabled
|
|
+ // while unwinding the stack.
|
|
+ thread->disable_stack_yellow_reserved_zone();
|
|
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
|
|
+ } else {
|
|
+ // Thread was in the vm or native code. Return and try to finish.
|
|
+ thread->disable_stack_yellow_reserved_zone();
|
|
+ return 1;
|
|
+ }
|
|
+ } else if (thread->in_stack_red_zone(addr)) {
|
|
+ // Fatal red zone violation. Disable the guard pages and fall through
|
|
+ // to handle_unexpected_exception way down below.
|
|
+ thread->disable_stack_red_zone();
|
|
+ tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
|
|
+
|
|
+ // This is a likely cause, but hard to verify. Let's just print
|
|
+ // it as a hint.
|
|
+ tty->print_raw_cr("Please check if any of your loaded .so files has "
|
|
+ "enabled executable stack (see man page execstack(8))");
|
|
+ } else {
|
|
+ // Accessing stack address below sp may cause SEGV if current
|
|
+ // thread has MAP_GROWSDOWN stack. This should only happen when
|
|
+ // current thread was created by user code with MAP_GROWSDOWN flag
|
|
+ // and then attached to VM. See notes in os_linux.cpp.
|
|
+ if (thread->osthread()->expanding_stack() == 0) {
|
|
+ thread->osthread()->set_expanding_stack();
|
|
+ if (os::Linux::manually_expand_stack(thread, addr)) {
|
|
+ thread->osthread()->clear_expanding_stack();
|
|
+ return 1;
|
|
+ }
|
|
+ thread->osthread()->clear_expanding_stack();
|
|
+ } else {
|
|
+ fatal("recursive segv. expanding stack.");
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (thread->thread_state() == _thread_in_Java) {
|
|
+ // Java thread running in Java code => find exception handler if any
|
|
+ // a fault inside compiled code, the interpreter, or a stub
|
|
+
|
|
+ // Handle signal from NativeJump::patch_verified_entry().
|
|
+ if ((sig == SIGILL || sig == SIGTRAP)
|
|
+ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
|
|
+ if (TraceTraps) {
|
|
+ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
|
|
+ }
|
|
+ stub = SharedRuntime::get_handle_wrong_method_stub();
|
|
+ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
|
|
+ stub = SharedRuntime::get_poll_stub(pc);
|
|
+ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
|
|
+ // BugId 4454115: A read from a MappedByteBuffer can fault
|
|
+ // here if the underlying file has been truncated.
|
|
+ // Do not crash the VM in such a case.
|
|
+ CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
|
|
+ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
|
|
+ if (nm != NULL && nm->has_unsafe_access()) {
|
|
+ address next_pc = pc + NativeCall::instruction_size;
|
|
+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
|
|
+ }
|
|
+ } else if (sig == SIGFPE &&
|
|
+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
|
|
+ stub =
|
|
+ SharedRuntime::
|
|
+ continuation_for_implicit_exception(thread,
|
|
+ pc,
|
|
+ SharedRuntime::
|
|
+ IMPLICIT_DIVIDE_BY_ZERO);
|
|
+ } else if (sig == SIGSEGV &&
|
|
+ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
|
|
+ // Determination of interpreter/vtable stub/compiled code null exception
|
|
+ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
|
|
+ }
|
|
+ } else if (thread->thread_state() == _thread_in_vm &&
|
|
+ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
|
|
+ thread->doing_unsafe_access()) {
|
|
+ address next_pc = pc + NativeCall::instruction_size;
|
|
+ stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
|
|
+ }
|
|
+
|
|
+ // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
|
|
+ // and the heap gets shrunk before the field access.
|
|
+ if ((sig == SIGSEGV) || (sig == SIGBUS)) {
|
|
+ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
|
|
+ if (addr_slow != (address)-1) {
|
|
+ stub = addr_slow;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Check to see if we caught the safepoint code in the
|
|
+ // process of write protecting the memory serialization page.
|
|
+ // It write enables the page immediately after protecting it
|
|
+ // so we can just return to retry the write.
|
|
+ if ((sig == SIGSEGV) &&
|
|
+ os::is_memory_serialize_page(thread, (address) info->si_addr)) {
|
|
+ // Block current thread until the memory serialize page permission restored.
|
|
+ os::block_on_serialize_page_trap();
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (stub != NULL) {
|
|
+ // save all thread context in case we need to restore it
|
|
+ if (thread != NULL) {
|
|
+ thread->set_saved_exception_pc(pc);
|
|
+ }
|
|
+
|
|
+ os::Linux::ucontext_set_pc(uc, stub);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ // signal-chaining
|
|
+ if (os::Linux::chained_handler(sig, info, ucVoid)) {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ if (!abort_if_unrecognized) {
|
|
+ // caller wants another chance, so give it to him
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (pc == NULL && uc != NULL) {
|
|
+ pc = os::Linux::ucontext_get_pc(uc);
|
|
+ }
|
|
+
|
|
+ // unmask current signal
|
|
+ sigset_t newset;
|
|
+ sigemptyset(&newset);
|
|
+ sigaddset(&newset, sig);
|
|
+ sigprocmask(SIG_UNBLOCK, &newset, NULL);
|
|
+
|
|
+ VMError::report_and_die(t, sig, pc, info, ucVoid);
|
|
+
|
|
+ ShouldNotReachHere();
|
|
+ return true; // Mute compiler
|
|
+}
|
|
+
|
|
+void os::Linux::init_thread_fpu_state(void) {
|
|
+}
|
|
+
|
|
+int os::Linux::get_fpu_control_word(void) {
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void os::Linux::set_fpu_control_word(int fpu_control) {
|
|
+}
|
|
+
|
|
+
|
|
+////////////////////////////////////////////////////////////////////////////////
|
|
+// thread stack
|
|
+
|
|
+// Minimum usable stack sizes required to get to user code. Space for
|
|
+// HotSpot guard pages is added later.
|
|
+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
|
|
+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
|
|
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
|
|
+
|
|
+// return default stack size for thr_type
|
|
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
|
|
+ // default stack size (compiler thread needs larger stack)
|
|
+ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
|
|
+ return s;
|
|
+}
|
|
+
|
|
+/////////////////////////////////////////////////////////////////////////////
|
|
+// helper functions for fatal error handler
|
|
+
|
|
+static const char* reg_abi_names[] = {
|
|
+ "pc",
|
|
+ "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
|
|
+ "x5(t0)", "x6(t1)", "x7(t2)",
|
|
+ "x8(s0)", "x9(s1)",
|
|
+ "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
|
|
+ "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
|
|
+ "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
|
|
+};
|
|
+
|
|
+void os::print_context(outputStream *st, const void *context) {
|
|
+ if (context == NULL) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ const ucontext_t *uc = (const ucontext_t*)context;
|
|
+ st->print_cr("Registers:");
|
|
+ for (int r = 0; r < 32; r++) {
|
|
+ st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
|
|
+ print_location(st, uc->uc_mcontext.__gregs[r]);
|
|
+ }
|
|
+ st->cr();
|
|
+
|
|
+ intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
|
|
+ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
|
|
+ print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
|
|
+ st->cr();
|
|
+
|
|
+ // Note: it may be unsafe to inspect memory near pc. For example, pc may
|
|
+ // point to garbage if entry point in an nmethod is corrupted. Leave
|
|
+ // this at the end, and hope for the best.
|
|
+ address pc = os::Linux::ucontext_get_pc(uc);
|
|
+ print_instructions(st, pc, sizeof(char));
|
|
+ st->cr();
|
|
+}
|
|
+
|
|
+void os::print_register_info(outputStream *st, const void *context) {
|
|
+ if (context == NULL) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ const ucontext_t *uc = (const ucontext_t*)context;
|
|
+
|
|
+ st->print_cr("Register to memory mapping:");
|
|
+ st->cr();
|
|
+
|
|
+ // this is horrendously verbose but the layout of the registers in the
|
|
+ // context does not match how we defined our abstract Register set, so
|
|
+ // we can't just iterate through the gregs area
|
|
+
|
|
+ // this is only for the "general purpose" registers
|
|
+
|
|
+ for (int r = 0; r < 32; r++)
|
|
+ st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
|
|
+ st->cr();
|
|
+}
|
|
+
|
|
+void os::setup_fpu() {
|
|
+}
|
|
+
|
|
+#ifndef PRODUCT
|
|
+void os::verify_stack_alignment() {
|
|
+ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
|
|
+}
|
|
+#endif
|
|
+
|
|
+int os::extra_bang_size_in_bytes() {
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+extern "C" {
|
|
+ int SpinPause() {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
|
|
+ if (from > to) {
|
|
+ const jshort *end = from + count;
|
|
+ while (from < end) {
|
|
+ *(to++) = *(from++);
|
|
+ }
|
|
+ } else if (from < to) {
|
|
+ const jshort *end = from;
|
|
+ from += count - 1;
|
|
+ to += count - 1;
|
|
+ while (from >= end) {
|
|
+ *(to--) = *(from--);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
|
|
+ if (from > to) {
|
|
+ const jint *end = from + count;
|
|
+ while (from < end) {
|
|
+ *(to++) = *(from++);
|
|
+ }
|
|
+ } else if (from < to) {
|
|
+ const jint *end = from;
|
|
+ from += count - 1;
|
|
+ to += count - 1;
|
|
+ while (from >= end) {
|
|
+ *(to--) = *(from--);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
|
|
+ if (from > to) {
|
|
+ const jlong *end = from + count;
|
|
+ while (from < end) {
|
|
+ os::atomic_copy64(from++, to++);
|
|
+ }
|
|
+ } else if (from < to) {
|
|
+ const jlong *end = from;
|
|
+ from += count - 1;
|
|
+ to += count - 1;
|
|
+ while (from >= end) {
|
|
+ os::atomic_copy64(from--, to--);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
|
|
+ HeapWord* to,
|
|
+ size_t count) {
|
|
+ memmove(to, from, count);
|
|
+ }
|
|
+ void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
|
|
+ HeapWord* to,
|
|
+ size_t count) {
|
|
+ memmove(to, from, count * 2);
|
|
+ }
|
|
+ void _Copy_arrayof_conjoint_jints(const HeapWord* from,
|
|
+ HeapWord* to,
|
|
+ size_t count) {
|
|
+ memmove(to, from, count * 4);
|
|
+ }
|
|
+ void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
|
|
+ HeapWord* to,
|
|
+ size_t count) {
|
|
+ memmove(to, from, count * 8);
|
|
+ }
|
|
+};
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..eae1635b0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
|
|
@@ -0,0 +1,40 @@
|
|
+/*
|
|
+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
|
|
+
|
|
+ static void setup_fpu();
|
|
+
|
|
+ // Used to register dynamic code cache area with the OS
|
|
+ // Note: Currently only used in 64 bit Windows implementations
|
|
+ static bool register_code_area(char *low, char *high) { return true; }
|
|
+
|
|
+ // Atomically copy 64 bits of data
|
|
+ static void atomic_copy64(const volatile void *src, volatile void *dst) {
|
|
+ *(jlong *) dst = *(const jlong *) src;
|
|
+ }
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
|
|
new file mode 100644
|
|
index 000000000..82b9bb6fd
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
|
|
@@ -0,0 +1,38 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
|
|
+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
|
|
+
|
|
+#include "runtime/prefetch.hpp"
|
|
+
|
|
+
|
|
+inline void Prefetch::read (void *loc, intx interval) {
|
|
+}
|
|
+
|
|
+inline void Prefetch::write(void *loc, intx interval) {
|
|
+}
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..c78096931
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
|
|
@@ -0,0 +1,103 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "memory/metaspaceShared.hpp"
|
|
+#include "runtime/frame.inline.hpp"
|
|
+#include "runtime/thread.inline.hpp"
|
|
+
|
|
+frame JavaThread::pd_last_frame() {
|
|
+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
|
|
+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
|
|
+}
|
|
+
|
|
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
|
|
+// currently interrupted by SIGPROF
|
|
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
|
|
+ void* ucontext, bool isInJava) {
|
|
+
|
|
+ assert(Thread::current() == this, "caller must be current thread");
|
|
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
|
|
+}
|
|
+
|
|
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
|
|
+ return pd_get_top_frame(fr_addr, ucontext, isInJava);
|
|
+}
|
|
+
|
|
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
|
|
+ assert(this->is_Java_thread(), "must be JavaThread");
|
|
+ JavaThread* jt = (JavaThread *)this;
|
|
+
|
|
+ // If we have a last_Java_frame, then we should use it even if
|
|
+ // isInJava == true. It should be more reliable than ucontext info.
|
|
+ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
|
|
+ *fr_addr = jt->pd_last_frame();
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ // At this point, we don't have a last_Java_frame, so
|
|
+ // we try to glean some information out of the ucontext
|
|
+ // if we were running Java code when SIGPROF came in.
|
|
+ if (isInJava) {
|
|
+ ucontext_t* uc = (ucontext_t*) ucontext;
|
|
+
|
|
+ intptr_t* ret_fp = NULL;
|
|
+ intptr_t* ret_sp = NULL;
|
|
+ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
|
|
+ &ret_sp, &ret_fp);
|
|
+ if (addr.pc() == NULL || ret_sp == NULL ) {
|
|
+ // ucontext wasn't useful
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
|
|
+ // In the middle of a trampoline call. Bail out for safety.
|
|
+ // This happens rarely so shouldn't affect profiling.
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ frame ret_frame(ret_sp, ret_fp, addr.pc());
|
|
+ if (!ret_frame.safe_for_sender(jt)) {
|
|
+#ifdef COMPILER2
|
|
+ frame ret_frame2(ret_sp, NULL, addr.pc());
|
|
+ if (!ret_frame2.safe_for_sender(jt)) {
|
|
+ // nothing else to try if the frame isn't good
|
|
+ return false;
|
|
+ }
|
|
+ ret_frame = ret_frame2;
|
|
+#else
|
|
+ // nothing else to try if the frame isn't good
|
|
+ return false;
|
|
+#endif /* COMPILER2 */
|
|
+ }
|
|
+ *fr_addr = ret_frame;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ // nothing else to try
|
|
+ return false;
|
|
+}
|
|
+
|
|
+void JavaThread::cache_global_variables() { }
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..657b98984
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
|
|
@@ -0,0 +1,67 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
|
|
+
|
|
+ private:
|
|
+ void pd_initialize() {
|
|
+ _anchor.clear();
|
|
+ }
|
|
+
|
|
+ frame pd_last_frame();
|
|
+
|
|
+ public:
|
|
+ // Mutators are highly dangerous....
|
|
+ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); }
|
|
+ void set_last_Java_fp(intptr_t* java_fp) { _anchor.set_last_Java_fp(java_fp); }
|
|
+
|
|
+ void set_base_of_stack_pointer(intptr_t* base_sp) {
|
|
+ }
|
|
+
|
|
+ static ByteSize last_Java_fp_offset() {
|
|
+ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
|
|
+ }
|
|
+
|
|
+ intptr_t* base_of_stack_pointer() {
|
|
+ return NULL;
|
|
+ }
|
|
+ void record_base_of_stack_pointer() {
|
|
+ }
|
|
+
|
|
+ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
|
|
+ bool isInJava);
|
|
+
|
|
+ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
|
|
+private:
|
|
+ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
|
|
+
|
|
+ // These routines are only used on cpu architectures that
|
|
+ // have separate register stacks (Itanium).
|
|
+ static bool register_stack_overflow() { return false; }
|
|
+ static void enable_register_stack_guard() {}
|
|
+ static void disable_register_stack_guard() {}
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
|
|
new file mode 100644
|
|
index 000000000..8ee443b5d
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
|
|
+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
|
|
+
|
|
+// These are the OS and CPU-specific fields, types and integer
|
|
+// constants required by the Serviceability Agent. This file is
|
|
+// referenced by vmStructs.cpp.
|
|
+
|
|
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
|
|
+ \
|
|
+ /******************************/ \
|
|
+ /* Threads (NOTE: incomplete) */ \
|
|
+ /******************************/ \
|
|
+ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \
|
|
+ nonstatic_field(OSThread, _pthread_id, pthread_t)
|
|
+
|
|
+
|
|
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
|
|
+ \
|
|
+ /**********************/ \
|
|
+ /* Posix Thread IDs */ \
|
|
+ /**********************/ \
|
|
+ \
|
|
+ declare_integer_type(OSThread::thread_id_t) \
|
|
+ declare_unsigned_integer_type(pthread_t)
|
|
+
|
|
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
|
|
+
|
|
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
|
|
+
|
|
+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
|
|
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
|
|
new file mode 100644
|
|
index 000000000..ef9358aa0
|
|
--- /dev/null
|
|
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
|
|
@@ -0,0 +1,116 @@
|
|
+/*
|
|
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+#include "asm/register.hpp"
|
|
+#include "runtime/os.hpp"
|
|
+#include "runtime/os.inline.hpp"
|
|
+#include "runtime/vm_version.hpp"
|
|
+
|
|
+#include <asm/hwcap.h>
|
|
+#include <sys/auxv.h>
|
|
+
|
|
+#ifndef HWCAP_ISA_I
|
|
+#define HWCAP_ISA_I (1 << ('I' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_M
|
|
+#define HWCAP_ISA_M (1 << ('M' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_A
|
|
+#define HWCAP_ISA_A (1 << ('A' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_F
|
|
+#define HWCAP_ISA_F (1 << ('F' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_D
|
|
+#define HWCAP_ISA_D (1 << ('D' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_C
|
|
+#define HWCAP_ISA_C (1 << ('C' - 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef HWCAP_ISA_V
|
|
+#define HWCAP_ISA_V (1 << ('V' - 'A'))
|
|
+#endif
|
|
+
|
|
+#define read_csr(csr) \
|
|
+({ \
|
|
+ register unsigned long __v; \
|
|
+ __asm__ __volatile__ ("csrr %0, %1" \
|
|
+ : "=r" (__v) \
|
|
+ : "i" (csr) \
|
|
+ : "memory"); \
|
|
+ __v; \
|
|
+})
|
|
+
|
|
+uint32_t VM_Version::get_current_vector_length() {
|
|
+ assert(_features & CPU_V, "should not call this");
|
|
+ return (uint32_t)read_csr(CSR_VLENB);
|
|
+}
|
|
+
|
|
+void VM_Version::get_os_cpu_info() {
|
|
+
|
|
+ uint64_t auxv = getauxval(AT_HWCAP);
|
|
+
|
|
+ STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
|
|
+ STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
|
|
+ STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
|
|
+ STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
|
|
+ STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
|
|
+ STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
|
|
+ STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
|
|
+
|
|
+ if (FILE *f = fopen("/proc/cpuinfo", "r")) {
|
|
+ char buf[512], *p;
|
|
+ while (fgets(buf, sizeof (buf), f) != NULL) {
|
|
+ if ((p = strchr(buf, ':')) != NULL) {
|
|
+ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
|
|
+ char* uarch = os::strdup(p + 2);
|
|
+ uarch[strcspn(uarch, "\n")] = '\0';
|
|
+ _uarch = uarch;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ fclose(f);
|
|
+ }
|
|
+
|
|
+ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
|
|
+ // Availability for those extensions could not be queried from HWCAP.
|
|
+ // TODO: Add proper detection for those extensions.
|
|
+ _features = auxv & (
|
|
+ HWCAP_ISA_I |
|
|
+ HWCAP_ISA_M |
|
|
+ HWCAP_ISA_A |
|
|
+ HWCAP_ISA_F |
|
|
+ HWCAP_ISA_D |
|
|
+ HWCAP_ISA_C |
|
|
+ HWCAP_ISA_V);
|
|
+}
|
|
diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
|
|
index ba61aa4c0..4ca0b050b 100644
|
|
--- a/src/hotspot/share/adlc/archDesc.cpp
|
|
+++ b/src/hotspot/share/adlc/archDesc.cpp
|
|
@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
|
// Match Vector types.
|
|
if (strncmp(idealOp, "Vec",3)==0) {
|
|
switch(last_char) {
|
|
+ case 'A': return "TypeVect::VECTA";
|
|
case 'S': return "TypeVect::VECTS";
|
|
case 'D': return "TypeVect::VECTD";
|
|
case 'X': return "TypeVect::VECTX";
|
|
@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
|
}
|
|
}
|
|
|
|
+ if (strncmp(idealOp, "RegVMask", 8) == 0) {
|
|
+ return "Type::BOTTOM";
|
|
+ }
|
|
+
|
|
// !!!!!
|
|
switch(last_char) {
|
|
case 'I': return "TypeInt::INT";
|
|
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
|
|
index f810fde76..2cf9636d1 100644
|
|
--- a/src/hotspot/share/adlc/formssel.cpp
|
|
+++ b/src/hotspot/share/adlc/formssel.cpp
|
|
@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
|
|
strcmp(opType,"RegL")==0 ||
|
|
strcmp(opType,"RegF")==0 ||
|
|
strcmp(opType,"RegD")==0 ||
|
|
+ strcmp(opType,"RegVMask")==0 ||
|
|
+ strcmp(opType,"VecA")==0 ||
|
|
strcmp(opType,"VecS")==0 ||
|
|
strcmp(opType,"VecD")==0 ||
|
|
strcmp(opType,"VecX")==0 ||
|
|
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
|
|
index e30d39f73..af54dddf3 100644
|
|
--- a/src/hotspot/share/c1/c1_LIR.cpp
|
|
+++ b/src/hotspot/share/c1/c1_LIR.cpp
|
|
@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
|
|
void LIR_Op2::verify() const {
|
|
#ifdef ASSERT
|
|
switch (code()) {
|
|
- case lir_cmove:
|
|
case lir_xchg:
|
|
break;
|
|
|
|
@@ -252,30 +251,27 @@ void LIR_Op2::verify() const {
|
|
|
|
|
|
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
|
|
- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
- , _cond(cond)
|
|
- , _type(type)
|
|
+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
, _label(block->label())
|
|
+ , _type(type)
|
|
, _block(block)
|
|
, _ublock(NULL)
|
|
, _stub(NULL) {
|
|
}
|
|
|
|
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
|
|
- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
- , _cond(cond)
|
|
- , _type(type)
|
|
+ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
, _label(stub->entry())
|
|
+ , _type(type)
|
|
, _block(NULL)
|
|
, _ublock(NULL)
|
|
, _stub(stub) {
|
|
}
|
|
|
|
LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
|
|
- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
- , _cond(cond)
|
|
- , _type(type)
|
|
+ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
|
|
, _label(block->label())
|
|
+ , _type(type)
|
|
, _block(block)
|
|
, _ublock(ublock)
|
|
, _stub(NULL)
|
|
@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
|
|
}
|
|
|
|
void LIR_OpBranch::negate_cond() {
|
|
- switch (_cond) {
|
|
- case lir_cond_equal: _cond = lir_cond_notEqual; break;
|
|
- case lir_cond_notEqual: _cond = lir_cond_equal; break;
|
|
- case lir_cond_less: _cond = lir_cond_greaterEqual; break;
|
|
- case lir_cond_lessEqual: _cond = lir_cond_greater; break;
|
|
- case lir_cond_greaterEqual: _cond = lir_cond_less; break;
|
|
- case lir_cond_greater: _cond = lir_cond_lessEqual; break;
|
|
+ switch (cond()) {
|
|
+ case lir_cond_equal: set_cond(lir_cond_notEqual); break;
|
|
+ case lir_cond_notEqual: set_cond(lir_cond_equal); break;
|
|
+ case lir_cond_less: set_cond(lir_cond_greaterEqual); break;
|
|
+ case lir_cond_lessEqual: set_cond(lir_cond_greater); break;
|
|
+ case lir_cond_greaterEqual: set_cond(lir_cond_less); break;
|
|
+ case lir_cond_greater: set_cond(lir_cond_lessEqual); break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
}
|
|
@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|
assert(op->as_OpBranch() != NULL, "must be");
|
|
LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
|
|
|
|
+ assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
|
|
+ opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
|
|
+ opBranch->_tmp5->is_illegal(), "not used");
|
|
+
|
|
+ if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
|
|
+ if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
|
|
+
|
|
if (opBranch->_info != NULL) do_info(opBranch->_info);
|
|
assert(opBranch->_result->is_illegal(), "not used");
|
|
if (opBranch->_stub != NULL) opBranch->stub()->visit(this);
|
|
@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|
// to the result operand, otherwise the backend fails
|
|
case lir_cmove:
|
|
{
|
|
- assert(op->as_Op2() != NULL, "must be");
|
|
- LIR_Op2* op2 = (LIR_Op2*)op;
|
|
+ assert(op->as_Op4() != NULL, "must be");
|
|
+ LIR_Op4* op4 = (LIR_Op4*)op;
|
|
|
|
- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
|
|
- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
|
|
- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
|
|
+ assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
|
|
+ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be");
|
|
+ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
|
|
|
|
- do_input(op2->_opr1);
|
|
- do_input(op2->_opr2);
|
|
- do_temp(op2->_opr2);
|
|
- do_output(op2->_result);
|
|
+ do_input(op4->_opr1);
|
|
+ do_input(op4->_opr2);
|
|
+ if (op4->_opr3->is_valid()) do_input(op4->_opr3);
|
|
+ if (op4->_opr4->is_valid()) do_input(op4->_opr4);
|
|
+ do_temp(op4->_opr2);
|
|
+ do_output(op4->_result);
|
|
|
|
break;
|
|
}
|
|
@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
|
|
masm->emit_op3(this);
|
|
}
|
|
|
|
+void LIR_Op4::emit_code(LIR_Assembler* masm) {
|
|
+ masm->emit_op4(this);
|
|
+}
|
|
+
|
|
void LIR_OpLock::emit_code(LIR_Assembler* masm) {
|
|
masm->emit_lock(this);
|
|
if (stub()) {
|
|
@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
|
|
, _file(NULL)
|
|
, _line(0)
|
|
#endif
|
|
+#ifdef RISCV
|
|
+ , _cmp_opr1(LIR_OprFact::illegalOpr)
|
|
+ , _cmp_opr2(LIR_OprFact::illegalOpr)
|
|
+#endif
|
|
{ }
|
|
|
|
|
|
@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
|
|
}
|
|
#endif
|
|
|
|
+#ifdef RISCV
|
|
+void LIR_List::set_cmp_oprs(LIR_Op* op) {
|
|
+ switch (op->code()) {
|
|
+ case lir_cmp:
|
|
+ _cmp_opr1 = op->as_Op2()->in_opr1();
|
|
+ _cmp_opr2 = op->as_Op2()->in_opr2();
|
|
+ break;
|
|
+ case lir_branch: // fall through
|
|
+ case lir_cond_float_branch:
|
|
+ assert(op->as_OpBranch()->cond() == lir_cond_always ||
|
|
+ (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
|
|
+ "conditional branches must have legal operands");
|
|
+ if (op->as_OpBranch()->cond() != lir_cond_always) {
|
|
+ op->as_Op2()->set_in_opr1(_cmp_opr1);
|
|
+ op->as_Op2()->set_in_opr2(_cmp_opr2);
|
|
+ }
|
|
+ break;
|
|
+ case lir_cmove:
|
|
+ op->as_Op4()->set_in_opr3(_cmp_opr1);
|
|
+ op->as_Op4()->set_in_opr4(_cmp_opr2);
|
|
+ break;
|
|
+#if INCLUDE_ZGC
|
|
+ case lir_zloadbarrier_test:
|
|
+ _cmp_opr1 = FrameMap::as_opr(t1);
|
|
+ _cmp_opr2 = LIR_OprFact::intConst(0);
|
|
+ break;
|
|
+#endif
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
|
|
void LIR_List::append(LIR_InsertionBuffer* buffer) {
|
|
assert(this == buffer->lir_list(), "wrong lir list");
|
|
@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const {
|
|
case lir_cmp_l2i: s = "cmp_l2i"; break;
|
|
case lir_ucmp_fd2i: s = "ucomp_fd2i"; break;
|
|
case lir_cmp_fd2i: s = "comp_fd2i"; break;
|
|
- case lir_cmove: s = "cmove"; break;
|
|
case lir_add: s = "add"; break;
|
|
case lir_sub: s = "sub"; break;
|
|
case lir_mul: s = "mul"; break;
|
|
@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const {
|
|
case lir_irem: s = "irem"; break;
|
|
case lir_fmad: s = "fmad"; break;
|
|
case lir_fmaf: s = "fmaf"; break;
|
|
+ // LIR_Op4
|
|
+ case lir_cmove: s = "cmove"; break;
|
|
// LIR_OpJavaCall
|
|
case lir_static_call: s = "static"; break;
|
|
case lir_optvirtual_call: s = "optvirtual"; break;
|
|
@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
|
|
// LIR_OpBranch
|
|
void LIR_OpBranch::print_instr(outputStream* out) const {
|
|
print_condition(out, cond()); out->print(" ");
|
|
+ in_opr1()->print(out); out->print(" ");
|
|
+ in_opr2()->print(out); out->print(" ");
|
|
if (block() != NULL) {
|
|
out->print("[B%d] ", block()->block_id());
|
|
} else if (stub() != NULL) {
|
|
@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
|
|
|
|
// LIR_Op2
|
|
void LIR_Op2::print_instr(outputStream* out) const {
|
|
- if (code() == lir_cmove || code() == lir_cmp) {
|
|
+ if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
|
|
print_condition(out, condition()); out->print(" ");
|
|
}
|
|
in_opr1()->print(out); out->print(" ");
|
|
@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
|
|
result_opr()->print(out);
|
|
}
|
|
|
|
+// LIR_Op4
|
|
+void LIR_Op4::print_instr(outputStream* out) const {
|
|
+ print_condition(out, condition()); out->print(" ");
|
|
+ in_opr1()->print(out); out->print(" ");
|
|
+ in_opr2()->print(out); out->print(" ");
|
|
+ in_opr3()->print(out); out->print(" ");
|
|
+ in_opr4()->print(out); out->print(" ");
|
|
+ result_opr()->print(out);
|
|
+}
|
|
|
|
void LIR_OpLock::print_instr(outputStream* out) const {
|
|
hdr_opr()->print(out); out->print(" ");
|
|
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
|
|
index 3234ca018..88cd3b24e 100644
|
|
--- a/src/hotspot/share/c1/c1_LIR.hpp
|
|
+++ b/src/hotspot/share/c1/c1_LIR.hpp
|
|
@@ -864,9 +864,11 @@ class LIR_OpConvert;
|
|
class LIR_OpAllocObj;
|
|
class LIR_OpRoundFP;
|
|
class LIR_Op2;
|
|
-class LIR_OpDelay;
|
|
+class LIR_OpBranch;
|
|
+class LIR_OpDelay;
|
|
class LIR_Op3;
|
|
class LIR_OpAllocArray;
|
|
+class LIR_Op4;
|
|
class LIR_OpCall;
|
|
class LIR_OpJavaCall;
|
|
class LIR_OpRTCall;
|
|
@@ -916,8 +918,6 @@ enum LIR_Code {
|
|
, lir_null_check
|
|
, lir_return
|
|
, lir_leal
|
|
- , lir_branch
|
|
- , lir_cond_float_branch
|
|
, lir_move
|
|
, lir_convert
|
|
, lir_alloc_object
|
|
@@ -929,11 +929,12 @@ enum LIR_Code {
|
|
, lir_unwind
|
|
, end_op1
|
|
, begin_op2
|
|
+ , lir_branch
|
|
+ , lir_cond_float_branch
|
|
, lir_cmp
|
|
, lir_cmp_l2i
|
|
, lir_ucmp_fd2i
|
|
, lir_cmp_fd2i
|
|
- , lir_cmove
|
|
, lir_add
|
|
, lir_sub
|
|
, lir_mul
|
|
@@ -964,6 +965,9 @@ enum LIR_Code {
|
|
, lir_fmad
|
|
, lir_fmaf
|
|
, end_op3
|
|
+ , begin_op4
|
|
+ , lir_cmove
|
|
+ , end_op4
|
|
, begin_opJavaCall
|
|
, lir_static_call
|
|
, lir_optvirtual_call
|
|
@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj {
|
|
virtual LIR_Op1* as_Op1() { return NULL; }
|
|
virtual LIR_Op2* as_Op2() { return NULL; }
|
|
virtual LIR_Op3* as_Op3() { return NULL; }
|
|
+ virtual LIR_Op4* as_Op4() { return NULL; }
|
|
virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
|
|
virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
|
|
virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
|
|
@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall {
|
|
virtual void verify() const;
|
|
};
|
|
|
|
-
|
|
-class LIR_OpBranch: public LIR_Op {
|
|
- friend class LIR_OpVisitState;
|
|
-
|
|
- private:
|
|
- LIR_Condition _cond;
|
|
- BasicType _type;
|
|
- Label* _label;
|
|
- BlockBegin* _block; // if this is a branch to a block, this is the block
|
|
- BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block
|
|
- CodeStub* _stub; // if this is a branch to a stub, this is the stub
|
|
-
|
|
- public:
|
|
- LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
|
|
- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
|
|
- , _cond(cond)
|
|
- , _type(type)
|
|
- , _label(lbl)
|
|
- , _block(NULL)
|
|
- , _ublock(NULL)
|
|
- , _stub(NULL) { }
|
|
-
|
|
- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
|
|
- LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
|
|
-
|
|
- // for unordered comparisons
|
|
- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
|
|
-
|
|
- LIR_Condition cond() const { return _cond; }
|
|
- BasicType type() const { return _type; }
|
|
- Label* label() const { return _label; }
|
|
- BlockBegin* block() const { return _block; }
|
|
- BlockBegin* ublock() const { return _ublock; }
|
|
- CodeStub* stub() const { return _stub; }
|
|
-
|
|
- void change_block(BlockBegin* b);
|
|
- void change_ublock(BlockBegin* b);
|
|
- void negate_cond();
|
|
-
|
|
- virtual void emit_code(LIR_Assembler* masm);
|
|
- virtual LIR_OpBranch* as_OpBranch() { return this; }
|
|
- virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
|
|
-};
|
|
-
|
|
-
|
|
class ConversionStub;
|
|
|
|
class LIR_OpConvert: public LIR_Op1 {
|
|
@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op {
|
|
void verify() const;
|
|
|
|
public:
|
|
- LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
|
|
+ LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
|
|
: LIR_Op(code, LIR_OprFact::illegalOpr, info)
|
|
, _opr1(opr1)
|
|
, _opr2(opr2)
|
|
- , _type(T_ILLEGAL)
|
|
- , _condition(condition)
|
|
+ , _type(type)
|
|
, _fpu_stack_size(0)
|
|
, _tmp1(LIR_OprFact::illegalOpr)
|
|
, _tmp2(LIR_OprFact::illegalOpr)
|
|
, _tmp3(LIR_OprFact::illegalOpr)
|
|
, _tmp4(LIR_OprFact::illegalOpr)
|
|
- , _tmp5(LIR_OprFact::illegalOpr) {
|
|
- assert(code == lir_cmp || code == lir_assert, "code check");
|
|
+ , _tmp5(LIR_OprFact::illegalOpr)
|
|
+ , _condition(condition) {
|
|
+ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
|
|
}
|
|
|
|
LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
|
|
@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op {
|
|
, _opr1(opr1)
|
|
, _opr2(opr2)
|
|
, _type(type)
|
|
- , _condition(condition)
|
|
, _fpu_stack_size(0)
|
|
, _tmp1(LIR_OprFact::illegalOpr)
|
|
, _tmp2(LIR_OprFact::illegalOpr)
|
|
@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op {
|
|
, _opr1(opr1)
|
|
, _opr2(opr2)
|
|
, _type(type)
|
|
- , _condition(lir_cond_unknown)
|
|
, _fpu_stack_size(0)
|
|
, _tmp1(LIR_OprFact::illegalOpr)
|
|
, _tmp2(LIR_OprFact::illegalOpr)
|
|
, _tmp3(LIR_OprFact::illegalOpr)
|
|
, _tmp4(LIR_OprFact::illegalOpr)
|
|
- , _tmp5(LIR_OprFact::illegalOpr) {
|
|
- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
|
|
+ , _tmp5(LIR_OprFact::illegalOpr)
|
|
+ , _condition(lir_cond_unknown) {
|
|
+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
|
|
}
|
|
|
|
LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
|
|
@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op {
|
|
, _opr1(opr1)
|
|
, _opr2(opr2)
|
|
, _type(T_ILLEGAL)
|
|
- , _condition(lir_cond_unknown)
|
|
, _fpu_stack_size(0)
|
|
, _tmp1(tmp1)
|
|
, _tmp2(tmp2)
|
|
, _tmp3(tmp3)
|
|
, _tmp4(tmp4)
|
|
- , _tmp5(tmp5) {
|
|
- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
|
|
+ , _tmp5(tmp5)
|
|
+ , _condition(lir_cond_unknown) {
|
|
+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
|
|
}
|
|
|
|
LIR_Opr in_opr1() const { return _opr1; }
|
|
@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op {
|
|
LIR_Opr tmp4_opr() const { return _tmp4; }
|
|
LIR_Opr tmp5_opr() const { return _tmp5; }
|
|
LIR_Condition condition() const {
|
|
- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
|
|
+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
|
|
}
|
|
void set_condition(LIR_Condition condition) {
|
|
- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition;
|
|
+ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
|
|
}
|
|
|
|
void set_fpu_stack_size(int size) { _fpu_stack_size = size; }
|
|
@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op {
|
|
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
|
|
};
|
|
|
|
+class LIR_OpBranch: public LIR_Op2 {
|
|
+ friend class LIR_OpVisitState;
|
|
+
|
|
+ private:
|
|
+ BasicType _type;
|
|
+ Label* _label;
|
|
+ BlockBegin* _block; // if this is a branch to a block, this is the block
|
|
+ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block
|
|
+ CodeStub* _stub; // if this is a branch to a stub, this is the stub
|
|
+
|
|
+ public:
|
|
+ LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
|
|
+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
|
|
+ , _label(lbl)
|
|
+ , _type(type)
|
|
+ , _block(NULL)
|
|
+ , _ublock(NULL)
|
|
+ , _stub(NULL) { }
|
|
+
|
|
+ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
|
|
+ LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
|
|
+
|
|
+ // for unordered comparisons
|
|
+ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
|
|
+
|
|
+ LIR_Condition cond() const {
|
|
+ return condition();
|
|
+ }
|
|
+
|
|
+ void set_cond(LIR_Condition cond) {
|
|
+ set_condition(cond);
|
|
+ }
|
|
+
|
|
+ Label* label() const { return _label; }
|
|
+ BlockBegin* block() const { return _block; }
|
|
+ BlockBegin* ublock() const { return _ublock; }
|
|
+ CodeStub* stub() const { return _stub; }
|
|
+
|
|
+ void change_block(BlockBegin* b);
|
|
+ void change_ublock(BlockBegin* b);
|
|
+ void negate_cond();
|
|
+
|
|
+ virtual void emit_code(LIR_Assembler* masm);
|
|
+ virtual LIR_OpBranch* as_OpBranch() { return this; }
|
|
+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
|
|
+};
|
|
+
|
|
class LIR_OpAllocArray : public LIR_Op {
|
|
friend class LIR_OpVisitState;
|
|
|
|
@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op {
|
|
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
|
|
};
|
|
|
|
+class LIR_Op4: public LIR_Op {
|
|
+ friend class LIR_OpVisitState;
|
|
+ protected:
|
|
+ LIR_Opr _opr1;
|
|
+ LIR_Opr _opr2;
|
|
+ LIR_Opr _opr3;
|
|
+ LIR_Opr _opr4;
|
|
+ BasicType _type;
|
|
+ LIR_Opr _tmp1;
|
|
+ LIR_Opr _tmp2;
|
|
+ LIR_Opr _tmp3;
|
|
+ LIR_Opr _tmp4;
|
|
+ LIR_Opr _tmp5;
|
|
+ LIR_Condition _condition;
|
|
+
|
|
+ public:
|
|
+ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
|
|
+ LIR_Opr result, BasicType type)
|
|
+ : LIR_Op(code, result, NULL)
|
|
+ , _opr1(opr1)
|
|
+ , _opr2(opr2)
|
|
+ , _opr3(opr3)
|
|
+ , _opr4(opr4)
|
|
+ , _type(type)
|
|
+ , _condition(condition)
|
|
+ , _tmp1(LIR_OprFact::illegalOpr)
|
|
+ , _tmp2(LIR_OprFact::illegalOpr)
|
|
+ , _tmp3(LIR_OprFact::illegalOpr)
|
|
+ , _tmp4(LIR_OprFact::illegalOpr)
|
|
+ , _tmp5(LIR_OprFact::illegalOpr) {
|
|
+ assert(code == lir_cmove, "code check");
|
|
+ assert(type != T_ILLEGAL, "cmove should have type");
|
|
+ }
|
|
+
|
|
+ LIR_Opr in_opr1() const { return _opr1; }
|
|
+ LIR_Opr in_opr2() const { return _opr2; }
|
|
+ LIR_Opr in_opr3() const { return _opr3; }
|
|
+ LIR_Opr in_opr4() const { return _opr4; }
|
|
+ BasicType type() const { return _type; }
|
|
+ LIR_Opr tmp1_opr() const { return _tmp1; }
|
|
+ LIR_Opr tmp2_opr() const { return _tmp2; }
|
|
+ LIR_Opr tmp3_opr() const { return _tmp3; }
|
|
+ LIR_Opr tmp4_opr() const { return _tmp4; }
|
|
+ LIR_Opr tmp5_opr() const { return _tmp5; }
|
|
+
|
|
+ LIR_Condition condition() const { return _condition; }
|
|
+ void set_condition(LIR_Condition condition) { _condition = condition; }
|
|
+
|
|
+ void set_in_opr1(LIR_Opr opr) { _opr1 = opr; }
|
|
+ void set_in_opr2(LIR_Opr opr) { _opr2 = opr; }
|
|
+ void set_in_opr3(LIR_Opr opr) { _opr3 = opr; }
|
|
+ void set_in_opr4(LIR_Opr opr) { _opr4 = opr; }
|
|
+ virtual void emit_code(LIR_Assembler* masm);
|
|
+ virtual LIR_Op4* as_Op4() { return this; }
|
|
+
|
|
+ virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
|
|
+};
|
|
|
|
//--------------------------------
|
|
class LabelObj: public CompilationResourceObj {
|
|
@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj {
|
|
const char * _file;
|
|
int _line;
|
|
#endif
|
|
+#ifdef RISCV
|
|
+ LIR_Opr _cmp_opr1;
|
|
+ LIR_Opr _cmp_opr2;
|
|
+#endif
|
|
|
|
public:
|
|
void append(LIR_Op* op) {
|
|
@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj {
|
|
}
|
|
#endif // PRODUCT
|
|
|
|
+#ifdef RISCV
|
|
+ set_cmp_oprs(op);
|
|
+ // lir_cmp set cmp oprs only on riscv
|
|
+ if (op->code() == lir_cmp) return;
|
|
+#endif
|
|
+
|
|
_operations.append(op);
|
|
|
|
#ifdef ASSERT
|
|
@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj {
|
|
void set_file_and_line(const char * file, int line);
|
|
#endif
|
|
|
|
+#ifdef RISCV
|
|
+ void set_cmp_oprs(LIR_Op* op);
|
|
+#endif
|
|
+
|
|
//---------- accessors ---------------
|
|
LIR_OpList* instructions_list() { return &_operations; }
|
|
int length() const { return _operations.length(); }
|
|
@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj {
|
|
void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
|
|
void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
|
|
|
|
- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
|
|
- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
|
|
+ void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
|
|
+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
|
|
+ append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
|
|
}
|
|
|
|
void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
|
|
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
|
|
index 160483d5f..42a0350f7 100644
|
|
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
|
|
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
|
|
@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
|
|
comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
|
|
break;
|
|
|
|
- case lir_cmove:
|
|
- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
|
|
- break;
|
|
-
|
|
case lir_shl:
|
|
case lir_shr:
|
|
case lir_ushr:
|
|
@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
|
|
}
|
|
}
|
|
|
|
+void LIR_Assembler::emit_op4(LIR_Op4* op) {
|
|
+ switch(op->code()) {
|
|
+ case lir_cmove:
|
|
+ cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ Unimplemented();
|
|
+ break;
|
|
+ }
|
|
+}
|
|
|
|
void LIR_Assembler::build_frame() {
|
|
_masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
|
|
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
|
|
index 44a5bcbe5..406a58d21 100644
|
|
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
|
|
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
|
|
@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
|
|
void emit_op1(LIR_Op1* op);
|
|
void emit_op2(LIR_Op2* op);
|
|
void emit_op3(LIR_Op3* op);
|
|
+ void emit_op4(LIR_Op4* op);
|
|
void emit_opBranch(LIR_OpBranch* op);
|
|
void emit_opLabel(LIR_OpLabel* op);
|
|
void emit_arraycopy(LIR_OpArrayCopy* op);
|
|
@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
|
|
void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
|
|
void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions
|
|
void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
|
|
- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
|
|
+ void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
|
|
+ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
|
|
|
|
void call( LIR_OpJavaCall* op, relocInfo::relocType rtype);
|
|
void ic_call( LIR_OpJavaCall* op);
|
|
diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
|
|
index c28055fd9..d00bfe91a 100644
|
|
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
|
|
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
|
|
@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) {
|
|
break;
|
|
}
|
|
case lir_cmove: {
|
|
- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
|
|
- LIR_Op2* cmove = (LIR_Op2*)op;
|
|
+ assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
|
|
+ LIR_Op4* cmove = (LIR_Op4*)op;
|
|
|
|
LIR_Opr move_from = cmove->in_opr1();
|
|
LIR_Opr move_to = cmove->result_opr();
|
|
@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
|
|
}
|
|
}
|
|
|
|
+#ifndef RISCV
|
|
+ // Disable these optimizations on riscv temporarily, because it does not
|
|
+ // work when the comparison operands are bound to branches or cmoves.
|
|
{ TIME_LINEAR_SCAN(timer_optimize_lir);
|
|
|
|
EdgeMoveOptimizer::optimize(ir()->code());
|
|
@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
|
|
// check that cfg is still correct after optimizations
|
|
ir()->verify();
|
|
}
|
|
+#endif
|
|
|
|
NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
|
|
NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
|
|
@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
|
|
// There might be a cmove inserted for profiling which depends on the same
|
|
// compare. If we change the condition of the respective compare, we have
|
|
// to take care of this cmove as well.
|
|
- LIR_Op2* prev_cmove = NULL;
|
|
+ LIR_Op4* prev_cmove = NULL;
|
|
|
|
for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
|
|
prev_op = instructions->at(j);
|
|
// check for the cmove
|
|
if (prev_op->code() == lir_cmove) {
|
|
- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
|
|
- prev_cmove = (LIR_Op2*)prev_op;
|
|
+ assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
|
|
+ prev_cmove = (LIR_Op4*)prev_op;
|
|
assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
|
|
}
|
|
if (prev_op->code() == lir_cmp) {
|
|
diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp
|
|
index 19fe196bc..d9cb8e999 100644
|
|
--- a/src/hotspot/share/classfile/vmSymbols.cpp
|
|
+++ b/src/hotspot/share/classfile/vmSymbols.cpp
|
|
@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
|
|
case vmIntrinsics::_indexOfIU:
|
|
case vmIntrinsics::_indexOfIUL:
|
|
case vmIntrinsics::_indexOfU_char:
|
|
+ case vmIntrinsics::_indexOfL_char:
|
|
case vmIntrinsics::_compareToL:
|
|
case vmIntrinsics::_compareToU:
|
|
case vmIntrinsics::_compareToLU:
|
|
@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
|
|
case vmIntrinsics::_indexOfIU:
|
|
case vmIntrinsics::_indexOfIUL:
|
|
case vmIntrinsics::_indexOfU_char:
|
|
+ case vmIntrinsics::_indexOfL_char:
|
|
if (!SpecialStringIndexOf) return true;
|
|
break;
|
|
case vmIntrinsics::_equalsL:
|
|
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
|
|
index cef3f530c..a31525003 100644
|
|
--- a/src/hotspot/share/classfile/vmSymbols.hpp
|
|
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
|
|
@@ -946,6 +946,7 @@
|
|
do_intrinsic(_indexOfIU, java_lang_StringUTF16, indexOf_name, indexOfI_signature, F_S) \
|
|
do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \
|
|
do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \
|
|
+ do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \
|
|
do_name( indexOf_name, "indexOf") \
|
|
do_name( indexOfChar_name, "indexOfChar") \
|
|
do_name( indexOfUL_name, "indexOfLatin1") \
|
|
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
|
|
index 4771a8b86..295f82ccc 100644
|
|
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
|
|
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
|
|
@@ -31,7 +31,7 @@
|
|
#include "utilities/defaultStream.hpp"
|
|
|
|
void ShenandoahArguments::initialize() {
|
|
-#if !(defined AARCH64 || defined AMD64 || defined IA32)
|
|
+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
|
|
vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
|
|
#endif
|
|
|
|
diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
|
|
index e01a242a5..ff16de0e7 100644
|
|
--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
|
|
+++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
|
|
@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
|
|
inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
|
|
#if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
|
|
return true;
|
|
-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
|
|
+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
|
|
return false;
|
|
#else
|
|
#warning "Unconfigured platform"
|
|
diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
|
|
index 7768615b7..ef006f087 100644
|
|
--- a/src/hotspot/share/opto/c2compiler.cpp
|
|
+++ b/src/hotspot/share/opto/c2compiler.cpp
|
|
@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
|
case vmIntrinsics::_indexOfIU:
|
|
case vmIntrinsics::_indexOfIUL:
|
|
case vmIntrinsics::_indexOfU_char:
|
|
+ case vmIntrinsics::_indexOfL_char:
|
|
case vmIntrinsics::_toBytesStringU:
|
|
case vmIntrinsics::_getCharsStringU:
|
|
case vmIntrinsics::_getCharStringU:
|
|
diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
|
|
index 500054218..fafbde78d 100644
|
|
--- a/src/hotspot/share/opto/chaitin.cpp
|
|
+++ b/src/hotspot/share/opto/chaitin.cpp
|
|
@@ -77,6 +77,7 @@ void LRG::dump() const {
|
|
if( _is_oop ) tty->print("Oop ");
|
|
if( _is_float ) tty->print("Float ");
|
|
if( _is_vector ) tty->print("Vector ");
|
|
+ if( _is_scalable ) tty->print("Scalable ");
|
|
if( _was_spilled1 ) tty->print("Spilled ");
|
|
if( _was_spilled2 ) tty->print("Spilled2 ");
|
|
if( _direct_conflict ) tty->print("Direct_conflict ");
|
|
@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() {
|
|
|
|
// Merge multidefs if multiple defs representing the same value are used in a single block.
|
|
merge_multidefs();
|
|
+ merge_debugdefs();
|
|
|
|
#ifdef ASSERT
|
|
// Veify the graph after RA.
|
|
@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() {
|
|
// Live ranges record the highest register in their mask.
|
|
// We want the low register for the AD file writer's convenience.
|
|
OptoReg::Name hi = lrg.reg(); // Get hi register
|
|
- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
|
|
+ int num_regs = lrg.num_regs();
|
|
+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
|
|
+ // For scalable vector registers, when they are allocated in physical
|
|
+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
|
|
+ // vector. If they are allocated on stack, we need to get the actual
|
|
+ // num_regs, which reflects the physical length of scalable registers.
|
|
+ num_regs = lrg.scalable_reg_slots();
|
|
+ }
|
|
+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
|
|
// We have to use pair [lo,lo+1] even for wide vectors because
|
|
// the rest of code generation works only with pairs. It is safe
|
|
// since for registers encoding only 'lo' is used.
|
|
@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
|
// Check for vector live range (only if vector register is used).
|
|
// On SPARC vector uses RegD which could be misaligned so it is not
|
|
// processes as vector in RA.
|
|
- if (RegMask::is_vector(ireg))
|
|
+ if (RegMask::is_vector(ireg)) {
|
|
lrg._is_vector = 1;
|
|
+ if (ireg == Op_VecA) {
|
|
+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
|
|
+ lrg._is_scalable = 1;
|
|
+ // For scalable vector, when it is allocated in physical register,
|
|
+ // num_regs is RegMask::SlotsPerVecA for reg mask,
|
|
+ // which may not be the actual physical register size.
|
|
+ // If it is allocated in stack, we need to get the actual
|
|
+ // physical length of scalable vector register.
|
|
+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
|
|
+ }
|
|
+ }
|
|
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
|
|
"vector must be in vector registers");
|
|
|
|
@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
|
lrg.set_reg_pressure(1);
|
|
#endif
|
|
break;
|
|
+ case Op_VecA:
|
|
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
|
+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
|
|
+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
|
|
+ lrg.set_num_regs(RegMask::SlotsPerVecA);
|
|
+ lrg.set_reg_pressure(1);
|
|
+ break;
|
|
case Op_VecS:
|
|
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
|
|
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
|
|
@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
|
|
return false;
|
|
}
|
|
|
|
+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
|
|
+ int num_regs = lrg.num_regs();
|
|
+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
|
|
+
|
|
+ if (lrg.is_scalable()) {
|
|
+ // a physical register is found
|
|
+ if (chunk == 0 && OptoReg::is_reg(assigned)) {
|
|
+ return assigned;
|
|
+ }
|
|
+
|
|
+ // find available stack slots for scalable register
|
|
+ if (lrg._is_vector) {
|
|
+ num_regs = lrg.scalable_reg_slots();
|
|
+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
|
|
+ if (num_regs == RegMask::SlotsPerVecA) {
|
|
+ return assigned;
|
|
+ }
|
|
+
|
|
+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
|
|
+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
|
|
+ // instead of SlotsPerVecA bits.
|
|
+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
|
|
+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
|
|
+ // Verify the found reg has scalable_reg_slots() bits set.
|
|
+ if (mask.is_valid_reg(assigned, num_regs)) {
|
|
+ return assigned;
|
|
+ } else {
|
|
+ // Remove more for each iteration
|
|
+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
|
|
+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
|
|
+ assigned = mask.find_first_set(lrg, num_regs);
|
|
+ }
|
|
+ }
|
|
+ return OptoReg::Bad; // will cause chunk change, and retry next chunk
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return assigned;
|
|
+}
|
|
+
|
|
+
|
|
// Choose a color using the biasing heuristic
|
|
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
|
|
@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
RegMask tempmask = lrg.mask();
|
|
tempmask.AND(lrgs(copy_lrg).mask());
|
|
tempmask.clear_to_sets(lrg.num_regs());
|
|
- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
|
|
+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
|
|
if (OptoReg::is_valid(reg))
|
|
return reg;
|
|
}
|
|
@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
|
// If no bias info exists, just go with the register selection ordering
|
|
if (lrg._is_vector || lrg.num_regs() == 2) {
|
|
// Find an aligned set
|
|
- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
|
|
+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
|
|
}
|
|
|
|
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
|
|
@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) {
|
|
int n_regs = lrg->num_regs();
|
|
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
|
|
if (n_regs == 1 || !lrg->_fat_proj) {
|
|
- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
|
|
+ } else {
|
|
+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
|
+ }
|
|
lrg->Clear(); // Clear the mask
|
|
lrg->Insert(reg); // Set regmask to match selected reg
|
|
// For vectors and pairs, also insert the low bit of the pair
|
|
- for (int i = 1; i < n_regs; i++)
|
|
+ // We always choose the high bit, then mask the low bits by register size
|
|
+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
|
|
+ n_regs = lrg->scalable_reg_slots();
|
|
+ }
|
|
+ for (int i = 1; i < n_regs; i++) {
|
|
lrg->Insert(OptoReg::add(reg,-i));
|
|
+ }
|
|
lrg->set_mask_size(n_regs);
|
|
} else { // Else fatproj
|
|
// mask must be equal to fatproj bits, by definition
|
|
diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
|
|
index e5be5b966..b5d1b0604 100644
|
|
--- a/src/hotspot/share/opto/chaitin.hpp
|
|
+++ b/src/hotspot/share/opto/chaitin.hpp
|
|
@@ -115,9 +115,11 @@ public:
|
|
_msize_valid=1;
|
|
if (_is_vector) {
|
|
assert(!_fat_proj, "sanity");
|
|
- _mask.verify_sets(_num_regs);
|
|
+ if (!(_is_scalable && OptoReg::is_stack(_reg))) {
|
|
+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
|
|
+ }
|
|
} else if (_num_regs == 2 && !_fat_proj) {
|
|
- _mask.verify_pairs();
|
|
+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
|
|
}
|
|
#endif
|
|
}
|
|
@@ -143,10 +145,34 @@ public:
|
|
private:
|
|
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
|
|
// except _num_regs is kill count for fat_proj
|
|
+
|
|
+ // For scalable register, num_regs may not be the actual physical register size.
|
|
+ // We need to get the actual physical length of scalable register when scalable
|
|
+ // register is spilled. The size of one slot is 32-bit.
|
|
+ uint _scalable_reg_slots; // Actual scalable register length of slots.
|
|
+ // Meaningful only when _is_scalable is true.
|
|
public:
|
|
int num_regs() const { return _num_regs; }
|
|
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
|
|
|
|
+ uint scalable_reg_slots() { return _scalable_reg_slots; }
|
|
+ void set_scalable_reg_slots(uint slots) {
|
|
+ assert(_is_scalable, "scalable register");
|
|
+ assert(slots > 0, "slots of scalable register is not valid");
|
|
+ _scalable_reg_slots = slots;
|
|
+ }
|
|
+
|
|
+ bool is_scalable() {
|
|
+#ifdef ASSERT
|
|
+ if (_is_scalable) {
|
|
+ // Should only be a vector for now, but it could also be a RegVMask in future.
|
|
+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
|
|
+ }
|
|
+#endif
|
|
+ return _is_scalable;
|
|
+ }
|
|
+
|
|
+
|
|
private:
|
|
// Number of physical registers this live range uses when it colors
|
|
// Architecture and register-set dependent
|
|
@@ -172,6 +198,7 @@ public:
|
|
uint _is_oop:1, // Live-range holds an oop
|
|
_is_float:1, // True if in float registers
|
|
_is_vector:1, // True if in vector registers
|
|
+ _is_scalable:1, // True if register size is scalable
|
|
_was_spilled1:1, // True if prior spilling on def
|
|
_was_spilled2:1, // True if twice prior spilling on def
|
|
_is_bound:1, // live range starts life with no
|
|
@@ -756,6 +783,7 @@ private:
|
|
|
|
// Merge nodes that are a part of a multidef lrg and produce the same value within a block.
|
|
void merge_multidefs();
|
|
+ void merge_debugdefs();
|
|
|
|
private:
|
|
|
|
diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp
|
|
index c0dfe1b0c..2d9526a39 100644
|
|
--- a/src/hotspot/share/opto/intrinsicnode.hpp
|
|
+++ b/src/hotspot/share/opto/intrinsicnode.hpp
|
|
@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node {
|
|
// Base class for Ideal nodes used in String intrinsic code.
|
|
class StrIntrinsicNode: public Node {
|
|
public:
|
|
- // Possible encodings of the two parameters passed to the string intrinsic.
|
|
+ // Possible encodings of the parameters passed to the string intrinsic.
|
|
// 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that
|
|
// the first string is Latin1 encoded and the second string is UTF16 encoded.
|
|
- typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc;
|
|
+ // 'L' means that the single string is Latin1 encoded
|
|
+ typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc;
|
|
|
|
protected:
|
|
// Encoding of strings. Used to select the right version of the intrinsic.
|
|
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
|
|
index 6b6aa9e9b..8719c5b12 100644
|
|
--- a/src/hotspot/share/opto/library_call.cpp
|
|
+++ b/src/hotspot/share/opto/library_call.cpp
|
|
@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit {
|
|
bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
|
|
Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
|
|
RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
|
|
- bool inline_string_indexOfChar();
|
|
+ bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
|
|
bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
|
|
bool inline_string_toBytesU();
|
|
bool inline_string_getCharsU();
|
|
@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|
case vmIntrinsics::_indexOfIL: return inline_string_indexOfI(StrIntrinsicNode::LL);
|
|
case vmIntrinsics::_indexOfIU: return inline_string_indexOfI(StrIntrinsicNode::UU);
|
|
case vmIntrinsics::_indexOfIUL: return inline_string_indexOfI(StrIntrinsicNode::UL);
|
|
- case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar();
|
|
+ case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(StrIntrinsicNode::U);
|
|
+ case vmIntrinsics::_indexOfL_char: return inline_string_indexOfChar(StrIntrinsicNode::L);
|
|
|
|
case vmIntrinsics::_equalsL: return inline_string_equals(StrIntrinsicNode::LL);
|
|
case vmIntrinsics::_equalsU: return inline_string_equals(StrIntrinsicNode::UU);
|
|
@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node*
|
|
}
|
|
|
|
//-----------------------------inline_string_indexOfChar-----------------------
|
|
-bool LibraryCallKit::inline_string_indexOfChar() {
|
|
+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) {
|
|
if (too_many_traps(Deoptimization::Reason_intrinsic)) {
|
|
return false;
|
|
}
|
|
@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() {
|
|
|
|
src = must_be_not_null(src, true);
|
|
|
|
- Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1)));
|
|
+ Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1)));
|
|
Node* src_start = array_element_address(src, src_offset, T_BYTE);
|
|
Node* src_count = _gvn.transform(new SubINode(max, from_index));
|
|
|
|
// Range checks
|
|
- generate_string_range_check(src, src_offset, src_count, true);
|
|
+ generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U);
|
|
if (stopped()) {
|
|
return true;
|
|
}
|
|
@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() {
|
|
RegionNode* region = new RegionNode(3);
|
|
Node* phi = new PhiNode(region, TypeInt::INT);
|
|
|
|
- Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none);
|
|
+ Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae);
|
|
C->set_has_split_ifs(true); // Has chance for split-if optimization
|
|
_gvn.transform(result);
|
|
|
|
diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp
|
|
index 8d526b15d..92b4f7158 100644
|
|
--- a/src/hotspot/share/opto/machnode.cpp
|
|
+++ b/src/hotspot/share/opto/machnode.cpp
|
|
@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const {
|
|
return MachNode::emit_size(ra_);
|
|
}
|
|
|
|
-//------------------------------size-------------------------------------------
|
|
+//-------------------------emit_size-------------------------------------------
|
|
// Helper function that computes size by emitting code
|
|
uint MachNode::emit_size(PhaseRegAlloc *ra_) const {
|
|
// Emit into a trash buffer and count bytes emitted.
|
|
diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
|
|
index a52325680..dad70565b 100644
|
|
--- a/src/hotspot/share/opto/machnode.hpp
|
|
+++ b/src/hotspot/share/opto/machnode.hpp
|
|
@@ -334,6 +334,10 @@ public:
|
|
// Top-level ideal Opcode matched
|
|
virtual int ideal_Opcode() const { return Op_Node; }
|
|
|
|
+ virtual bool is_Opcode_equal(Node* node) {
|
|
+ return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode());
|
|
+ }
|
|
+
|
|
// Adds the label for the case
|
|
virtual void add_case_label( int switch_val, Label* blockLabel);
|
|
|
|
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
|
|
index 9e9b3383f..97de5e314 100644
|
|
--- a/src/hotspot/share/opto/matcher.cpp
|
|
+++ b/src/hotspot/share/opto/matcher.cpp
|
|
@@ -84,6 +84,7 @@ Matcher::Matcher()
|
|
idealreg2spillmask [Op_RegF] = NULL;
|
|
idealreg2spillmask [Op_RegD] = NULL;
|
|
idealreg2spillmask [Op_RegP] = NULL;
|
|
+ idealreg2spillmask [Op_VecA] = NULL;
|
|
idealreg2spillmask [Op_VecS] = NULL;
|
|
idealreg2spillmask [Op_VecD] = NULL;
|
|
idealreg2spillmask [Op_VecX] = NULL;
|
|
@@ -110,6 +111,7 @@ Matcher::Matcher()
|
|
idealreg2mhdebugmask[Op_RegF] = NULL;
|
|
idealreg2mhdebugmask[Op_RegD] = NULL;
|
|
idealreg2mhdebugmask[Op_RegP] = NULL;
|
|
+ idealreg2mhdebugmask[Op_VecA] = NULL;
|
|
idealreg2mhdebugmask[Op_VecS] = NULL;
|
|
idealreg2mhdebugmask[Op_VecD] = NULL;
|
|
idealreg2mhdebugmask[Op_VecX] = NULL;
|
|
@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
|
void Matcher::init_first_stack_mask() {
|
|
|
|
// Allocate storage for spill masks as masks for the appropriate load type.
|
|
- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
|
|
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6));
|
|
|
|
idealreg2spillmask [Op_RegN] = &rms[0];
|
|
idealreg2spillmask [Op_RegI] = &rms[1];
|
|
@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() {
|
|
idealreg2mhdebugmask[Op_RegD] = &rms[16];
|
|
idealreg2mhdebugmask[Op_RegP] = &rms[17];
|
|
|
|
- idealreg2spillmask [Op_VecS] = &rms[18];
|
|
- idealreg2spillmask [Op_VecD] = &rms[19];
|
|
- idealreg2spillmask [Op_VecX] = &rms[20];
|
|
- idealreg2spillmask [Op_VecY] = &rms[21];
|
|
- idealreg2spillmask [Op_VecZ] = &rms[22];
|
|
+ idealreg2spillmask [Op_VecA] = &rms[18];
|
|
+ idealreg2spillmask [Op_VecS] = &rms[19];
|
|
+ idealreg2spillmask [Op_VecD] = &rms[20];
|
|
+ idealreg2spillmask [Op_VecX] = &rms[21];
|
|
+ idealreg2spillmask [Op_VecY] = &rms[22];
|
|
+ idealreg2spillmask [Op_VecZ] = &rms[23];
|
|
|
|
OptoReg::Name i;
|
|
|
|
@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() {
|
|
// Keep spill masks aligned.
|
|
aligned_stack_mask.clear_to_pairs();
|
|
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
|
+ RegMask scalable_stack_mask = aligned_stack_mask;
|
|
|
|
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
|
|
#ifdef _LP64
|
|
@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() {
|
|
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
|
|
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
|
|
}
|
|
+
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
+ int k = 1;
|
|
+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
|
|
+ // Exclude last input arg stack slots to avoid spilling vector register there,
|
|
+ // otherwise vector spills could stomp over stack slots in caller frame.
|
|
+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
|
|
+ scalable_stack_mask.Remove(in);
|
|
+ in = OptoReg::add(in, -1);
|
|
+ }
|
|
+
|
|
+ // For VecA
|
|
+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
|
|
+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
|
|
+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
|
|
+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
|
|
+ } else {
|
|
+ *idealreg2spillmask[Op_VecA] = RegMask::Empty;
|
|
+ }
|
|
+
|
|
if (UseFPUForSpilling) {
|
|
// This mask logic assumes that the spill operations are
|
|
// symmetric and that the registers involved are the same size.
|
|
@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) {
|
|
idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
|
|
|
|
// Vector regmasks.
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));;
|
|
+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
|
|
+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
|
|
+ }
|
|
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
|
TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
|
|
MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
|
|
diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
|
|
index 244e3d1f8..9a8307102 100644
|
|
--- a/src/hotspot/share/opto/matcher.hpp
|
|
+++ b/src/hotspot/share/opto/matcher.hpp
|
|
@@ -310,7 +310,7 @@ public:
|
|
|
|
// identify extra cases that we might want to provide match rules for
|
|
// e.g. Op_ vector nodes and other intrinsics while guarding with vlen
|
|
- static const bool match_rule_supported_vector(int opcode, int vlen);
|
|
+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
|
|
|
|
// Some microarchitectures have mask registers used on vectors
|
|
static const bool has_predicated_vectors(void);
|
|
@@ -333,6 +333,10 @@ public:
|
|
Matcher::min_vector_size(bt) <= size);
|
|
}
|
|
|
|
+ static const bool supports_scalable_vector();
|
|
+ // Actual max scalable vector register length.
|
|
+ static const int scalable_vector_reg_size(const BasicType bt);
|
|
+
|
|
// Vector ideal reg
|
|
static const uint vector_ideal_reg(int len);
|
|
static const uint vector_shift_count_ideal_reg(int len);
|
|
diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
|
|
index 02bb6bb16..99d51ba05 100644
|
|
--- a/src/hotspot/share/opto/node.cpp
|
|
+++ b/src/hotspot/share/opto/node.cpp
|
|
@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) {
|
|
return NULL;
|
|
}
|
|
|
|
+//--------------------------is_similar-----------------------------------
|
|
+// True if a node has the same opcode and inputs as "this".
|
|
+bool Node::is_similar(Node* node) {
|
|
+ if (this == node) {
|
|
+ return true;
|
|
+ } else {
|
|
+ if (is_Opcode_equal(node) && (req() == node->req())) {
|
|
+ for (uint i = 0; i < node->req(); i++) {
|
|
+ if (in(i) != node->in(i)) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+bool Node::is_Opcode_equal(Node* node) {
|
|
+ return Opcode() == node->Opcode();
|
|
+}
|
|
|
|
//--------------------------unique_ctrl_out------------------------------
|
|
// Return the unique control out if only one. Null if none or more than one.
|
|
diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
|
|
index 0c0b9bf69..e24456d85 100644
|
|
--- a/src/hotspot/share/opto/node.hpp
|
|
+++ b/src/hotspot/share/opto/node.hpp
|
|
@@ -1030,6 +1030,11 @@ public:
|
|
// be found; Otherwise return NULL;
|
|
Node* find_similar(int opc);
|
|
|
|
+ // True if a node has the same opcode and inputs as "this".
|
|
+ bool is_similar(Node* node);
|
|
+
|
|
+ virtual bool is_Opcode_equal(Node* node);
|
|
+
|
|
// Return the unique control out if only one. Null if none or more than one.
|
|
Node* unique_ctrl_out() const;
|
|
|
|
diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
|
|
index e31e8d847..aa0483c73 100644
|
|
--- a/src/hotspot/share/opto/opcodes.cpp
|
|
+++ b/src/hotspot/share/opto/opcodes.cpp
|
|
@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
|
|
"RegF",
|
|
"RegD",
|
|
"RegL",
|
|
- "RegFlags",
|
|
+ "VecA",
|
|
"VecS",
|
|
"VecD",
|
|
"VecX",
|
|
"VecY",
|
|
"VecZ",
|
|
+ "RegVMask",
|
|
+ "RegFlags",
|
|
"_last_machine_leaf",
|
|
#include "classes.hpp"
|
|
"_last_class_name",
|
|
diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
|
|
index ae3d61ce0..0a77c3732 100644
|
|
--- a/src/hotspot/share/opto/opcodes.hpp
|
|
+++ b/src/hotspot/share/opto/opcodes.hpp
|
|
@@ -37,11 +37,13 @@ enum Opcodes {
|
|
macro(RegF) // Machine float register
|
|
macro(RegD) // Machine double register
|
|
macro(RegL) // Machine long register
|
|
+ macro(VecA) // Machine vectora register
|
|
macro(VecS) // Machine vectors register
|
|
macro(VecD) // Machine vectord register
|
|
macro(VecX) // Machine vectorx register
|
|
macro(VecY) // Machine vectory register
|
|
macro(VecZ) // Machine vectorz register
|
|
+ macro(RegVMask) // Vector mask/predicate register
|
|
macro(RegFlags) // Machine flags register
|
|
_last_machine_leaf, // Split between regular opcodes and machine
|
|
#include "classes.hpp"
|
|
diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp
|
|
index 397a53713..89c7fc7c8 100644
|
|
--- a/src/hotspot/share/opto/phase.cpp
|
|
+++ b/src/hotspot/share/opto/phase.cpp
|
|
@@ -113,6 +113,7 @@ void Phase::print_timers() {
|
|
tty->print_cr (" Regalloc Split: %7.3f s", timers[_t_regAllocSplit].seconds());
|
|
tty->print_cr (" Postalloc Copy Rem: %7.3f s", timers[_t_postAllocCopyRemoval].seconds());
|
|
tty->print_cr (" Merge multidefs: %7.3f s", timers[_t_mergeMultidefs].seconds());
|
|
+ tty->print_cr (" Merge debugdefs: %7.3f s", timers[_t_mergeDebugdefs].seconds());
|
|
tty->print_cr (" Fixup Spills: %7.3f s", timers[_t_fixupSpills].seconds());
|
|
tty->print_cr (" Compact: %7.3f s", timers[_t_chaitinCompact].seconds());
|
|
tty->print_cr (" Coalesce 1: %7.3f s", timers[_t_chaitinCoalesce1].seconds());
|
|
@@ -130,6 +131,7 @@ void Phase::print_timers() {
|
|
timers[_t_regAllocSplit].seconds() +
|
|
timers[_t_postAllocCopyRemoval].seconds() +
|
|
timers[_t_mergeMultidefs].seconds() +
|
|
+ timers[_t_mergeDebugdefs].seconds() +
|
|
timers[_t_fixupSpills].seconds() +
|
|
timers[_t_chaitinCompact].seconds() +
|
|
timers[_t_chaitinCoalesce1].seconds() +
|
|
diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp
|
|
index 4b0c53ffc..b3302ec86 100644
|
|
--- a/src/hotspot/share/opto/phase.hpp
|
|
+++ b/src/hotspot/share/opto/phase.hpp
|
|
@@ -91,6 +91,7 @@ public:
|
|
_t_regAllocSplit,
|
|
_t_postAllocCopyRemoval,
|
|
_t_mergeMultidefs,
|
|
+ _t_mergeDebugdefs,
|
|
_t_fixupSpills,
|
|
_t_chaitinCompact,
|
|
_t_chaitinCoalesce1,
|
|
diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
|
|
index 46766b604..3f608bb40 100644
|
|
--- a/src/hotspot/share/opto/postaloc.cpp
|
|
+++ b/src/hotspot/share/opto/postaloc.cpp
|
|
@@ -27,6 +27,7 @@
|
|
#include "memory/resourceArea.hpp"
|
|
#include "opto/chaitin.hpp"
|
|
#include "opto/machnode.hpp"
|
|
+#include "opto/addnode.hpp"
|
|
|
|
// See if this register (or pairs, or vector) already contains the value.
|
|
static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
|
|
@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
|
Node *val = skip_copies(n->in(k));
|
|
if (val == x) return blk_adjust; // No progress?
|
|
|
|
- int n_regs = RegMask::num_registers(val->ideal_reg());
|
|
uint val_idx = _lrg_map.live_range_id(val);
|
|
OptoReg::Name val_reg = lrgs(val_idx).reg();
|
|
+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
|
|
|
|
// See if it happens to already be in the correct register!
|
|
// (either Phi's direct register, or the common case of the name
|
|
@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
|
}
|
|
|
|
Node *vv = value[reg];
|
|
+ // For scalable register, number of registers may be inconsistent between
|
|
+ // "val_reg" and "reg". For example, when "val" resides in register
|
|
+ // but "reg" is located in stack.
|
|
+ if (lrgs(val_idx).is_scalable()) {
|
|
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
|
+ if (OptoReg::is_stack(reg)) {
|
|
+ n_regs = lrgs(val_idx).scalable_reg_slots();
|
|
+ } else {
|
|
+ n_regs = RegMask::SlotsPerVecA;
|
|
+ }
|
|
+ }
|
|
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
|
|
- uint last = (n_regs-1); // Looking for the last part of a set
|
|
+ uint last;
|
|
+ if (lrgs(val_idx).is_scalable()) {
|
|
+ assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
|
+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned
|
|
+ last = RegMask::SlotsPerVecA - 1;
|
|
+ } else {
|
|
+ last = (n_regs-1); // Looking for the last part of a set
|
|
+ }
|
|
if ((reg&last) != last) continue; // Wrong part of a set
|
|
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
|
|
}
|
|
@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() {
|
|
}
|
|
}
|
|
|
|
+void PhaseChaitin::merge_debugdefs() {
|
|
+ Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]);
|
|
+
|
|
+ ResourceMark rm;
|
|
+ for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
|
|
+ Block* block = _cfg.get_block(i);
|
|
+ for (int j = 0; j < (int) block->number_of_nodes(); j++) {
|
|
+ Node* base = block->get_node(j);
|
|
+ if (base && base->is_Mach() && base->outcnt() == 1) {
|
|
+ Node* addp = base->unique_out();
|
|
+ if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) {
|
|
+ Node* derived = addp->in(AddPNode::Address);
|
|
+ if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) {
|
|
+ base->subsume_by(derived, Compile::current());
|
|
+ block->remove_node(j--);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
|
|
int blk_adjust = 0;
|
|
|
|
@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
uint k;
|
|
Node *phi = block->get_node(j);
|
|
uint pidx = _lrg_map.live_range_id(phi);
|
|
- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
|
|
+ OptoReg::Name preg = lrgs(pidx).reg();
|
|
|
|
// Remove copies remaining on edges. Check for junk phi.
|
|
Node *u = NULL;
|
|
@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
if( pidx ) {
|
|
value.map(preg,phi);
|
|
regnd.map(preg,phi);
|
|
- int n_regs = RegMask::num_registers(phi->ideal_reg());
|
|
+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
|
|
for (int l = 1; l < n_regs; l++) {
|
|
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
|
|
value.map(preg_lo,phi);
|
|
@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
regnd.map(ureg, def);
|
|
// Record other half of doubles
|
|
uint def_ideal_reg = def->ideal_reg();
|
|
- int n_regs = RegMask::num_registers(def_ideal_reg);
|
|
+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
|
|
for (int l = 1; l < n_regs; l++) {
|
|
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
|
|
if (!value[ureg_lo] &&
|
|
@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
|
}
|
|
|
|
uint n_ideal_reg = n->ideal_reg();
|
|
- int n_regs = RegMask::num_registers(n_ideal_reg);
|
|
+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
|
|
if (n_regs == 1) {
|
|
// If Node 'n' does not change the value mapped by the register,
|
|
// then 'n' is a useless copy. Do not update the register->node
|
|
diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
|
|
index 2e04c42eb..34a701e84 100644
|
|
--- a/src/hotspot/share/opto/regmask.cpp
|
|
+++ b/src/hotspot/share/opto/regmask.cpp
|
|
@@ -24,6 +24,7 @@
|
|
|
|
#include "precompiled.hpp"
|
|
#include "opto/ad.hpp"
|
|
+#include "opto/chaitin.hpp"
|
|
#include "opto/compile.hpp"
|
|
#include "opto/matcher.hpp"
|
|
#include "opto/node.hpp"
|
|
@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
|
|
|
|
//=============================================================================
|
|
bool RegMask::is_vector(uint ireg) {
|
|
- return (ireg == Op_VecS || ireg == Op_VecD ||
|
|
+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
|
|
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
|
|
}
|
|
|
|
int RegMask::num_registers(uint ireg) {
|
|
switch(ireg) {
|
|
case Op_VecZ:
|
|
- return 16;
|
|
+ return SlotsPerVecZ;
|
|
case Op_VecY:
|
|
- return 8;
|
|
+ return SlotsPerVecY;
|
|
case Op_VecX:
|
|
- return 4;
|
|
+ return SlotsPerVecX;
|
|
case Op_VecD:
|
|
+ return SlotsPerVecD;
|
|
case Op_RegD:
|
|
case Op_RegL:
|
|
#ifdef _LP64
|
|
case Op_RegP:
|
|
#endif
|
|
return 2;
|
|
+ case Op_VecA:
|
|
+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
|
+ return SlotsPerVecA;
|
|
}
|
|
// Op_VecS and the rest ideal registers.
|
|
return 1;
|
|
}
|
|
|
|
+int RegMask::num_registers(uint ireg, LRG &lrg) {
|
|
+ int n_regs = num_registers(ireg);
|
|
+
|
|
+ // assigned is OptoReg which is selected by register allocator
|
|
+ OptoReg::Name assigned = lrg.reg();
|
|
+ assert(OptoReg::is_valid(assigned), "should be valid opto register");
|
|
+
|
|
+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
|
|
+ n_regs = lrg.scalable_reg_slots();
|
|
+ }
|
|
+ return n_regs;
|
|
+}
|
|
+
|
|
//------------------------------find_first_pair--------------------------------
|
|
// Find the lowest-numbered register pair in the mask. Return the
|
|
// HIGHEST register number in the pair, or BAD if no pairs.
|
|
@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
|
|
return true;
|
|
}
|
|
|
|
+// Check that whether given reg number with size is valid
|
|
+// for current regmask, where reg is the highest number.
|
|
+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
|
|
+ for (int i = 0; i < size; i++) {
|
|
+ if (!Member(reg - i)) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
|
|
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
|
|
//------------------------------find_first_set---------------------------------
|
|
// Find the lowest-numbered register set in the mask. Return the
|
|
// HIGHEST register number in the set, or BAD if no sets.
|
|
// Works also for size 1.
|
|
-OptoReg::Name RegMask::find_first_set(const int size) const {
|
|
- verify_sets(size);
|
|
+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
|
|
+ if (lrg.is_scalable()) {
|
|
+ // For scalable vector register, regmask is SlotsPerVecA bits aligned.
|
|
+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
|
|
+ } else {
|
|
+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
|
+ }
|
|
for (int i = 0; i < RM_SIZE; i++) {
|
|
if (_A[i]) { // Found some bits
|
|
int bit = _A[i] & -_A[i]; // Extract low bit
|
|
diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
|
|
index c64d08795..2688275be 100644
|
|
--- a/src/hotspot/share/opto/regmask.hpp
|
|
+++ b/src/hotspot/share/opto/regmask.hpp
|
|
@@ -28,6 +28,8 @@
|
|
#include "code/vmreg.hpp"
|
|
#include "opto/optoreg.hpp"
|
|
|
|
+class LRG;
|
|
+
|
|
// Some fun naming (textual) substitutions:
|
|
//
|
|
// RegMask::get_low_elem() ==> RegMask::find_first_elem()
|
|
@@ -95,6 +97,7 @@ public:
|
|
// requirement is internal to the allocator, and independent of any
|
|
// particular platform.
|
|
enum { SlotsPerLong = 2,
|
|
+ SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8),
|
|
SlotsPerVecS = 1,
|
|
SlotsPerVecD = 2,
|
|
SlotsPerVecX = 4,
|
|
@@ -204,10 +207,14 @@ public:
|
|
return false;
|
|
}
|
|
|
|
+ // Check that whether given reg number with size is valid
|
|
+ // for current regmask, where reg is the highest number.
|
|
+ bool is_valid_reg(OptoReg::Name reg, const int size) const;
|
|
+
|
|
// Find the lowest-numbered register set in the mask. Return the
|
|
// HIGHEST register number in the set, or BAD if no sets.
|
|
// Assert that the mask contains only bit sets.
|
|
- OptoReg::Name find_first_set(const int size) const;
|
|
+ OptoReg::Name find_first_set(LRG &lrg, const int size) const;
|
|
|
|
// Clear out partial bits; leave only aligned adjacent bit sets of size.
|
|
void clear_to_sets(const int size);
|
|
@@ -226,6 +233,7 @@ public:
|
|
|
|
static bool is_vector(uint ireg);
|
|
static int num_registers(uint ireg);
|
|
+ static int num_registers(uint ireg, LRG &lrg);
|
|
|
|
// Fast overlap test. Non-zero if any registers in common.
|
|
int overlap( const RegMask &rm ) const {
|
|
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
|
|
index fed52e488..ee583236f 100644
|
|
--- a/src/hotspot/share/opto/superword.cpp
|
|
+++ b/src/hotspot/share/opto/superword.cpp
|
|
@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz
|
|
//------------------------------transform_loop---------------------------
|
|
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
|
assert(UseSuperWord, "should be");
|
|
- // Do vectors exist on this architecture?
|
|
- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
|
|
+ // SuperWord only works with power of two vector sizes.
|
|
+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
|
|
+ if (vector_width < 2 || !is_power_of_2(vector_width)) {
|
|
+ return;
|
|
+ }
|
|
|
|
assert(lpt->_head->is_CountedLoop(), "must be");
|
|
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
|
|
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
|
|
index 7d767c47c..c9948df5f 100644
|
|
--- a/src/hotspot/share/opto/type.cpp
|
|
+++ b/src/hotspot/share/opto/type.cpp
|
|
@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
|
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
|
|
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
|
|
#else // all other
|
|
+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA
|
|
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
|
|
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
|
|
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
|
|
@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) {
|
|
// get_zero_type() should not happen for T_CONFLICT
|
|
_zero_type[T_CONFLICT]= NULL;
|
|
|
|
+ if (Matcher::supports_scalable_vector()) {
|
|
+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
|
|
+ }
|
|
+
|
|
// Vector predefined types, it needs initialized _const_basic_type[].
|
|
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
|
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
|
|
@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) {
|
|
if (Matcher::vector_size_supported(T_FLOAT,16)) {
|
|
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
|
|
}
|
|
+ mreg2type[Op_VecA] = TypeVect::VECTA;
|
|
mreg2type[Op_VecS] = TypeVect::VECTS;
|
|
mreg2type[Op_VecD] = TypeVect::VECTD;
|
|
mreg2type[Op_VecX] = TypeVect::VECTX;
|
|
@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
|
|
|
|
Bad, // Tuple - handled in v-call
|
|
Bad, // Array - handled in v-call
|
|
+ Bad, // VectorA - handled in v-call
|
|
Bad, // VectorS - handled in v-call
|
|
Bad, // VectorD - handled in v-call
|
|
Bad, // VectorX - handled in v-call
|
|
@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const {
|
|
|
|
//==============================TypeVect=======================================
|
|
// Convenience common pre-built types.
|
|
+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
|
|
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
|
|
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
|
|
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
|
|
@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
|
|
const TypeVect* TypeVect::make(const Type *elem, uint length) {
|
|
BasicType elem_bt = elem->array_element_basic_type();
|
|
assert(is_java_primitive(elem_bt), "only primitive types in vector");
|
|
- assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
|
|
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
|
|
int size = length * type2aelembytes(elem_bt);
|
|
switch (Matcher::vector_ideal_reg(size)) {
|
|
+ case Op_VecA:
|
|
+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
|
|
case Op_VecS:
|
|
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
|
|
case Op_RegL:
|
|
@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
|
|
default: // All else is a mistake
|
|
typerr(t);
|
|
|
|
+ case VectorA:
|
|
case VectorS:
|
|
case VectorD:
|
|
case VectorX:
|
|
@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const {
|
|
#ifndef PRODUCT
|
|
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
|
|
switch (base()) {
|
|
+ case VectorA:
|
|
+ st->print("vectora["); break;
|
|
case VectorS:
|
|
st->print("vectors["); break;
|
|
case VectorD:
|
|
diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
|
|
index 27d042d94..82ee2dfcb 100644
|
|
--- a/src/hotspot/share/opto/type.hpp
|
|
+++ b/src/hotspot/share/opto/type.hpp
|
|
@@ -53,6 +53,7 @@ class TypeNarrowKlass;
|
|
class TypeAry;
|
|
class TypeTuple;
|
|
class TypeVect;
|
|
+class TypeVectA;
|
|
class TypeVectS;
|
|
class TypeVectD;
|
|
class TypeVectX;
|
|
@@ -87,6 +88,7 @@ public:
|
|
|
|
Tuple, // Method signature or object layout
|
|
Array, // Array types
|
|
+ VectorA, // (Scalable) Vector types for vector length agnostic
|
|
VectorS, // 32bit Vector types
|
|
VectorD, // 64bit Vector types
|
|
VectorX, // 128bit Vector types
|
|
@@ -769,6 +771,7 @@ public:
|
|
virtual const Type *xmeet( const Type *t) const;
|
|
virtual const Type *xdual() const; // Compute dual right now.
|
|
|
|
+ static const TypeVect *VECTA;
|
|
static const TypeVect *VECTS;
|
|
static const TypeVect *VECTD;
|
|
static const TypeVect *VECTX;
|
|
@@ -780,6 +783,11 @@ public:
|
|
#endif
|
|
};
|
|
|
|
+class TypeVectA : public TypeVect {
|
|
+ friend class TypeVect;
|
|
+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
|
|
+};
|
|
+
|
|
class TypeVectS : public TypeVect {
|
|
friend class TypeVect;
|
|
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
|
|
@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const {
|
|
}
|
|
|
|
inline const TypeVect *Type::is_vect() const {
|
|
- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
|
|
+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
|
|
return (TypeVect*)this;
|
|
}
|
|
|
|
inline const TypeVect *Type::isa_vect() const {
|
|
- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
|
+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
|
}
|
|
|
|
inline const TypePtr *Type::is_ptr() const {
|
|
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
|
|
index de22591ba..b82d631f4 100644
|
|
--- a/src/hotspot/share/opto/vectornode.cpp
|
|
+++ b/src/hotspot/share/opto/vectornode.cpp
|
|
@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
|
|
(vlen > 1) && is_power_of_2(vlen) &&
|
|
Matcher::vector_size_supported(bt, vlen)) {
|
|
int vopc = VectorNode::opcode(opc, bt);
|
|
- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
|
|
+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
|
}
|
|
return false;
|
|
}
|
|
@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
|
(vlen > 1) && is_power_of_2(vlen) &&
|
|
Matcher::vector_size_supported(bt, vlen)) {
|
|
int vopc = ReductionNode::opcode(opc, bt);
|
|
- return vopc != opc && Matcher::match_rule_supported(vopc);
|
|
+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
|
}
|
|
return false;
|
|
}
|
|
diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
|
|
index c46247f2b..ee769634f 100644
|
|
--- a/src/hotspot/share/runtime/abstract_vm_version.cpp
|
|
+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
|
|
@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false;
|
|
#ifdef ZERO
|
|
#define VMTYPE "Zero"
|
|
#else // ZERO
|
|
- #define VMTYPE COMPILER1_PRESENT("Client") \
|
|
- COMPILER2_PRESENT("Server")
|
|
+ #ifdef COMPILER2
|
|
+ #define VMTYPE "Server"
|
|
+ #elif defined(COMPILER1)
|
|
+ #define VMTYPE "Client"
|
|
+ #else
|
|
+ #define VMTYPE "Core"
|
|
+ #endif // COMPILER2
|
|
#endif // ZERO
|
|
#endif // TIERED
|
|
#endif
|
|
@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() {
|
|
IA32_ONLY("x86") \
|
|
IA64_ONLY("ia64") \
|
|
S390_ONLY("s390") \
|
|
- SPARC_ONLY("sparc")
|
|
+ SPARC_ONLY("sparc") \
|
|
+ RISCV64_ONLY("riscv64")
|
|
#endif // !ZERO
|
|
#endif // !CPU
|
|
|
|
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
|
|
index 0a9c45f85..a96c2dd81 100644
|
|
--- a/src/hotspot/share/runtime/thread.hpp
|
|
+++ b/src/hotspot/share/runtime/thread.hpp
|
|
@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
|
|
address last_Java_pc(void) { return _anchor.last_Java_pc(); }
|
|
|
|
// Safepoint support
|
|
-#if !(defined(PPC64) || defined(AARCH64))
|
|
+#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
|
|
JavaThreadState thread_state() const { return _thread_state; }
|
|
void set_thread_state(JavaThreadState s) {
|
|
assert(current_or_null() == NULL || current_or_null() == this,
|
|
diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
|
|
index dee8534f7..aa71d7655 100644
|
|
--- a/src/hotspot/share/runtime/thread.inline.hpp
|
|
+++ b/src/hotspot/share/runtime/thread.inline.hpp
|
|
@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
|
|
set_has_async_exception();
|
|
}
|
|
|
|
-#if defined(PPC64) || defined (AARCH64)
|
|
+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64)
|
|
inline JavaThreadState JavaThread::thread_state() const {
|
|
return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
|
|
}
|
|
diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp
|
|
index 0b898dcc3..7f76486ae 100644
|
|
--- a/src/hotspot/share/utilities/debug.cpp
|
|
+++ b/src/hotspot/share/utilities/debug.cpp
|
|
@@ -632,6 +632,7 @@ void help() {
|
|
tty->print_cr(" pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or");
|
|
tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or");
|
|
tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or");
|
|
+ tty->print_cr(" pns($sp, $fp, $pc) on Linux/RISCV64 or");
|
|
tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or");
|
|
tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC");
|
|
tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()");
|
|
diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
|
|
index cf8025386..e8ab3097a 100644
|
|
--- a/src/hotspot/share/utilities/macros.hpp
|
|
+++ b/src/hotspot/share/utilities/macros.hpp
|
|
@@ -597,6 +597,32 @@
|
|
|
|
#define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
|
|
|
|
+#if defined(RISCV32) || defined(RISCV64)
|
|
+#define RISCV
|
|
+#define RISCV_ONLY(code) code
|
|
+#define NOT_RISCV(code)
|
|
+#else
|
|
+#undef RISCV
|
|
+#define RISCV_ONLY(code)
|
|
+#define NOT_RISCV(code) code
|
|
+#endif
|
|
+
|
|
+#ifdef RISCV32
|
|
+#define RISCV32_ONLY(code) code
|
|
+#define NOT_RISCV32(code)
|
|
+#else
|
|
+#define RISCV32_ONLY(code)
|
|
+#define NOT_RISCV32(code) code
|
|
+#endif
|
|
+
|
|
+#ifdef RISCV64
|
|
+#define RISCV64_ONLY(code) code
|
|
+#define NOT_RISCV64(code)
|
|
+#else
|
|
+#define RISCV64_ONLY(code)
|
|
+#define NOT_RISCV64(code) code
|
|
+#endif
|
|
+
|
|
#ifdef VM_LITTLE_ENDIAN
|
|
#define LITTLE_ENDIAN_ONLY(code) code
|
|
#define BIG_ENDIAN_ONLY(code)
|
|
diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java
|
|
index 063a5ef3a..50e9cdb57 100644
|
|
--- a/src/java.base/share/classes/java/lang/StringLatin1.java
|
|
+++ b/src/java.base/share/classes/java/lang/StringLatin1.java
|
|
@@ -209,6 +209,11 @@ final class StringLatin1 {
|
|
// Note: fromIndex might be near -1>>>1.
|
|
return -1;
|
|
}
|
|
+ return indexOfChar(value, ch, fromIndex, max);
|
|
+ }
|
|
+
|
|
+ @HotSpotIntrinsicCandidate
|
|
+ private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
|
|
byte c = (byte)ch;
|
|
for (int i = fromIndex; i < max; i++) {
|
|
if (value[i] == c) {
|
|
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
|
|
index 0d834302c..55a7b96f7 100644
|
|
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
|
|
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
|
|
@@ -58,6 +58,10 @@
|
|
#include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
|
|
#endif
|
|
|
|
+#ifdef riscv64
|
|
+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
|
|
+#endif
|
|
+
|
|
static jfieldID p_ps_prochandle_ID = 0;
|
|
static jfieldID threadList_ID = 0;
|
|
static jfieldID loadObjectList_ID = 0;
|
|
@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
|
|
return (err == PS_OK)? array : 0;
|
|
}
|
|
|
|
-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
|
|
+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
|
|
JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
|
|
(JNIEnv *env, jobject this_obj, jint lwp_id) {
|
|
|
|
@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
|
|
#ifdef aarch64
|
|
#define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG
|
|
#endif
|
|
+#ifdef riscv64
|
|
+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
|
|
+#endif
|
|
#if defined(sparc) || defined(sparcv9)
|
|
#define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
|
|
#endif
|
|
@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
|
|
}
|
|
#endif /* aarch64 */
|
|
|
|
+#if defined(riscv64)
|
|
+
|
|
+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
|
|
+
|
|
+ {
|
|
+ regs[REG_INDEX(PC)] = gregs.pc;
|
|
+ regs[REG_INDEX(LR)] = gregs.ra;
|
|
+ regs[REG_INDEX(SP)] = gregs.sp;
|
|
+ regs[REG_INDEX(R3)] = gregs.gp;
|
|
+ regs[REG_INDEX(R4)] = gregs.tp;
|
|
+ regs[REG_INDEX(R5)] = gregs.t0;
|
|
+ regs[REG_INDEX(R6)] = gregs.t1;
|
|
+ regs[REG_INDEX(R7)] = gregs.t2;
|
|
+ regs[REG_INDEX(R8)] = gregs.s0;
|
|
+ regs[REG_INDEX(R9)] = gregs.s1;
|
|
+ regs[REG_INDEX(R10)] = gregs.a0;
|
|
+ regs[REG_INDEX(R11)] = gregs.a1;
|
|
+ regs[REG_INDEX(R12)] = gregs.a2;
|
|
+ regs[REG_INDEX(R13)] = gregs.a3;
|
|
+ regs[REG_INDEX(R14)] = gregs.a4;
|
|
+ regs[REG_INDEX(R15)] = gregs.a5;
|
|
+ regs[REG_INDEX(R16)] = gregs.a6;
|
|
+ regs[REG_INDEX(R17)] = gregs.a7;
|
|
+ regs[REG_INDEX(R18)] = gregs.s2;
|
|
+ regs[REG_INDEX(R19)] = gregs.s3;
|
|
+ regs[REG_INDEX(R20)] = gregs.s4;
|
|
+ regs[REG_INDEX(R21)] = gregs.s5;
|
|
+ regs[REG_INDEX(R22)] = gregs.s6;
|
|
+ regs[REG_INDEX(R23)] = gregs.s7;
|
|
+ regs[REG_INDEX(R24)] = gregs.s8;
|
|
+ regs[REG_INDEX(R25)] = gregs.s9;
|
|
+ regs[REG_INDEX(R26)] = gregs.s10;
|
|
+ regs[REG_INDEX(R27)] = gregs.s11;
|
|
+ regs[REG_INDEX(R28)] = gregs.t3;
|
|
+ regs[REG_INDEX(R29)] = gregs.t4;
|
|
+ regs[REG_INDEX(R30)] = gregs.t5;
|
|
+ regs[REG_INDEX(R31)] = gregs.t6;
|
|
+ }
|
|
+#endif /* riscv64 */
|
|
+
|
|
#if defined(ppc64) || defined(ppc64le)
|
|
#define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
|
|
|
|
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
|
|
index 8318e8e02..9d7fda8a6 100644
|
|
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
|
|
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
|
|
@@ -43,6 +43,8 @@
|
|
#elif defined(arm)
|
|
#include <asm/ptrace.h>
|
|
#define user_regs_struct pt_regs
|
|
+#elif defined(riscv64)
|
|
+#include <asm/ptrace.h>
|
|
#endif
|
|
|
|
// This C bool type must be int for compatibility with Linux calls and
|
|
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
|
|
index de5254d85..12eafc455 100644
|
|
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
|
|
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
|
|
@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
|
|
#define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr)
|
|
#endif
|
|
|
|
+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case
|
|
+// like other platforms, so call ptrace with PTRACE_GETREGSET here.
|
|
+#ifndef riscv64
|
|
#if defined(_LP64) && defined(PTRACE_GETREGS64)
|
|
#define PTRACE_GETREGS_REQ PTRACE_GETREGS64
|
|
#elif defined(PTRACE_GETREGS)
|
|
@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
|
|
#elif defined(PT_GETREGS)
|
|
#define PTRACE_GETREGS_REQ PT_GETREGS
|
|
#endif
|
|
+#endif
|
|
|
|
#ifdef PTRACE_GETREGS_REQ
|
|
if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
|
|
index 0f5f0119c..82c083055 100644
|
|
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
|
|
@@ -1,6 +1,7 @@
|
|
/*
|
|
* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
|
|
+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
|
|
import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
|
|
@@ -592,6 +594,8 @@ public class HotSpotAgent {
|
|
machDesc = new MachineDescriptionPPC64();
|
|
} else if (cpu.equals("aarch64")) {
|
|
machDesc = new MachineDescriptionAArch64();
|
|
+ } else if (cpu.equals("riscv64")) {
|
|
+ machDesc = new MachineDescriptionRISCV64();
|
|
} else if (cpu.equals("sparc")) {
|
|
if (LinuxDebuggerLocal.getAddressSize()==8) {
|
|
machDesc = new MachineDescriptionSPARC64Bit();
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
|
|
new file mode 100644
|
|
index 000000000..4221937f1
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
|
|
@@ -0,0 +1,40 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger;
|
|
+
|
|
+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
|
|
+ public long getAddressSize() {
|
|
+ return 8;
|
|
+ }
|
|
+
|
|
+ public boolean isLP64() {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ public boolean isBigEndian() {
|
|
+ return false;
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
|
|
index 5e5a6bb71..acd5844ca 100644
|
|
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
|
|
@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*;
|
|
import sun.jvm.hotspot.debugger.x86.*;
|
|
import sun.jvm.hotspot.debugger.amd64.*;
|
|
import sun.jvm.hotspot.debugger.aarch64.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
import sun.jvm.hotspot.debugger.sparc.*;
|
|
import sun.jvm.hotspot.debugger.ppc64.*;
|
|
import sun.jvm.hotspot.debugger.linux.x86.*;
|
|
@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*;
|
|
import sun.jvm.hotspot.debugger.linux.sparc.*;
|
|
import sun.jvm.hotspot.debugger.linux.ppc64.*;
|
|
import sun.jvm.hotspot.debugger.linux.aarch64.*;
|
|
+import sun.jvm.hotspot.debugger.linux.riscv64.*;
|
|
import sun.jvm.hotspot.utilities.*;
|
|
|
|
class LinuxCDebugger implements CDebugger {
|
|
@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger {
|
|
Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
|
|
if (pc == null) return null;
|
|
return new LinuxAARCH64CFrame(dbg, fp, pc);
|
|
- } else {
|
|
+ } else if (cpu.equals("riscv64")) {
|
|
+ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
|
|
+ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
|
|
+ if (fp == null) return null;
|
|
+ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
|
|
+ if (pc == null) return null;
|
|
+ return new LinuxRISCV64CFrame(dbg, fp, pc);
|
|
+ } else {
|
|
// Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
|
|
ThreadContext context = (ThreadContext) thread.getContext();
|
|
return context.getTopFrame(dbg);
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
|
|
new file mode 100644
|
|
index 000000000..eaef586b4
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
|
|
@@ -0,0 +1,90 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.linux.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.linux.*;
|
|
+import sun.jvm.hotspot.debugger.cdbg.*;
|
|
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
|
|
+
|
|
+public final class LinuxRISCV64CFrame extends BasicCFrame {
|
|
+ private static final int C_FRAME_LINK_OFFSET = -2;
|
|
+ private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
|
|
+
|
|
+ public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
|
|
+ super(dbg.getCDebugger());
|
|
+ this.fp = fp;
|
|
+ this.pc = pc;
|
|
+ this.dbg = dbg;
|
|
+ }
|
|
+
|
|
+ // override base class impl to avoid ELF parsing
|
|
+ public ClosestSymbol closestSymbolToPC() {
|
|
+ // try native lookup in debugger.
|
|
+ return dbg.lookup(dbg.getAddressValue(pc()));
|
|
+ }
|
|
+
|
|
+ public Address pc() {
|
|
+ return pc;
|
|
+ }
|
|
+
|
|
+ public Address localVariableBase() {
|
|
+ return fp;
|
|
+ }
|
|
+
|
|
+ public CFrame sender(ThreadProxy thread) {
|
|
+ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
|
|
+ Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
|
|
+
|
|
+ if ((fp == null) || fp.lessThan(rsp)) {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ // Check alignment of fp
|
|
+ if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
|
|
+ if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
|
|
+ return null;
|
|
+ }
|
|
+ Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
|
|
+ if (nextPC == null) {
|
|
+ return null;
|
|
+ }
|
|
+ return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
|
|
+ }
|
|
+
|
|
+ // package/class internals only
|
|
+ private static final int ADDRESS_SIZE = 8;
|
|
+ private Address pc;
|
|
+ private Address sp;
|
|
+ private Address fp;
|
|
+ private LinuxDebugger dbg;
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
|
|
new file mode 100644
|
|
index 000000000..4789e664c
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
|
|
@@ -0,0 +1,48 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.linux.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.linux.*;
|
|
+
|
|
+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
|
|
+ private LinuxDebugger debugger;
|
|
+
|
|
+ public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
|
|
+ super();
|
|
+ this.debugger = debugger;
|
|
+ }
|
|
+
|
|
+ public void setRegisterAsAddress(int index, Address value) {
|
|
+ setRegister(index, debugger.getAddressValue(value));
|
|
+ }
|
|
+
|
|
+ public Address getRegisterAsAddress(int index) {
|
|
+ return debugger.newAddress(getRegister(index));
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
|
|
index 74e957d94..1f44d75ee 100644
|
|
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
|
|
@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*;
|
|
import sun.jvm.hotspot.debugger.cdbg.*;
|
|
import sun.jvm.hotspot.debugger.proc.amd64.*;
|
|
import sun.jvm.hotspot.debugger.proc.aarch64.*;
|
|
+import sun.jvm.hotspot.debugger.proc.riscv64.*;
|
|
import sun.jvm.hotspot.debugger.proc.sparc.*;
|
|
import sun.jvm.hotspot.debugger.proc.ppc64.*;
|
|
import sun.jvm.hotspot.debugger.proc.x86.*;
|
|
import sun.jvm.hotspot.debugger.ppc64.*;
|
|
import sun.jvm.hotspot.debugger.amd64.*;
|
|
import sun.jvm.hotspot.debugger.aarch64.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
import sun.jvm.hotspot.debugger.sparc.*;
|
|
import sun.jvm.hotspot.debugger.x86.*;
|
|
import sun.jvm.hotspot.utilities.*;
|
|
@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
|
|
threadFactory = new ProcAARCH64ThreadFactory(this);
|
|
pcRegIndex = AARCH64ThreadContext.PC;
|
|
fpRegIndex = AARCH64ThreadContext.FP;
|
|
+ } else if (cpu.equals("riscv64")) {
|
|
+ threadFactory = new ProcRISCV64ThreadFactory(this);
|
|
+ pcRegIndex = RISCV64ThreadContext.PC;
|
|
+ fpRegIndex = RISCV64ThreadContext.FP;
|
|
} else if (cpu.equals("ppc64")) {
|
|
threadFactory = new ProcPPC64ThreadFactory(this);
|
|
pcRegIndex = PPC64ThreadContext.PC;
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
|
|
new file mode 100644
|
|
index 000000000..c1cf1fb0f
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
|
|
@@ -0,0 +1,88 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.proc.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.proc.*;
|
|
+import sun.jvm.hotspot.utilities.*;
|
|
+
|
|
+public class ProcRISCV64Thread implements ThreadProxy {
|
|
+ private ProcDebugger debugger;
|
|
+ private int id;
|
|
+
|
|
+ public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
|
|
+ this.debugger = debugger;
|
|
+
|
|
+ // FIXME: the size here should be configurable. However, making it
|
|
+ // so would produce a dependency on the "types" package from the
|
|
+ // debugger package, which is not desired.
|
|
+ this.id = (int) addr.getCIntegerAt(0, 4, true);
|
|
+ }
|
|
+
|
|
+ public ProcRISCV64Thread(ProcDebugger debugger, long id) {
|
|
+ this.debugger = debugger;
|
|
+ this.id = (int) id;
|
|
+ }
|
|
+
|
|
+ public ThreadContext getContext() throws IllegalThreadStateException {
|
|
+ ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
|
|
+ long[] regs = debugger.getThreadIntegerRegisterSet(id);
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
|
|
+ }
|
|
+ for (int i = 0; i < regs.length; i++) {
|
|
+ context.setRegister(i, regs[i]);
|
|
+ }
|
|
+ return context;
|
|
+ }
|
|
+
|
|
+ public boolean canSetContext() throws DebuggerException {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ public void setContext(ThreadContext context)
|
|
+ throws IllegalThreadStateException, DebuggerException {
|
|
+ throw new DebuggerException("Unimplemented");
|
|
+ }
|
|
+
|
|
+ public String toString() {
|
|
+ return "t@" + id;
|
|
+ }
|
|
+
|
|
+ public boolean equals(Object obj) {
|
|
+ if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return (((ProcRISCV64Thread) obj).id == id);
|
|
+ }
|
|
+
|
|
+ public int hashCode() {
|
|
+ return id;
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
|
|
new file mode 100644
|
|
index 000000000..498fa0dc6
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
|
|
@@ -0,0 +1,48 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.proc.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.proc.*;
|
|
+
|
|
+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
|
|
+ private ProcDebugger debugger;
|
|
+
|
|
+ public ProcRISCV64ThreadContext(ProcDebugger debugger) {
|
|
+ super();
|
|
+ this.debugger = debugger;
|
|
+ }
|
|
+
|
|
+ public void setRegisterAsAddress(int index, Address value) {
|
|
+ setRegister(index, debugger.getAddressValue(value));
|
|
+ }
|
|
+
|
|
+ public Address getRegisterAsAddress(int index) {
|
|
+ return debugger.newAddress(getRegister(index));
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
|
|
new file mode 100644
|
|
index 000000000..81afd8fdc
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
|
|
@@ -0,0 +1,46 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.proc.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.proc.*;
|
|
+
|
|
+public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
|
|
+ private ProcDebugger debugger;
|
|
+
|
|
+ public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
|
|
+ this.debugger = debugger;
|
|
+ }
|
|
+
|
|
+ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
|
|
+ return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
|
|
+ }
|
|
+
|
|
+ public ThreadProxy createThreadWrapper(long id) {
|
|
+ return new ProcRISCV64Thread(debugger, id);
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
|
|
new file mode 100644
|
|
index 000000000..ab92e3e74
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
|
|
@@ -0,0 +1,55 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.remote.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.remote.*;
|
|
+import sun.jvm.hotspot.utilities.*;
|
|
+
|
|
+public class RemoteRISCV64Thread extends RemoteThread {
|
|
+ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
|
|
+ super(debugger, addr);
|
|
+ }
|
|
+
|
|
+ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
|
|
+ super(debugger, id);
|
|
+ }
|
|
+
|
|
+ public ThreadContext getContext() throws IllegalThreadStateException {
|
|
+ RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
|
|
+ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
|
|
+ debugger.getThreadIntegerRegisterSet(id);
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
|
|
+ }
|
|
+ for (int i = 0; i < regs.length; i++) {
|
|
+ context.setRegister(i, regs[i]);
|
|
+ }
|
|
+ return context;
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
|
|
new file mode 100644
|
|
index 000000000..1e8cd19b2
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
|
|
@@ -0,0 +1,48 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.remote.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.debugger.remote.*;
|
|
+
|
|
+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
|
|
+ private RemoteDebuggerClient debugger;
|
|
+
|
|
+ public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
|
|
+ super();
|
|
+ this.debugger = debugger;
|
|
+ }
|
|
+
|
|
+ public void setRegisterAsAddress(int index, Address value) {
|
|
+ setRegister(index, debugger.getAddressValue(value));
|
|
+ }
|
|
+
|
|
+ public Address getRegisterAsAddress(int index) {
|
|
+ return debugger.newAddress(getRegister(index));
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
|
|
new file mode 100644
|
|
index 000000000..eecb6e029
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
|
|
@@ -0,0 +1,46 @@
|
|
+/*
|
|
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.remote.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.remote.*;
|
|
+
|
|
+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
|
|
+ private RemoteDebuggerClient debugger;
|
|
+
|
|
+ public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
|
|
+ this.debugger = debugger;
|
|
+ }
|
|
+
|
|
+ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
|
|
+ return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
|
|
+ }
|
|
+
|
|
+ public ThreadProxy createThreadWrapper(long id) {
|
|
+ return new RemoteRISCV64Thread(debugger, id);
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
|
|
new file mode 100644
|
|
index 000000000..426ff0580
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
|
|
@@ -0,0 +1,172 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.debugger.riscv64;
|
|
+
|
|
+import java.lang.annotation.Native;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.cdbg.*;
|
|
+
|
|
+/** Specifies the thread context on riscv64 platforms; only a sub-portion
|
|
+ * of the context is guaranteed to be present on all operating
|
|
+ * systems. */
|
|
+
|
|
+public abstract class RISCV64ThreadContext implements ThreadContext {
|
|
+ // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
|
|
+
|
|
+ // /*
|
|
+ // * Signal context structure - contains all info to do with the state
|
|
+ // * before the signal handler was invoked.
|
|
+ // */
|
|
+ // struct sigcontext {
|
|
+ // struct user_regs_struct sc_regs;
|
|
+ // union __riscv_fp_state sc_fpregs;
|
|
+ // };
|
|
+ //
|
|
+ // struct user_regs_struct {
|
|
+ // unsigned long pc;
|
|
+ // unsigned long ra;
|
|
+ // unsigned long sp;
|
|
+ // unsigned long gp;
|
|
+ // unsigned long tp;
|
|
+ // unsigned long t0;
|
|
+ // unsigned long t1;
|
|
+ // unsigned long t2;
|
|
+ // unsigned long s0;
|
|
+ // unsigned long s1;
|
|
+ // unsigned long a0;
|
|
+ // unsigned long a1;
|
|
+ // unsigned long a2;
|
|
+ // unsigned long a3;
|
|
+ // unsigned long a4;
|
|
+ // unsigned long a5;
|
|
+ // unsigned long a6;
|
|
+ // unsigned long a7;
|
|
+ // unsigned long s2;
|
|
+ // unsigned long s3;
|
|
+ // unsigned long s4;
|
|
+ // unsigned long s5;
|
|
+ // unsigned long s6;
|
|
+ // unsigned long s7;
|
|
+ // unsigned long s8;
|
|
+ // unsigned long s9;
|
|
+ // unsigned long s10;
|
|
+ // unsigned long s11;
|
|
+ // unsigned long t3;
|
|
+ // unsigned long t4;
|
|
+ // unsigned long t5;
|
|
+ // unsigned long t6;
|
|
+ // };
|
|
+
|
|
+ // NOTE: the indices for the various registers must be maintained as
|
|
+ // listed across various operating systems. However, only a small
|
|
+ // subset of the registers' values are guaranteed to be present (and
|
|
+ // must be present for the SA's stack walking to work)
|
|
+
|
|
+ // One instance of the Native annotation is enough to trigger header generation
|
|
+ // for this file.
|
|
+ @Native
|
|
+ public static final int R0 = 0;
|
|
+ public static final int R1 = 1;
|
|
+ public static final int R2 = 2;
|
|
+ public static final int R3 = 3;
|
|
+ public static final int R4 = 4;
|
|
+ public static final int R5 = 5;
|
|
+ public static final int R6 = 6;
|
|
+ public static final int R7 = 7;
|
|
+ public static final int R8 = 8;
|
|
+ public static final int R9 = 9;
|
|
+ public static final int R10 = 10;
|
|
+ public static final int R11 = 11;
|
|
+ public static final int R12 = 12;
|
|
+ public static final int R13 = 13;
|
|
+ public static final int R14 = 14;
|
|
+ public static final int R15 = 15;
|
|
+ public static final int R16 = 16;
|
|
+ public static final int R17 = 17;
|
|
+ public static final int R18 = 18;
|
|
+ public static final int R19 = 19;
|
|
+ public static final int R20 = 20;
|
|
+ public static final int R21 = 21;
|
|
+ public static final int R22 = 22;
|
|
+ public static final int R23 = 23;
|
|
+ public static final int R24 = 24;
|
|
+ public static final int R25 = 25;
|
|
+ public static final int R26 = 26;
|
|
+ public static final int R27 = 27;
|
|
+ public static final int R28 = 28;
|
|
+ public static final int R29 = 29;
|
|
+ public static final int R30 = 30;
|
|
+ public static final int R31 = 31;
|
|
+
|
|
+ public static final int NPRGREG = 32;
|
|
+
|
|
+ public static final int PC = R0;
|
|
+ public static final int LR = R1;
|
|
+ public static final int SP = R2;
|
|
+ public static final int FP = R8;
|
|
+
|
|
+ private long[] data;
|
|
+
|
|
+ public RISCV64ThreadContext() {
|
|
+ data = new long[NPRGREG];
|
|
+ }
|
|
+
|
|
+ public int getNumRegisters() {
|
|
+ return NPRGREG;
|
|
+ }
|
|
+
|
|
+ public String getRegisterName(int index) {
|
|
+ switch (index) {
|
|
+ case LR: return "lr";
|
|
+ case SP: return "sp";
|
|
+ case PC: return "pc";
|
|
+ default:
|
|
+ return "r" + index;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public void setRegister(int index, long value) {
|
|
+ data[index] = value;
|
|
+ }
|
|
+
|
|
+ public long getRegister(int index) {
|
|
+ return data[index];
|
|
+ }
|
|
+
|
|
+ public CFrame getTopFrame(Debugger dbg) {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ /** This can't be implemented in this class since we would have to
|
|
+ * tie the implementation to, for example, the debugging system */
|
|
+ public abstract void setRegisterAsAddress(int index, Address value);
|
|
+
|
|
+ /** This can't be implemented in this class since we would have to
|
|
+ * tie the implementation to, for example, the debugging system */
|
|
+ public abstract Address getRegisterAsAddress(int index);
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
|
|
index 190062785..74bd614d3 100644
|
|
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
|
|
@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
|
|
+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
|
|
import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
|
|
@@ -99,6 +100,8 @@ public class Threads {
|
|
access = new LinuxPPC64JavaThreadPDAccess();
|
|
} else if (cpu.equals("aarch64")) {
|
|
access = new LinuxAARCH64JavaThreadPDAccess();
|
|
+ } else if (cpu.equals("riscv64")) {
|
|
+ access = new LinuxRISCV64JavaThreadPDAccess();
|
|
} else {
|
|
try {
|
|
access = (JavaThreadPDAccess)
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
|
|
new file mode 100644
|
|
index 000000000..2df0837b6
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
|
|
@@ -0,0 +1,132 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.runtime.linux_riscv64;
|
|
+
|
|
+import java.io.*;
|
|
+import java.util.*;
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.runtime.*;
|
|
+import sun.jvm.hotspot.runtime.riscv64.*;
|
|
+import sun.jvm.hotspot.types.*;
|
|
+import sun.jvm.hotspot.utilities.*;
|
|
+
|
|
+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
|
|
+ private static AddressField lastJavaFPField;
|
|
+ private static AddressField osThreadField;
|
|
+
|
|
+ // Field from OSThread
|
|
+ private static CIntegerField osThreadThreadIDField;
|
|
+
|
|
+ // This is currently unneeded but is being kept in case we change
|
|
+ // the currentFrameGuess algorithm
|
|
+ private static final long GUESS_SCAN_RANGE = 128 * 1024;
|
|
+
|
|
+ static {
|
|
+ VM.registerVMInitializedObserver(new Observer() {
|
|
+ public void update(Observable o, Object data) {
|
|
+ initialize(VM.getVM().getTypeDataBase());
|
|
+ }
|
|
+ });
|
|
+ }
|
|
+
|
|
+ private static synchronized void initialize(TypeDataBase db) {
|
|
+ Type type = db.lookupType("JavaThread");
|
|
+ osThreadField = type.getAddressField("_osthread");
|
|
+
|
|
+ Type anchorType = db.lookupType("JavaFrameAnchor");
|
|
+ lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
|
|
+
|
|
+ Type osThreadType = db.lookupType("OSThread");
|
|
+ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
|
|
+ }
|
|
+
|
|
+ public Address getLastJavaFP(Address addr) {
|
|
+ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
|
|
+ }
|
|
+
|
|
+ public Address getLastJavaPC(Address addr) {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ public Address getBaseOfStackPointer(Address addr) {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ public Frame getLastFramePD(JavaThread thread, Address addr) {
|
|
+ Address fp = thread.getLastJavaFP();
|
|
+ if (fp == null) {
|
|
+ return null; // no information
|
|
+ }
|
|
+ return new RISCV64Frame(thread.getLastJavaSP(), fp);
|
|
+ }
|
|
+
|
|
+ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
|
|
+ return new RISCV64RegisterMap(thread, updateMap);
|
|
+ }
|
|
+
|
|
+ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
|
|
+ ThreadProxy t = getThreadProxy(addr);
|
|
+ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
|
|
+ RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
|
|
+ if (!guesser.run(GUESS_SCAN_RANGE)) {
|
|
+ return null;
|
|
+ }
|
|
+ if (guesser.getPC() == null) {
|
|
+ return new RISCV64Frame(guesser.getSP(), guesser.getFP());
|
|
+ } else {
|
|
+ return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public void printThreadIDOn(Address addr, PrintStream tty) {
|
|
+ tty.print(getThreadProxy(addr));
|
|
+ }
|
|
+
|
|
+ public void printInfoOn(Address threadAddr, PrintStream tty) {
|
|
+ tty.print("Thread id: ");
|
|
+ printThreadIDOn(threadAddr, tty);
|
|
+ }
|
|
+
|
|
+ public Address getLastSP(Address addr) {
|
|
+ ThreadProxy t = getThreadProxy(addr);
|
|
+ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
|
|
+ return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
|
|
+ }
|
|
+
|
|
+ public ThreadProxy getThreadProxy(Address addr) {
|
|
+ // Addr is the address of the JavaThread.
|
|
+ // Fetch the OSThread (for now and for simplicity, not making a
|
|
+ // separate "OSThread" class in this package)
|
|
+ Address osThreadAddr = osThreadField.getValue(addr);
|
|
+ // Get the address of the _thread_id from the OSThread
|
|
+ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
|
|
+
|
|
+ JVMDebugger debugger = VM.getVM().getDebugger();
|
|
+ return debugger.getThreadForIdentifierAddress(threadIdAddr);
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
|
|
new file mode 100644
|
|
index 000000000..a3bbf1ad1
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
|
|
@@ -0,0 +1,223 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, 2019, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.runtime.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.debugger.riscv64.*;
|
|
+import sun.jvm.hotspot.code.*;
|
|
+import sun.jvm.hotspot.interpreter.*;
|
|
+import sun.jvm.hotspot.runtime.*;
|
|
+import sun.jvm.hotspot.runtime.riscv64.*;
|
|
+
|
|
+/** <P> Should be able to be used on all riscv64 platforms we support
|
|
+ (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
|
|
+ functionality. Input is an RISCV64ThreadContext; output is SP, FP,
|
|
+ and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
|
|
+ left to the caller, since we may need to subclass RISCV64Frame to
|
|
+ support signal handler frames on Unix platforms. </P>
|
|
+
|
|
+ <P> Algorithm is to walk up the stack within a given range (say,
|
|
+ 512K at most) looking for a plausible PC and SP for a Java frame,
|
|
+ also considering those coming in from the context. If we find a PC
|
|
+ that belongs to the VM (i.e., in generated code like the
|
|
+ interpreter or CodeCache) then we try to find an associated FP.
|
|
+ We repeat this until we either find a complete frame or run out of
|
|
+ stack to look at. </P> */
|
|
+
|
|
+public class RISCV64CurrentFrameGuess {
|
|
+ private RISCV64ThreadContext context;
|
|
+ private JavaThread thread;
|
|
+ private Address spFound;
|
|
+ private Address fpFound;
|
|
+ private Address pcFound;
|
|
+
|
|
+ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
|
|
+ != null;
|
|
+
|
|
+ public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
|
|
+ JavaThread thread) {
|
|
+ this.context = context;
|
|
+ this.thread = thread;
|
|
+ }
|
|
+
|
|
+ /** Returns false if not able to find a frame within a reasonable range. */
|
|
+ public boolean run(long regionInBytesToSearch) {
|
|
+ Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
|
|
+ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
|
|
+ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
|
|
+ if (sp == null) {
|
|
+ // Bail out if no last java frame either
|
|
+ if (thread.getLastJavaSP() != null) {
|
|
+ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+ Address end = sp.addOffsetTo(regionInBytesToSearch);
|
|
+ VM vm = VM.getVM();
|
|
+
|
|
+ setValues(null, null, null); // Assume we're not going to find anything
|
|
+
|
|
+ if (vm.isJavaPCDbg(pc)) {
|
|
+ if (vm.isClientCompiler()) {
|
|
+ // If the topmost frame is a Java frame, we are (pretty much)
|
|
+ // guaranteed to have a viable FP. We should be more robust
|
|
+ // than this (we have the potential for losing entire threads'
|
|
+ // stack traces) but need to see how much work we really have
|
|
+ // to do here. Searching the stack for an (SP, FP) pair is
|
|
+ // hard since it's easy to misinterpret inter-frame stack
|
|
+ // pointers as base-of-frame pointers; we also don't know the
|
|
+ // sizes of C1 frames (not registered in the nmethod) so can't
|
|
+ // derive them from SP.
|
|
+
|
|
+ setValues(sp, fp, pc);
|
|
+ return true;
|
|
+ } else {
|
|
+ if (vm.getInterpreter().contains(pc)) {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
|
|
+ sp + ", fp = " + fp + ", pc = " + pc);
|
|
+ }
|
|
+ setValues(sp, fp, pc);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ // For the server compiler, FP is not guaranteed to be valid
|
|
+ // for compiled code. In addition, an earlier attempt at a
|
|
+ // non-searching algorithm (see below) failed because the
|
|
+ // stack pointer from the thread context was pointing
|
|
+ // (considerably) beyond the ostensible end of the stack, into
|
|
+ // garbage; walking from the topmost frame back caused a crash.
|
|
+ //
|
|
+ // This algorithm takes the current PC as a given and tries to
|
|
+ // find the correct corresponding SP by walking up the stack
|
|
+ // and repeatedly performing stackwalks (very inefficient).
|
|
+ //
|
|
+ // FIXME: there is something wrong with stackwalking across
|
|
+ // adapter frames...this is likely to be the root cause of the
|
|
+ // failure with the simpler algorithm below.
|
|
+
|
|
+ for (long offset = 0;
|
|
+ offset < regionInBytesToSearch;
|
|
+ offset += vm.getAddressSize()) {
|
|
+ try {
|
|
+ Address curSP = sp.addOffsetTo(offset);
|
|
+ Frame frame = new RISCV64Frame(curSP, null, pc);
|
|
+ RegisterMap map = thread.newRegisterMap(false);
|
|
+ while (frame != null) {
|
|
+ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
|
|
+ // We were able to traverse all the way to the
|
|
+ // bottommost Java frame.
|
|
+ // This sp looks good. Keep it.
|
|
+ if (DEBUG) {
|
|
+ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
|
|
+ }
|
|
+ setValues(curSP, null, pc);
|
|
+ return true;
|
|
+ }
|
|
+ frame = frame.sender(map);
|
|
+ }
|
|
+ } catch (Exception e) {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
|
|
+ }
|
|
+ // Bad SP. Try another.
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Were not able to find a plausible SP to go with this PC.
|
|
+ // Bail out.
|
|
+ return false;
|
|
+ }
|
|
+ } else {
|
|
+ // If the current program counter was not known to us as a Java
|
|
+ // PC, we currently assume that we are in the run-time system
|
|
+ // and attempt to look to thread-local storage for saved SP and
|
|
+ // FP. Note that if these are null (because we were, in fact,
|
|
+ // in Java code, i.e., vtable stubs or similar, and the SA
|
|
+ // didn't have enough insight into the target VM to understand
|
|
+ // that) then we are going to lose the entire stack trace for
|
|
+ // the thread, which is sub-optimal. FIXME.
|
|
+
|
|
+ if (DEBUG) {
|
|
+ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
|
|
+ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
|
|
+ }
|
|
+ if (thread.getLastJavaSP() == null) {
|
|
+ return false; // No known Java frames on stack
|
|
+ }
|
|
+
|
|
+ // The runtime has a nasty habit of not saving fp in the frame
|
|
+ // anchor, leaving us to grovel about in the stack to find a
|
|
+ // plausible address. Fortunately, this only happens in
|
|
+ // compiled code; there we always have a valid PC, and we always
|
|
+ // push LR and FP onto the stack as a pair, with FP at the lower
|
|
+ // address.
|
|
+ pc = thread.getLastJavaPC();
|
|
+ fp = thread.getLastJavaFP();
|
|
+ sp = thread.getLastJavaSP();
|
|
+
|
|
+ if (fp == null) {
|
|
+ CodeCache cc = vm.getCodeCache();
|
|
+ if (cc.contains(pc)) {
|
|
+ CodeBlob cb = cc.findBlob(pc);
|
|
+ if (DEBUG) {
|
|
+ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize());
|
|
+ }
|
|
+ // See if we can derive a frame pointer from SP and PC
|
|
+ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
|
|
+ if (link_offset >= 0) {
|
|
+ fp = sp.addOffsetTo(link_offset);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // We found a PC in the frame anchor. Check that it's plausible, and
|
|
+ // if it is, use it.
|
|
+ if (vm.isJavaPCDbg(pc)) {
|
|
+ setValues(sp, fp, pc);
|
|
+ } else {
|
|
+ setValues(sp, fp, null);
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public Address getSP() { return spFound; }
|
|
+ public Address getFP() { return fpFound; }
|
|
+ /** May be null if getting values from thread-local storage; take
|
|
+ care to call the correct RISCV64Frame constructor to recover this if
|
|
+ necessary */
|
|
+ public Address getPC() { return pcFound; }
|
|
+
|
|
+ private void setValues(Address sp, Address fp, Address pc) {
|
|
+ spFound = sp;
|
|
+ fpFound = fp;
|
|
+ pcFound = pc;
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
|
|
new file mode 100644
|
|
index 000000000..c04def5a1
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
|
|
@@ -0,0 +1,554 @@
|
|
+/*
|
|
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, 2019, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.runtime.riscv64;
|
|
+
|
|
+import java.util.*;
|
|
+import sun.jvm.hotspot.code.*;
|
|
+import sun.jvm.hotspot.compiler.*;
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.oops.*;
|
|
+import sun.jvm.hotspot.runtime.*;
|
|
+import sun.jvm.hotspot.types.*;
|
|
+import sun.jvm.hotspot.utilities.*;
|
|
+
|
|
+/** Specialization of and implementation of abstract methods of the
|
|
+ Frame class for the riscv64 family of CPUs. */
|
|
+
|
|
+public class RISCV64Frame extends Frame {
|
|
+ private static final boolean DEBUG;
|
|
+ static {
|
|
+ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
|
|
+ }
|
|
+
|
|
+ // Java frames
|
|
+ private static final int LINK_OFFSET = -2;
|
|
+ private static final int RETURN_ADDR_OFFSET = -1;
|
|
+ private static final int SENDER_SP_OFFSET = 0;
|
|
+
|
|
+ // Interpreter frames
|
|
+ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
|
|
+ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
|
|
+ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
|
|
+ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only
|
|
+ private static int INTERPRETER_FRAME_PADDING_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_MIRROR_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_CACHE_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_LOCALS_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_BCX_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
|
|
+ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
|
|
+
|
|
+ // Entry frames
|
|
+ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
|
|
+
|
|
+ // Native frames
|
|
+ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2;
|
|
+
|
|
+ private static VMReg fp = new VMReg(8);
|
|
+
|
|
+ static {
|
|
+ VM.registerVMInitializedObserver(new Observer() {
|
|
+ public void update(Observable o, Object data) {
|
|
+ initialize(VM.getVM().getTypeDataBase());
|
|
+ }
|
|
+ });
|
|
+ }
|
|
+
|
|
+ private static synchronized void initialize(TypeDataBase db) {
|
|
+ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
|
|
+ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
|
|
+ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
|
|
+ }
|
|
+
|
|
+
|
|
+ // an additional field beyond sp and pc:
|
|
+ Address raw_fp; // frame pointer
|
|
+ private Address raw_unextendedSP;
|
|
+
|
|
+ private RISCV64Frame() {
|
|
+ }
|
|
+
|
|
+ private void adjustForDeopt() {
|
|
+ if ( pc != null) {
|
|
+ // Look for a deopt pc and if it is deopted convert to original pc
|
|
+ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
|
|
+ if (cb != null && cb.isJavaMethod()) {
|
|
+ NMethod nm = (NMethod) cb;
|
|
+ if (pc.equals(nm.deoptHandlerBegin())) {
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
|
|
+ }
|
|
+ // adjust pc if frame is deoptimized.
|
|
+ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
|
|
+ deoptimized = true;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
|
|
+ this.raw_sp = raw_sp;
|
|
+ this.raw_unextendedSP = raw_sp;
|
|
+ this.raw_fp = raw_fp;
|
|
+ this.pc = pc;
|
|
+ adjustUnextendedSP();
|
|
+
|
|
+ // Frame must be fully constructed before this call
|
|
+ adjustForDeopt();
|
|
+
|
|
+ if (DEBUG) {
|
|
+ System.out.println("RISCV64Frame(sp, fp, pc): " + this);
|
|
+ dumpStack();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public RISCV64Frame(Address raw_sp, Address raw_fp) {
|
|
+ this.raw_sp = raw_sp;
|
|
+ this.raw_unextendedSP = raw_sp;
|
|
+ this.raw_fp = raw_fp;
|
|
+
|
|
+ // We cannot assume SP[-1] always contains a valid return PC (e.g. if
|
|
+ // the callee is a C/C++ compiled frame). If the PC is not known to
|
|
+ // Java then this.pc is null.
|
|
+ Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
|
|
+ if (VM.getVM().isJavaPCDbg(savedPC)) {
|
|
+ this.pc = savedPC;
|
|
+ }
|
|
+
|
|
+ adjustUnextendedSP();
|
|
+
|
|
+ // Frame must be fully constructed before this call
|
|
+ adjustForDeopt();
|
|
+
|
|
+ if (DEBUG) {
|
|
+ System.out.println("RISCV64Frame(sp, fp): " + this);
|
|
+ dumpStack();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
|
|
+ this.raw_sp = raw_sp;
|
|
+ this.raw_unextendedSP = raw_unextendedSp;
|
|
+ this.raw_fp = raw_fp;
|
|
+ this.pc = pc;
|
|
+ adjustUnextendedSP();
|
|
+
|
|
+ // Frame must be fully constructed before this call
|
|
+ adjustForDeopt();
|
|
+
|
|
+ if (DEBUG) {
|
|
+ System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
|
|
+ dumpStack();
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ public Object clone() {
|
|
+ RISCV64Frame frame = new RISCV64Frame();
|
|
+ frame.raw_sp = raw_sp;
|
|
+ frame.raw_unextendedSP = raw_unextendedSP;
|
|
+ frame.raw_fp = raw_fp;
|
|
+ frame.pc = pc;
|
|
+ frame.deoptimized = deoptimized;
|
|
+ return frame;
|
|
+ }
|
|
+
|
|
+ public boolean equals(Object arg) {
|
|
+ if (arg == null) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (!(arg instanceof RISCV64Frame)) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ RISCV64Frame other = (RISCV64Frame) arg;
|
|
+
|
|
+ return (AddressOps.equal(getSP(), other.getSP()) &&
|
|
+ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
|
|
+ AddressOps.equal(getFP(), other.getFP()) &&
|
|
+ AddressOps.equal(getPC(), other.getPC()));
|
|
+ }
|
|
+
|
|
+ public int hashCode() {
|
|
+ if (raw_sp == null) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return raw_sp.hashCode();
|
|
+ }
|
|
+
|
|
+ public String toString() {
|
|
+ return "sp: " + (getSP() == null? "null" : getSP().toString()) +
|
|
+ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
|
|
+ ", fp: " + (getFP() == null? "null" : getFP().toString()) +
|
|
+ ", pc: " + (pc == null? "null" : pc.toString());
|
|
+ }
|
|
+
|
|
+ // accessors for the instance variables
|
|
+ public Address getFP() { return raw_fp; }
|
|
+ public Address getSP() { return raw_sp; }
|
|
+ public Address getID() { return raw_sp; }
|
|
+
|
|
+ // FIXME: not implemented yet
|
|
+ public boolean isSignalHandlerFrameDbg() { return false; }
|
|
+ public int getSignalNumberDbg() { return 0; }
|
|
+ public String getSignalNameDbg() { return null; }
|
|
+
|
|
+ public boolean isInterpretedFrameValid() {
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(isInterpretedFrame(), "Not an interpreted frame");
|
|
+ }
|
|
+
|
|
+ // These are reasonable sanity checks
|
|
+ if (getFP() == null || getFP().andWithMask(0x3) != null) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (getSP() == null || getSP().andWithMask(0x3) != null) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ // These are hacks to keep us out of trouble.
|
|
+ // The problem with these is that they mask other problems
|
|
+ if (getFP().lessThanOrEqual(getSP())) {
|
|
+ // this attempts to deal with unsigned comparison above
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
|
|
+ // stack frames shouldn't be large.
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ public Frame sender(RegisterMap regMap, CodeBlob cb) {
|
|
+ RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
|
|
+
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(map != null, "map must be set");
|
|
+ }
|
|
+
|
|
+ // Default is we done have to follow them. The sender_for_xxx will
|
|
+ // update it accordingly
|
|
+ map.setIncludeArgumentOops(false);
|
|
+
|
|
+ if (isEntryFrame()) return senderForEntryFrame(map);
|
|
+ if (isInterpretedFrame()) return senderForInterpreterFrame(map);
|
|
+
|
|
+ if(cb == null) {
|
|
+ cb = VM.getVM().getCodeCache().findBlob(getPC());
|
|
+ } else {
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (cb != null) {
|
|
+ return senderForCompiledFrame(map, cb);
|
|
+ }
|
|
+
|
|
+ // Must be native-compiled frame, i.e. the marshaling code for native
|
|
+ // methods that exists in the core system.
|
|
+ return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
|
|
+ }
|
|
+
|
|
+ private Frame senderForEntryFrame(RISCV64RegisterMap map) {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("senderForEntryFrame");
|
|
+ }
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(map != null, "map must be set");
|
|
+ }
|
|
+ // Java frame called from C; skip all C frames and return top C
|
|
+ // frame of that chunk as the sender
|
|
+ RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
|
|
+ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
|
|
+ }
|
|
+ RISCV64Frame fr;
|
|
+ if (jcw.getLastJavaPC() != null) {
|
|
+ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
|
|
+ } else {
|
|
+ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
|
|
+ }
|
|
+ map.clear();
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
|
|
+ }
|
|
+ return fr;
|
|
+ }
|
|
+
|
|
+ //------------------------------------------------------------------------------
|
|
+ // frame::adjust_unextended_sp
|
|
+ private void adjustUnextendedSP() {
|
|
+ // If we are returning to a compiled MethodHandle call site, the
|
|
+ // saved_fp will in fact be a saved value of the unextended SP. The
|
|
+ // simplest way to tell whether we are returning to such a call site
|
|
+ // is as follows:
|
|
+
|
|
+ CodeBlob cb = cb();
|
|
+ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
|
|
+ if (senderNm != null) {
|
|
+ // If the sender PC is a deoptimization point, get the original
|
|
+ // PC. For MethodHandle call site the unextended_sp is stored in
|
|
+ // saved_fp.
|
|
+ if (senderNm.isDeoptMhEntry(getPC())) {
|
|
+ raw_unextendedSP = getFP();
|
|
+ }
|
|
+ else if (senderNm.isDeoptEntry(getPC())) {
|
|
+ }
|
|
+ else if (senderNm.isMethodHandleReturn(getPC())) {
|
|
+ raw_unextendedSP = getFP();
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("senderForInterpreterFrame");
|
|
+ }
|
|
+ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
|
|
+ Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
|
|
+ // We do not need to update the callee-save register mapping because above
|
|
+ // us is either another interpreter frame or a converter-frame, but never
|
|
+ // directly a compiled frame.
|
|
+ // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
|
|
+ // However c2 no longer uses callee save register for java calls so there
|
|
+ // are no callee register to find.
|
|
+
|
|
+ if (map.getUpdateMap())
|
|
+ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
|
|
+
|
|
+ return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
|
|
+ }
|
|
+
|
|
+ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
|
|
+ map.setLocation(fp, savedFPAddr);
|
|
+ }
|
|
+
|
|
+ private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("senderForCompiledFrame");
|
|
+ }
|
|
+
|
|
+ //
|
|
+ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess
|
|
+ //
|
|
+
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(map != null, "map must be set");
|
|
+ }
|
|
+
|
|
+ // frame owned by optimizing compiler
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
|
|
+ }
|
|
+ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
|
|
+
|
|
+ // The return_address is always the word on the stack
|
|
+ Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize());
|
|
+
|
|
+ // This is the saved value of FP which may or may not really be an FP.
|
|
+ // It is only an FP if the sender is an interpreter frame.
|
|
+ Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize());
|
|
+
|
|
+ if (map.getUpdateMap()) {
|
|
+ // Tell GC to use argument oopmaps for some runtime stubs that need it.
|
|
+ // For C1, the runtime stub might not have oop maps, so set this flag
|
|
+ // outside of update_register_map.
|
|
+ map.setIncludeArgumentOops(cb.callerMustGCArguments());
|
|
+
|
|
+ if (cb.getOopMaps() != null) {
|
|
+ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
|
|
+ }
|
|
+
|
|
+ // Since the prolog does the save and restore of FP there is no oopmap
|
|
+ // for it so we must fill in its location as if there was an oopmap entry
|
|
+ // since if our caller was compiled code there could be live jvm state in it.
|
|
+ updateMapWithSavedLink(map, savedFPAddr);
|
|
+ }
|
|
+
|
|
+ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
|
|
+ }
|
|
+
|
|
+ protected boolean hasSenderPD() {
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ public long frameSize() {
|
|
+ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
|
|
+ }
|
|
+
|
|
+ public Address getLink() {
|
|
+ try {
|
|
+ if (DEBUG) {
|
|
+ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
|
|
+ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
|
|
+ }
|
|
+ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
|
|
+ } catch (Exception e) {
|
|
+ if (DEBUG)
|
|
+ System.out.println("Returning null");
|
|
+ return null;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public Address getUnextendedSP() { return raw_unextendedSP; }
|
|
+
|
|
+ // Return address:
|
|
+ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
|
|
+ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); }
|
|
+
|
|
+ // return address of param, zero origin index.
|
|
+ public Address getNativeParamAddr(int idx) {
|
|
+ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
|
|
+ }
|
|
+
|
|
+ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); }
|
|
+
|
|
+ public Address addressOfInterpreterFrameLocals() {
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
|
|
+ }
|
|
+
|
|
+ private Address addressOfInterpreterFrameBCX() {
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
|
|
+ }
|
|
+
|
|
+ public int getInterpreterFrameBCI() {
|
|
+ // FIXME: this is not atomic with respect to GC and is unsuitable
|
|
+ // for use in a non-debugging, or reflective, system. Need to
|
|
+ // figure out how to express this.
|
|
+ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
|
|
+ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
|
|
+ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
|
|
+ return bcpToBci(bcp, method);
|
|
+ }
|
|
+
|
|
+ public Address addressOfInterpreterFrameMDX() {
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
|
|
+ }
|
|
+
|
|
+ // expression stack
|
|
+ // (the max_stack arguments are used by the GC; see class FrameClosure)
|
|
+
|
|
+ public Address addressOfInterpreterFrameExpressionStack() {
|
|
+ Address monitorEnd = interpreterFrameMonitorEnd().address();
|
|
+ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
|
|
+ }
|
|
+
|
|
+ public int getInterpreterFrameExpressionStackDirection() { return -1; }
|
|
+
|
|
+ // top of expression stack
|
|
+ public Address addressOfInterpreterFrameTOS() {
|
|
+ return getSP();
|
|
+ }
|
|
+
|
|
+ /** Expression stack from top down */
|
|
+ public Address addressOfInterpreterFrameTOSAt(int slot) {
|
|
+ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
|
|
+ }
|
|
+
|
|
+ public Address getInterpreterFrameSenderSP() {
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ Assert.that(isInterpretedFrame(), "interpreted frame expected");
|
|
+ }
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
|
|
+ }
|
|
+
|
|
+ // Monitors
|
|
+ public BasicObjectLock interpreterFrameMonitorBegin() {
|
|
+ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
|
|
+ }
|
|
+
|
|
+ public BasicObjectLock interpreterFrameMonitorEnd() {
|
|
+ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
|
|
+ if (Assert.ASSERTS_ENABLED) {
|
|
+ // make sure the pointer points inside the frame
|
|
+ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer");
|
|
+ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
|
|
+ }
|
|
+ return new BasicObjectLock(result);
|
|
+ }
|
|
+
|
|
+ public int interpreterFrameMonitorSize() {
|
|
+ return BasicObjectLock.size();
|
|
+ }
|
|
+
|
|
+ // Method
|
|
+ public Address addressOfInterpreterFrameMethod() {
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
|
|
+ }
|
|
+
|
|
+ // Constant pool cache
|
|
+ public Address addressOfInterpreterFrameCPCache() {
|
|
+ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
|
|
+ }
|
|
+
|
|
+ // Entry frames
|
|
+ public JavaCallWrapper getEntryFrameCallWrapper() {
|
|
+ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
|
|
+ }
|
|
+
|
|
+ protected Address addressOfSavedOopResult() {
|
|
+ // offset is 2 for compiler2 and 3 for compiler1
|
|
+ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
|
|
+ VM.getVM().getAddressSize());
|
|
+ }
|
|
+
|
|
+ protected Address addressOfSavedReceiver() {
|
|
+ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
|
|
+ }
|
|
+
|
|
+ private void dumpStack() {
|
|
+ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
|
|
+ AddressOps.lt(addr, getSP());
|
|
+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
|
|
+ System.out.println(addr + ": " + addr.getAddressAt(0));
|
|
+ }
|
|
+ System.out.println("-----------------------");
|
|
+ for (Address addr = getSP();
|
|
+ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
|
|
+ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
|
|
+ System.out.println(addr + ": " + addr.getAddressAt(0));
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
|
|
new file mode 100644
|
|
index 000000000..4d79e3ee4
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
|
|
@@ -0,0 +1,58 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.runtime.riscv64;
|
|
+
|
|
+import java.util.*;
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.types.*;
|
|
+import sun.jvm.hotspot.runtime.*;
|
|
+
|
|
+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
|
|
+ private static AddressField lastJavaFPField;
|
|
+
|
|
+ static {
|
|
+ VM.registerVMInitializedObserver(new Observer() {
|
|
+ public void update(Observable o, Object data) {
|
|
+ initialize(VM.getVM().getTypeDataBase());
|
|
+ }
|
|
+ });
|
|
+ }
|
|
+
|
|
+ private static synchronized void initialize(TypeDataBase db) {
|
|
+ Type type = db.lookupType("JavaFrameAnchor");
|
|
+
|
|
+ lastJavaFPField = type.getAddressField("_last_Java_fp");
|
|
+ }
|
|
+
|
|
+ public RISCV64JavaCallWrapper(Address addr) {
|
|
+ super(addr);
|
|
+ }
|
|
+
|
|
+ public Address getLastJavaFP() {
|
|
+ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
|
|
+ }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
|
|
new file mode 100644
|
|
index 000000000..d7187a5f8
|
|
--- /dev/null
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
|
|
@@ -0,0 +1,53 @@
|
|
+/*
|
|
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2015, Red Hat Inc.
|
|
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+package sun.jvm.hotspot.runtime.riscv64;
|
|
+
|
|
+import sun.jvm.hotspot.debugger.*;
|
|
+import sun.jvm.hotspot.runtime.*;
|
|
+
|
|
+public class RISCV64RegisterMap extends RegisterMap {
|
|
+
|
|
+ /** This is the only public constructor */
|
|
+ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
|
|
+ super(thread, updateMap);
|
|
+ }
|
|
+
|
|
+ protected RISCV64RegisterMap(RegisterMap map) {
|
|
+ super(map);
|
|
+ }
|
|
+
|
|
+ public Object clone() {
|
|
+ RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ // no PD state to clear or copy:
|
|
+ protected void clearPD() {}
|
|
+ protected void initializePD() {}
|
|
+ protected void initializeFromPD(RegisterMap map) {}
|
|
+ protected Address getLocationPD(VMReg reg) { return null; }
|
|
+}
|
|
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
|
|
index 7d7a6107c..948eabcab 100644
|
|
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
|
|
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
|
|
@@ -54,7 +54,7 @@ public class PlatformInfo {
|
|
|
|
public static boolean knownCPU(String cpu) {
|
|
final String[] KNOWN =
|
|
- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
|
|
+ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
|
|
|
|
for(String s : KNOWN) {
|
|
if(s.equals(cpu))
|
|
diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c
|
|
index d0a6f4ea8..a29c7bf8b 100644
|
|
--- a/src/utils/hsdis/hsdis.c
|
|
+++ b/src/utils/hsdis/hsdis.c
|
|
@@ -28,9 +28,6 @@
|
|
*/
|
|
|
|
#include <config.h> /* required by bfd.h */
|
|
-#include <errno.h>
|
|
-#include <inttypes.h>
|
|
-#include <string.h>
|
|
|
|
#include <libiberty.h>
|
|
#include <bfd.h>
|
|
@@ -479,6 +476,9 @@ static const char* native_arch_name() {
|
|
#endif
|
|
#ifdef LIBARCH_s390x
|
|
res = "s390:64-bit";
|
|
+#endif
|
|
+#ifdef LIBARCH_riscv64
|
|
+ res = "riscv:rv64";
|
|
#endif
|
|
if (res == NULL)
|
|
res = "architecture not set in Makefile!";
|
|
diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
|
|
index 7805918c2..a21307083 100644
|
|
--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
|
|
+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools;
|
|
*
|
|
* @run driver compiler.c2.TestBit
|
|
*
|
|
- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
|
|
+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
|
|
* @requires vm.debug == true & vm.compiler2.enabled
|
|
*/
|
|
public class TestBit {
|
|
@@ -54,7 +55,8 @@ public class TestBit {
|
|
String expectedTestBitInstruction =
|
|
"ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
|
|
"aarch64".equals(System.getProperty("os.arch")) ? "tb" :
|
|
- "amd64".equals(System.getProperty("os.arch")) ? "test" : null;
|
|
+ "amd64".equals(System.getProperty("os.arch")) ? "test" :
|
|
+ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
|
|
|
|
if (expectedTestBitInstruction != null) {
|
|
output.shouldContain(expectedTestBitInstruction);
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
|
|
index 558b4218f..9d875e33f 100644
|
|
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
|
|
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
|
|
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
|
|
@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
|
|
SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
|
|
new GenericTestCaseForUnsupportedAArch64CPU(
|
|
SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
|
|
+ new GenericTestCaseForUnsupportedRISCV64CPU(
|
|
+ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
|
|
new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
|
|
SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
|
|
new GenericTestCaseForOtherCPU(
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
|
|
index 3ed72bf0a..a7e277060 100644
|
|
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
|
|
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
|
|
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
|
|
@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
|
|
SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
|
|
new GenericTestCaseForUnsupportedAArch64CPU(
|
|
SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
|
|
+ new GenericTestCaseForUnsupportedRISCV64CPU(
|
|
+ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
|
|
new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
|
|
SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
|
|
new GenericTestCaseForOtherCPU(
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
|
|
index c05cf309d..e714fcc59 100644
|
|
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
|
|
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
|
|
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
|
|
@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
|
|
SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
|
|
new GenericTestCaseForUnsupportedAArch64CPU(
|
|
SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
|
|
+ new GenericTestCaseForUnsupportedRISCV64CPU(
|
|
+ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
|
|
new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
|
|
SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
|
|
new GenericTestCaseForOtherCPU(
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
|
|
index 58ce5366b..d52d81e26 100644
|
|
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli;
|
|
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
|
|
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
|
|
import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
|
|
import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
|
|
@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
|
|
SHAOptionsBase.USE_SHA_OPTION),
|
|
new GenericTestCaseForUnsupportedAArch64CPU(
|
|
SHAOptionsBase.USE_SHA_OPTION),
|
|
+ new GenericTestCaseForUnsupportedRISCV64CPU(
|
|
+ SHAOptionsBase.USE_SHA_OPTION),
|
|
new UseSHASpecificTestCaseForUnsupportedCPU(
|
|
SHAOptionsBase.USE_SHA_OPTION),
|
|
new GenericTestCaseForOtherCPU(
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
|
|
index faa9fdbae..50e549069 100644
|
|
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
|
|
|
|
/**
|
|
* Generic test case for SHA-related options targeted to any CPU except
|
|
- * AArch64, PPC, S390x, SPARC and X86.
|
|
+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
|
|
*/
|
|
public class GenericTestCaseForOtherCPU extends
|
|
SHAOptionsBase.TestCase {
|
|
public GenericTestCaseForOtherCPU(String optionName) {
|
|
- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
|
|
+ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
|
|
super(optionName, new NotPredicate(
|
|
new OrPredicate(Platform::isAArch64,
|
|
+ new OrPredicate(Platform::isRISCV64,
|
|
new OrPredicate(Platform::isS390x,
|
|
new OrPredicate(Platform::isSparc,
|
|
new OrPredicate(Platform::isPPC,
|
|
new OrPredicate(Platform::isX64,
|
|
- Platform::isX86)))))));
|
|
+ Platform::isX86))))))));
|
|
}
|
|
|
|
@Override
|
|
protected void verifyWarnings() throws Throwable {
|
|
String shouldPassMessage = String.format("JVM should start with "
|
|
+ "option '%s' without any warnings", optionName);
|
|
- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
|
|
+ // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of
|
|
// SHA-related options will not cause any warnings.
|
|
CommandLineOptionTest.verifySameJVMStartup(null,
|
|
new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
|
|
new file mode 100644
|
|
index 000000000..d81b5b53f
|
|
--- /dev/null
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
|
|
@@ -0,0 +1,102 @@
|
|
+/*
|
|
+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+
|
|
+package compiler.intrinsics.sha.cli.testcases;
|
|
+
|
|
+import compiler.intrinsics.sha.cli.SHAOptionsBase;
|
|
+import jdk.test.lib.process.ExitCode;
|
|
+import jdk.test.lib.Platform;
|
|
+import jdk.test.lib.cli.CommandLineOptionTest;
|
|
+import jdk.test.lib.cli.predicate.AndPredicate;
|
|
+import jdk.test.lib.cli.predicate.NotPredicate;
|
|
+
|
|
+/**
|
|
+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
|
|
+ * which don't support instruction required by the tested option.
|
|
+ */
|
|
+public class GenericTestCaseForUnsupportedRISCV64CPU extends
|
|
+ SHAOptionsBase.TestCase {
|
|
+ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
|
|
+ super(optionName, new AndPredicate(Platform::isRISCV64,
|
|
+ new NotPredicate(SHAOptionsBase.getPredicateForOption(
|
|
+ optionName))));
|
|
+ }
|
|
+
|
|
+ @Override
|
|
+ protected void verifyWarnings() throws Throwable {
|
|
+ String shouldPassMessage = String.format("JVM startup should pass with"
|
|
+ + "option '-XX:-%s' without any warnings", optionName);
|
|
+ //Verify that option could be disabled without any warnings.
|
|
+ CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
|
|
+ SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
|
|
+ }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
|
|
+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
|
|
+ CommandLineOptionTest.prepareBooleanFlag(optionName, false));
|
|
+
|
|
+ shouldPassMessage = String.format("If JVM is started with '-XX:-"
|
|
+ + "%s' '-XX:+%s', output should contain warning.",
|
|
+ SHAOptionsBase.USE_SHA_OPTION, optionName);
|
|
+
|
|
+ // Verify that when the tested option is enabled, then
|
|
+ // a warning will occur in VM output if UseSHA is disabled.
|
|
+ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
|
|
+ CommandLineOptionTest.verifySameJVMStartup(
|
|
+ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
|
|
+ null,
|
|
+ shouldPassMessage,
|
|
+ shouldPassMessage,
|
|
+ ExitCode.OK,
|
|
+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
|
|
+ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
|
|
+ CommandLineOptionTest.prepareBooleanFlag(optionName, true));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Override
|
|
+ protected void verifyOptionValues() throws Throwable {
|
|
+ // Verify that option is disabled by default.
|
|
+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
|
|
+ String.format("Option '%s' should be disabled by default",
|
|
+ optionName),
|
|
+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
|
|
+
|
|
+ // Verify that option is disabled even if it was explicitly enabled
|
|
+ // using CLI options.
|
|
+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
|
|
+ String.format("Option '%s' should be off on unsupported "
|
|
+ + "RISCV64CPU even if set to true directly", optionName),
|
|
+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
|
|
+ CommandLineOptionTest.prepareBooleanFlag(optionName, true));
|
|
+
|
|
+ // Verify that option is disabled when +UseSHA was passed to JVM.
|
|
+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
|
|
+ String.format("Option '%s' should be off on unsupported "
|
|
+ + "RISCV64CPU even if %s flag set to JVM",
|
|
+ optionName, CommandLineOptionTest.prepareBooleanFlag(
|
|
+ SHAOptionsBase.USE_SHA_OPTION, true)),
|
|
+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
|
|
+ CommandLineOptionTest.prepareBooleanFlag(
|
|
+ SHAOptionsBase.USE_SHA_OPTION, true));
|
|
+ }
|
|
+}
|
|
diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
|
|
new file mode 100644
|
|
index 000000000..d3aafec8e
|
|
--- /dev/null
|
|
+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
|
|
@@ -0,0 +1,153 @@
|
|
+/*
|
|
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * @test
|
|
+ * @bug 8173585
|
|
+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that
|
|
+ * differing code paths are taken contingent upon the length of the input String.
|
|
+ * Hence we must test against differing string lengths in order to validate
|
|
+ * correct functionality. We also ensure the strings are long enough to trigger
|
|
+ * the looping conditions of the individual code paths.
|
|
+ *
|
|
+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all
|
|
+ *
|
|
+ * @library /compiler/patches /test/lib
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar
|
|
+ */
|
|
+
|
|
+package compiler.intrinsics.string;
|
|
+
|
|
+import jdk.test.lib.Asserts;
|
|
+
|
|
+public class TestStringLatin1IndexOfChar{
|
|
+ private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions
|
|
+
|
|
+ public static void main(String[] args) throws Exception {
|
|
+ for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code...
|
|
+ findOneItem();
|
|
+ withOffsetTest();
|
|
+ testEmpty();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private static void testEmpty(){
|
|
+ Asserts.assertEQ("".indexOf('a'), -1);
|
|
+ }
|
|
+
|
|
+ private final static char SEARCH_CHAR = 'z';
|
|
+ private final static char INVERLEAVING_CHAR = 'a';
|
|
+ private final static char MISSING_CHAR = 'd';
|
|
+
|
|
+ private static void findOneItem(){
|
|
+ //test strings of varying length ensuring that for all lengths one instance of the
|
|
+ //search char can be found. We check what happens when the search character is in
|
|
+ //each position of the search string (including first and last positions)
|
|
+ for(int strLength : new int[]{1, 15, 31, 32, 79}){
|
|
+ for(int searchPos = 0; searchPos < strLength; searchPos++){
|
|
+ String totest = makeOneItemStringLatin1(strLength, searchPos);
|
|
+
|
|
+ int intri = totest.indexOf(SEARCH_CHAR);
|
|
+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
|
|
+ Asserts.assertEQ(intri, nonintri);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private static String makeOneItemStringLatin1(int length, int searchPos){
|
|
+ StringBuilder sb = new StringBuilder(length);
|
|
+
|
|
+ for(int n =0; n < length; n++){
|
|
+ sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR);
|
|
+ }
|
|
+
|
|
+ return sb.toString();
|
|
+ }
|
|
+
|
|
+ private static void withOffsetTest(){
|
|
+ //progressivly move through string checking indexes and starting offset correctly processed
|
|
+ //string is of form azaza, aazaazaa, aaazaaazaaa, etc
|
|
+ //we find n s.t. maxlength = (n*3) + 2
|
|
+ int maxaInstances = (MAX_LENGTH-2)/3;
|
|
+
|
|
+ for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){
|
|
+ String totest = makeWithOffsetStringLatin1(aInstances);
|
|
+
|
|
+ int startoffset;
|
|
+ {
|
|
+ int intri = totest.indexOf(SEARCH_CHAR);
|
|
+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
|
|
+
|
|
+ Asserts.assertEQ(intri, nonintri);
|
|
+ startoffset = intri+1;
|
|
+ }
|
|
+
|
|
+ {
|
|
+ int intri = totest.indexOf(SEARCH_CHAR, startoffset);
|
|
+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset);
|
|
+
|
|
+ Asserts.assertEQ(intri, nonintri);
|
|
+ startoffset = intri+1;
|
|
+ }
|
|
+
|
|
+ Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string
|
|
+ Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private static String makeWithOffsetStringLatin1(int aInstances){
|
|
+ StringBuilder sb = new StringBuilder((aInstances*3) + 2);
|
|
+ for(int n =0; n < aInstances; n++){
|
|
+ sb.append(INVERLEAVING_CHAR);
|
|
+ }
|
|
+
|
|
+ sb.append(SEARCH_CHAR);
|
|
+
|
|
+ for(int n =0; n < aInstances; n++){
|
|
+ sb.append(INVERLEAVING_CHAR);
|
|
+ }
|
|
+
|
|
+ sb.append(SEARCH_CHAR);
|
|
+
|
|
+ for(int n =0; n < aInstances; n++){
|
|
+ sb.append(INVERLEAVING_CHAR);
|
|
+ }
|
|
+ return sb.toString();
|
|
+ }
|
|
+
|
|
+ private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) {
|
|
+ //non intrinsic version of indexOfChar
|
|
+ byte c = (byte)ch;
|
|
+ for (int i = fromIndex; i < value.length(); i++) {
|
|
+ if (value.charAt(i) == c) {
|
|
+ return i;
|
|
+ }
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+}
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
|
|
index 2e3e2717a..8093d6598 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
|
|
index 0e06a9e43..1ff9f36e1 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
|
|
index c3cdbf374..f3531ea74 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
|
|
index d33bd411f..589209447 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
|
|
* -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
|
|
index 992fa4b51..907e21371 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8138583
|
|
* @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
|
|
- * @requires os.arch=="aarch64"
|
|
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
|
|
index 3e79b3528..c41c0b606 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8138583
|
|
* @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
|
|
- * @requires os.arch=="aarch64"
|
|
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
|
|
index 6603dd224..b626da40d 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8135028
|
|
* @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
|
|
index d9a0c9880..92cd84a2f 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
|
|
index 722db95ae..e72345799 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
|
|
index f58f21feb..f4f67cf52 100644
|
|
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
|
|
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
|
|
@@ -25,7 +25,7 @@
|
|
* @test
|
|
* @bug 8074981
|
|
* @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
|
|
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
|
|
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
|
|
*
|
|
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
|
|
* -XX:CompileThresholdScaling=0.1
|
|
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
|
|
index acb86812d..c5e38ba72 100644
|
|
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
|
|
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
|
|
@@ -24,7 +24,7 @@
|
|
|
|
/* @test
|
|
* @bug 8167409
|
|
- * @requires (os.arch != "aarch64") & (os.arch != "arm")
|
|
+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
|
|
* @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
|
|
*/
|
|
package compiler.runtime.criticalnatives.argumentcorruption;
|
|
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
|
|
index eab36f931..4437367b6 100644
|
|
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
|
|
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
|
|
@@ -24,7 +24,7 @@
|
|
|
|
/* @test
|
|
* @bug 8167408
|
|
- * @requires (os.arch != "aarch64") & (os.arch != "arm")
|
|
+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
|
|
* @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
|
|
*/
|
|
package compiler.runtime.criticalnatives.lookup;
|
|
diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
|
|
index 7774dabcb..284b51019 100644
|
|
--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
|
|
+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
|
|
@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
|
|
|
|
public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
|
|
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
|
|
+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null),
|
|
// x86 variants
|
|
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null),
|
|
- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))));
|
|
+ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))));
|
|
|
|
public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
|
|
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null),
|
|
+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null),
|
|
@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
|
|
new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null),
|
|
- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))));
|
|
+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))));
|
|
|
|
public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
|
|
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null),
|
|
+ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null),
|
|
@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
|
|
new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null),
|
|
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null),
|
|
- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))));
|
|
+ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))));
|
|
|
|
public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
|
|
= new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
|
|
diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
|
|
index 57256aa5a..16c199e37 100644
|
|
--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
|
|
+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace {
|
|
// It's ok for ARM not to have symbols, because it does not support NMT detail
|
|
// when targeting thumb2. It's also ok for Windows not to have symbols, because
|
|
// they are only available if the symbols file is included with the build.
|
|
- if (Platform.isWindows() || Platform.isARM()) {
|
|
+ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
|
|
return; // we are done
|
|
}
|
|
output.reportDiagnosticSummary();
|
|
diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
|
|
index 127bb6abc..46be4dc98 100644
|
|
--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
|
|
+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -239,7 +240,7 @@ public class ReservedStackTest {
|
|
return Platform.isAix() ||
|
|
(Platform.isLinux() &&
|
|
(Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
|
|
- Platform.isX86())) ||
|
|
+ Platform.isX86() || Platform.isRISCV64())) ||
|
|
Platform.isOSX() ||
|
|
Platform.isSolaris();
|
|
}
|
|
diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh
|
|
index 0c300d4fd..7f3698c47 100644
|
|
--- a/test/hotspot/jtreg/test_env.sh
|
|
+++ b/test/hotspot/jtreg/test_env.sh
|
|
@@ -185,6 +185,11 @@ if [ $? = 0 ]
|
|
then
|
|
VM_CPU="arm"
|
|
fi
|
|
+grep "riscv64" vm_version.out > ${NULL}
|
|
+if [ $? = 0 ]
|
|
+then
|
|
+ VM_CPU="riscv64"
|
|
+fi
|
|
grep "ppc" vm_version.out > ${NULL}
|
|
if [ $? = 0 ]
|
|
then
|
|
diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
|
|
index 77458554b..73e92855d 100644
|
|
--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
|
|
+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -45,7 +46,7 @@ import java.util.Set;
|
|
*/
|
|
public class TestMutuallyExclusivePlatformPredicates {
|
|
private static enum MethodGroup {
|
|
- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
|
|
+ ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
|
|
BITNESS("is32bit", "is64bit"),
|
|
OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
|
|
VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
|
|
diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
|
|
index cb3348a0f..bc0d1a743 100644
|
|
--- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
|
|
+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
|
|
@@ -63,13 +63,13 @@ public class thrinfo001 {
|
|
try {
|
|
t_a.join();
|
|
} catch (InterruptedException e) {}
|
|
+ checkInfo(t_a, t_a.getThreadGroup(), 1);
|
|
|
|
thrinfo001b t_b = new thrinfo001b();
|
|
t_b.setPriority(Thread.MIN_PRIORITY);
|
|
t_b.setDaemon(true);
|
|
checkInfo(t_b, t_b.getThreadGroup(), 2);
|
|
t_b.start();
|
|
- checkInfo(t_b, t_b.getThreadGroup(), 2);
|
|
try {
|
|
t_b.join();
|
|
} catch (InterruptedException e) {}
|
|
diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
|
|
index 7990c49a1..bb8c79cdd 100644
|
|
--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
|
|
+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -54,8 +55,8 @@ public class TestCPUInformation {
|
|
Events.assertField(event, "hwThreads").atLeast(1);
|
|
Events.assertField(event, "cores").atLeast(1);
|
|
Events.assertField(event, "sockets").atLeast(1);
|
|
- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
|
|
- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
|
|
+ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
|
|
+ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
|
|
}
|
|
}
|
|
}
|
|
diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
|
|
index f4ee0546c..a9cd63db9 100644
|
|
--- a/test/lib/jdk/test/lib/Platform.java
|
|
+++ b/test/lib/jdk/test/lib/Platform.java
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -202,6 +203,10 @@ public class Platform {
|
|
return isArch("arm.*");
|
|
}
|
|
|
|
+ public static boolean isRISCV64() {
|
|
+ return isArch("riscv64");
|
|
+ }
|
|
+
|
|
public static boolean isPPC() {
|
|
return isArch("ppc.*");
|
|
}
|
|
diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
|
|
new file mode 100644
|
|
index 000000000..6852c0540
|
|
--- /dev/null
|
|
+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
|
|
@@ -0,0 +1,221 @@
|
|
+/*
|
|
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.java.lang;
|
|
+
|
|
+import java.util.Random;
|
|
+import org.openjdk.jmh.annotations.Benchmark;
|
|
+import org.openjdk.jmh.annotations.BenchmarkMode;
|
|
+import org.openjdk.jmh.annotations.OutputTimeUnit;
|
|
+import org.openjdk.jmh.annotations.Mode;
|
|
+import org.openjdk.jmh.annotations.Scope;
|
|
+import org.openjdk.jmh.annotations.State;
|
|
+
|
|
+import java.util.concurrent.TimeUnit;
|
|
+
|
|
+/**
|
|
+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of
|
|
+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified.
|
|
+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string
|
|
+ */
|
|
+@BenchmarkMode(Mode.AverageTime)
|
|
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
|
+@State(Scope.Thread)
|
|
+public class IndexOfBenchmark {
|
|
+ private static final int loops = 100000;
|
|
+ private static final Random rng = new Random(1999);
|
|
+ private static final int pathCnt = 1000;
|
|
+ private static final String [] latn1_short = new String[pathCnt];
|
|
+ private static final String [] latn1_sse4 = new String[pathCnt];
|
|
+ private static final String [] latn1_avx2 = new String[pathCnt];
|
|
+ private static final String [] latn1_mixedLength = new String[pathCnt];
|
|
+ private static final String [] utf16_short = new String[pathCnt];
|
|
+ private static final String [] utf16_sse4 = new String[pathCnt];
|
|
+ private static final String [] utf16_avx2 = new String[pathCnt];
|
|
+ private static final String [] utf16_mixedLength = new String[pathCnt];
|
|
+ static {
|
|
+ for (int i = 0; i < pathCnt; i++) {
|
|
+ latn1_short[i] = makeRndString(false, 15);
|
|
+ latn1_sse4[i] = makeRndString(false, 16);
|
|
+ latn1_avx2[i] = makeRndString(false, 32);
|
|
+ utf16_short[i] = makeRndString(true, 7);
|
|
+ utf16_sse4[i] = makeRndString(true, 8);
|
|
+ utf16_avx2[i] = makeRndString(true, 16);
|
|
+ latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65));
|
|
+ utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private static String makeRndString(boolean isUtf16, int length) {
|
|
+ StringBuilder sb = new StringBuilder(length);
|
|
+ if(length > 0){
|
|
+ sb.append(isUtf16?'☺':'b');
|
|
+
|
|
+ for (int i = 1; i < length-1; i++) {
|
|
+ sb.append((char)('b' + rng.nextInt(26)));
|
|
+ }
|
|
+
|
|
+ sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string
|
|
+ }
|
|
+ return sb.toString();
|
|
+ }
|
|
+
|
|
+
|
|
+ @Benchmark
|
|
+ public static void latin1_mixed_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_mixedLength) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static void utf16_mixed_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_mixedLength) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static void latin1_mixed_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_mixedLength) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static void utf16_mixed_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_mixedLength) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ////////// more detailed code path dependent tests //////////
|
|
+
|
|
+ @Benchmark
|
|
+ public static void latin1_Short_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_short) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static void latin1_SSE4_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_sse4) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static void latin1_AVX2_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_avx2) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_Short_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_short) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_SSE4_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_sse4) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_AVX2_char() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_avx2) {
|
|
+ ret += what.indexOf('a');
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int latin1_Short_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_short) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int latin1_SSE4_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_sse4) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int latin1_AVX2_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : latn1_avx2) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_Short_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_short) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_SSE4_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_sse4) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public static int utf16_AVX2_String() {
|
|
+ int ret = 0;
|
|
+ for (String what : utf16_avx2) {
|
|
+ ret += what.indexOf("a");
|
|
+ }
|
|
+ return ret;
|
|
+ }
|
|
+}
|
|
--
|
|
2.40.0.windows.1
|
|
|