diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
index 385b097b9f..4eebe79871 100644
--- a/.github/workflows/build-cross-compile.yml
+++ b/.github/workflows/build-cross-compile.yml
@@ -54,28 +54,39 @@ jobs:
           - arm
           - s390x
           - ppc64le
+          - riscv64
         include:
           - target-cpu: aarch64
             gnu-arch: aarch64
             debian-arch: arm64
             debian-repository: https://httpredir.debian.org/debian/
             debian-version: bullseye
+            tolerate-sysroot-errors: false
           - target-cpu: arm
             gnu-arch: arm
             debian-arch: armhf
             debian-repository: https://httpredir.debian.org/debian/
             debian-version: bullseye
+            tolerate-sysroot-errors: false
             gnu-abi: eabihf
           - target-cpu: s390x
             gnu-arch: s390x
             debian-arch: s390x
             debian-repository: https://httpredir.debian.org/debian/
             debian-version: bullseye
+            tolerate-sysroot-errors: false
           - target-cpu: ppc64le
             gnu-arch: powerpc64le
             debian-arch: ppc64el
             debian-repository: https://httpredir.debian.org/debian/
             debian-version: bullseye
+            tolerate-sysroot-errors: false
+          - target-cpu: riscv64
+            gnu-arch: riscv64
+            debian-arch: riscv64
+            debian-repository: https://snapshot.debian.org/archive/debian/20240228T034848Z/
+            debian-version: sid
+            tolerate-sysroot-errors: true
 
     steps:
       - name: 'Checkout the JDK source'
@@ -113,6 +124,7 @@ jobs:
         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
 
       - name: 'Create sysroot'
+        id: create-sysroot
         run: >
           sudo debootstrap
           --arch=${{ matrix.debian-arch }}
@@ -123,6 +135,7 @@ jobs:
           ${{ matrix.debian-version }}
           sysroot
           ${{ matrix.debian-repository }}
+        continue-on-error: ${{ matrix.tolerate-sysroot-errors }}
         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
 
       - name: 'Prepare sysroot'
@@ -134,7 +147,12 @@ jobs:
           rm -rf sysroot/usr/{sbin,bin,share}
           rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
           rm -rf sysroot/usr/libexec/gcc
-        if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+        if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
+
+      - name: 'Remove broken sysroot'
+        run: |
+          sudo rm -rf sysroot/
+        if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
 
       - name: 'Configure'
         run: >
@@ -153,6 +171,7 @@ jobs:
           echo "Dumping config.log:" &&
           cat config.log &&
           exit 1)
+        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
 
       - name: 'Build'
         id: build
@@ -160,3 +179,4 @@ jobs:
         with:
           make-target: 'hotspot ${{ inputs.make-arguments }}'
           platform: linux-${{ matrix.target-cpu }}
+        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
index 4186c451b7..678f5a038e 100644
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -55,7 +55,7 @@ on:
 jobs:
   build-macos:
     name: build
-    runs-on: macos-13
+    runs-on: macos-12
 
     strategy:
       fail-fast: false
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 78a8e1e0d4..46cae3afbf 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -223,7 +223,7 @@ jobs:
     uses: ./.github/workflows/build-macos.yml
     with:
       platform: macos-x64
-      xcode-toolset-version: '14.3.1'
+      xcode-toolset-version: '13.4.1'
       configure-arguments: ${{ github.event.inputs.configure-arguments }}
       make-arguments: ${{ github.event.inputs.make-arguments }}
     if: needs.select.outputs.macos-x64 == 'true'
@@ -234,7 +234,7 @@ jobs:
     uses: ./.github/workflows/build-macos.yml
     with:
       platform: macos-aarch64
-      xcode-toolset-version: '14.3.1'
+      xcode-toolset-version: '13.4.1'
       extra-conf-options: '--openjdk-target=aarch64-apple-darwin'
       configure-arguments: ${{ github.event.inputs.configure-arguments }}
       make-arguments: ${{ github.event.inputs.make-arguments }}
@@ -298,7 +298,7 @@ jobs:
     with:
       platform: macos-x64
       bootjdk-platform: macos-x64
-      runs-on: macos-13
+      runs-on: macos-12
 
   test-windows-x64:
     name: windows-x64
@@ -341,7 +341,7 @@ jobs:
               -H 'Accept: application/vnd.github+json' \
               -H 'Authorization: Bearer ${{ github.token }}' \
               -H 'X-GitHub-Api-Version: 2022-11-28' \
-              '${{ github.api_url }}/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts')"
+              '${{ github.api_url }}/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts?per_page=100')"
           BUNDLE_ARTIFACT_IDS="$(echo "$ALL_ARTIFACT_IDS" | jq -r -c '.artifacts | map(select(.name|startswith("bundles-"))) | .[].id')"
           for id in $BUNDLE_ARTIFACT_IDS; do
             echo "Removing $id"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c3560f2135..dacf8eaba1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -127,7 +127,7 @@ jobs:
         run: |
           # On macOS we need to install some dependencies for testing
           brew install make
-          sudo xcode-select --switch /Applications/Xcode_14.3.1.app/Contents/Developer
+          sudo xcode-select --switch /Applications/Xcode_13.4.1.app/Contents/Developer
           # This will make GNU make available as 'make' and not only as 'gmake'
           echo '/usr/local/opt/make/libexec/gnubin' >> $GITHUB_PATH
         if: runner.os == 'macOS'
diff --git a/.jcheck/conf b/.jcheck/conf
index 5636278120..d13b1bf5e8 100644
--- a/.jcheck/conf
+++ b/.jcheck/conf
@@ -1,5 +1,5 @@
 [general]
-project=jdk-updates
+project=riscv-port
 jbs=JDK
 version=11.0.25
 
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000..f4c5e7e67c
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,3 @@
+# JDK Vulnerabilities
+
+Please follow the process outlined in the [OpenJDK Vulnerability Policy](https://openjdk.org/groups/vulnerability/report) to disclose vulnerabilities in the JDK.
diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
index 3c280ac7c0..6c66c221e0 100644
--- a/make/autoconf/build-aux/config.sub
+++ b/make/autoconf/build-aux/config.sub
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then
     exit
 fi
 
+# Canonicalize for riscv which autoconf-config.sub doesn't handle
+if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then
+    result=`echo $@ | sed 's/linux/unknown-linux/'`
+    echo $result
+    exit
+fi
+
 # First, filter out everything that doesn't begin with "aarch64-"
 if ! echo $* | grep '^aarch64-' >/dev/null ; then
     . $DIR/autoconf-config.sub "$@"
diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
index 9bb34363e5..f84e8f84c6 100644
--- a/make/autoconf/hotspot.m4
+++ b/make/autoconf/hotspot.m4
@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
   AC_MSG_CHECKING([if shenandoah can be built])
   if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then
     if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
-       test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
+       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
+       test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
       AC_MSG_RESULT([yes])
     else
       DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
index 5d1d9efa39..565ca18e20 100644
--- a/make/autoconf/platform.m4
+++ b/make/autoconf/platform.m4
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
     HOTSPOT_$1_CPU_DEFINE=PPC64
   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
     HOTSPOT_$1_CPU_DEFINE=PPC64
+  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
+    HOTSPOT_$1_CPU_DEFINE=RISCV64
 
   # The cpu defines below are for zero, we don't support them directly.
   elif test "x$OPENJDK_$1_CPU" = xsparc; then
diff --git a/make/autoconf/version-numbers b/make/autoconf/version-numbers
index fe5e0d9850..c02b769bf2 100644
--- a/make/autoconf/version-numbers
+++ b/make/autoconf/version-numbers
@@ -37,7 +37,7 @@ DEFAULT_VERSION_DATE=2024-10-15
 DEFAULT_VERSION_CLASSFILE_MAJOR=55  # "`$EXPR $DEFAULT_VERSION_FEATURE + 44`"
 DEFAULT_VERSION_CLASSFILE_MINOR=0
 DEFAULT_ACCEPTABLE_BOOT_VERSIONS="10 11"
-DEFAULT_PROMOTED_VERSION_PRE=
+DEFAULT_PROMOTED_VERSION_PRE=ea
 
 LAUNCHER_NAME=openjdk
 PRODUCT_NAME=OpenJDK
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index c5a3ac5724..51137b99db 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
       $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
     )))
 
+  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
+    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
+    )))
+  endif
+
   ifeq ($(call check-jvm-feature, shenandoahgc), true)
     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
diff --git a/make/hotspot/lib/JvmFlags.gmk b/make/hotspot/lib/JvmFlags.gmk
index 3246c83155..1a91eb0079 100644
--- a/make/hotspot/lib/JvmFlags.gmk
+++ b/make/hotspot/lib/JvmFlags.gmk
@@ -67,10 +67,12 @@ JVM_CFLAGS_TARGET_DEFINES += \
     #
 
 ifeq ($(DEBUG_LEVEL), release)
+  # release builds disable uses of assert macro from <assert.h>.
+  JVM_CFLAGS_DEBUGLEVEL := -DNDEBUG
   # For hotspot, release builds differ internally between "optimized" and "product"
   # in that "optimize" does not define PRODUCT.
   ifneq ($(HOTSPOT_DEBUG_LEVEL), optimized)
-    JVM_CFLAGS_DEBUGLEVEL := -DPRODUCT
+    JVM_CFLAGS_DEBUGLEVEL += -DPRODUCT
   endif
 else ifeq ($(DEBUG_LEVEL), fastdebug)
   JVM_CFLAGS_DEBUGLEVEL := -DASSERT
diff --git a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
index f02537c305..ef94e3879c 100644
--- a/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
+++ b/make/jdk/src/classes/build/tools/tzdb/TzdbZoneRulesProvider.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -363,33 +363,35 @@ class TzdbZoneRulesProvider {
         }
 
         Month parseMonth(String mon) {
-            switch (mon) {
-            case "Jan": return Month.JANUARY;
-            case "Feb": return Month.FEBRUARY;
-            case "Mar": return Month.MARCH;
-            case "Apr": return Month.APRIL;
-            case "May": return Month.MAY;
-            case "Jun": return Month.JUNE;
-            case "Jul": return Month.JULY;
-            case "Aug": return Month.AUGUST;
-            case "Sep": return Month.SEPTEMBER;
-            case "Oct": return Month.OCTOBER;
-            case "Nov": return Month.NOVEMBER;
-            case "Dec": return Month.DECEMBER;
-            }
+            int len = mon.length();
+
+            if (mon.regionMatches(true, 0, "January", 0, len)) return Month.JANUARY;
+            if (mon.regionMatches(true, 0, "February", 0, len)) return Month.FEBRUARY;
+            if (mon.regionMatches(true, 0, "March", 0, len)) return Month.MARCH;
+            if (mon.regionMatches(true, 0, "April", 0, len)) return Month.APRIL;
+            if (mon.regionMatches(true, 0, "May", 0, len)) return Month.MAY;
+            if (mon.regionMatches(true, 0, "June", 0, len)) return Month.JUNE;
+            if (mon.regionMatches(true, 0, "July", 0, len)) return Month.JULY;
+            if (mon.regionMatches(true, 0, "August", 0, len)) return Month.AUGUST;
+            if (mon.regionMatches(true, 0, "September", 0, len)) return Month.SEPTEMBER;
+            if (mon.regionMatches(true, 0, "October", 0, len)) return Month.OCTOBER;
+            if (mon.regionMatches(true, 0, "November", 0, len)) return Month.NOVEMBER;
+            if (mon.regionMatches(true, 0, "December", 0, len)) return Month.DECEMBER;
+
             throw new IllegalArgumentException("Unknown month: " + mon);
         }
 
         DayOfWeek parseDayOfWeek(String dow) {
-            switch (dow) {
-            case "Mon": return DayOfWeek.MONDAY;
-            case "Tue": return DayOfWeek.TUESDAY;
-            case "Wed": return DayOfWeek.WEDNESDAY;
-            case "Thu": return DayOfWeek.THURSDAY;
-            case "Fri": return DayOfWeek.FRIDAY;
-            case "Sat": return DayOfWeek.SATURDAY;
-            case "Sun": return DayOfWeek.SUNDAY;
-            }
+            int len = dow.length();
+
+            if (dow.regionMatches(true, 0, "Monday", 0, len)) return DayOfWeek.MONDAY;
+            if (dow.regionMatches(true, 0, "Tuesday", 0, len)) return DayOfWeek.TUESDAY;
+            if (dow.regionMatches(true, 0, "Wednesday", 0, len)) return DayOfWeek.WEDNESDAY;
+            if (dow.regionMatches(true, 0, "Thursday", 0, len)) return DayOfWeek.THURSDAY;
+            if (dow.regionMatches(true, 0, "Friday", 0, len)) return DayOfWeek.FRIDAY;
+            if (dow.regionMatches(true, 0, "Saturday", 0, len)) return DayOfWeek.SATURDAY;
+            if (dow.regionMatches(true, 0, "Sunday", 0, len)) return DayOfWeek.SUNDAY;
+
             throw new IllegalArgumentException("Unknown day-of-week: " + dow);
         }
 
diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
new file mode 100644
index 0000000000..31c63abe71
--- /dev/null
+++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/constMethod.hpp"
+#include "oops/klass.inline.hpp"
+#include "oops/method.hpp"
+#include "runtime/frame.inline.hpp"
+#include "utilities/align.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : i = 4; break;
+    case T_LONG   : i = 5; break;
+    case T_VOID   : i = 6; break;
+    case T_FLOAT  : i = 7; break;
+    case T_DOUBLE : i = 8; break;
+    case T_OBJECT : i = 9; break;
+    case T_ARRAY  : i = 9; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
+         "index out of bounds");
+  return i;
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+  const int entry_size = frame::interpreter_frame_monitor_size();
+
+  // total overhead size: entry_size + (saved fp thru expr stack
+  // bottom).  be sure to change this if you add/subtract anything
+  // to/from the overhead area
+  const int overhead_size =
+    -(frame::interpreter_frame_initial_sp_offset) + entry_size;
+
+  const int stub_code = frame::entry_frame_after_call_words;
+  assert_cond(method != NULL);
+  const int method_stack = (method->max_locals() + method->max_stack()) *
+                           Interpreter::stackElementWords;
+  return (overhead_size + method_stack + stub_code);
+}
+
+// asm based interpreter deoptimization helpers
+int AbstractInterpreter::size_activation(int max_stack,
+                                         int temps,
+                                         int extra_args,
+                                         int monitors,
+                                         int callee_params,
+                                         int callee_locals,
+                                         bool is_top_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in TemplateInterpreterGenerator::generate_method_entry.
+
+  // fixed size of an interpreter frame:
+  int overhead = frame::sender_sp_offset -
+                 frame::interpreter_frame_initial_sp_offset;
+  // Our locals were accounted for by the caller (or last_frame_adjust
+  // on the transistion) Since the callee parameters already account
+  // for the callee's params we only need to account for the extra
+  // locals.
+  int size = overhead +
+             (callee_locals - callee_params) +
+             monitors * frame::interpreter_frame_monitor_size() +
+             // On the top frame, at all times SP <= ESP, and SP is
+             // 16-aligned.  We ensure this by adjusting SP on method
+             // entry and re-entry to allow room for the maximum size of
+             // the expression stack.  When we call another method we bump
+             // SP so that no stack space is wasted.  So, only on the top
+             // frame do we need to allow max_stack words.
+             (is_top_frame ? max_stack : temps + extra_args);
+
+  // On riscv we always keep the stack pointer 16-aligned, so we
+  // must round up here.
+  size = align_up(size, 2);
+
+  return size;
+}
+
+void AbstractInterpreter::layout_activation(Method* method,
+                                            int tempcount,
+                                            int popframe_extra_args,
+                                            int moncount,
+                                            int caller_actual_parameters,
+                                            int callee_param_count,
+                                            int callee_locals,
+                                            frame* caller,
+                                            frame* interpreter_frame,
+                                            bool is_top_frame,
+                                            bool is_bottom_frame) {
+  // The frame interpreter_frame is guaranteed to be the right size,
+  // as determined by a previous call to the size_activation() method.
+  // It is also guaranteed to be walkable even though it is in a
+  // skeletal state
+  assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL);
+  int max_locals = method->max_locals() * Interpreter::stackElementWords;
+  int extra_locals = (method->max_locals() - method->size_of_parameters()) *
+    Interpreter::stackElementWords;
+
+#ifdef ASSERT
+  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable");
+#endif
+
+  interpreter_frame->interpreter_frame_set_method(method);
+  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+  // and sender_sp is fp
+  intptr_t* locals = NULL;
+  if (caller->is_interpreted_frame()) {
+    locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;
+  } else {
+    locals = interpreter_frame->sender_sp() + max_locals - 1;
+  }
+
+#ifdef ASSERT
+  if (caller->is_interpreted_frame()) {
+    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+  }
+#endif
+
+  interpreter_frame->interpreter_frame_set_locals(locals);
+  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+  BasicObjectLock* monbot = montop - moncount;
+  interpreter_frame->interpreter_frame_set_monitor_end(monbot);
+
+  // Set last_sp
+  intptr_t* last_sp = (intptr_t*) monbot -
+    tempcount*Interpreter::stackElementWords -
+    popframe_extra_args;
+  interpreter_frame->interpreter_frame_set_last_sp(last_sp);
+
+  // All frames but the initial (oldest) interpreter frame we fill in have
+  // a value for sender_sp that allows walking the stack but isn't
+  // truly correct. Correct the value here.
+  if (extra_locals != 0 &&
+      interpreter_frame->sender_sp() ==
+      interpreter_frame->interpreter_frame_sender_sp()) {
+    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
+                                                       extra_locals);
+  }
+
+  *interpreter_frame->interpreter_frame_cache_addr() =
+    method->constants()->cache();
+  *interpreter_frame->interpreter_frame_mirror_addr() =
+    method->method_holder()->java_mirror();
+}
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
new file mode 100644
index 0000000000..67c6f1eccb
--- /dev/null
+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+int AbstractAssembler::code_fill_byte() {
+  return 0;
+}
+
+void Assembler::add(Register Rd, Register Rn, int64_t increment, Register temp) {
+  if (is_imm_in_range(increment, 12, 0)) {
+    addi(Rd, Rn, increment);
+  } else {
+    assert_different_registers(Rn, temp);
+    li(temp, increment);
+    add(Rd, Rn, temp);
+  }
+}
+
+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
+  if (is_imm_in_range(increment, 12, 0)) {
+    addiw(Rd, Rn, increment);
+  } else {
+    assert_different_registers(Rn, temp);
+    li(temp, increment);
+    addw(Rd, Rn, temp);
+  }
+}
+
+void Assembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) {
+  if (is_imm_in_range(-decrement, 12, 0)) {
+    addi(Rd, Rn, -decrement);
+  } else {
+    assert_different_registers(Rn, temp);
+    li(temp, decrement);
+    sub(Rd, Rn, temp);
+  }
+}
+
+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
+  if (is_imm_in_range(-decrement, 12, 0)) {
+    addiw(Rd, Rn, -decrement);
+  } else {
+    assert_different_registers(Rn, temp);
+    li(temp, decrement);
+    subw(Rd, Rn, temp);
+  }
+}
+
+void Assembler::zext_w(Register Rd, Register Rs) {
+  add_uw(Rd, Rs, zr);
+}
+
+void Assembler::_li(Register Rd, int64_t imm) {
+  // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
+  int shift = 12;
+  int64_t upper = imm, lower = imm;
+  // Split imm to a lower 12-bit sign-extended part and the remainder,
+  // because addi will sign-extend the lower imm.
+  lower = ((int32_t)imm << 20) >> 20;
+  upper -= lower;
+
+  // Test whether imm is a 32-bit integer.
+  if (!(((imm) & ~(int64_t)0x7fffffff) == 0 ||
+        (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) {
+    while (((upper >> shift) & 1) == 0) { shift++; }
+    upper >>= shift;
+    li(Rd, upper);
+    slli(Rd, Rd, shift);
+    if (lower != 0) {
+      addi(Rd, Rd, lower);
+    }
+  } else {
+    // 32-bit integer
+    Register hi_Rd = zr;
+    if (upper != 0) {
+      lui(Rd, (int32_t)upper);
+      hi_Rd = Rd;
+    }
+    if (lower != 0 || hi_Rd == zr) {
+      addiw(Rd, hi_Rd, lower);
+    }
+  }
+}
+
+void Assembler::li32(Register Rd, int32_t imm) {
+  // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit
+  int64_t upper = imm, lower = imm;
+  lower = (imm << 20) >> 20;
+  upper -= lower;
+  upper = (int32_t)upper;
+  // lui Rd, imm[31:12] + imm[11]
+  lui(Rd, upper);
+  addiw(Rd, Rd, lower);
+}
+
+#define INSN(NAME, REGISTER)                                       \
+  void Assembler::NAME(const address &dest, Register temp) {       \
+    assert_cond(dest != NULL);                                     \
+    int64_t distance = dest - pc();                                \
+    if (is_imm_in_range(distance, 20, 1)) {                        \
+      jal(REGISTER, distance);                                     \
+    } else {                                                       \
+      assert(temp != noreg, "temp must not be empty register!");   \
+      int32_t offset = 0;                                          \
+      movptr_with_offset(temp, dest, offset);                      \
+      jalr(REGISTER, temp, offset);                                \
+    }                                                              \
+  }                                                                \
+  void Assembler::NAME(Label &l, Register temp) {                  \
+    jal(REGISTER, l, temp);                                        \
+  }                                                                \
+
+  INSN(j,   x0);
+  INSN(jal, x1);
+
+#undef INSN
+
+#define INSN(NAME, REGISTER)                                       \
+  void Assembler::NAME(Register Rs) {                              \
+    jalr(REGISTER, Rs, 0);                                         \
+  }
+
+  INSN(jr,   x0);
+  INSN(jalr, x1);
+
+#undef INSN
+
+void Assembler::ret() {
+  jalr(x0, x1, 0);
+}
+
+#define INSN(NAME, REGISTER)                                      \
+  void Assembler::NAME(const address &dest, Register temp) {      \
+    assert_cond(dest != NULL);                                    \
+    assert(temp != noreg, "temp must not be empty register!");    \
+    int64_t distance = dest - pc();                               \
+    if (is_offset_in_range(distance, 32)) {                       \
+      auipc(temp, distance + 0x800);                              \
+      jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20);      \
+    } else {                                                      \
+      int32_t offset = 0;                                         \
+      movptr_with_offset(temp, dest, offset);                     \
+      jalr(REGISTER, temp, offset);                               \
+    }                                                             \
+  }
+
+  INSN(call, x1);
+  INSN(tail, x0);
+
+#undef INSN
+
+#define INSN(NAME, REGISTER)                                   \
+  void Assembler::NAME(const Address &adr, Register temp) {    \
+    switch (adr.getMode()) {                                   \
+      case Address::literal: {                                 \
+        relocate(adr.rspec());                                 \
+        NAME(adr.target(), temp);                              \
+        break;                                                 \
+      }                                                        \
+      case Address::base_plus_offset: {                        \
+        int32_t offset = 0;                                    \
+        baseOffset(temp, adr, offset);                         \
+        jalr(REGISTER, temp, offset);                          \
+        break;                                                 \
+      }                                                        \
+      default:                                                 \
+        ShouldNotReachHere();                                  \
+    }                                                          \
+  }
+
+  INSN(j,    x0);
+  INSN(jal,  x1);
+  INSN(call, x1);
+  INSN(tail, x0);
+
+#undef INSN
+
+void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn,
+                           compare_and_branch_label_insn neg_insn, bool is_far) {
+  if (is_far) {
+    Label done;
+    (this->*neg_insn)(r1, r2, done, /* is_far */ false);
+    j(L);
+    bind(done);
+  } else {
+    if (L.is_bound()) {
+      (this->*insn)(r1, r2, target(L));
+    } else {
+      L.add_patch_at(code(), locator());
+      (this->*insn)(r1, r2, pc());
+    }
+  }
+}
+
+void Assembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) {
+  if (L.is_bound()) {
+    (this->*insn)(Rt, target(L), tmp);
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(Rt, pc(), tmp);
+  }
+}
+
+void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(Rt, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(Rt, pc());
+  }
+}
+
+void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
+  int64_t imm64 = (int64_t)addr;
+#ifndef PRODUCT
+  {
+    char buffer[64];
+    snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64);
+    block_comment(buffer);
+  }
+#endif
+  assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (int64_t)-1),
+         "bit 47 overflows in address constant");
+  // Load upper 31 bits
+  int64_t imm = imm64 >> 17;
+  int64_t upper = imm, lower = imm;
+  lower = (lower << 52) >> 52;
+  upper -= lower;
+  upper = (int32_t)upper;
+  lui(Rd, upper);
+  addi(Rd, Rd, lower);
+
+  // Load the rest 17 bits.
+  slli(Rd, Rd, 11);
+  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
+  slli(Rd, Rd, 6);
+
+  // This offset will be used by following jalr/ld.
+  offset = imm64 & 0x3f;
+}
+
+void Assembler::movptr(Register Rd, uintptr_t imm64) {
+  movptr(Rd, (address)imm64);
+}
+
+void Assembler::movptr(Register Rd, address addr) {
+  int offset = 0;
+  movptr_with_offset(Rd, addr, offset);
+  addi(Rd, Rd, offset);
+}
+
+#define INSN(NAME, NEG_INSN)                                                         \
+  void Assembler::NAME(Register Rs, Register Rt, const address &dest) {              \
+    NEG_INSN(Rt, Rs, dest);                                                          \
+  }                                                                                  \
+  void Assembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) {            \
+    NEG_INSN(Rt, Rs, l, is_far);                                                     \
+  }
+
+  INSN(bgt,  blt);
+  INSN(ble,  bge);
+  INSN(bgtu, bltu);
+  INSN(bleu, bgeu);
+#undef INSN
+
+#undef __
+
+Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _offset(0), _mode(literal) {
+  _target = target;
+  switch (rtype) {
+    case relocInfo::oop_type:
+    case relocInfo::metadata_type:
+      // Oops are a special case. Normally they would be their own section
+      // but in cases like icBuffer they are literals in the code stream that
+      // we don't have a section for. We use none so that we get a literal address
+      // which is always patchable.
+      break;
+    case relocInfo::external_word_type:
+      _rspec = external_word_Relocation::spec(target);
+      break;
+    case relocInfo::internal_word_type:
+      _rspec = internal_word_Relocation::spec(target);
+      break;
+    case relocInfo::opt_virtual_call_type:
+      _rspec = opt_virtual_call_Relocation::spec();
+      break;
+    case relocInfo::static_call_type:
+      _rspec = static_call_Relocation::spec();
+      break;
+    case relocInfo::runtime_call_type:
+      _rspec = runtime_call_Relocation::spec();
+      break;
+    case relocInfo::poll_type:
+    case relocInfo::poll_return_type:
+      _rspec = Relocation::spec_simple(rtype);
+      break;
+    case relocInfo::none:
+      _rspec = RelocationHolder::none;
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
new file mode 100644
index 0000000000..9f6c477afa
--- /dev/null
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -0,0 +1,3056 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP
+#define CPU_RISCV_ASSEMBLER_RISCV_HPP
+
+#include "asm/register.hpp"
+#include "assembler_riscv.inline.hpp"
+#include "metaprogramming/enableIf.hpp"
+
+#define XLEN 64
+
+// definitions of various symbolic names for machine registers
+
+// First intercalls between C and Java which use 8 general registers
+// and 8 floating registers
+
+class Argument {
+ public:
+  enum {
+    n_int_register_parameters_c   = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
+    n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
+
+    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
+    n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
+  };
+};
+
+// function argument(caller-save registers)
+REGISTER_DECLARATION(Register, c_rarg0, x10);
+REGISTER_DECLARATION(Register, c_rarg1, x11);
+REGISTER_DECLARATION(Register, c_rarg2, x12);
+REGISTER_DECLARATION(Register, c_rarg3, x13);
+REGISTER_DECLARATION(Register, c_rarg4, x14);
+REGISTER_DECLARATION(Register, c_rarg5, x15);
+REGISTER_DECLARATION(Register, c_rarg6, x16);
+REGISTER_DECLARATION(Register, c_rarg7, x17);
+
+REGISTER_DECLARATION(FloatRegister, c_farg0, f10);
+REGISTER_DECLARATION(FloatRegister, c_farg1, f11);
+REGISTER_DECLARATION(FloatRegister, c_farg2, f12);
+REGISTER_DECLARATION(FloatRegister, c_farg3, f13);
+REGISTER_DECLARATION(FloatRegister, c_farg4, f14);
+REGISTER_DECLARATION(FloatRegister, c_farg5, f15);
+REGISTER_DECLARATION(FloatRegister, c_farg6, f16);
+REGISTER_DECLARATION(FloatRegister, c_farg7, f17);
+
+// Symbolically name the register arguments used by the Java calling convention.
+// We have control over the convention for java so we can do what we please.
+// What pleases us is to offset the java calling convention so that when
+// we call a suitable jni method the arguments are lined up and we don't
+// have to do much shuffling. A suitable jni method is non-static and a
+// small number of arguments.
+//
+// |------------------------------------------------------------------------|
+// | c_rarg0  c_rarg1  c_rarg2  c_rarg3  c_rarg4  c_rarg5  c_rarg6  c_rarg7 |
+// |------------------------------------------------------------------------|
+// | x10      x11      x12      x13      x14      x15      x16      x17     |
+// |------------------------------------------------------------------------|
+// | j_rarg7  j_rarg0  j_rarg1  j_rarg2  j_rarg3  j_rarg4  j_rarg5  j_rarg6 |
+// |------------------------------------------------------------------------|
+
+REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
+REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
+REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
+REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
+REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
+REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
+REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
+REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
+
+// Java floating args are passed as per C
+
+REGISTER_DECLARATION(FloatRegister, j_farg0, f10);
+REGISTER_DECLARATION(FloatRegister, j_farg1, f11);
+REGISTER_DECLARATION(FloatRegister, j_farg2, f12);
+REGISTER_DECLARATION(FloatRegister, j_farg3, f13);
+REGISTER_DECLARATION(FloatRegister, j_farg4, f14);
+REGISTER_DECLARATION(FloatRegister, j_farg5, f15);
+REGISTER_DECLARATION(FloatRegister, j_farg6, f16);
+REGISTER_DECLARATION(FloatRegister, j_farg7, f17);
+
+// zero rigster
+REGISTER_DECLARATION(Register, zr,        x0);
+// global pointer
+REGISTER_DECLARATION(Register, gp,        x3);
+// thread pointer
+REGISTER_DECLARATION(Register, tp,        x4);
+
+// registers used to hold VM data either temporarily within a method
+// or across method calls
+
+// volatile (caller-save) registers
+
+// current method -- must be in a call-clobbered register
+REGISTER_DECLARATION(Register, xmethod,   x31);
+// return address
+REGISTER_DECLARATION(Register, ra,        x1);
+
+// non-volatile (callee-save) registers
+
+// stack pointer
+REGISTER_DECLARATION(Register, sp,        x2);
+// frame pointer
+REGISTER_DECLARATION(Register, fp,        x8);
+// base of heap
+REGISTER_DECLARATION(Register, xheapbase, x27);
+// constant pool cache
+REGISTER_DECLARATION(Register, xcpool,    x26);
+// monitors allocated on stack
+REGISTER_DECLARATION(Register, xmonitors, x25);
+// locals on stack
+REGISTER_DECLARATION(Register, xlocals,   x24);
+
+// java thread pointer
+REGISTER_DECLARATION(Register, xthread,   x23);
+// bytecode pointer
+REGISTER_DECLARATION(Register, xbcp,      x22);
+// Dispatch table base
+REGISTER_DECLARATION(Register, xdispatch, x21);
+// Java stack pointer
+REGISTER_DECLARATION(Register, esp,       x20);
+
+// temporary register(caller-save registers)
+REGISTER_DECLARATION(Register, t0, x5);
+REGISTER_DECLARATION(Register, t1, x6);
+REGISTER_DECLARATION(Register, t2, x7);
+
+const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
+  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
+};
+
+const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
+  c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7
+};
+
+#define assert_cond(ARG1) assert(ARG1, #ARG1)
+
+// Addressing modes
+class Address {
+ public:
+
+  enum mode { no_mode, base_plus_offset, pcrel, literal };
+
+ private:
+  Register _base;
+  Register _index;
+  int64_t _offset;
+  enum mode _mode;
+
+  RelocationHolder _rspec;
+
+  // If the target is far we'll need to load the ea of this to a
+  // register to reach it. Otherwise if near we can do PC-relative
+  // addressing.
+  address          _target;
+
+ public:
+  Address()
+    : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
+  Address(Register r)
+    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, int o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, long o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, long long o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, unsigned int o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, unsigned long o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+  Address(Register r, unsigned long long o)
+    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
+#ifdef ASSERT
+  Address(Register r, ByteSize disp)
+    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
+#endif
+  Address(address target, RelocationHolder const& rspec)
+    : _base(noreg),
+      _index(noreg),
+      _offset(0),
+      _mode(literal),
+      _rspec(rspec),
+      _target(target) { }
+  Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
+
+  const Register base() const {
+    guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode");
+    return _base;
+  }
+  long offset() const {
+    return _offset;
+  }
+  Register index() const {
+    return _index;
+  }
+  mode getMode() const {
+    return _mode;
+  }
+
+  bool uses(Register reg) const { return _base == reg; }
+  const address target() const { return _target; }
+  const RelocationHolder& rspec() const { return _rspec; }
+  ~Address() {
+    _target = NULL;
+    _base = NULL;
+  }
+};
+
+// Convience classes
+class RuntimeAddress: public Address {
+
+  public:
+
+  RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
+  ~RuntimeAddress() {}
+};
+
+class OopAddress: public Address {
+
+  public:
+
+  OopAddress(address target) : Address(target, relocInfo::oop_type) {}
+  ~OopAddress() {}
+};
+
+class ExternalAddress: public Address {
+ private:
+  static relocInfo::relocType reloc_for_target(address target) {
+    // Sometimes ExternalAddress is used for values which aren't
+    // exactly addresses, like the card table base.
+    // external_word_type can't be used for values in the first page
+    // so just skip the reloc in that case.
+    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+  }
+
+ public:
+
+  ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
+  ~ExternalAddress() {}
+};
+
+class InternalAddress: public Address {
+
+  public:
+
+  InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
+  ~InternalAddress() {}
+};
+
+class Assembler : public AbstractAssembler {
+public:
+
+  enum { instruction_size = 4 };
+
+  enum RoundingMode {
+    rne = 0b000,     // round to Nearest, ties to Even
+    rtz = 0b001,     // round towards Zero
+    rdn = 0b010,     // round Down (towards eegative infinity)
+    rup = 0b011,     // round Up (towards infinity)
+    rmm = 0b100,     // round to Nearest, ties to Max Magnitude
+    rdy = 0b111,     // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
+  };
+
+  void baseOffset32(Register Rd, const Address &adr, int32_t &offset) {
+    assert(Rd != noreg, "Rd must not be empty register!");
+    guarantee(Rd != adr.base(), "should use different registers!");
+    if (is_offset_in_range(adr.offset(), 32)) {
+      int32_t imm = adr.offset();
+      int32_t upper = imm, lower = imm;
+      lower = (imm << 20) >> 20;
+      upper -= lower;
+      lui(Rd, upper);
+      offset = lower;
+    } else {
+      movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset);
+    }
+    add(Rd, Rd, adr.base());
+  }
+
+  void baseOffset(Register Rd, const Address &adr, int32_t &offset) {
+    if (is_offset_in_range(adr.offset(), 12)) {
+      assert(Rd != noreg, "Rd must not be empty register!");
+      addi(Rd, adr.base(), adr.offset());
+      offset = 0;
+    } else {
+      baseOffset32(Rd, adr, offset);
+    }
+  }
+
+  void _li(Register Rd, int64_t imm);  // optimized load immediate
+  void li32(Register Rd, int32_t imm);
+  void movptr(Register Rd, address addr);
+  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
+  void movptr(Register Rd, uintptr_t imm64);
+  void j(const address &dest, Register temp = t0);
+  void j(const Address &adr, Register temp = t0);
+  void j(Label &l, Register temp = t0);
+  void jal(Label &l, Register temp = t0);
+  void jal(const address &dest, Register temp = t0);
+  void jal(const Address &adr, Register temp = t0);
+  void jr(Register Rs);
+  void jalr(Register Rs);
+  void ret();
+  void call(const address &dest, Register temp = t0);
+  void call(const Address &adr, Register temp = t0);
+  void tail(const address &dest, Register temp = t0);
+  void tail(const Address &adr, Register temp = t0);
+  void call(Label &l, Register temp) {
+    call(target(l), temp);
+  }
+  void tail(Label &l, Register temp) {
+    tail(target(l), temp);
+  }
+
+  static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) {
+    assert_cond(msb >= lsb && msb <= 31);
+    unsigned nbits = msb - lsb + 1;
+    uint32_t mask = (1U << nbits) - 1;
+    uint32_t result = val >> lsb;
+    result &= mask;
+    return result;
+  }
+
+  static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) {
+    assert_cond(msb >= lsb && msb <= 31);
+    int32_t result = val << (31 - msb);
+    result >>= (31 - msb + lsb);
+    return result;
+  }
+
+  static void patch(address a, unsigned msb, unsigned lsb, unsigned val) {
+    assert_cond(a != NULL);
+    assert_cond(msb >= lsb && msb <= 31);
+    unsigned nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    unsigned mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    unsigned target = *(unsigned *)a;
+    target &= ~mask;
+    target |= val;
+    *(unsigned *)a = target;
+  }
+
+  static void patch(address a, unsigned bit, unsigned val) {
+    patch(a, bit, bit, val);
+  }
+
+  static void patch_reg(address a, unsigned lsb, Register reg) {
+    patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  static void patch_reg(address a, unsigned lsb, FloatRegister reg) {
+    patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  static void patch_reg(address a, unsigned lsb, VectorRegister reg) {
+    patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  void emit(unsigned insn) {
+    emit_int32((jint)insn);
+  }
+
+  void _halt() {
+    emit_int32(0);
+  }
+
+// Register Instruction
+#define INSN(NAME, op, funct3, funct7)                          \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
+    unsigned insn = 0;                                          \
+    patch((address)&insn, 6,  0, op);                           \
+    patch((address)&insn, 14, 12, funct3);                      \
+    patch((address)&insn, 31, 25, funct7);                      \
+    patch_reg((address)&insn, 7, Rd);                           \
+    patch_reg((address)&insn, 15, Rs1);                         \
+    patch_reg((address)&insn, 20, Rs2);                         \
+    emit(insn);                                                 \
+  }
+
+  INSN(_add,  0b0110011, 0b000, 0b0000000);
+  INSN(_sub,  0b0110011, 0b000, 0b0100000);
+  INSN(_andr, 0b0110011, 0b111, 0b0000000);
+  INSN(_orr,  0b0110011, 0b110, 0b0000000);
+  INSN(_xorr, 0b0110011, 0b100, 0b0000000);
+  INSN(sll,   0b0110011, 0b001, 0b0000000);
+  INSN(sra,   0b0110011, 0b101, 0b0100000);
+  INSN(srl,   0b0110011, 0b101, 0b0000000);
+  INSN(slt,   0b0110011, 0b010, 0b0000000);
+  INSN(sltu,  0b0110011, 0b011, 0b0000000);
+  INSN(_addw, 0b0111011, 0b000, 0b0000000);
+  INSN(_subw, 0b0111011, 0b000, 0b0100000);
+  INSN(sllw,  0b0111011, 0b001, 0b0000000);
+  INSN(sraw,  0b0111011, 0b101, 0b0100000);
+  INSN(srlw,  0b0111011, 0b101, 0b0000000);
+  INSN(mul,   0b0110011, 0b000, 0b0000001);
+  INSN(mulh,  0b0110011, 0b001, 0b0000001);
+  INSN(mulhsu,0b0110011, 0b010, 0b0000001);
+  INSN(mulhu, 0b0110011, 0b011, 0b0000001);
+  INSN(mulw,  0b0111011, 0b000, 0b0000001);
+  INSN(div,   0b0110011, 0b100, 0b0000001);
+  INSN(divu,  0b0110011, 0b101, 0b0000001);
+  INSN(divw,  0b0111011, 0b100, 0b0000001);
+  INSN(divuw, 0b0111011, 0b101, 0b0000001);
+  INSN(rem,   0b0110011, 0b110, 0b0000001);
+  INSN(remu,  0b0110011, 0b111, 0b0000001);
+  INSN(remw,  0b0111011, 0b110, 0b0000001);
+  INSN(remuw, 0b0111011, 0b111, 0b0000001);
+
+#undef INSN
+
+#define INSN_ENTRY_RELOC(result_type, header)                               \
+  result_type header {                                                      \
+    guarantee(rtype == relocInfo::internal_word_type,                       \
+              "only internal_word_type relocs make sense here");            \
+    relocate(InternalAddress(dest).rspec());
+
+  // Load/store register (all modes)
+#define INSN(NAME, op, funct3)                                                                     \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                      \
+    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    unsigned insn = 0;                                                                             \
+    int32_t val = offset & 0xfff;                                                                  \
+    patch((address)&insn, 6, 0, op);                                                               \
+    patch((address)&insn, 14, 12, funct3);                                                         \
+    patch_reg((address)&insn, 15, Rs);                                                             \
+    patch_reg((address)&insn, 7, Rd);                                                              \
+    patch((address)&insn, 31, 20, val);                                                            \
+    emit(insn);                                                                                    \
+  }
+
+  INSN(lb,  0b0000011, 0b000);
+  INSN(lbu, 0b0000011, 0b100);
+  INSN(lh,  0b0000011, 0b001);
+  INSN(lhu, 0b0000011, 0b101);
+  INSN(_lw, 0b0000011, 0b010);
+  INSN(lwu, 0b0000011, 0b110);
+  INSN(_ld, 0b0000011, 0b011);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
+  void NAME(Register Rd, address dest) {                                                           \
+    assert_cond(dest != NULL);                                                                     \
+    int64_t distance = (dest - pc());                                                              \
+    if (is_offset_in_range(distance, 32)) {                                                        \
+      auipc(Rd, (int32_t)distance + 0x800);                                                        \
+      NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                               \
+    } else {                                                                                       \
+      int32_t offset = 0;                                                                          \
+      movptr_with_offset(Rd, dest, offset);                                                        \
+      NAME(Rd, Rd, offset);                                                                        \
+    }                                                                                              \
+  }                                                                                                \
+  INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
+    NAME(Rd, dest);                                                                                \
+  }                                                                                                \
+  void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
+    switch (adr.getMode()) {                                                                       \
+      case Address::literal: {                                                                     \
+        relocate(adr.rspec());                                                                     \
+        NAME(Rd, adr.target());                                                                    \
+        break;                                                                                     \
+      }                                                                                            \
+      case Address::base_plus_offset: {                                                            \
+        if (is_offset_in_range(adr.offset(), 12)) {                                                \
+          NAME(Rd, adr.base(), adr.offset());                                                      \
+        } else {                                                                                   \
+          int32_t offset = 0;                                                                      \
+          if (Rd == adr.base()) {                                                                  \
+            baseOffset32(temp, adr, offset);                                                       \
+            NAME(Rd, temp, offset);                                                                \
+          } else {                                                                                 \
+            baseOffset32(Rd, adr, offset);                                                         \
+            NAME(Rd, Rd, offset);                                                                  \
+          }                                                                                        \
+        }                                                                                          \
+        break;                                                                                     \
+      }                                                                                            \
+      default:                                                                                     \
+        ShouldNotReachHere();                                                                      \
+    }                                                                                              \
+  }                                                                                                \
+  void NAME(Register Rd, Label &L) {                                                               \
+    wrap_label(Rd, L, &Assembler::NAME);                                                           \
+  }
+
+  INSN(lb);
+  INSN(lbu);
+  INSN(lh);
+  INSN(lhu);
+  INSN(lw);
+  INSN(lwu);
+  INSN(ld);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                                                     \
+  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                                 \
+    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    unsigned insn = 0;                                                                             \
+    uint32_t val = offset & 0xfff;                                                                 \
+    patch((address)&insn, 6, 0, op);                                                               \
+    patch((address)&insn, 14, 12, funct3);                                                         \
+    patch_reg((address)&insn, 15, Rs);                                                             \
+    patch_reg((address)&insn, 7, Rd);                                                              \
+    patch((address)&insn, 31, 20, val);                                                            \
+    emit(insn);                                                                                    \
+  }
+
+  INSN(flw,  0b0000111, 0b010);
+  INSN(_fld, 0b0000111, 0b011);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
+  void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
+    assert_cond(dest != NULL);                                                                     \
+    int64_t distance = (dest - pc());                                                              \
+    if (is_offset_in_range(distance, 32)) {                                                        \
+      auipc(temp, (int32_t)distance + 0x800);                                                      \
+      NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                             \
+    } else {                                                                                       \
+      int32_t offset = 0;                                                                          \
+      movptr_with_offset(temp, dest, offset);                                                      \
+      NAME(Rd, temp, offset);                                                                      \
+    }                                                                                              \
+  }                                                                                                \
+  INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \
+    NAME(Rd, dest, temp);                                                                          \
+  }                                                                                                \
+  void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
+    switch (adr.getMode()) {                                                                       \
+      case Address::literal: {                                                                     \
+        relocate(adr.rspec());                                                                     \
+        NAME(Rd, adr.target(), temp);                                                              \
+        break;                                                                                     \
+      }                                                                                            \
+      case Address::base_plus_offset: {                                                            \
+        if (is_offset_in_range(adr.offset(), 12)) {                                                \
+          NAME(Rd, adr.base(), adr.offset());                                                      \
+        } else {                                                                                   \
+          int32_t offset = 0;                                                                      \
+          baseOffset32(temp, adr, offset);                                                         \
+          NAME(Rd, temp, offset);                                                                  \
+        }                                                                                          \
+        break;                                                                                     \
+      }                                                                                            \
+      default:                                                                                     \
+        ShouldNotReachHere();                                                                      \
+    }                                                                                              \
+  }
+
+  INSN(flw);
+  INSN(fld);
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                                                           \
+  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                                          \
+    guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
+    unsigned insn = 0;                                                                                   \
+    uint32_t val  = offset & 0x1fff;                                                                     \
+    uint32_t val11 = (val >> 11) & 0x1;                                                                  \
+    uint32_t val12 = (val >> 12) & 0x1;                                                                  \
+    uint32_t low  = (val >> 1) & 0xf;                                                                    \
+    uint32_t high = (val >> 5) & 0x3f;                                                                   \
+    patch((address)&insn, 6, 0, op);                                                                     \
+    patch((address)&insn, 14, 12, funct3);                                                               \
+    patch_reg((address)&insn, 15, Rs1);                                                                  \
+    patch_reg((address)&insn, 20, Rs2);                                                                  \
+    patch((address)&insn, 7, val11);                                                                     \
+    patch((address)&insn, 11, 8, low);                                                                   \
+    patch((address)&insn, 30, 25, high);                                                                 \
+    patch((address)&insn, 31, val12);                                                                    \
+    emit(insn);                                                                                          \
+  }
+
+  INSN(_beq, 0b1100011, 0b000);
+  INSN(_bne, 0b1100011, 0b001);
+  INSN(bge,  0b1100011, 0b101);
+  INSN(bgeu, 0b1100011, 0b111);
+  INSN(blt,  0b1100011, 0b100);
+  INSN(bltu, 0b1100011, 0b110);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                       \
+  void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
+    assert_cond(dest != NULL);                                                                           \
+    int64_t offset = (dest - pc());                                                                      \
+    guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
+    NAME(Rs1, Rs2, offset);                                                                              \
+  }                                                                                                      \
+  INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
+    NAME(Rs1, Rs2, dest);                                                                                \
+  }
+
+  INSN(beq);
+  INSN(bne);
+  INSN(bge);
+  INSN(bgeu);
+  INSN(blt);
+  INSN(bltu);
+
+#undef INSN
+
+#define INSN(NAME, NEG_INSN)                                                                \
+  void NAME(Register Rs1, Register Rs2, Label &L, bool is_far = false) {                    \
+    wrap_label(Rs1, Rs2, L, &Assembler::NAME, &Assembler::NEG_INSN, is_far);                \
+  }
+
+  INSN(beq,  bne);
+  INSN(bne,  beq);
+  INSN(blt,  bge);
+  INSN(bge,  blt);
+  INSN(bltu, bgeu);
+  INSN(bgeu, bltu);
+
+#undef INSN
+
+#define INSN(NAME, REGISTER, op, funct3)                                                                    \
+  void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) {                                             \
+    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                                        \
+    unsigned insn = 0;                                                                                      \
+    uint32_t val  = offset & 0xfff;                                                                         \
+    uint32_t low  = val & 0x1f;                                                                             \
+    uint32_t high = (val >> 5) & 0x7f;                                                                      \
+    patch((address)&insn, 6, 0, op);                                                                        \
+    patch((address)&insn, 14, 12, funct3);                                                                  \
+    patch_reg((address)&insn, 15, Rs2);                                                                     \
+    patch_reg((address)&insn, 20, Rs1);                                                                     \
+    patch((address)&insn, 11, 7, low);                                                                      \
+    patch((address)&insn, 31, 25, high);                                                                    \
+    emit(insn);                                                                                             \
+  }                                                                                                         \
+
+  INSN(sb,   Register,      0b0100011, 0b000);
+  INSN(sh,   Register,      0b0100011, 0b001);
+  INSN(_sw,  Register,      0b0100011, 0b010);
+  INSN(_sd,  Register,      0b0100011, 0b011);
+  INSN(fsw,  FloatRegister, 0b0100111, 0b010);
+  INSN(_fsd, FloatRegister, 0b0100111, 0b011);
+
+#undef INSN
+
+#define INSN(NAME, REGISTER)                                                                                \
+  INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0))   \
+    NAME(Rs, dest, temp);                                                                                   \
+  }
+
+  INSN(sb,  Register);
+  INSN(sh,  Register);
+  INSN(sw,  Register);
+  INSN(sd,  Register);
+  INSN(fsw, FloatRegister);
+  INSN(fsd, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
+  void NAME(Register Rs, address dest, Register temp = t0) {                                       \
+    assert_cond(dest != NULL);                                                                     \
+    assert_different_registers(Rs, temp);                                                          \
+    int64_t distance = (dest - pc());                                                              \
+    if (is_offset_in_range(distance, 32)) {                                                        \
+      auipc(temp, (int32_t)distance + 0x800);                                                      \
+      NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                             \
+    } else {                                                                                       \
+      int32_t offset = 0;                                                                          \
+      movptr_with_offset(temp, dest, offset);                                                      \
+      NAME(Rs, temp, offset);                                                                      \
+    }                                                                                              \
+  }                                                                                                \
+  void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
+    switch (adr.getMode()) {                                                                       \
+      case Address::literal: {                                                                     \
+        assert_different_registers(Rs, temp);                                                      \
+        relocate(adr.rspec());                                                                     \
+        NAME(Rs, adr.target(), temp);                                                              \
+        break;                                                                                     \
+      }                                                                                            \
+      case Address::base_plus_offset: {                                                            \
+        if (is_offset_in_range(adr.offset(), 12)) {                                                \
+          NAME(Rs, adr.base(), adr.offset());                                                      \
+        } else {                                                                                   \
+          int32_t offset= 0;                                                                       \
+          assert_different_registers(Rs, temp);                                                    \
+          baseOffset32(temp, adr, offset);                                                         \
+          NAME(Rs, temp, offset);                                                                  \
+        }                                                                                          \
+        break;                                                                                     \
+      }                                                                                            \
+      default:                                                                                     \
+        ShouldNotReachHere();                                                                      \
+    }                                                                                              \
+  }
+
+  INSN(sb);
+  INSN(sh);
+  INSN(sw);
+  INSN(sd);
+
+#undef INSN
+
+#define INSN(NAME)                                                                                 \
+  void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
+    assert_cond(dest != NULL);                                                                     \
+    int64_t distance = (dest - pc());                                                              \
+    if (is_offset_in_range(distance, 32)) {                                                        \
+      auipc(temp, (int32_t)distance + 0x800);                                                      \
+      NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                             \
+    } else {                                                                                       \
+      int32_t offset = 0;                                                                          \
+      movptr_with_offset(temp, dest, offset);                                                      \
+      NAME(Rs, temp, offset);                                                                      \
+    }                                                                                              \
+  }                                                                                                \
+  void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
+    switch (adr.getMode()) {                                                                       \
+      case Address::literal: {                                                                     \
+        relocate(adr.rspec());                                                                     \
+        NAME(Rs, adr.target(), temp);                                                              \
+        break;                                                                                     \
+      }                                                                                            \
+      case Address::base_plus_offset: {                                                            \
+        if (is_offset_in_range(adr.offset(), 12)) {                                                \
+          NAME(Rs, adr.base(), adr.offset());                                                      \
+        } else {                                                                                   \
+          int32_t offset = 0;                                                                      \
+          baseOffset32(temp, adr, offset);                                                         \
+          NAME(Rs, temp, offset);                                                                  \
+        }                                                                                          \
+        break;                                                                                     \
+      }                                                                                            \
+      default:                                                                                     \
+        ShouldNotReachHere();                                                                      \
+    }                                                                                              \
+  }
+
+  INSN(fsw);
+  INSN(fsd);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                                        \
+  void NAME(Register Rd, const uint32_t csr, Register Rs1) {                          \
+    guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid");                \
+    unsigned insn = 0;                                                                \
+    patch((address)&insn, 6, 0, op);                                                  \
+    patch((address)&insn, 14, 12, funct3);                                            \
+    patch_reg((address)&insn, 7, Rd);                                                 \
+    patch_reg((address)&insn, 15, Rs1);                                               \
+    patch((address)&insn, 31, 20, csr);                                               \
+    emit(insn);                                                                       \
+  }
+
+  INSN(csrrw, 0b1110011, 0b001);
+  INSN(csrrs, 0b1110011, 0b010);
+  INSN(csrrc, 0b1110011, 0b011);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                                        \
+  void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) {                   \
+    guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid");                \
+    guarantee(is_unsigned_imm_in_range(uimm, 5, 0), "uimm is invalid");               \
+    unsigned insn = 0;                                                                \
+    uint32_t val  = uimm & 0x1f;                                                      \
+    patch((address)&insn, 6, 0, op);                                                  \
+    patch((address)&insn, 14, 12, funct3);                                            \
+    patch_reg((address)&insn, 7, Rd);                                                 \
+    patch((address)&insn, 19, 15, val);                                               \
+    patch((address)&insn, 31, 20, csr);                                               \
+    emit(insn);                                                                       \
+  }
+
+  INSN(csrrwi, 0b1110011, 0b101);
+  INSN(csrrsi, 0b1110011, 0b110);
+  INSN(csrrci, 0b1110011, 0b111);
+
+#undef INSN
+
+#define INSN(NAME, op)                                                                        \
+  void NAME(Register Rd, const int32_t offset) {                                              \
+    guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid.");                          \
+    unsigned insn = 0;                                                                        \
+    patch((address)&insn, 6, 0, op);                                                          \
+    patch_reg((address)&insn, 7, Rd);                                                         \
+    patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                         \
+    patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1));                              \
+    patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));                         \
+    patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));                              \
+    emit(insn);                                                                               \
+  }
+
+  INSN(_jal, 0b1101111);
+
+#undef INSN
+
+#define INSN(NAME)                                                                            \
+  void NAME(Register Rd, const address dest, Register temp = t0) {                            \
+    assert_cond(dest != NULL);                                                                \
+    int64_t offset = dest - pc();                                                             \
+    if (is_imm_in_range(offset, 20, 1)) {                                                     \
+      NAME(Rd, offset);                                                                       \
+    } else {                                                                                  \
+      assert_different_registers(Rd, temp);                                                   \
+      int32_t off = 0;                                                                        \
+      movptr_with_offset(temp, dest, off);                                                    \
+      jalr(Rd, temp, off);                                                                    \
+    }                                                                                         \
+  }                                                                                           \
+  void NAME(Register Rd, Label &L, Register temp = t0) {                                      \
+    assert_different_registers(Rd, temp);                                                     \
+    wrap_label(Rd, L, temp, &Assembler::NAME);                                                \
+  }
+
+  INSN(jal);
+
+#undef INSN
+
+#undef INSN_ENTRY_RELOC
+
+#define INSN(NAME, op, funct)                                                              \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                              \
+    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                       \
+    unsigned insn = 0;                                                                     \
+    patch((address)&insn, 6, 0, op);                                                       \
+    patch_reg((address)&insn, 7, Rd);                                                      \
+    patch((address)&insn, 14, 12, funct);                                                  \
+    patch_reg((address)&insn, 15, Rs);                                                     \
+    int32_t val = offset & 0xfff;                                                          \
+    patch((address)&insn, 31, 20, val);                                                    \
+    emit(insn);                                                                            \
+  }
+
+  INSN(_jalr, 0b1100111, 0b000);
+
+#undef INSN
+
+  enum barrier {
+    i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001,
+    ir = i | r, ow = o | w, iorw = i | o | r | w
+  };
+
+  void fence(const uint32_t predecessor, const uint32_t successor) {
+    unsigned insn = 0;
+    guarantee(predecessor < 16, "predecessor is invalid");
+    guarantee(successor < 16, "successor is invalid");
+    patch((address)&insn, 6, 0, 0b001111);
+    patch((address)&insn, 11, 7, 0b00000);
+    patch((address)&insn, 14, 12, 0b000);
+    patch((address)&insn, 19, 15, 0b00000);
+    patch((address)&insn, 23, 20, successor);
+    patch((address)&insn, 27, 24, predecessor);
+    patch((address)&insn, 31, 28, 0b0000);
+    emit(insn);
+  }
+
+#define INSN(NAME, op, funct3, funct7)                      \
+  void NAME() {                                             \
+    unsigned insn = 0;                                      \
+    patch((address)&insn, 6, 0, op);                        \
+    patch((address)&insn, 11, 7, 0b00000);                  \
+    patch((address)&insn, 14, 12, funct3);                  \
+    patch((address)&insn, 19, 15, 0b00000);                 \
+    patch((address)&insn, 31, 20, funct7);                  \
+    emit(insn);                                             \
+  }
+
+  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
+  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
+
+#undef INSN
+
+enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
+
+#define INSN(NAME, op, funct3, funct7)                                                  \
+  void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) {        \
+    unsigned insn = 0;                                                                  \
+    patch((address)&insn, 6, 0, op);                                                    \
+    patch((address)&insn, 14, 12, funct3);                                              \
+    patch_reg((address)&insn, 7, Rd);                                                   \
+    patch_reg((address)&insn, 15, Rs1);                                                 \
+    patch_reg((address)&insn, 20, Rs2);                                                 \
+    patch((address)&insn, 31, 27, funct7);                                              \
+    patch((address)&insn, 26, 25, memory_order);                                        \
+    emit(insn);                                                                         \
+  }
+
+  INSN(amoswap_w, 0b0101111, 0b010, 0b00001);
+  INSN(amoadd_w,  0b0101111, 0b010, 0b00000);
+  INSN(amoxor_w,  0b0101111, 0b010, 0b00100);
+  INSN(amoand_w,  0b0101111, 0b010, 0b01100);
+  INSN(amoor_w,   0b0101111, 0b010, 0b01000);
+  INSN(amomin_w,  0b0101111, 0b010, 0b10000);
+  INSN(amomax_w,  0b0101111, 0b010, 0b10100);
+  INSN(amominu_w, 0b0101111, 0b010, 0b11000);
+  INSN(amomaxu_w, 0b0101111, 0b010, 0b11100);
+  INSN(amoswap_d, 0b0101111, 0b011, 0b00001);
+  INSN(amoadd_d,  0b0101111, 0b011, 0b00000);
+  INSN(amoxor_d,  0b0101111, 0b011, 0b00100);
+  INSN(amoand_d,  0b0101111, 0b011, 0b01100);
+  INSN(amoor_d,   0b0101111, 0b011, 0b01000);
+  INSN(amomin_d,  0b0101111, 0b011, 0b10000);
+  INSN(amomax_d , 0b0101111, 0b011, 0b10100);
+  INSN(amominu_d, 0b0101111, 0b011, 0b11000);
+  INSN(amomaxu_d, 0b0101111, 0b011, 0b11100);
+#undef INSN
+
+enum operand_size { int8, int16, int32, uint32, int64 };
+
+#define INSN(NAME, op, funct3, funct7)                                              \
+  void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) {               \
+    unsigned insn = 0;                                                              \
+    uint32_t val = memory_order & 0x3;                                              \
+    patch((address)&insn, 6, 0, op);                                                \
+    patch((address)&insn, 14, 12, funct3);                                          \
+    patch_reg((address)&insn, 7, Rd);                                               \
+    patch_reg((address)&insn, 15, Rs1);                                             \
+    patch((address)&insn, 25, 20, 0b00000);                                         \
+    patch((address)&insn, 31, 27, funct7);                                          \
+    patch((address)&insn, 26, 25, val);                                             \
+    emit(insn);                                                                     \
+  }
+
+  INSN(lr_w, 0b0101111, 0b010, 0b00010);
+  INSN(lr_d, 0b0101111, 0b011, 0b00010);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct7)                                                      \
+  void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) {         \
+    unsigned insn = 0;                                                                      \
+    uint32_t val = memory_order & 0x3;                                                      \
+    patch((address)&insn, 6, 0, op);                                                        \
+    patch((address)&insn, 14, 12, funct3);                                                  \
+    patch_reg((address)&insn, 7, Rd);                                                       \
+    patch_reg((address)&insn, 15, Rs2);                                                     \
+    patch_reg((address)&insn, 20, Rs1);                                                     \
+    patch((address)&insn, 31, 27, funct7);                                                  \
+    patch((address)&insn, 26, 25, val);                                                     \
+    emit(insn);                                                                             \
+  }
+
+  INSN(sc_w, 0b0101111, 0b010, 0b00011);
+  INSN(sc_d, 0b0101111, 0b011, 0b00011);
+#undef INSN
+
+#define INSN(NAME, op, funct5, funct7)                                                      \
+  void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) {                   \
+    unsigned insn = 0;                                                                      \
+    patch((address)&insn, 6, 0, op);                                                        \
+    patch((address)&insn, 14, 12, rm);                                                      \
+    patch((address)&insn, 24, 20, funct5);                                                  \
+    patch((address)&insn, 31, 25, funct7);                                                  \
+    patch_reg((address)&insn, 7, Rd);                                                       \
+    patch_reg((address)&insn, 15, Rs1);                                                     \
+    emit(insn);                                                                             \
+  }
+
+  INSN(fsqrt_s,   0b1010011, 0b00000, 0b0101100);
+  INSN(fsqrt_d,   0b1010011, 0b00000, 0b0101101);
+  INSN(fcvt_s_d,  0b1010011, 0b00001, 0b0100000);
+  INSN(fcvt_d_s,  0b1010011, 0b00000, 0b0100001);
+#undef INSN
+
+// Immediate Instruction
+#define INSN(NAME, op, funct3)                                                              \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                       \
+    guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity");                 \
+    unsigned insn = 0;                                                                      \
+    patch((address)&insn, 6, 0, op);                                                        \
+    patch((address)&insn, 14, 12, funct3);                                                  \
+    patch((address)&insn, 31, 20, imm & 0x00000fff);                                        \
+    patch_reg((address)&insn, 7, Rd);                                                       \
+    patch_reg((address)&insn, 15, Rs1);                                                     \
+    emit(insn);                                                                             \
+  }
+
+  INSN(_addi,      0b0010011, 0b000);
+  INSN(slti,       0b0010011, 0b010);
+  INSN(_addiw,     0b0011011, 0b000);
+  INSN(_and_imm12, 0b0010011, 0b111);
+  INSN(ori,        0b0010011, 0b110);
+  INSN(xori,       0b0010011, 0b100);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                                              \
+  void NAME(Register Rd, Register Rs1, uint32_t imm) {                                      \
+    guarantee(is_unsigned_imm_in_range(imm, 12, 0), "Immediate is out of validity");        \
+    unsigned insn = 0;                                                                      \
+    patch((address)&insn,6, 0,  op);                                                        \
+    patch((address)&insn, 14, 12, funct3);                                                  \
+    patch((address)&insn, 31, 20, imm & 0x00000fff);                                        \
+    patch_reg((address)&insn, 7, Rd);                                                       \
+    patch_reg((address)&insn, 15, Rs1);                                                     \
+    emit(insn);                                                                             \
+  }
+
+  INSN(sltiu, 0b0010011, 0b011);
+
+#undef INSN
+
+// Shift Immediate Instruction
+#define INSN(NAME, op, funct3, funct6)                                   \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                 \
+    guarantee(shamt <= 0x3f, "Shamt is invalid");                        \
+    unsigned insn = 0;                                                   \
+    patch((address)&insn, 6, 0, op);                                     \
+    patch((address)&insn, 14, 12, funct3);                               \
+    patch((address)&insn, 25, 20, shamt);                                \
+    patch((address)&insn, 31, 26, funct6);                               \
+    patch_reg((address)&insn, 7, Rd);                                    \
+    patch_reg((address)&insn, 15, Rs1);                                  \
+    emit(insn);                                                          \
+  }
+
+  INSN(_slli, 0b0010011, 0b001, 0b000000);
+  INSN(_srai, 0b0010011, 0b101, 0b010000);
+  INSN(_srli, 0b0010011, 0b101, 0b000000);
+
+#undef INSN
+
+// Shift Word Immediate Instruction
+#define INSN(NAME, op, funct3, funct7)                                  \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                \
+    guarantee(shamt <= 0x1f, "Shamt is invalid");                       \
+    unsigned insn = 0;                                                  \
+    patch((address)&insn, 6, 0, op);                                    \
+    patch((address)&insn, 14, 12, funct3);                              \
+    patch((address)&insn, 24, 20, shamt);                               \
+    patch((address)&insn, 31, 25, funct7);                              \
+    patch_reg((address)&insn, 7, Rd);                                   \
+    patch_reg((address)&insn, 15, Rs1);                                 \
+    emit(insn);                                                         \
+  }
+
+  INSN(slliw, 0b0011011, 0b001, 0b0000000);
+  INSN(sraiw, 0b0011011, 0b101, 0b0100000);
+  INSN(srliw, 0b0011011, 0b101, 0b0000000);
+
+#undef INSN
+
+// Upper Immediate Instruction
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rd, int32_t imm) {                                 \
+    int32_t upperImm = imm >> 12;                                       \
+    unsigned insn = 0;                                                  \
+    patch((address)&insn, 6, 0, op);                                    \
+    patch_reg((address)&insn, 7, Rd);                                   \
+    upperImm &= 0x000fffff;                                             \
+    patch((address)&insn, 31, 12, upperImm);                            \
+    emit(insn);                                                         \
+  }
+
+  INSN(_lui,  0b0110111);
+  INSN(auipc, 0b0010111);
+
+#undef INSN
+
+// Float and Double Rigster Instruction
+#define INSN(NAME, op, funct2)                                                                                     \
+  void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) {    \
+    unsigned insn = 0;                                                                                             \
+    patch((address)&insn, 6, 0, op);                                                                               \
+    patch((address)&insn, 14, 12, rm);                                                                             \
+    patch((address)&insn, 26, 25, funct2);                                                                         \
+    patch_reg((address)&insn, 7, Rd);                                                                              \
+    patch_reg((address)&insn, 15, Rs1);                                                                            \
+    patch_reg((address)&insn, 20, Rs2);                                                                            \
+    patch_reg((address)&insn, 27, Rs3);                                                                            \
+    emit(insn);                                                                                                    \
+  }
+
+  INSN(fmadd_s,   0b1000011,  0b00);
+  INSN(fmsub_s,   0b1000111,  0b00);
+  INSN(fnmsub_s,  0b1001011,  0b00);
+  INSN(fnmadd_s,  0b1001111,  0b00);
+  INSN(fmadd_d,   0b1000011,  0b01);
+  INSN(fmsub_d,   0b1000111,  0b01);
+  INSN(fnmsub_d,  0b1001011,  0b01);
+  INSN(fnmadd_d,  0b1001111,  0b01);
+
+#undef INSN
+
+// Float and Double Rigster Instruction
+#define INSN(NAME, op, funct3, funct7)                                        \
+  void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) {         \
+    unsigned insn = 0;                                                        \
+    patch((address)&insn, 6, 0, op);                                          \
+    patch((address)&insn, 14, 12, funct3);                                    \
+    patch((address)&insn, 31, 25, funct7);                                    \
+    patch_reg((address)&insn, 7, Rd);                                         \
+    patch_reg((address)&insn, 15, Rs1);                                       \
+    patch_reg((address)&insn, 20, Rs2);                                       \
+    emit(insn);                                                               \
+  }
+
+  INSN(fsgnj_s,  0b1010011, 0b000, 0b0010000);
+  INSN(fsgnjn_s, 0b1010011, 0b001, 0b0010000);
+  INSN(fsgnjx_s, 0b1010011, 0b010, 0b0010000);
+  INSN(fmin_s,   0b1010011, 0b000, 0b0010100);
+  INSN(fmax_s,   0b1010011, 0b001, 0b0010100);
+  INSN(fsgnj_d,  0b1010011, 0b000, 0b0010001);
+  INSN(fsgnjn_d, 0b1010011, 0b001, 0b0010001);
+  INSN(fsgnjx_d, 0b1010011, 0b010, 0b0010001);
+  INSN(fmin_d,   0b1010011, 0b000, 0b0010101);
+  INSN(fmax_d,   0b1010011, 0b001, 0b0010101);
+
+#undef INSN
+
+// Float and Double Rigster Arith Instruction
+#define INSN(NAME, op, funct3, funct7)                                    \
+  void NAME(Register Rd, FloatRegister Rs1, FloatRegister Rs2) {          \
+    unsigned insn = 0;                                                    \
+    patch((address)&insn, 6, 0, op);                                      \
+    patch((address)&insn, 14, 12, funct3);                                \
+    patch((address)&insn, 31, 25, funct7);                                \
+    patch_reg((address)&insn, 7, Rd);                                     \
+    patch_reg((address)&insn, 15, Rs1);                                   \
+    patch_reg((address)&insn, 20, Rs2);                                   \
+    emit(insn);                                                           \
+  }
+
+  INSN(feq_s,    0b1010011, 0b010, 0b1010000);
+  INSN(flt_s,    0b1010011, 0b001, 0b1010000);
+  INSN(fle_s,    0b1010011, 0b000, 0b1010000);
+  INSN(feq_d,    0b1010011, 0b010, 0b1010001);
+  INSN(fle_d,    0b1010011, 0b000, 0b1010001);
+  INSN(flt_d,    0b1010011, 0b001, 0b1010001);
+#undef INSN
+
+// Float and Double Arith Instruction
+#define INSN(NAME, op, funct7)                                                                  \
+  void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) {    \
+    unsigned insn = 0;                                                                          \
+    patch((address)&insn, 6, 0, op);                                                            \
+    patch((address)&insn, 14, 12, rm);                                                          \
+    patch((address)&insn, 31, 25, funct7);                                                      \
+    patch_reg((address)&insn, 7, Rd);                                                           \
+    patch_reg((address)&insn, 15, Rs1);                                                         \
+    patch_reg((address)&insn, 20, Rs2);                                                         \
+    emit(insn);                                                                                 \
+  }
+
+  INSN(fadd_s,   0b1010011, 0b0000000);
+  INSN(fsub_s,   0b1010011, 0b0000100);
+  INSN(fmul_s,   0b1010011, 0b0001000);
+  INSN(fdiv_s,   0b1010011, 0b0001100);
+  INSN(fadd_d,   0b1010011, 0b0000001);
+  INSN(fsub_d,   0b1010011, 0b0000101);
+  INSN(fmul_d,   0b1010011, 0b0001001);
+  INSN(fdiv_d,   0b1010011, 0b0001101);
+
+#undef INSN
+
+// Whole Float and Double Conversion Instruction
+#define INSN(NAME, op, funct5, funct7)                                  \
+  void NAME(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) {    \
+    unsigned insn = 0;                                                  \
+    patch((address)&insn, 6, 0, op);                                    \
+    patch((address)&insn, 14, 12, rm);                                  \
+    patch((address)&insn, 24, 20, funct5);                              \
+    patch((address)&insn, 31, 25, funct7);                              \
+    patch_reg((address)&insn, 7, Rd);                                   \
+    patch_reg((address)&insn, 15, Rs1);                                 \
+    emit(insn);                                                         \
+  }
+
+  INSN(fcvt_s_w,   0b1010011, 0b00000, 0b1101000);
+  INSN(fcvt_s_wu,  0b1010011, 0b00001, 0b1101000);
+  INSN(fcvt_s_l,   0b1010011, 0b00010, 0b1101000);
+  INSN(fcvt_s_lu,  0b1010011, 0b00011, 0b1101000);
+  INSN(fcvt_d_w,   0b1010011, 0b00000, 0b1101001);
+  INSN(fcvt_d_wu,  0b1010011, 0b00001, 0b1101001);
+  INSN(fcvt_d_l,   0b1010011, 0b00010, 0b1101001);
+  INSN(fcvt_d_lu,  0b1010011, 0b00011, 0b1101001);
+
+#undef INSN
+
+// Float and Double Conversion Instruction
+#define INSN(NAME, op, funct5, funct7)                                  \
+  void NAME(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) {    \
+    unsigned insn = 0;                                                  \
+    patch((address)&insn, 6, 0, op);                                    \
+    patch((address)&insn, 14, 12, rm);                                  \
+    patch((address)&insn, 24, 20, funct5);                              \
+    patch((address)&insn, 31, 25, funct7);                              \
+    patch_reg((address)&insn, 7, Rd);                                   \
+    patch_reg((address)&insn, 15, Rs1);                                 \
+    emit(insn);                                                         \
+  }
+
+  INSN(fcvt_w_s,   0b1010011, 0b00000, 0b1100000);
+  INSN(fcvt_l_s,   0b1010011, 0b00010, 0b1100000);
+  INSN(fcvt_wu_s,  0b1010011, 0b00001, 0b1100000);
+  INSN(fcvt_lu_s,  0b1010011, 0b00011, 0b1100000);
+  INSN(fcvt_w_d,   0b1010011, 0b00000, 0b1100001);
+  INSN(fcvt_wu_d,  0b1010011, 0b00001, 0b1100001);
+  INSN(fcvt_l_d,   0b1010011, 0b00010, 0b1100001);
+  INSN(fcvt_lu_d,  0b1010011, 0b00011, 0b1100001);
+
+#undef INSN
+
+// Float and Double Move Instruction
+#define INSN(NAME, op, funct3, funct5, funct7)       \
+  void NAME(FloatRegister Rd, Register Rs1) {        \
+    unsigned insn = 0;                               \
+    patch((address)&insn, 6, 0, op);                 \
+    patch((address)&insn, 14, 12, funct3);           \
+    patch((address)&insn, 20, funct5);               \
+    patch((address)&insn, 31, 25, funct7);           \
+    patch_reg((address)&insn, 7, Rd);                \
+    patch_reg((address)&insn, 15, Rs1);              \
+    emit(insn);                                      \
+  }
+
+  INSN(fmv_w_x,  0b1010011, 0b000, 0b00000, 0b1111000);
+  INSN(fmv_d_x,  0b1010011, 0b000, 0b00000, 0b1111001);
+
+#undef INSN
+
+// Float and Double Conversion Instruction
+#define INSN(NAME, op, funct3, funct5, funct7)            \
+  void NAME(Register Rd, FloatRegister Rs1) {             \
+    unsigned insn = 0;                                    \
+    patch((address)&insn, 6, 0, op);                      \
+    patch((address)&insn, 14, 12, funct3);                \
+    patch((address)&insn, 20, funct5);                    \
+    patch((address)&insn, 31, 25, funct7);                \
+    patch_reg((address)&insn, 7, Rd);                     \
+    patch_reg((address)&insn, 15, Rs1);                   \
+    emit(insn);                                           \
+  }
+
+  INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000);
+  INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001);
+  INSN(fmv_x_w,  0b1010011, 0b000, 0b00000, 0b1110000);
+  INSN(fmv_x_d,  0b1010011, 0b000, 0b00000, 0b1110001);
+
+#undef INSN
+
+// ==========================
+// RISC-V Vector Extension
+// ==========================
+enum SEW {
+  e8,
+  e16,
+  e32,
+  e64,
+  RESERVED,
+};
+
+enum LMUL {
+  mf8 = 0b101,
+  mf4 = 0b110,
+  mf2 = 0b111,
+  m1  = 0b000,
+  m2  = 0b001,
+  m4  = 0b010,
+  m8  = 0b011,
+};
+
+enum VMA {
+  mu, // undisturbed
+  ma, // agnostic
+};
+
+enum VTA {
+  tu, // undisturbed
+  ta, // agnostic
+};
+
+static Assembler::SEW elembytes_to_sew(int ebytes) {
+  assert(ebytes > 0 && ebytes <= 8, "unsupported element size");
+  return (Assembler::SEW) exact_log2(ebytes);
+}
+
+static Assembler::SEW elemtype_to_sew(BasicType etype) {
+  return Assembler::elembytes_to_sew(type2aelembytes(etype));
+}
+
+#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill)   \
+    if (vill == 1) {                                         \
+      guarantee((vlmul | vsew | vta | vma == 0),             \
+                "the other bits in vtype shall be zero");    \
+    }                                                        \
+    patch((address)&insn, lsb + 2, lsb, vlmul);              \
+    patch((address)&insn, lsb + 5, lsb + 3, vsew);           \
+    patch((address)&insn, lsb + 6, vta);                     \
+    patch((address)&insn, lsb + 7, vma);                     \
+    patch((address)&insn, hsb - 1, lsb + 8, 0);              \
+    patch((address)&insn, hsb, vill)
+
+#define INSN(NAME, op, funct3)                                            \
+  void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1,           \
+            VMA vma = mu, VTA vta = tu, bool vill = false) {              \
+    unsigned insn = 0;                                                    \
+    patch((address)&insn, 6, 0, op);                                      \
+    patch((address)&insn, 14, 12, funct3);                                \
+    patch_vtype(30, 20, lmul, sew, vta, vma, vill);                       \
+    patch((address)&insn, 31, 0);                                         \
+    patch_reg((address)&insn, 7, Rd);                                     \
+    patch_reg((address)&insn, 15, Rs1);                                   \
+    emit(insn);                                                           \
+  }
+
+  INSN(vsetvli, 0b1010111, 0b111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3)                                            \
+  void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1,           \
+            VMA vma = mu, VTA vta = tu, bool vill = false) {              \
+    unsigned insn = 0;                                                    \
+    guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid");     \
+    patch((address)&insn, 6, 0, op);                                      \
+    patch((address)&insn, 14, 12, funct3);                                \
+    patch((address)&insn, 19, 15, imm);                                   \
+    patch_vtype(29, 20, lmul, sew, vta, vma, vill);                       \
+    patch((address)&insn, 31, 30, 0b11);                                  \
+    patch_reg((address)&insn, 7, Rd);                                     \
+    emit(insn);                                                           \
+  }
+
+  INSN(vsetivli, 0b1010111, 0b111);
+
+#undef INSN
+
+#undef patch_vtype
+
+#define INSN(NAME, op, funct3, funct7)                          \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
+    unsigned insn = 0;                                          \
+    patch((address)&insn, 6,  0, op);                           \
+    patch((address)&insn, 14, 12, funct3);                      \
+    patch((address)&insn, 31, 25, funct7);                      \
+    patch_reg((address)&insn, 7, Rd);                           \
+    patch_reg((address)&insn, 15, Rs1);                         \
+    patch_reg((address)&insn, 20, Rs2);                         \
+    emit(insn);                                                 \
+  }
+
+  // Vector Configuration Instruction
+  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
+
+#undef INSN
+
+enum VectorMask {
+  v0_t = 0b0,
+  unmasked = 0b1
+};
+
+#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6)            \
+    unsigned insn = 0;                                                         \
+    patch((address)&insn, 6, 0, op);                                           \
+    patch((address)&insn, 14, 12, funct3);                                     \
+    patch((address)&insn, 19, 15, Reg_or_Imm5);                                \
+    patch((address)&insn, 25, vm);                                             \
+    patch((address)&insn, 31, 26, funct6);                                     \
+    patch_reg((address)&insn, 7, Reg);                                         \
+    patch_reg((address)&insn, 20, Vs2);                                        \
+    emit(insn)
+
+// r2_vm
+#define INSN(NAME, op, funct3, Vs1, funct6)                                    \
+  void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) {       \
+    patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6);                        \
+  }
+
+  // Vector Mask
+  INSN(vpopc_m,  0b1010111, 0b010, 0b10000, 0b010000);
+  INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000);
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs1, funct6)                                    \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
+    patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6);                        \
+  }
+
+  // Vector Integer Extension
+  INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010);
+  INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010);
+  INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010);
+  INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010);
+  INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010);
+  INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010);
+
+  // Vector Mask
+  INSN(vmsbf_m,   0b1010111, 0b010, 0b00001, 0b010100);
+  INSN(vmsif_m,   0b1010111, 0b010, 0b00011, 0b010100);
+  INSN(vmsof_m,   0b1010111, 0b010, 0b00010, 0b010100);
+  INSN(viota_m,   0b1010111, 0b010, 0b10000, 0b010100);
+
+  // Vector Single-Width Floating-Point/Integer Type-Convert Instructions
+  INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010);
+  INSN(vfcvt_x_f_v,  0b1010111, 0b001, 0b00001, 0b010010);
+  INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010);
+  INSN(vfcvt_f_x_v,  0b1010111, 0b001, 0b00011, 0b010010);
+  INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010);
+  INSN(vfcvt_rtz_x_f_v,  0b1010111, 0b001, 0b00111, 0b010010);
+
+  // Vector Floating-Point Instruction
+  INSN(vfsqrt_v,  0b1010111, 0b001, 0b00000, 0b010011);
+  INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011);
+
+#undef INSN
+
+// r2rd
+#define INSN(NAME, op, funct3, simm5, vm, funct6)         \
+  void NAME(VectorRegister Vd, VectorRegister Vs2) {      \
+    patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \
+  }
+
+  // Vector Whole Vector Register Move
+  INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111);
+  INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111);
+  INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111);
+  INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs1, vm, funct6)           \
+  void NAME(FloatRegister Rd, VectorRegister Vs2) {       \
+    patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6);   \
+  }
+
+  // Vector Floating-Point Move Instruction
+  INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs1, vm, funct6)          \
+  void NAME(Register Rd, VectorRegister Vs2) {           \
+    patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6);  \
+  }
+
+  // Vector Integer Scalar Move Instructions
+  INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000);
+
+#undef INSN
+
+// r_vm
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {       \
+    guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid");                              \
+    patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6);                         \
+  }
+
+  // Vector Single-Width Bit Shift Instructions
+  INSN(vsra_vi,    0b1010111, 0b011, 0b101001);
+  INSN(vsrl_vi,    0b1010111, 0b011, 0b101000);
+  INSN(vsll_vi,    0b1010111, 0b011, 0b100101);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \
+    patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011);
+  INSN(vfmsub_vv,  0b1010111, 0b001, 0b101010);
+  INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001);
+  INSN(vfmadd_vv,  0b1010111, 0b001, 0b101000);
+  INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111);
+  INSN(vfmsac_vv,  0b1010111, 0b001, 0b101110);
+  INSN(vfmacc_vv,  0b1010111, 0b001, 0b101100);
+  INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101);
+
+  // Vector Single-Width Integer Multiply-Add Instructions
+  INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011);
+  INSN(vmadd_vv,  0b1010111, 0b010, 0b101001);
+  INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111);
+  INSN(vmacc_vv,  0b1010111, 0b010, 0b101101);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) {       \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Single-Width Integer Multiply-Add Instructions
+  INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011);
+  INSN(vmadd_vx,  0b1010111, 0b110, 0b101001);
+  INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111);
+  INSN(vmacc_vx,  0b1010111, 0b110, 0b101101);
+
+  INSN(vrsub_vx,  0b1010111, 0b100, 0b000011);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) {  \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011);
+  INSN(vfmsub_vf,  0b1010111, 0b101, 0b101010);
+  INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001);
+  INSN(vfmadd_vf,  0b1010111, 0b101, 0b101000);
+  INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111);
+  INSN(vfmsac_vf,  0b1010111, 0b101, 0b101110);
+  INSN(vfmacc_vf,  0b1010111, 0b101, 0b101100);
+  INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \
+    patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Single-Width Floating-Point Reduction Instructions
+  INSN(vfredsum_vs,   0b1010111, 0b001, 0b000001);
+  INSN(vfredosum_vs,  0b1010111, 0b001, 0b000011);
+  INSN(vfredmin_vs,   0b1010111, 0b001, 0b000101);
+  INSN(vfredmax_vs,   0b1010111, 0b001, 0b000111);
+
+  // Vector Single-Width Integer Reduction Instructions
+  INSN(vredsum_vs,    0b1010111, 0b010, 0b000000);
+  INSN(vredand_vs,    0b1010111, 0b010, 0b000001);
+  INSN(vredor_vs,     0b1010111, 0b010, 0b000010);
+  INSN(vredxor_vs,    0b1010111, 0b010, 0b000011);
+  INSN(vredminu_vs,   0b1010111, 0b010, 0b000100);
+  INSN(vredmin_vs,    0b1010111, 0b010, 0b000101);
+  INSN(vredmaxu_vs,   0b1010111, 0b010, 0b000110);
+  INSN(vredmax_vs,    0b1010111, 0b010, 0b000111);
+
+  // Vector Floating-Point Compare Instructions
+  INSN(vmfle_vv, 0b1010111, 0b001, 0b011001);
+  INSN(vmflt_vv, 0b1010111, 0b001, 0b011011);
+  INSN(vmfne_vv, 0b1010111, 0b001, 0b011100);
+  INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000);
+
+  // Vector Floating-Point Sign-Injection Instructions
+  INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010);
+  INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001);
+  INSN(vfsgnj_vv,  0b1010111, 0b001, 0b001000);
+
+  // Vector Floating-Point MIN/MAX Instructions
+  INSN(vfmax_vv,   0b1010111, 0b001, 0b000110);
+  INSN(vfmin_vv,   0b1010111, 0b001, 0b000100);
+
+  // Vector Single-Width Floating-Point Multiply/Divide Instructions
+  INSN(vfdiv_vv,   0b1010111, 0b001, 0b100000);
+  INSN(vfmul_vv,   0b1010111, 0b001, 0b100100);
+
+  // Vector Single-Width Floating-Point Add/Subtract Instructions
+  INSN(vfsub_vv, 0b1010111, 0b001, 0b000010);
+  INSN(vfadd_vv, 0b1010111, 0b001, 0b000000);
+
+  // Vector Single-Width Fractional Multiply with Rounding and Saturation
+  INSN(vsmul_vv, 0b1010111, 0b000, 0b100111);
+
+  // Vector Integer Divide Instructions
+  INSN(vrem_vv,  0b1010111, 0b010, 0b100011);
+  INSN(vremu_vv, 0b1010111, 0b010, 0b100010);
+  INSN(vdiv_vv,  0b1010111, 0b010, 0b100001);
+  INSN(vdivu_vv, 0b1010111, 0b010, 0b100000);
+
+  // Vector Single-Width Integer Multiply Instructions
+  INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110);
+  INSN(vmulhu_vv,  0b1010111, 0b010, 0b100100);
+  INSN(vmulh_vv,   0b1010111, 0b010, 0b100111);
+  INSN(vmul_vv,    0b1010111, 0b010, 0b100101);
+
+  // Vector Integer Min/Max Instructions
+  INSN(vmax_vv,  0b1010111, 0b000, 0b000111);
+  INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110);
+  INSN(vmin_vv,  0b1010111, 0b000, 0b000101);
+  INSN(vminu_vv, 0b1010111, 0b000, 0b000100);
+
+  // Vector Integer Comparison Instructions
+  INSN(vmsle_vv,  0b1010111, 0b000, 0b011101);
+  INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100);
+  INSN(vmslt_vv,  0b1010111, 0b000, 0b011011);
+  INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010);
+  INSN(vmsne_vv,  0b1010111, 0b000, 0b011001);
+  INSN(vmseq_vv,  0b1010111, 0b000, 0b011000);
+
+  // Vector Single-Width Bit Shift Instructions
+  INSN(vsra_vv, 0b1010111, 0b000, 0b101001);
+  INSN(vsrl_vv, 0b1010111, 0b000, 0b101000);
+  INSN(vsll_vv, 0b1010111, 0b000, 0b100101);
+
+  // Vector Bitwise Logical Instructions
+  INSN(vxor_vv, 0b1010111, 0b000, 0b001011);
+  INSN(vor_vv,  0b1010111, 0b000, 0b001010);
+  INSN(vand_vv, 0b1010111, 0b000, 0b001001);
+
+  // Vector Single-Width Integer Add and Subtract
+  INSN(vsub_vv, 0b1010111, 0b000, 0b000010);
+  INSN(vadd_vv, 0b1010111, 0b000, 0b000000);
+
+#undef INSN
+
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) {       \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Integer Divide Instructions
+  INSN(vrem_vx,  0b1010111, 0b110, 0b100011);
+  INSN(vremu_vx, 0b1010111, 0b110, 0b100010);
+  INSN(vdiv_vx,  0b1010111, 0b110, 0b100001);
+  INSN(vdivu_vx, 0b1010111, 0b110, 0b100000);
+
+  // Vector Single-Width Integer Multiply Instructions
+  INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110);
+  INSN(vmulhu_vx,  0b1010111, 0b110, 0b100100);
+  INSN(vmulh_vx,   0b1010111, 0b110, 0b100111);
+  INSN(vmul_vx,    0b1010111, 0b110, 0b100101);
+
+  // Vector Integer Min/Max Instructions
+  INSN(vmax_vx,  0b1010111, 0b100, 0b000111);
+  INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110);
+  INSN(vmin_vx,  0b1010111, 0b100, 0b000101);
+  INSN(vminu_vx, 0b1010111, 0b100, 0b000100);
+
+  // Vector Integer Comparison Instructions
+  INSN(vmsgt_vx,  0b1010111, 0b100, 0b011111);
+  INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110);
+  INSN(vmsle_vx,  0b1010111, 0b100, 0b011101);
+  INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100);
+  INSN(vmslt_vx,  0b1010111, 0b100, 0b011011);
+  INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010);
+  INSN(vmsne_vx,  0b1010111, 0b100, 0b011001);
+  INSN(vmseq_vx,  0b1010111, 0b100, 0b011000);
+
+  // Vector Narrowing Integer Right Shift Instructions
+  INSN(vnsra_wx, 0b1010111, 0b100, 0b101101);
+  INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100);
+
+  // Vector Single-Width Bit Shift Instructions
+  INSN(vsra_vx, 0b1010111, 0b100, 0b101001);
+  INSN(vsrl_vx, 0b1010111, 0b100, 0b101000);
+  INSN(vsll_vx, 0b1010111, 0b100, 0b100101);
+
+  // Vector Bitwise Logical Instructions
+  INSN(vxor_vx, 0b1010111, 0b100, 0b001011);
+  INSN(vor_vx,  0b1010111, 0b100, 0b001010);
+  INSN(vand_vx, 0b1010111, 0b100, 0b001001);
+
+  // Vector Single-Width Integer Add and Subtract
+  INSN(vsub_vx, 0b1010111, 0b100, 0b000010);
+  INSN(vadd_vx, 0b1010111, 0b100, 0b000000);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) {  \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6);                        \
+  }
+
+  // Vector Floating-Point Compare Instructions
+  INSN(vmfge_vf, 0b1010111, 0b101, 0b011111);
+  INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101);
+  INSN(vmfle_vf, 0b1010111, 0b101, 0b011001);
+  INSN(vmflt_vf, 0b1010111, 0b101, 0b011011);
+  INSN(vmfne_vf, 0b1010111, 0b101, 0b011100);
+  INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000);
+
+  // Vector Floating-Point Sign-Injection Instructions
+  INSN(vfsgnjx_vf, 0b1010111, 0b101, 0b001010);
+  INSN(vfsgnjn_vf, 0b1010111, 0b101, 0b001001);
+  INSN(vfsgnj_vf,  0b1010111, 0b101, 0b001000);
+
+  // Vector Floating-Point MIN/MAX Instructions
+  INSN(vfmax_vf, 0b1010111, 0b101, 0b000110);
+  INSN(vfmin_vf, 0b1010111, 0b101, 0b000100);
+
+  // Vector Single-Width Floating-Point Multiply/Divide Instructions
+  INSN(vfdiv_vf,  0b1010111, 0b101, 0b100000);
+  INSN(vfmul_vf,  0b1010111, 0b101, 0b100100);
+  INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001);
+
+  // Vector Single-Width Floating-Point Add/Subtract Instructions
+  INSN(vfsub_vf,  0b1010111, 0b101, 0b000010);
+  INSN(vfadd_vf,  0b1010111, 0b101, 0b000000);
+  INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) {        \
+    guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid");                                       \
+    patch_VArith(op, Vd, funct3, (uint32_t)imm & 0x1f, Vs2, vm, funct6);                           \
+  }
+
+  INSN(vmsgt_vi,  0b1010111, 0b011, 0b011111);
+  INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110);
+  INSN(vmsle_vi,  0b1010111, 0b011, 0b011101);
+  INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100);
+  INSN(vmsne_vi,  0b1010111, 0b011, 0b011001);
+  INSN(vmseq_vi,  0b1010111, 0b011, 0b011000);
+  INSN(vxor_vi,   0b1010111, 0b011, 0b001011);
+  INSN(vor_vi,    0b1010111, 0b011, 0b001010);
+  INSN(vand_vi,   0b1010111, 0b011, 0b001001);
+  INSN(vadd_vi,   0b1010111, 0b011, 0b000000);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                                                             \
+  void NAME(VectorRegister Vd, int32_t imm, VectorRegister Vs2, VectorMask vm = unmasked) {        \
+    guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid");                                       \
+    patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6);                         \
+  }
+
+  INSN(vrsub_vi, 0b1010111, 0b011, 0b000011);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, vm, funct6)                                   \
+  void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) {     \
+    patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6);  \
+  }
+
+  // Vector Compress Instruction
+  INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111);
+
+  // Vector Mask-Register Logical Instructions
+  INSN(vmxnor_mm,   0b1010111, 0b010, 0b1, 0b011111);
+  INSN(vmornot_mm,  0b1010111, 0b010, 0b1, 0b011100);
+  INSN(vmnor_mm,    0b1010111, 0b010, 0b1, 0b011110);
+  INSN(vmor_mm,     0b1010111, 0b010, 0b1, 0b011010);
+  INSN(vmxor_mm,    0b1010111, 0b010, 0b1, 0b011011);
+  INSN(vmandnot_mm, 0b1010111, 0b010, 0b1, 0b011000);
+  INSN(vmnand_mm,   0b1010111, 0b010, 0b1, 0b011101);
+  INSN(vmand_mm,    0b1010111, 0b010, 0b1, 0b011001);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs2, vm, funct6)                            \
+  void NAME(VectorRegister Vd, int32_t imm) {                              \
+    guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid");               \
+    patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \
+  }
+
+  // Vector Integer Move Instructions
+  INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs2, vm, funct6)                             \
+  void NAME(VectorRegister Vd, FloatRegister Rs1) {                         \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
+  }
+
+  // Floating-Point Scalar Move Instructions
+  INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000);
+  // Vector Floating-Point Move Instruction
+  INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs2, vm, funct6)                             \
+  void NAME(VectorRegister Vd, VectorRegister Vs1) {                        \
+    patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \
+  }
+
+  // Vector Integer Move Instructions
+  INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, Vs2, vm, funct6)                             \
+   void NAME(VectorRegister Vd, Register Rs1) {                             \
+    patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \
+   }
+
+  // Integer Scalar Move Instructions
+  INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000);
+
+  // Vector Integer Move Instructions
+  INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111);
+
+#undef INSN
+#undef patch_VArith
+
+#define INSN(NAME, op, funct13, funct6)                    \
+  void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \
+    unsigned insn = 0;                                     \
+    patch((address)&insn, 6, 0, op);                       \
+    patch((address)&insn, 24, 12, funct13);                \
+    patch((address)&insn, 25, vm);                         \
+    patch((address)&insn, 31, 26, funct6);                 \
+    patch_reg((address)&insn, 7, Vd);                      \
+    emit(insn);                                            \
+  }
+
+  // Vector Element Index Instruction
+  INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100);
+
+#undef INSN
+
+enum Nf {
+  g1 = 0b000,
+  g2 = 0b001,
+  g3 = 0b010,
+  g4 = 0b011,
+  g5 = 0b100,
+  g6 = 0b101,
+  g7 = 0b110,
+  g8 = 0b111
+};
+
+#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \
+    unsigned insn = 0;                                                   \
+    patch((address)&insn, 6, 0, op);                                     \
+    patch((address)&insn, 14, 12, width);                                \
+    patch((address)&insn, 24, 20, Reg_or_umop);                          \
+    patch((address)&insn, 25, vm);                                       \
+    patch((address)&insn, 27, 26, mop);                                  \
+    patch((address)&insn, 28, mew);                                      \
+    patch((address)&insn, 31, 29, nf);                                   \
+    patch_reg((address)&insn, 7, VReg);                                  \
+    patch_reg((address)&insn, 15, Rs1);                                  \
+    emit(insn)
+
+#define INSN(NAME, op, lumop, vm, mop, nf)                                           \
+  void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \
+    guarantee(is_unsigned_imm_in_range(width, 3, 0), "width is invalid");            \
+    patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf);                        \
+  }
+
+  // Vector Load/Store Instructions
+  INSN(vl1r_v, 0b0000111, 0b01000, 0b1, 0b00, g1);
+
+#undef INSN
+
+#define INSN(NAME, op, width, sumop, vm, mop, mew, nf)           \
+  void NAME(VectorRegister Vs3, Register Rs1) {                  \
+    patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf);   \
+  }
+
+  // Vector Load/Store Instructions
+  INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1);
+
+#undef INSN
+
+// r2_nfvm
+#define INSN(NAME, op, width, umop, mop, mew)                         \
+  void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) {     \
+    patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf);    \
+  }
+
+  // Vector Unit-Stride Instructions
+  INSN(vle1_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0);
+  INSN(vse1_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0);
+
+#undef INSN
+
+#define INSN(NAME, op, width, umop, mop, mew)                                               \
+  void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \
+    patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf);                         \
+  }
+
+  // Vector Unit-Stride Instructions
+  INSN(vle8_v,    0b0000111, 0b000, 0b00000, 0b00, 0b0);
+  INSN(vle16_v,   0b0000111, 0b101, 0b00000, 0b00, 0b0);
+  INSN(vle32_v,   0b0000111, 0b110, 0b00000, 0b00, 0b0);
+  INSN(vle64_v,   0b0000111, 0b111, 0b00000, 0b00, 0b0);
+
+  // Vector unit-stride fault-only-first Instructions
+  INSN(vle8ff_v,  0b0000111, 0b000, 0b10000, 0b00, 0b0);
+  INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0);
+  INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0);
+  INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0);
+
+  INSN(vse8_v,  0b0100111, 0b000, 0b00000, 0b00, 0b0);
+  INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0);
+  INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0);
+  INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0);
+
+#undef INSN
+
+#define INSN(NAME, op, width, mop, mew)                                                                  \
+  void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \
+    patch_VLdSt(op, Vd, width, Rs1, Vs2->encoding_nocheck(), vm, mop, mew, nf);                          \
+  }
+
+  // Vector unordered indexed load instructions
+  INSN(vluxei8_v,  0b0000111, 0b000, 0b01, 0b0);
+  INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0);
+  INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0);
+  INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0);
+
+  // Vector ordered indexed load instructions
+  INSN(vloxei8_v,  0b0000111, 0b000, 0b11, 0b0);
+  INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0);
+  INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0);
+  INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0);
+#undef INSN
+
+#define INSN(NAME, op, width, mop, mew)                                                                  \
+  void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) {       \
+    patch_VLdSt(op, Vd, width, Rs1, Rs2->encoding_nocheck(), vm, mop, mew, nf);                          \
+  }
+
+  // Vector Strided Instructions
+  INSN(vlse8_v,  0b0000111, 0b000, 0b10, 0b0);
+  INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0);
+  INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0);
+  INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0);
+
+#undef INSN
+#undef patch_VLdSt
+
+// ====================================
+// RISC-V Bit-Manipulation Extension
+// Currently only support Zba, Zbb and Zbs bitmanip extensions.
+// ====================================
+#define INSN(NAME, op, funct3, funct7)                  \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6,  0, op);                   \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 31, 25, funct7);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    patch_reg((address)&insn, 20, Rs2);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(add_uw,    0b0111011, 0b000, 0b0000100);
+  INSN(rol,       0b0110011, 0b001, 0b0110000);
+  INSN(rolw,      0b0111011, 0b001, 0b0110000);
+  INSN(ror,       0b0110011, 0b101, 0b0110000);
+  INSN(rorw,      0b0111011, 0b101, 0b0110000);
+  INSN(sh1add,    0b0110011, 0b010, 0b0010000);
+  INSN(sh2add,    0b0110011, 0b100, 0b0010000);
+  INSN(sh3add,    0b0110011, 0b110, 0b0010000);
+  INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000);
+  INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000);
+  INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000);
+  INSN(andn,      0b0110011, 0b111, 0b0100000);
+  INSN(orn,       0b0110011, 0b110, 0b0100000);
+  INSN(xnor,      0b0110011, 0b100, 0b0100000);
+  INSN(max,       0b0110011, 0b110, 0b0000101);
+  INSN(maxu,      0b0110011, 0b111, 0b0000101);
+  INSN(min,       0b0110011, 0b100, 0b0000101);
+  INSN(minu,      0b0110011, 0b101, 0b0000101);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct12)                 \
+  void NAME(Register Rd, Register Rs1) {                \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 31, 20, funct12);             \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(rev8,   0b0010011, 0b101, 0b011010111000);
+  INSN(sext_b, 0b0010011, 0b001, 0b011000000100);
+  INSN(sext_h, 0b0010011, 0b001, 0b011000000101);
+  INSN(zext_h, 0b0111011, 0b100, 0b000010000000);
+  INSN(clz,    0b0010011, 0b001, 0b011000000000);
+  INSN(clzw,   0b0011011, 0b001, 0b011000000000);
+  INSN(ctz,    0b0010011, 0b001, 0b011000000001);
+  INSN(ctzw,   0b0011011, 0b001, 0b011000000001);
+  INSN(cpop,   0b0010011, 0b001, 0b011000000010);
+  INSN(cpopw,  0b0011011, 0b001, 0b011000000010);
+  INSN(orc_b,  0b0010011, 0b101, 0b001010000111);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct6)                  \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
+    guarantee(shamt <= 0x3f, "Shamt is invalid");       \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 25, 20, shamt);               \
+    patch((address)&insn, 31, 26, funct6);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(rori,    0b0010011, 0b101, 0b011000);
+  INSN(slli_uw, 0b0011011, 0b001, 0b000010);
+  INSN(bexti,   0b0010011, 0b101, 0b010010);
+
+#undef INSN
+
+#define INSN(NAME, op, funct3, funct7)                  \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
+    guarantee(shamt <= 0x1f, "Shamt is invalid");       \
+    unsigned insn = 0;                                  \
+    patch((address)&insn, 6, 0, op);                    \
+    patch((address)&insn, 14, 12, funct3);              \
+    patch((address)&insn, 24, 20, shamt);               \
+    patch((address)&insn, 31, 25, funct7);              \
+    patch_reg((address)&insn, 7, Rd);                   \
+    patch_reg((address)&insn, 15, Rs1);                 \
+    emit(insn);                                         \
+  }
+
+  INSN(roriw, 0b0011011, 0b101, 0b0110000);
+
+#undef INSN
+
+// ========================================
+// RISC-V Compressed Instructions Extension
+// ========================================
+// Note:
+// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be
+//    transformed to 16-bit instructions if compressible.
+// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li',
+//    but most of time we have no need to explicitly use these instructions.
+// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range
+//    are qualified to be compressed with their 2-byte versions.
+//    An example:
+//
+//      CompressibleRegion cr(_masm);
+//      __ andr(...);      // this instruction could change to c.and if able to
+//
+// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from
+//    normal ones.
+//
+
+private:
+  bool _in_compressible_region;
+public:
+  bool in_compressible_region() const { return _in_compressible_region; }
+  void set_in_compressible_region(bool b) { _in_compressible_region = b; }
+public:
+
+  // a compressible region
+  class CompressibleRegion : public StackObj {
+  protected:
+    Assembler *_masm;
+    bool _saved_in_compressible_region;
+  public:
+    CompressibleRegion(Assembler *_masm)
+    : _masm(_masm)
+    , _saved_in_compressible_region(_masm->in_compressible_region()) {
+      _masm->set_in_compressible_region(true);
+    }
+    ~CompressibleRegion() {
+      _masm->set_in_compressible_region(_saved_in_compressible_region);
+    }
+  };
+
+  // patch a 16-bit instruction.
+  static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
+    assert_cond(a != NULL);
+    assert_cond(msb >= lsb && msb <= 15);
+    unsigned nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    uint16_t mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    uint16_t target = *(uint16_t *)a;
+    target &= ~mask;
+    target |= val;
+    *(uint16_t *)a = target;
+  }
+
+  static void c_patch(address a, unsigned bit, uint16_t val) {
+    c_patch(a, bit, bit, val);
+  }
+
+  // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
+  static void c_patch_reg(address a, unsigned lsb, Register reg) {
+    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
+  static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) {
+    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
+  static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) {
+    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
+  static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) {
+    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+// --------------  RVC Instruction Definitions  --------------
+
+  void c_nop() {
+    c_addi(x0, 0);
+  }
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi,   0b000, 0b01);
+  INSN(c_addiw,  0b001, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t imm) {                                                                   \
+    assert_cond(is_imm_in_range(imm, 10, 0));                                                \
+    assert_cond((imm & 0b1111) == 0);                                                        \
+    assert_cond(imm != 0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7);                             \
+    c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6);                                  \
+    c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4);                                  \
+    c_patch_reg((address)&insn, 7, sp);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi16sp, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 10, 0));                                      \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(uimm != 0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd);                                           \
+    c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3);                                 \
+    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6);                          \
+    c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_addi4spn, 0b000, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_slli, 0b000, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 11, 10, funct2);                                                 \
+    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_srli, 0b100, 0b00, 0b01);
+  INSN(c_srai, 0b100, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 11, 10, funct2);                                                 \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_andi, 0b100, 0b10, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct6, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rs2);                                          \
+    c_patch((address)&insn, 6, 5, funct2);                                                   \
+    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
+    c_patch((address)&insn, 15, 10, funct6);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_sub,  0b100011, 0b00, 0b01);
+  INSN(c_xor,  0b100011, 0b01, 0b01);
+  INSN(c_or,   0b100011, 0b10, 0b01);
+  INSN(c_and,  0b100011, 0b11, 0b01);
+  INSN(c_subw, 0b100111, 0b00, 0b01);
+  INSN(c_addw, 0b100111, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
+    c_patch((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_mv,  0b1000, 0b10);
+  INSN(c_add, 0b1001, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rs1) {                                                                  \
+    assert_cond(Rs1 != x0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, x0);                                                      \
+    c_patch_reg((address)&insn, 7, Rs1);                                                     \
+    c_patch((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_jr,   0b1000, 0b10);
+  INSN(c_jalr, 0b1001, 0b10);
+
+#undef INSN
+
+  typedef void (Assembler::* j_c_insn)(address dest);
+  typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
+
+  void wrap_label(Label &L, j_c_insn insn) {
+    if (L.is_bound()) {
+      (this->*insn)(target(L));
+    } else {
+      L.add_patch_at(code(), locator());
+      (this->*insn)(pc());
+    }
+  }
+
+  void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
+    if (L.is_bound()) {
+      (this->*insn)(r, target(L));
+    } else {
+      L.add_patch_at(code(), locator());
+      (this->*insn)(r, pc());
+    }
+  }
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t offset) {                                                                \
+    assert_cond(is_imm_in_range(offset, 11, 1));                                             \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
+    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
+    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
+    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
+    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
+    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
+    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(address dest) {                                                                  \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 11, 1));                                           \
+    c_j(distance);                                                                           \
+  }                                                                                          \
+  void NAME(Label &L) {                                                                      \
+    wrap_label(L, &Assembler::NAME);                                                         \
+  }
+
+  INSN(c_j, 0b101, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs1, int32_t imm) {                                                     \
+    assert_cond(is_imm_in_range(imm, 8, 1));                                                 \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1);                             \
+    c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6);                             \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3);                           \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8);                                \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(Register Rs1, address dest) {                                                    \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 8, 1));                                            \
+    NAME(Rs1, distance);                                                                     \
+  }                                                                                          \
+  void NAME(Register Rs1, Label &L) {                                                        \
+    wrap_label(L, Rs1, &Assembler::NAME);                                                    \
+  }
+
+  INSN(c_beqz, 0b110, 0b01);
+  INSN(c_bnez, 0b111, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 18, 0));                                                \
+    assert_cond((imm & 0xfff) == 0);                                                         \
+    assert_cond(imm != 0);                                                                   \
+    assert_cond(Rd != x0 && Rd != x2);                                                       \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12);                           \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17);                              \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lui, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_li, 0b010, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ldsp,  0b011, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(FloatRegister Rd, uint32_t uimm) {                                               \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_fldsp, 0b001, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) {                             \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
+    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ld,  0b011, 0b00, Register);
+  INSN(c_sd,  0b111, 0b00, Register);
+  INSN(c_fld, 0b001, 0b00, FloatRegister);
+  INSN(c_fsd, 0b101, 0b00, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rs2, uint32_t uimm) {                                              \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6);                            \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_sdsp,  0b111, 0b10, Register);
+  INSN(c_fsdsp, 0b101, 0b10, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs2, uint32_t uimm) {                                                   \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_reg((address)&insn, 2, Rs2);                                                     \
+    c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2);                           \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_swsp, 0b110, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6);                            \
+    c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2);                            \
+    c_patch_reg((address)&insn, 7, Rd);                                                      \
+    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lwsp, 0b010, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) {                                  \
+    assert_cond(is_unsigned_imm_in_range(uimm, 7, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
+    c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6);                                 \
+    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
+    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_lw, 0b010, 0b00);
+  INSN(c_sw, 0b110, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME() {                                                                              \
+    uint16_t insn = 0;                                                                       \
+    c_patch((address)&insn, 1, 0, op);                                                       \
+    c_patch((address)&insn, 11, 2, 0x0);                                                     \
+    c_patch((address)&insn, 12, 12, 0b1);                                                    \
+    c_patch((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(c_ebreak, 0b100, 0b10);
+
+#undef INSN
+
+// --------------  RVC Transformation Functions  --------------
+
+// --------------------------
+// Register instructions
+// --------------------------
+#define INSN(NAME)                                                                             \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                         \
+    /* add -> c.add */                                                                         \
+    if (do_compress()) {                                                                       \
+      Register src = noreg;                                                                    \
+      if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {      \
+        c_add(Rd, src);                                                                        \
+        return;                                                                                \
+      }                                                                                        \
+    }                                                                                          \
+    _add(Rd, Rs1, Rs2);                                                                        \
+  }
+
+  INSN(add);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
+    /* sub/subw -> c.sub/c.subw */                                                           \
+    if (do_compress() &&                                                                     \
+        (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) {            \
+      C_NAME(Rd, Rs2);                                                                       \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
+  }
+
+  INSN(sub,  c_sub,  _sub);
+  INSN(subw, c_subw, _subw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
+    /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */                                         \
+    if (do_compress()) {                                                                     \
+      Register src = noreg;                                                                  \
+      if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() &&                        \
+        ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {                                \
+        C_NAME(Rd, src);                                                                     \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
+  }
+
+  INSN(andr, c_and,  _andr);
+  INSN(orr,  c_or,   _orr);
+  INSN(xorr, c_xor,  _xorr);
+  INSN(addw, c_addw, _addw);
+
+#undef INSN
+
+private:
+// some helper functions
+  bool do_compress() const {
+    return UseRVC && in_compressible_region();
+  }
+
+#define FUNC(NAME, funct3, bits)                                                             \
+  bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) {                         \
+    return rs1 == sp &&                                                                      \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0 &&                                                       \
+      (!ld || rd_rs2 != x0);                                                                 \
+  }                                                                                          \
+
+  FUNC(is_c_ldsdsp,  0b111, 9);
+  FUNC(is_c_lwswsp,  0b011, 8);
+
+#undef FUNC
+
+#define FUNC(NAME, funct3, bits)                                                             \
+  bool NAME(Register rs1, int32_t imm12) {                                                   \
+    return rs1 == sp &&                                                                      \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0;                                                         \
+  }                                                                                          \
+
+  FUNC(is_c_fldsdsp, 0b111, 9);
+
+#undef FUNC
+
+#define FUNC(NAME, REG_TYPE, funct3, bits)                                                   \
+  bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) {                                  \
+    return rs1->is_compressed_valid() &&                                                     \
+      rd_rs2->is_compressed_valid() &&                                                       \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
+      (intx(imm12) & funct3) == 0x0;                                                         \
+  }                                                                                          \
+
+  FUNC(is_c_ldsd,  Register,      0b111, 8);
+  FUNC(is_c_lwsw,  Register,      0b011, 7);
+  FUNC(is_c_fldsd, FloatRegister, 0b111, 8);
+
+#undef FUNC
+
+public:
+// --------------------------
+// Load/store register
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* lw -> c.lwsp/c.lw */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_lwswsp(Rs, Rd, offset, true)) {                                               \
+        c_lwsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
+        c_lw(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _lw(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(lw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* ld -> c.ldsp/c.ld */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_ldsdsp(Rs, Rd, offset, true)) {                                               \
+        c_ldsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
+        c_ld(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _ld(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(ld);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
+    /* fld -> c.fldsp/c.fld */                                                               \
+    if (do_compress()) {                                                                     \
+      if (is_c_fldsdsp(Rs, offset)) {                                                        \
+        c_fldsp(Rd, offset);                                                                 \
+        return;                                                                              \
+      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
+        c_fld(Rd, Rs, offset);                                                               \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _fld(Rd, Rs, offset);                                                                    \
+  }
+
+  INSN(fld);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* sd -> c.sdsp/c.sd */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_ldsdsp(Rs, Rd, offset, false)) {                                              \
+        c_sdsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
+        c_sd(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _sd(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(sd);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* sw -> c.swsp/c.sw */                                                                  \
+    if (do_compress()) {                                                                     \
+      if (is_c_lwswsp(Rs, Rd, offset, false)) {                                              \
+        c_swsp(Rd, offset);                                                                  \
+        return;                                                                              \
+      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
+        c_sw(Rd, Rs, offset);                                                                \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _sw(Rd, Rs, offset);                                                                     \
+  }
+
+  INSN(sw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
+    /* fsd -> c.fsdsp/c.fsd */                                                               \
+    if (do_compress()) {                                                                     \
+      if (is_c_fldsdsp(Rs, offset)) {                                                        \
+        c_fsdsp(Rd, offset);                                                                 \
+        return;                                                                              \
+      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
+        c_fsd(Rd, Rs, offset);                                                               \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _fsd(Rd, Rs, offset);                                                                    \
+  }
+
+  INSN(fsd);
+
+#undef INSN
+
+// --------------------------
+// Conditional branch instructions
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                              \
+    /* beq/bne -> c.beqz/c.bnez */                                                           \
+    if (do_compress() &&                                                                     \
+        (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() &&                           \
+        is_imm_in_range(offset, 8, 1))) {                                                    \
+      C_NAME(Rs1, offset);                                                                   \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rs1, Rs2, offset);                                                           \
+  }
+
+  INSN(beq, c_beqz, _beq);
+  INSN(bne, c_beqz, _bne);
+
+#undef INSN
+
+// --------------------------
+// Unconditional branch instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, const int32_t offset) {                                             \
+    /* jal -> c.j */                                                                         \
+    if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) {        \
+      c_j(offset);                                                                           \
+      return;                                                                                \
+    }                                                                                        \
+    _jal(Rd, offset);                                                                        \
+  }
+
+  INSN(jal);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
+    /* jalr -> c.jr/c.jalr */                                                                \
+    if (do_compress() && (offset == 0 && Rs != x0)) {                                        \
+      if (Rd == x1) {                                                                        \
+        c_jalr(Rs);                                                                          \
+        return;                                                                              \
+      } else if (Rd == x0) {                                                                 \
+        c_jr(Rs);                                                                            \
+        return;                                                                              \
+      }                                                                                      \
+    }                                                                                        \
+    _jalr(Rd, Rs, offset);                                                                   \
+  }
+
+  INSN(jalr);
+
+#undef INSN
+
+// --------------------------
+// Miscellaneous Instructions
+// --------------------------
+#define INSN(NAME)                                                     \
+  void NAME() {                                                        \
+    /* ebreak -> c.ebreak */                                           \
+    if (do_compress()) {                                               \
+      c_ebreak();                                                      \
+      return;                                                          \
+    }                                                                  \
+    _ebreak();                                                         \
+  }
+
+  INSN(ebreak);
+
+#undef INSN
+
+#define INSN(NAME)                                                      \
+  void NAME() {                                                         \
+    /* The illegal instruction in RVC is presented by a 16-bit 0. */    \
+    if (do_compress()) {                                                \
+      emit_int16(0);                                                    \
+      return;                                                           \
+    }                                                                   \
+    _halt();                                                            \
+  }
+
+  INSN(halt);
+
+#undef INSN
+
+// --------------------------
+// Immediate Instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, int64_t imm) {                                                      \
+    /* li -> c.li */                                                                         \
+    if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) {                         \
+      c_li(Rd, imm);                                                                         \
+      return;                                                                                \
+    }                                                                                        \
+    _li(Rd, imm);                                                                            \
+  }
+
+  INSN(li);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */                                    \
+    if (do_compress()) {                                                                     \
+      if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) {                                         \
+        c_addi(Rd, imm);                                                                     \
+        return;                                                                              \
+      } else if (imm == 0 && Rd != x0 && Rs1 != x0) {                                        \
+        c_mv(Rd, Rs1);                                                                       \
+        return;                                                                              \
+      } else if (Rs1 == sp && imm != 0) {                                                    \
+        if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) {             \
+          c_addi16sp(imm);                                                                   \
+          return;                                                                            \
+        } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \
+          c_addi4spn(Rd, imm);                                                               \
+          return;                                                                            \
+        }                                                                                    \
+      }                                                                                      \
+    }                                                                                        \
+    _addi(Rd, Rs1, imm);                                                                     \
+  }
+
+  INSN(addi);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* addiw -> c.addiw */                                                                   \
+    if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) {            \
+      c_addiw(Rd, imm);                                                                      \
+      return;                                                                                \
+    }                                                                                        \
+    _addiw(Rd, Rs1, imm);                                                                    \
+  }
+
+  INSN(addiw);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
+    /* and_imm12 -> c.andi */                                                                \
+    if (do_compress() &&                                                                     \
+        (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) {            \
+      c_andi(Rd, imm);                                                                       \
+      return;                                                                                \
+    }                                                                                        \
+    _and_imm12(Rd, Rs1, imm);                                                                \
+  }
+
+  INSN(and_imm12);
+
+#undef INSN
+
+// --------------------------
+// Shift Immediate Instructions
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
+    /* slli -> c.slli */                                                                     \
+    if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) {                            \
+      c_slli(Rd, shamt);                                                                     \
+      return;                                                                                \
+    }                                                                                        \
+    _slli(Rd, Rs1, shamt);                                                                   \
+  }
+
+  INSN(slli);
+
+#undef INSN
+
+// --------------------------
+#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
+  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
+    /* srai/srli -> c.srai/c.srli */                                                         \
+    if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) {           \
+      C_NAME(Rd, shamt);                                                                     \
+      return;                                                                                \
+    }                                                                                        \
+    NORMAL_NAME(Rd, Rs1, shamt);                                                             \
+  }
+
+  INSN(srai, c_srai, _srai);
+  INSN(srli, c_srli, _srli);
+
+#undef INSN
+
+// --------------------------
+// Upper Immediate Instruction
+// --------------------------
+#define INSN(NAME)                                                                           \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    /* lui -> c.lui */                                                                       \
+    if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \
+      c_lui(Rd, imm);                                                                        \
+      return;                                                                                \
+    }                                                                                        \
+    _lui(Rd, imm);                                                                           \
+  }
+
+  INSN(lui);
+
+#undef INSN
+
+// ---------------------------------------------------------------------------------------
+
+  void bgt(Register Rs, Register Rt, const address &dest);
+  void ble(Register Rs, Register Rt, const address &dest);
+  void bgtu(Register Rs, Register Rt, const address &dest);
+  void bleu(Register Rs, Register Rt, const address &dest);
+  void bgt(Register Rs, Register Rt, Label &l, bool is_far = false);
+  void ble(Register Rs, Register Rt, Label &l, bool is_far = false);
+  void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
+  void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
+
+  typedef void (Assembler::* jal_jalr_insn)(Register Rt, address dest);
+  typedef void (Assembler::* load_insn_by_temp)(Register Rt, address dest, Register temp);
+  typedef void (Assembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
+  typedef void (Assembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
+
+  void wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn,
+                  compare_and_branch_label_insn neg_insn, bool is_far);
+  void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
+  void wrap_label(Register r, Label &L, jal_jalr_insn insn);
+
+  // Computational pseudo instructions
+  void add(Register Rd, Register Rn, int64_t increment, Register temp = t0);
+  void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
+
+  void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
+  void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
+
+  // RVB pseudo instructions
+  // zero extend word
+  void zext_w(Register Rd, Register Rs);
+
+  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
+  }
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset) {
+    ShouldNotCallThis();
+    return RegisterOrConstant();
+  }
+
+  // Stack overflow checking
+  virtual void bang_stack_with_offset(int offset) { Unimplemented(); }
+
+  static bool operand_valid_for_add_immediate(long imm) {
+    return is_imm_in_range(imm, 12, 0);
+  }
+
+  // The maximum range of a branch is fixed for the RISCV architecture.
+  static const unsigned long branch_range = 1 * M;
+
+  static bool reachable_from_branch_at(address branch, address target) {
+    return uabs(target - branch) < branch_range;
+  }
+
+  virtual ~Assembler() {}
+};
+
+class BiasedLockingCounters;
+
+#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
new file mode 100644
index 0000000000..7ffe880398
--- /dev/null
+++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
+#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+inline bool is_imm_in_range(long value, unsigned bits, unsigned align_bits) {
+  intx sign_bits = (value >> (bits + align_bits - 1));
+  return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1));
+}
+
+inline bool is_unsigned_imm_in_range(intx value, unsigned bits, unsigned align_bits) {
+  return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0);
+}
+
+inline bool is_offset_in_range(intx offset, unsigned bits) {
+  return is_imm_in_range(offset, bits, 0);
+}
+
+#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
new file mode 100644
index 0000000000..485a5f9355
--- /dev/null
+++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_BYTES_RISCV_HPP
+#define CPU_RISCV_BYTES_RISCV_HPP
+
+class Bytes: AllStatic {
+ public:
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // RISCV needs to check for alignment.
+
+  // Forward declarations of the compiler-dependent implementation
+  static inline u2 swap_u2(u2 x);
+  static inline u4 swap_u4(u4 x);
+  static inline u8 swap_u8(u8 x);
+
+  static inline u2 get_native_u2(address p) {
+    if ((intptr_t(p) & 1) == 0) {
+      return *(u2*)p;
+    } else {
+      return ((u2)(p[1]) << 8) |
+             ((u2)(p[0]));
+    }
+  }
+
+  static inline u4 get_native_u4(address p) {
+    switch (intptr_t(p) & 3) {
+      case 0:
+        return *(u4*)p;
+
+      case 2:
+        return ((u4)(((u2*)p)[1]) << 16) |
+               ((u4)(((u2*)p)[0]));
+
+      default:
+        return ((u4)(p[3]) << 24) |
+               ((u4)(p[2]) << 16) |
+               ((u4)(p[1]) <<  8) |
+               ((u4)(p[0]));
+    }
+  }
+
+  static inline u8 get_native_u8(address p) {
+    switch (intptr_t(p) & 7) {
+      case 0:
+        return *(u8*)p;
+
+      case 4:
+        return ((u8)(((u4*)p)[1]) << 32) |
+               ((u8)(((u4*)p)[0]));
+
+      case 2:
+      case 6:
+        return ((u8)(((u2*)p)[3]) << 48) |
+               ((u8)(((u2*)p)[2]) << 32) |
+               ((u8)(((u2*)p)[1]) << 16) |
+               ((u8)(((u2*)p)[0]));
+
+      default:
+        return ((u8)(p[7]) << 56) |
+               ((u8)(p[6]) << 48) |
+               ((u8)(p[5]) << 40) |
+               ((u8)(p[4]) << 32) |
+               ((u8)(p[3]) << 24) |
+               ((u8)(p[2]) << 16) |
+               ((u8)(p[1]) <<  8) |
+               ((u8)(p[0]));
+    }
+  }
+
+  static inline void put_native_u2(address p, u2 x) {
+    if ((intptr_t(p) & 1) == 0) {
+      *(u2*)p = x;
+    } else {
+      p[1] = x >> 8;
+      p[0] = x;
+    }
+  }
+
+  static inline void put_native_u4(address p, u4 x) {
+    switch (intptr_t(p) & 3) {
+      case 0:
+        *(u4*)p = x;
+        break;
+
+      case 2:
+        ((u2*)p)[1] = x >> 16;
+        ((u2*)p)[0] = x;
+        break;
+
+      default:
+        ((u1*)p)[3] = x >> 24;
+        ((u1*)p)[2] = x >> 16;
+        ((u1*)p)[1] = x >>  8;
+        ((u1*)p)[0] = x;
+        break;
+    }
+  }
+
+  static inline void put_native_u8(address p, u8 x) {
+    switch (intptr_t(p) & 7) {
+      case 0:
+        *(u8*)p = x;
+        break;
+
+      case 4:
+        ((u4*)p)[1] = x >> 32;
+        ((u4*)p)[0] = x;
+        break;
+
+      case 2:
+      case 6:
+        ((u2*)p)[3] = x >> 48;
+        ((u2*)p)[2] = x >> 32;
+        ((u2*)p)[1] = x >> 16;
+        ((u2*)p)[0] = x;
+        break;
+
+      default:
+        ((u1*)p)[7] = x >> 56;
+        ((u1*)p)[6] = x >> 48;
+        ((u1*)p)[5] = x >> 40;
+        ((u1*)p)[4] = x >> 32;
+        ((u1*)p)[3] = x >> 24;
+        ((u1*)p)[2] = x >> 16;
+        ((u1*)p)[1] = x >>  8;
+        ((u1*)p)[0] = x;
+        break;
+    }
+  }
+
+  // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
+  static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); }
+  static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); }
+  static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); }
+
+  static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); }
+  static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); }
+  static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); }
+};
+
+#include OS_CPU_HEADER(bytes)
+
+#endif // CPU_RISCV_BYTES_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
new file mode 100644
index 0000000000..9729e16c96
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/javaClasses.hpp"
+#include "nativeInst_riscv.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+
+#define __ ce->masm()->
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  __ mov_metadata(t0, m);
+  ce->store_parameter(t0, 1);
+  ce->store_parameter(_bci, 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ j(_continuation);
+}
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
+  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    __ far_call(RuntimeAddress(a));
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ should_not_reach_here());
+    return;
+  }
+
+  if (_index->is_cpu_register()) {
+    __ mv(t0, _index->as_register());
+  } else {
+    __ mv(t0, _index->as_jint());
+  }
+  Runtime1::StubID stub_id;
+  if (_throw_index_out_of_bounds_exception) {
+    stub_id = Runtime1::throw_index_exception_id;
+  } else {
+    assert(_array != NULL, "sanity");
+    __ mv(t1, _array->as_pointer_register());
+    stub_id = Runtime1::throw_range_check_failed_id;
+  }
+  int32_t off = 0;
+  __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
+  __ jalr(ra, ra, off);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  __ far_call(RuntimeAddress(a));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+#ifdef ASSERT
+  __ should_not_reach_here();
+#endif
+}
+
+// Implementation of NewInstanceStub
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  __ mv(x13, _klass_reg->as_register());
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == x10, "result must in x10");
+  __ j(_continuation);
+}
+
+// Implementation of NewTypeArrayStub
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == x9, "length must in x9");
+  assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == x10, "result must in x10");
+  __ j(_continuation);
+}
+
+// Implementation of NewObjectArrayStub
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _result = result;
+  _length = length;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == x9, "length must in x9");
+  assert(_klass_reg->as_register() == x13, "klass_reg must in x13");
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == x10, "result must in x10");
+  __ j(_continuation);
+}
+
+// Implementation of MonitorAccessStubs
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+: MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_obj_reg->as_register(),  1);
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  Runtime1::StubID enter_id;
+  if (ce->compilation()->has_fpu_code()) {
+    enter_id = Runtime1::monitorenter_id;
+  } else {
+    enter_id = Runtime1::monitorenter_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ j(_continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    // lock_reg was destroyed by fast unlocking attempt => recompute it
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  // note: non-blocking leaf routine => no call info needed
+  Runtime1::StubID exit_id;
+  if (ce->compilation()->has_fpu_code()) {
+    exit_id = Runtime1::monitorexit_id;
+  } else {
+    exit_id = Runtime1::monitorexit_nofpu_id;
+  }
+  __ la(ra, _continuation);
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
+}
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  assert(false, "RISCV should not use C1 runtime patching");
+}
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  ce->store_parameter(_trap_request, 0);
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
+  ce->add_call_info_here(_info);
+  DEBUG_ONLY(__ should_not_reach_here());
+}
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a = NULL;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  __ bind(_entry);
+  __ far_call(RuntimeAddress(a));
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+
+  __ bind(_entry);
+  // pass the object in a tmp register because all other registers
+  // must be preserved
+  if (_obj->is_cpu_register()) {
+    __ mv(t0, _obj->as_register());
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1);
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  // ---------------slow case: call to native-----------------
+  __ bind(_entry);
+  // Figure out where the args should go
+  // This should really convert the IntrinsicID to the Method* and signature
+  // but I don't know how to do that.
+  const int args_num = 5;
+  VMRegPair args[args_num];
+  BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
+  SharedRuntime::java_calling_convention(signature, args, args_num, true);
+
+  // push parameters
+  Register r[args_num];
+  r[0] = src()->as_register();
+  r[1] = src_pos()->as_register();
+  r[2] = dst()->as_register();
+  r[3] = dst_pos()->as_register();
+  r[4] = length()->as_register();
+
+  // next registers will get stored on the stack
+  for (int j = 0; j < args_num; j++) {
+    VMReg r_1 = args[j].first();
+    if (r_1->is_stack()) {
+      int st_off = r_1->reg2stack() * wordSize;
+      __ sd(r[j], Address(sp, st_off));
+    } else {
+      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg");
+    }
+  }
+
+  ce->align_call(lir_static_call);
+
+  ce->emit_static_call_stub();
+  if (ce->compilation()->bailed_out()) {
+    return; // CodeCache is full
+  }
+  Address resolve(SharedRuntime::get_resolve_static_call_stub(),
+                  relocInfo::static_call_type);
+  address call = __ trampoline_call(resolve);
+  if (call == NULL) {
+    ce->bailout("trampoline stub overflow");
+    return;
+  }
+  ce->add_call_info_here(info());
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
+    __ incrementw(Address(t1));
+  }
+#endif
+
+  __ j(_continuation);
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
new file mode 100644
index 0000000000..4417ad6309
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_DEFS_RISCV_HPP
+#define CPU_RISCV_C1_DEFS_RISCV_HPP
+
+// native word offsets from memory address (little endian)
+enum {
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,       // number of registers used during code emission
+  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers,  // number of float registers used during code emission
+
+  // caller saved
+  pd_nof_caller_save_cpu_regs_frame_map = 13, // number of registers killed by calls
+  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of float registers killed by calls
+
+  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
+  pd_last_callee_saved_reg = 21,
+
+  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
+
+  pd_nof_cpu_regs_reg_alloc
+    = pd_nof_caller_save_cpu_regs_frame_map,  // number of registers that are visible to register allocator
+  pd_nof_fpu_regs_reg_alloc = 32,  // number of float registers that are visible to register allocator
+
+  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
+  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan
+  pd_nof_xmm_regs_linearscan = 0, // don't have vector registers
+
+  pd_first_cpu_reg  = 0,
+  pd_last_cpu_reg   = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_byte_reg = 0,
+  pd_last_byte_reg  = pd_nof_cpu_regs_reg_alloc - 1,
+
+  pd_first_fpu_reg  = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg   = pd_first_fpu_reg + 31,
+
+  pd_first_callee_saved_fpu_reg_1 = 8 + pd_first_fpu_reg,
+  pd_last_callee_saved_fpu_reg_1  = 9 + pd_first_fpu_reg,
+  pd_first_callee_saved_fpu_reg_2 = 18 + pd_first_fpu_reg,
+  pd_last_callee_saved_fpu_reg_2  = 27 + pd_first_fpu_reg
+};
+
+
+// Encoding of float value in debug info.  This is true on x86 where
+// floats are extended to doubles when stored in the stack, false for
+// RISCV where floats and doubles are stored in their native form.
+enum {
+  pd_float_saved_as_double = false
+};
+
+#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
new file mode 100644
index 0000000000..e3a2606c53
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+//--------------------------------------------------------
+//               FpuStackSim
+//--------------------------------------------------------
+
+// No FPU stack on RISCV
diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
new file mode 100644
index 0000000000..7bc3d31150
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
+#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
+
+// No FPU stack on RISCV
+class FpuStackSim;
+
+#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
new file mode 100644
index 0000000000..682ebe8262
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
+  } else if (r_1->is_Register()) {
+    Register reg1 = r_1->as_Register();
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      Register reg2 = r_2->as_Register();
+      assert(reg2 == reg1, "must be same register");
+      opr = as_long_opr(reg1);
+    } else if (is_reference_type(type)) {
+      opr = as_oop_opr(reg1);
+    } else if (type == T_METADATA) {
+      opr = as_metadata_opr(reg1);
+    } else if (type == T_ADDRESS) {
+      opr = as_address_opr(reg1);
+    } else {
+      opr = as_opr(reg1);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    int num = r_1->as_FloatRegister()->encoding();
+    if (type == T_FLOAT) {
+      opr = LIR_OprFact::single_fpu(num);
+    } else {
+      opr = LIR_OprFact::double_fpu(num);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return opr;
+}
+
+LIR_Opr FrameMap::zr_opr;
+LIR_Opr FrameMap::r1_opr;
+LIR_Opr FrameMap::r2_opr;
+LIR_Opr FrameMap::r3_opr;
+LIR_Opr FrameMap::r4_opr;
+LIR_Opr FrameMap::r5_opr;
+LIR_Opr FrameMap::r6_opr;
+LIR_Opr FrameMap::r7_opr;
+LIR_Opr FrameMap::r8_opr;
+LIR_Opr FrameMap::r9_opr;
+LIR_Opr FrameMap::r10_opr;
+LIR_Opr FrameMap::r11_opr;
+LIR_Opr FrameMap::r12_opr;
+LIR_Opr FrameMap::r13_opr;
+LIR_Opr FrameMap::r14_opr;
+LIR_Opr FrameMap::r15_opr;
+LIR_Opr FrameMap::r16_opr;
+LIR_Opr FrameMap::r17_opr;
+LIR_Opr FrameMap::r18_opr;
+LIR_Opr FrameMap::r19_opr;
+LIR_Opr FrameMap::r20_opr;
+LIR_Opr FrameMap::r21_opr;
+LIR_Opr FrameMap::r22_opr;
+LIR_Opr FrameMap::r23_opr;
+LIR_Opr FrameMap::r24_opr;
+LIR_Opr FrameMap::r25_opr;
+LIR_Opr FrameMap::r26_opr;
+LIR_Opr FrameMap::r27_opr;
+LIR_Opr FrameMap::r28_opr;
+LIR_Opr FrameMap::r29_opr;
+LIR_Opr FrameMap::r30_opr;
+LIR_Opr FrameMap::r31_opr;
+
+LIR_Opr FrameMap::fp_opr;
+LIR_Opr FrameMap::sp_opr;
+
+LIR_Opr FrameMap::receiver_opr;
+
+LIR_Opr FrameMap::zr_oop_opr;
+LIR_Opr FrameMap::r1_oop_opr;
+LIR_Opr FrameMap::r2_oop_opr;
+LIR_Opr FrameMap::r3_oop_opr;
+LIR_Opr FrameMap::r4_oop_opr;
+LIR_Opr FrameMap::r5_oop_opr;
+LIR_Opr FrameMap::r6_oop_opr;
+LIR_Opr FrameMap::r7_oop_opr;
+LIR_Opr FrameMap::r8_oop_opr;
+LIR_Opr FrameMap::r9_oop_opr;
+LIR_Opr FrameMap::r10_oop_opr;
+LIR_Opr FrameMap::r11_oop_opr;
+LIR_Opr FrameMap::r12_oop_opr;
+LIR_Opr FrameMap::r13_oop_opr;
+LIR_Opr FrameMap::r14_oop_opr;
+LIR_Opr FrameMap::r15_oop_opr;
+LIR_Opr FrameMap::r16_oop_opr;
+LIR_Opr FrameMap::r17_oop_opr;
+LIR_Opr FrameMap::r18_oop_opr;
+LIR_Opr FrameMap::r19_oop_opr;
+LIR_Opr FrameMap::r20_oop_opr;
+LIR_Opr FrameMap::r21_oop_opr;
+LIR_Opr FrameMap::r22_oop_opr;
+LIR_Opr FrameMap::r23_oop_opr;
+LIR_Opr FrameMap::r24_oop_opr;
+LIR_Opr FrameMap::r25_oop_opr;
+LIR_Opr FrameMap::r26_oop_opr;
+LIR_Opr FrameMap::r27_oop_opr;
+LIR_Opr FrameMap::r28_oop_opr;
+LIR_Opr FrameMap::r29_oop_opr;
+LIR_Opr FrameMap::r30_oop_opr;
+LIR_Opr FrameMap::r31_oop_opr;
+
+LIR_Opr FrameMap::t0_opr;
+LIR_Opr FrameMap::t1_opr;
+LIR_Opr FrameMap::t0_long_opr;
+LIR_Opr FrameMap::t1_long_opr;
+
+LIR_Opr FrameMap::r10_metadata_opr;
+LIR_Opr FrameMap::r11_metadata_opr;
+LIR_Opr FrameMap::r12_metadata_opr;
+LIR_Opr FrameMap::r13_metadata_opr;
+LIR_Opr FrameMap::r14_metadata_opr;
+LIR_Opr FrameMap::r15_metadata_opr;
+
+LIR_Opr FrameMap::long10_opr;
+LIR_Opr FrameMap::long11_opr;
+LIR_Opr FrameMap::fpu10_float_opr;
+LIR_Opr FrameMap::fpu10_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+//--------------------------------------------------------
+//               FrameMap
+//--------------------------------------------------------
+// |---f31--|
+// |---..---|
+// |---f28--|
+// |---f27--|<---pd_last_callee_saved_fpu_reg_2
+// |---..---|
+// |---f18--|<---pd_first_callee_saved_fpu_reg_2
+// |---f17--|
+// |---..---|
+// |---f10--|
+// |---f9---|<---pd_last_callee_saved_fpu_reg_1
+// |---f8---|<---pd_first_callee_saved_fpu_reg_1
+// |---f7---|
+// |---..---|
+// |---f0---|
+// |---x27--|
+// |---x23--|
+// |---x8---|
+// |---x4---|
+// |---x3---|
+// |---x2---|
+// |---x1---|
+// |---x0---|
+// |---x26--|<---pd_last_callee_saved_reg
+// |---..---|
+// |---x18--|
+// |---x9---|<---pd_first_callee_saved_reg
+// |---x31--|
+// |---..---|
+// |---x28--|
+// |---x17--|
+// |---..---|
+// |---x10--|
+// |---x7---|
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i = 0;
+
+  // caller save register
+  map_register(i, x7);  r7_opr  = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x10); r10_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x11); r11_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x12); r12_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x13); r13_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x14); r14_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x15); r15_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x16); r16_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x17); r17_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x28); r28_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x29); r29_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x30); r30_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x31); r31_opr = LIR_OprFact::single_cpu(i); i++;
+
+  // callee save register
+  map_register(i, x9);  r9_opr  = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x18); r18_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x19); r19_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x20); r20_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x21); r21_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x22); r22_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x24); r24_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x25); r25_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, x26); r26_opr = LIR_OprFact::single_cpu(i); i++;
+
+  // special register
+  map_register(i, x0);  zr_opr  = LIR_OprFact::single_cpu(i); i++;  // zr
+  map_register(i, x1);  r1_opr  = LIR_OprFact::single_cpu(i); i++;  // ra
+  map_register(i, x2);  r2_opr  = LIR_OprFact::single_cpu(i); i++;  // sp
+  map_register(i, x3);  r3_opr  = LIR_OprFact::single_cpu(i); i++;  // gp
+  map_register(i, x4);  r4_opr  = LIR_OprFact::single_cpu(i); i++;  // thread
+  map_register(i, x8);  r8_opr  = LIR_OprFact::single_cpu(i); i++;  // fp
+  map_register(i, x23); r23_opr = LIR_OprFact::single_cpu(i); i++;  // java thread
+  map_register(i, x27); r27_opr = LIR_OprFact::single_cpu(i); i++;  // heapbase
+
+  // tmp register
+  map_register(i, x5);  r5_opr  = LIR_OprFact::single_cpu(i); i++;  // t0
+  map_register(i, x6);  r6_opr  = LIR_OprFact::single_cpu(i); i++;  // t1
+
+  t0_opr = r5_opr;
+  t1_opr = r6_opr;
+  t0_long_opr = LIR_OprFact::double_cpu(r5_opr->cpu_regnr(), r5_opr->cpu_regnr());
+  t1_long_opr = LIR_OprFact::double_cpu(r6_opr->cpu_regnr(), r6_opr->cpu_regnr());
+
+  long10_opr  = LIR_OprFact::double_cpu(r10_opr->cpu_regnr(), r10_opr->cpu_regnr());
+  long11_opr  = LIR_OprFact::double_cpu(r11_opr->cpu_regnr(), r11_opr->cpu_regnr());
+
+  fpu10_float_opr   = LIR_OprFact::single_fpu(10);
+  fpu10_double_opr  = LIR_OprFact::double_fpu(10);
+
+  i = 0;
+  _caller_save_cpu_regs[i++]  = r7_opr;
+  _caller_save_cpu_regs[i++]  = r10_opr;
+  _caller_save_cpu_regs[i++]  = r11_opr;
+  _caller_save_cpu_regs[i++]  = r12_opr;
+  _caller_save_cpu_regs[i++]  = r13_opr;
+  _caller_save_cpu_regs[i++]  = r14_opr;
+  _caller_save_cpu_regs[i++]  = r15_opr;
+  _caller_save_cpu_regs[i++]  = r16_opr;
+  _caller_save_cpu_regs[i++]  = r17_opr;
+  _caller_save_cpu_regs[i++]  = r28_opr;
+  _caller_save_cpu_regs[i++]  = r29_opr;
+  _caller_save_cpu_regs[i++]  = r30_opr;
+  _caller_save_cpu_regs[i++]  = r31_opr;
+
+  _init_done = true;
+
+  zr_oop_opr  = as_oop_opr(x0);
+  r1_oop_opr  = as_oop_opr(x1);
+  r2_oop_opr  = as_oop_opr(x2);
+  r3_oop_opr  = as_oop_opr(x3);
+  r4_oop_opr  = as_oop_opr(x4);
+  r5_oop_opr  = as_oop_opr(x5);
+  r6_oop_opr  = as_oop_opr(x6);
+  r7_oop_opr  = as_oop_opr(x7);
+  r8_oop_opr  = as_oop_opr(x8);
+  r9_oop_opr  = as_oop_opr(x9);
+  r10_oop_opr = as_oop_opr(x10);
+  r11_oop_opr = as_oop_opr(x11);
+  r12_oop_opr = as_oop_opr(x12);
+  r13_oop_opr = as_oop_opr(x13);
+  r14_oop_opr = as_oop_opr(x14);
+  r15_oop_opr = as_oop_opr(x15);
+  r16_oop_opr = as_oop_opr(x16);
+  r17_oop_opr = as_oop_opr(x17);
+  r18_oop_opr = as_oop_opr(x18);
+  r19_oop_opr = as_oop_opr(x19);
+  r20_oop_opr = as_oop_opr(x20);
+  r21_oop_opr = as_oop_opr(x21);
+  r22_oop_opr = as_oop_opr(x22);
+  r23_oop_opr = as_oop_opr(x23);
+  r24_oop_opr = as_oop_opr(x24);
+  r25_oop_opr = as_oop_opr(x25);
+  r26_oop_opr = as_oop_opr(x26);
+  r27_oop_opr = as_oop_opr(x27);
+  r28_oop_opr = as_oop_opr(x28);
+  r29_oop_opr = as_oop_opr(x29);
+  r30_oop_opr = as_oop_opr(x30);
+  r31_oop_opr = as_oop_opr(x31);
+
+  r10_metadata_opr = as_metadata_opr(x10);
+  r11_metadata_opr = as_metadata_opr(x11);
+  r12_metadata_opr = as_metadata_opr(x12);
+  r13_metadata_opr = as_metadata_opr(x13);
+  r14_metadata_opr = as_metadata_opr(x14);
+  r15_metadata_opr = as_metadata_opr(x15);
+
+  sp_opr = as_pointer_opr(sp);
+  fp_opr = as_pointer_opr(fp);
+
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+  receiver_opr = as_oop_opr(regs.first()->as_Register());
+
+  for (i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(sp, in_bytes(sp_offset));
+}
+
+
+// ----------------mapping-----------------------
+// all mapping is based on fp addressing, except for simple leaf methods where we access
+// the locals sp based (and no frame is built)
+
+
+// Frame for simple leaf methods (quick entries)
+//
+//   +----------+
+//   | ret addr |   <- TOS
+//   +----------+
+//   | args     |
+//   | ......   |
+
+// Frame for standard methods
+//
+//   | .........|  <- TOS
+//   | locals   |
+//   +----------+
+//   |  old fp, |
+//   +----------+
+//   | ret addr |
+//   +----------+
+//   |  args    |  <- FP
+//   | .........|
+
+
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
+// This is the offset from sp() in the frame of the slot for the index,
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
+//
+//           framesize +
+//           stack0         stack0          0  <- VMReg
+//             |              | <registers> |
+//  ...........|..............|.............|
+//      0 1 2 3 x x 4 5 6 ... |                <- local indices
+//      ^           ^        sp()                 ( x x indicate link
+//      |           |                               and return addr)
+//  arguments   non-argument locals
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  // Return the OptoReg name for the fpu stack slot "n"
+  // A spilled fpu stack slot comprises to two single-word OptoReg's.
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  return FrameMap::sp_opr;
+}
+
+// JSR 292
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;  // Not needed on riscv
+}
+
+bool FrameMap::validate_frame() {
+  return true;
+}
diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
new file mode 100644
index 0000000000..01281f5c9e
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
+#define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
+
+//  On RISCV the frame looks as follows:
+//
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+
+ public:
+  static const int pd_c_runtime_reserved_arg_size;
+
+  enum {
+    first_available_sp_in_frame = 0,
+    frame_pad_in_bytes = 16,
+    nof_reg_args = 8
+  };
+
+ public:
+  static LIR_Opr receiver_opr;
+
+  static LIR_Opr zr_opr;
+  static LIR_Opr r1_opr;
+  static LIR_Opr r2_opr;
+  static LIR_Opr r3_opr;
+  static LIR_Opr r4_opr;
+  static LIR_Opr r5_opr;
+  static LIR_Opr r6_opr;
+  static LIR_Opr r7_opr;
+  static LIR_Opr r8_opr;
+  static LIR_Opr r9_opr;
+  static LIR_Opr r10_opr;
+  static LIR_Opr r11_opr;
+  static LIR_Opr r12_opr;
+  static LIR_Opr r13_opr;
+  static LIR_Opr r14_opr;
+  static LIR_Opr r15_opr;
+  static LIR_Opr r16_opr;
+  static LIR_Opr r17_opr;
+  static LIR_Opr r18_opr;
+  static LIR_Opr r19_opr;
+  static LIR_Opr r20_opr;
+  static LIR_Opr r21_opr;
+  static LIR_Opr r22_opr;
+  static LIR_Opr r23_opr;
+  static LIR_Opr r24_opr;
+  static LIR_Opr r25_opr;
+  static LIR_Opr r26_opr;
+  static LIR_Opr r27_opr;
+  static LIR_Opr r28_opr;
+  static LIR_Opr r29_opr;
+  static LIR_Opr r30_opr;
+  static LIR_Opr r31_opr;
+  static LIR_Opr fp_opr;
+  static LIR_Opr sp_opr;
+
+  static LIR_Opr zr_oop_opr;
+  static LIR_Opr r1_oop_opr;
+  static LIR_Opr r2_oop_opr;
+  static LIR_Opr r3_oop_opr;
+  static LIR_Opr r4_oop_opr;
+  static LIR_Opr r5_oop_opr;
+  static LIR_Opr r6_oop_opr;
+  static LIR_Opr r7_oop_opr;
+  static LIR_Opr r8_oop_opr;
+  static LIR_Opr r9_oop_opr;
+  static LIR_Opr r10_oop_opr;
+  static LIR_Opr r11_oop_opr;
+  static LIR_Opr r12_oop_opr;
+  static LIR_Opr r13_oop_opr;
+  static LIR_Opr r14_oop_opr;
+  static LIR_Opr r15_oop_opr;
+  static LIR_Opr r16_oop_opr;
+  static LIR_Opr r17_oop_opr;
+  static LIR_Opr r18_oop_opr;
+  static LIR_Opr r19_oop_opr;
+  static LIR_Opr r20_oop_opr;
+  static LIR_Opr r21_oop_opr;
+  static LIR_Opr r22_oop_opr;
+  static LIR_Opr r23_oop_opr;
+  static LIR_Opr r24_oop_opr;
+  static LIR_Opr r25_oop_opr;
+  static LIR_Opr r26_oop_opr;
+  static LIR_Opr r27_oop_opr;
+  static LIR_Opr r28_oop_opr;
+  static LIR_Opr r29_oop_opr;
+  static LIR_Opr r30_oop_opr;
+  static LIR_Opr r31_oop_opr;
+
+  static LIR_Opr t0_opr;
+  static LIR_Opr t1_opr;
+  static LIR_Opr t0_long_opr;
+  static LIR_Opr t1_long_opr;
+
+  static LIR_Opr r10_metadata_opr;
+  static LIR_Opr r11_metadata_opr;
+  static LIR_Opr r12_metadata_opr;
+  static LIR_Opr r13_metadata_opr;
+  static LIR_Opr r14_metadata_opr;
+  static LIR_Opr r15_metadata_opr;
+
+  static LIR_Opr long10_opr;
+  static LIR_Opr long11_opr;
+  static LIR_Opr fpu10_float_opr;
+  static LIR_Opr fpu10_double_opr;
+
+  static LIR_Opr as_long_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+
+  // VMReg name for spilled physical FPU stack slot n
+  static VMReg fpu_regname(int n);
+
+  static bool is_caller_save_register(LIR_Opr opr) { return true; }
+  static bool is_caller_save_register(Register r)  { return true; }
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg; }
+
+#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
new file mode 100644
index 0000000000..2a99d49c94
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+#define __ _masm->
+
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal,
+                                    LIR_Opr result, CodeEmitInfo* info) {
+  // opcode check
+  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
+  bool is_irem = (code == lir_irem);
+  // opreand check
+  assert(left->is_single_cpu(), "left must be a register");
+  assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant");
+  assert(result->is_single_cpu(), "result must be a register");
+  Register lreg = left->as_register();
+  Register dreg = result->as_register();
+
+  // power-of-2 constant check and codegen
+  if (right->is_constant()) {
+    int c = right->as_constant_ptr()->as_jint();
+    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+    if (is_irem) {
+      if (c == 1) {
+        // move 0 to dreg if divisor is 1
+        __ mv(dreg, zr);
+      } else {
+        unsigned int shift = exact_log2(c);
+        __ sraiw(t0, lreg, 0x1f);
+        __ srliw(t0, t0, BitsPerInt - shift);
+        __ addw(t1, lreg, t0);
+        if (is_imm_in_range(c - 1, 12, 0)) {
+          __ andi(t1, t1, c - 1);
+        } else {
+          __ zero_extend(t1, t1, shift);
+        }
+        __ subw(dreg, t1, t0);
+      }
+    } else {
+      if (c == 1) {
+        // move lreg to dreg if divisor is 1
+        __ mv(dreg, lreg);
+      } else {
+        unsigned int shift = exact_log2(c);
+        __ sraiw(t0, lreg, 0x1f);
+        if (is_imm_in_range(c - 1, 12, 0)) {
+          __ andi(t0, t0, c - 1);
+        } else {
+          __ zero_extend(t0, t0, shift);
+        }
+        __ addw(dreg, t0, lreg);
+        __ sraiw(dreg, dreg, shift);
+      }
+    }
+  } else {
+    Register rreg = right->as_register();
+    __ corrected_idivl(dreg, lreg, rreg, is_irem);
+  }
+}
+
+void LIR_Assembler::arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right,
+                                                       Register lreg, Register dreg) {
+  // cpu register - constant
+  jlong c;
+
+  switch (right->type()) {
+    case T_LONG:
+      c = right->as_constant_ptr()->as_jlong(); break;
+    case T_INT:     // fall through
+    case T_ADDRESS:
+      c = right->as_constant_ptr()->as_jint(); break;
+    default:
+      ShouldNotReachHere();
+      c = 0;   // unreachable
+  }
+
+  assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+  if (c == 0 && dreg == lreg) {
+    COMMENT("effective nop elided");
+    return;
+  }
+  switch (left->type()) {
+    case T_INT:
+      switch (code) {
+        case lir_add: __ addw(dreg, lreg, c); break;
+        case lir_sub: __ subw(dreg, lreg, c); break;
+        default:      ShouldNotReachHere();
+      }
+    break;
+    case T_OBJECT:  // fall through
+    case T_ADDRESS:
+      switch (code) {
+        case lir_add: __ add(dreg, lreg, c); break;
+        case lir_sub: __ sub(dreg, lreg, c); break;
+        default:      ShouldNotReachHere();
+      }
+    break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  Register lreg = left->as_register();
+  Register dreg = as_reg(dest);
+
+  if (right->is_single_cpu()) {
+    // cpu register - cpu register
+    assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
+    Register rreg = right->as_register();
+    switch (code) {
+      case lir_add: __ addw(dest->as_register(), lreg, rreg); break;
+      case lir_sub: __ subw(dest->as_register(), lreg, rreg); break;
+      case lir_mul: __ mulw(dest->as_register(), lreg, rreg); break;
+      default:      ShouldNotReachHere();
+    }
+  } else if (right->is_double_cpu()) {
+    Register rreg = right->as_register_lo();
+    // sigle_cpu + double_cpu; can happen with obj_long
+    assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+    switch (code) {
+      case lir_add: __ add(dreg, lreg, rreg); break;
+      case lir_sub: __ sub(dreg, lreg, rreg); break;
+      default:      ShouldNotReachHere();
+    }
+  } else if (right->is_constant()) {
+    arith_op_single_cpu_right_constant(code, left, right, lreg, dreg);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  Register lreg_lo = left->as_register_lo();
+
+  if (right->is_double_cpu()) {
+    // cpu register - cpu register
+    Register rreg_lo = right->as_register_lo();
+    switch (code) {
+      case lir_add: __ add(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_sub: __ sub(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_mul: __ mul(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+      case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false); break;
+      case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true); break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else if (right->is_constant()) {
+    jlong c = right->as_constant_ptr()->as_jlong();
+    Register dreg = as_reg(dest);
+    switch (code) {
+      case lir_add: // fall through
+      case lir_sub:
+        if (c == 0 && dreg == lreg_lo) {
+          COMMENT("effective nop elided");
+          return;
+        }
+        code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
+        break;
+      case lir_div:
+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+        if (c == 1) {
+          // move lreg_lo to dreg if divisor is 1
+          __ mv(dreg, lreg_lo);
+        } else {
+          unsigned int shift = exact_log2_long(c);
+          // use t0 as intermediate result register
+          __ srai(t0, lreg_lo, 0x3f);
+          if (is_imm_in_range(c - 1, 12, 0)) {
+            __ andi(t0, t0, c - 1);
+          } else {
+            __ zero_extend(t0, t0, shift);
+          }
+          __ add(dreg, t0, lreg_lo);
+          __ srai(dreg, dreg, shift);
+        }
+        break;
+      case lir_rem:
+        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+        if (c == 1) {
+          // move 0 to dreg if divisor is 1
+          __ mv(dreg, zr);
+        } else {
+          unsigned int shift = exact_log2_long(c);
+          __ srai(t0, lreg_lo, 0x3f);
+          __ srli(t0, t0, BitsPerLong - shift);
+          __ add(t1, lreg_lo, t0);
+          if (is_imm_in_range(c - 1, 12, 0)) {
+            __ andi(t1, t1, c - 1);
+          } else {
+            __ zero_extend(t1, t1, shift);
+          }
+          __ sub(dreg, t1, t0);
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
+  switch (code) {
+    case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_mul_strictfp: // fall through
+    case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    case lir_div_strictfp: // fall through
+    case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  if (right->is_double_fpu()) {
+    // fpu register - fpu register
+    switch (code) {
+      case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_mul_strictfp: // fall through
+      case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      case lir_div_strictfp: // fall through
+      case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                             CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+  if (left->is_single_cpu()) {
+    arith_op_single_cpu(code, left, right, dest);
+  } else if (left->is_double_cpu()) {
+    arith_op_double_cpu(code, left, right, dest);
+  } else if (left->is_single_fpu()) {
+    arith_op_single_fpu(code, left, right, dest);
+  } else if (left->is_double_fpu()) {
+    arith_op_double_fpu(code, left, right, dest);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
new file mode 100644
index 0000000000..ab0a9963fc
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
+#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
+
+  // arith_op sub functions
+  void arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
+  void arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
+  void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
+  void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
+  void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg);
+  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
+
+#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
new file mode 100644
index 0000000000..e6b95d3b7f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define __ _masm->
+
+
+void LIR_Assembler::generic_arraycopy(Register src, Register src_pos, Register length,
+                                      Register dst, Register dst_pos, CodeStub *stub) {
+  assert(src == x11 && src_pos == x12, "mismatch in calling convention");
+  // Save the arguments in case the generic arraycopy fails and we
+  // have to fall back to the JNI stub
+  arraycopy_store_args(src, src_pos, length, dst, dst_pos);
+
+  address copyfunc_addr = StubRoutines::generic_arraycopy();
+  assert(copyfunc_addr != NULL, "generic arraycopy stub required");
+
+  // The arguments are in java calling convention so we shift them
+  // to C convention
+  assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
+  __ mv(c_rarg0, j_rarg0);
+  assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
+  __ mv(c_rarg1, j_rarg1);
+  assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
+  __ mv(c_rarg2, j_rarg2);
+  assert_different_registers(c_rarg3, j_rarg4);
+  __ mv(c_rarg3, j_rarg3);
+  __ mv(c_rarg4, j_rarg4);
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
+  }
+#endif
+  __ far_call(RuntimeAddress(copyfunc_addr));
+  __ beqz(x10, *stub->continuation());
+  // Reload values from the stack so they are where the stub
+  // expects them.
+  arraycopy_load_args(src, src_pos, length, dst, dst_pos);
+
+  // x10 is -1^K where K == partial copied count
+  __ xori(t0, x10, -1);
+  // adjust length down and src/end pos up by partial copied count
+  __ subw(length, length, t0);
+  __ addw(src_pos, src_pos, t0);
+  __ addw(dst_pos, dst_pos, t0);
+  __ j(*stub->entry());
+
+  __ bind(*stub->continuation());
+}
+
+void LIR_Assembler::arraycopy_simple_check(Register src, Register src_pos, Register length,
+                                           Register dst, Register dst_pos, Register tmp,
+                                           CodeStub *stub, int flags) {
+  // test for NULL
+  if (flags & LIR_OpArrayCopy::src_null_check) {
+    __ beqz(src, *stub->entry(), /* is_far */ true);
+  }
+  if (flags & LIR_OpArrayCopy::dst_null_check) {
+    __ beqz(dst, *stub->entry(), /* is_far */ true);
+  }
+
+  // If the compiler was not able to prove that exact type of the source or the destination
+  // of the arraycopy is an array type, check at runtime if the source or the destination is
+  // an instance type.
+  if (flags & LIR_OpArrayCopy::type_check) {
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ mv(t1, Klass::_lh_neutral_value);
+      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
+    }
+
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ mv(t1, Klass::_lh_neutral_value);
+      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
+    }
+  }
+
+  // check if negative
+  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+    __ bltz(src_pos, *stub->entry(), /* is_far */ true);
+  }
+  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+    __ bltz(dst_pos, *stub->entry(), /* is_far */ true);
+  }
+  if (flags & LIR_OpArrayCopy::length_positive_check) {
+    __ bltz(length, *stub->entry(), /* is_far */ true);
+  }
+
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ addw(tmp, src_pos, length);
+    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
+    __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true);
+  }
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ addw(tmp, dst_pos, length);
+    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+    __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true);
+  }
+}
+
+void LIR_Assembler::arraycopy_checkcast(Register src, Register src_pos, Register length,
+                                        Register dst, Register dst_pos, Register tmp,
+                                        CodeStub *stub, BasicType basic_type,
+                                        address copyfunc_addr, int flags) {
+  // src is not a sub class of dst so we have to do a
+  // per-element check.
+  int mask = LIR_OpArrayCopy::src_objarray | LIR_OpArrayCopy::dst_objarray;
+  if ((flags & mask) != mask) {
+    // Check that at least both of them object arrays.
+    assert(flags & mask, "one of the two should be known to be an object array");
+
+    if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+    } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+    }
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
+    Address klass_lh_addr(tmp, lh_offset);
+    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ lw(t0, klass_lh_addr);
+    __ mvw(t1, objArray_lh);
+    __ bne(t0, t1, *stub->entry(), /* is_far */ true);
+  }
+
+  // Spill because stubs can use any register they like and it's
+  // easier to restore just those that we care about.
+  arraycopy_store_args(src, src_pos, length, dst, dst_pos);
+  arraycopy_checkcast_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
+  __ far_call(RuntimeAddress(copyfunc_addr));
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    Label failed;
+    __ bnez(x10, failed);
+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
+    __ bind(failed);
+  }
+#endif
+
+  __ beqz(x10, *stub->continuation());
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
+  }
+#endif
+  assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0);
+
+  // Restore previously spilled arguments
+  arraycopy_load_args(src, src_pos, length, dst, dst_pos);
+
+  // return value is -1^K where K is partial copied count
+  __ xori(t0, x10, -1);
+  // adjust length down and src/end pos up by partial copied count
+  __ subw(length, length, t0);
+  __ addw(src_pos, src_pos, t0);
+  __ addw(dst_pos, dst_pos, t0);
+}
+
+void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Register length,
+                                         Register dst, Register dst_pos, Register tmp,
+                                         CodeStub *stub, BasicType basic_type, int flags) {
+  // We don't know the array types are compatible
+  if (basic_type != T_OBJECT) {
+    // Simple test for basic type arrays
+    if (UseCompressedClassPointers) {
+      __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
+      __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+    } else {
+      __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
+      __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+    }
+    __ bne(tmp, t0, *stub->entry(), /* is_far */ true);
+  } else {
+    // For object arrays, if src is a sub class of dst then we can
+    // safely do the copy.
+    Label cont, slow;
+
+#define PUSH(r1, r2)                                     \
+    __ addi(sp, sp, -2 * wordSize);                      \
+    __ sd(r1, Address(sp, 1 * wordSize));                \
+    __ sd(r2, Address(sp, 0));
+
+#define POP(r1, r2)                                      \
+    __ ld(r1, Address(sp, 1 * wordSize));                \
+    __ ld(r2, Address(sp, 0));                           \
+    __ addi(sp, sp, 2 * wordSize);
+
+    PUSH(src, dst);
+    __ load_klass(src, src);
+    __ load_klass(dst, dst);
+    __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
+
+    PUSH(src, dst);
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+    POP(src, dst);
+    __ bnez(dst, cont);
+
+    __ bind(slow);
+    POP(src, dst);
+
+    address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+    if (copyfunc_addr != NULL) { // use stub if available
+      arraycopy_checkcast(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, copyfunc_addr, flags);
+    }
+
+    __ j(*stub->entry());
+    __ bind(cont);
+    POP(src, dst);
+  }
+}
+
+void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) {
+  assert(default_type != NULL, "NULL default_type!");
+  BasicType basic_type = default_type->element_type()->basic_type();
+
+  if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class.  For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type.  For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    __ mov_metadata(tmp, default_type->constant_encoding());
+    if (UseCompressedClassPointers) {
+      __ encode_klass_not_null(tmp);
+    }
+
+    if (basic_type != T_OBJECT) {
+      if (UseCompressedClassPointers) {
+        __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+      } else {
+        __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+      }
+      __ bne(tmp, t0, halt);
+      if (UseCompressedClassPointers) {
+        __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes()));
+      } else {
+        __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes()));
+      }
+      __ beq(tmp, t0, known_ok);
+    } else {
+      if (UseCompressedClassPointers) {
+        __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+      } else {
+        __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes()));
+      }
+      __ beq(tmp, t0, known_ok);
+      __ beq(src, dst, known_ok);
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+}
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  ciArrayKlass *default_type = op->expected_type();
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+
+  CodeStub* stub = op->stub();
+  int flags = op->flags();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (is_reference_type(basic_type)) { basic_type = T_OBJECT; }
+
+  // if we don't know anything, just go through the generic arraycopy
+  if (default_type == NULL) {
+    generic_arraycopy(src, src_pos, length, dst, dst_pos, stub);
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
+         "must be true at this point");
+
+  arraycopy_simple_check(src, src_pos, length, dst, dst_pos, tmp, stub, flags);
+
+  if (flags & LIR_OpArrayCopy::type_check) {
+    arraycopy_type_check(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, flags);
+  }
+
+#ifdef ASSERT
+  arraycopy_assert(src, dst, tmp, default_type, flags);
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
+  }
+#endif
+  arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name = NULL;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+  CodeBlob *cb = CodeCache::find_blob(entry);
+  if (cb != NULL) {
+    __ far_call(RuntimeAddress(entry));
+  } else {
+    const int args_num = 3;
+    __ call_VM_leaf(entry, args_num);
+  }
+
+  __ bind(*stub->continuation());
+}
+
+
+void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length,
+                                             Register dst, Register dst_pos, BasicType basic_type) {
+  int scale = array_element_size(basic_type);
+  __ shadd(c_rarg0, src_pos, src, t0, scale);
+  __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg0, dst, dst_pos, length);
+  __ shadd(c_rarg1, dst_pos, dst, t0, scale);
+  __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(c_rarg1, dst, length);
+  __ mv(c_rarg2, length);
+  assert_different_registers(c_rarg2, dst);
+}
+
+void LIR_Assembler::arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length,
+                                                       Register dst, Register dst_pos, BasicType basic_type) {
+  arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
+  __ load_klass(c_rarg4, dst);
+  __ ld(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
+  __ lwu(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
+}
+
+void LIR_Assembler::arraycopy_store_args(Register src, Register src_pos, Register length,
+                                         Register dst, Register dst_pos) {
+  __ sd(dst_pos, Address(sp, 0));                // 0: dst_pos sp offset
+  __ sd(dst, Address(sp, 1 * BytesPerWord));     // 1: dst sp offset
+  __ sd(length, Address(sp, 2 * BytesPerWord));  // 2: length sp offset
+  __ sd(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset
+  __ sd(src, Address(sp, 4 * BytesPerWord));     // 4: src sp offset
+}
+
+void LIR_Assembler::arraycopy_load_args(Register src, Register src_pos, Register length,
+                                        Register dst, Register dst_pos) {
+  __ ld(dst_pos, Address(sp, 0));                // 0: dst_pos sp offset
+  __ ld(dst, Address(sp, 1 * BytesPerWord));     // 1: dst sp offset
+  __ ld(length, Address(sp, 2 * BytesPerWord));  // 2: length sp offset
+  __ ld(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset
+  __ ld(src, Address(sp, 4 * BytesPerWord));     // 4: src sp offset
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
new file mode 100644
index 0000000000..06a0f248ca
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
+#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
+
+  // arraycopy sub functions
+  void generic_arraycopy(Register src, Register src_pos, Register length,
+                         Register dst, Register dst_pos, CodeStub *stub);
+  void arraycopy_simple_check(Register src, Register src_pos, Register length,
+                              Register dst, Register dst_pos, Register tmp,
+                              CodeStub *stub, int flags);
+  void arraycopy_checkcast(Register src, Register src_pos, Register length,
+                           Register dst, Register dst_pos, Register tmp,
+                           CodeStub *stub, BasicType basic_type,
+                           address copyfunc_addr, int flags);
+  void arraycopy_type_check(Register src, Register src_pos, Register length,
+                            Register dst, Register dst_pos, Register tmp,
+                            CodeStub *stub, BasicType basic_type, int flags);
+  void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags);
+  void arraycopy_prepare_params(Register src, Register src_pos, Register length,
+                                Register dst, Register dst_pos, BasicType basic_type);
+  void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length,
+                                          Register dst, Register dst_pos, BasicType basic_type);
+  void arraycopy_store_args(Register src, Register src_pos, Register length,
+                            Register dst, Register dst_pos);
+  void arraycopy_load_args(Register src, Register src_pos, Register length,
+                           Register dst, Register dst_pos);
+
+#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
new file mode 100644
index 0000000000..fb6a60fb49
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -0,0 +1,2258 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "code/compiledIC.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+NEEDS_CLEANUP // remove this definitions ?
+const Register IC_Klass    = t1;    // where the IC klass is cached
+const Register SYNC_header = x10;   // synchronization header
+const Register SHIFT_count = x10;   // where count for shift operations must be
+
+#define __ _masm->
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp2 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2);
+}
+
+static void select_different_registers(Register preserve,
+                                       Register extra,
+                                       Register &tmp1,
+                                       Register &tmp2,
+                                       Register &tmp3) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp2 = extra;
+  } else if (tmp3 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp3 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2, tmp3);
+}
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::receiver_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
+}
+
+void LIR_Assembler::breakpoint() { Unimplemented(); }
+
+void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
+
+void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
+
+static jlong as_long(LIR_Opr data) {
+  jlong result;
+  switch (data->type()) {
+    case T_INT:
+      result = (data->as_jint());
+      break;
+    case T_LONG:
+      result = (data->as_jlong());
+      break;
+    default:
+      ShouldNotReachHere();
+      result = 0;  // unreachable
+  }
+  return result;
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
+  if (addr->base()->is_illegal()) {
+    assert(addr->index()->is_illegal(), "must be illegal too");
+    __ movptr(tmp, addr->disp());
+    return Address(tmp, 0);
+  }
+
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr index_opr = addr->index();
+
+  if (index_opr->is_illegal()) {
+    return Address(base, addr->disp());
+  }
+
+  int scale = addr->scale();
+  if (index_opr->is_cpu_register()) {
+    Register index;
+    if (index_opr->is_single_cpu()) {
+      index = index_opr->as_register();
+    } else {
+      index = index_opr->as_register_lo();
+    }
+    if (scale != 0) {
+      __ shadd(tmp, index, base, tmp, scale);
+    } else {
+      __ add(tmp, base, index);
+    }
+    return Address(tmp, addr->disp());
+  } else if (index_opr->is_constant()) {
+    intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp();
+    return Address(base, addr_offset);
+  }
+
+  Unimplemented();
+  return Address();
+}
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  ShouldNotReachHere();
+  return Address();
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  return as_Address(addr, t0);
+}
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  return as_Address(addr);
+}
+
+// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
+// not encodable as a base + (immediate) offset, generate an explicit address
+// calculation to hold the address in t0.
+Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
+  precond(size == 4 || size == 8);
+  Address addr = frame_map()->address_for_slot(index, adjust);
+  precond(addr.getMode() == Address::base_plus_offset);
+  precond(addr.base() == sp);
+  precond(addr.offset() > 0);
+  uint mask = size - 1;
+  assert((addr.offset() & mask) == 0, "scaled offsets only");
+
+  return addr;
+}
+
+void LIR_Assembler::osr_entry() {
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  guarantee(osr_entry != NULL, "NULL osr_entry!");
+  ValueStack* entry_state = osr_entry->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // we jump here if osr happens with the interpreter
+  // state set up to continue at the beginning of the
+  // loop that triggered osr - in particular, we have
+  // the following registers setup:
+  //
+  // x12: osr buffer
+  //
+
+  //build frame
+  ciMethod* m = compilation()->method();
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[0..number_of_locks]
+  //
+  // locals is a direct copy of the interpreter frame so in the osr buffer
+  // so first slot in the local array is the last local from the interpreter
+  // and last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   x12: pointer to osr buffer
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_pointer_register();
+  {
+    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // verify the interpreter's monitor has a non-null object
+      {
+        Label L;
+        __ ld(t0, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
+        __ bnez(t0, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif // ASSERT
+      __ ld(x9, Address(OSR_buf, slot_offset + 0));
+      __ sd(x9, frame_map()->address_for_monitor_lock(i));
+      __ ld(x9, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
+      __ sd(x9, frame_map()->address_for_monitor_object(i));
+    }
+  }
+}
+
+// inline cache check; done before the frame is built.
+int LIR_Assembler::check_icache() {
+  Register receiver = FrameMap::receiver_opr->as_register();
+  Register ic_klass = IC_Klass;
+  int start_offset = __ offset();
+  Label dont;
+  __ inline_cache_check(receiver, ic_klass, dont);
+
+  // if icache check fails, then jump to runtime routine
+  // Note: RECEIVER must still contain the receiver!
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+  // We align the verified entry point unless the method body
+  // (including its inline cache check) will fit in a single 64-byte
+  // icache line.
+  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
+    // force alignment after the cache check.
+    __ align(CodeEntryAlignment);
+  }
+
+  __ bind(dont);
+  return start_offset;
+}
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ mv(reg, zr);
+  } else {
+    __ movoop(reg, o, /* immediate */ true);
+  }
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  deoptimize_trap(info);
+}
+
+// This specifies the rsp decrement needed to build the frame
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  // if rounding, must let FrameMap know!
+
+  return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+int LIR_Assembler::emit_exception_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci ==> add a nop
+  // (was bug 5/14/1999 -gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(exception_handler_size());
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  // the exception oop and pc are in x10, and x13
+  // no other registers need to be preserved, so invalidate them
+  __ invalidate_registers(false, true, true, false, true, true);
+
+  // check that there is really an exception
+  __ verify_not_null_oop(x10);
+
+  // search an exception handler (x10: exception oop, x13: throwing pc)
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));
+  __ should_not_reach_here();
+  guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+  if (CommentedAssembly) {
+    _masm->block_comment("Unwind handler");
+  }
+#endif // PRODUCT
+
+  int offset = code_offset();
+
+  // Fetch the exception from TLS and clear out exception related thread state
+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(x10);
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mv(x9, x10);   // Perserve the exception
+  }
+
+  // Preform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::r10_opr);
+    stub = new MonitorExitStub(FrameMap::r10_opr, true, 0);
+    __ unlock_object(x15, x14, x10, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    __ mv(c_rarg0, xthread);
+    __ mov_metadata(c_rarg1, method()->constant_encoding());
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), c_rarg0, c_rarg1);
+  }
+
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ mv(x10, x9);   // Restore the exception
+  }
+
+  // remove the activation and dispatch to the unwind handler
+  __ block_comment("remove_frame and dispatch to the unwind handler");
+  __ remove_frame(initial_frame_size_in_bytes());
+  __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
+
+  // Emit the slow path assembly
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+int LIR_Assembler::emit_deopt_handler() {
+  // if the last instruciton is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bck => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(deopt_handler_size());
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  __ auipc(ra, 0);
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+  guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10");
+
+  // Pop the stack before the safepoint code
+  __ remove_frame(initial_frame_size_in_bytes());
+
+  if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
+    __ reserved_stack_check();
+  }
+
+  address polling_page(os::get_polling_page());
+  __ read_polling_page(t0, polling_page, relocInfo::poll_return_type);
+  __ ret();
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+  address polling_page(os::get_polling_page());
+  guarantee(info != NULL, "Shouldn't be NULL");
+  assert(os::is_poll_address(polling_page), "should be");
+  int32_t offset = 0;
+  __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type);
+  add_debug_info_for_branch(info);  // This isn't just debug info:
+                                    // it's the oop map
+  __ read_polling_page(t0, offset, relocInfo::poll_type);
+  return __ offset();
+}
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+  __ mv(to_reg, from_reg);
+}
+
+void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  address const_addr = NULL;
+
+  switch (c->type()) {
+    case T_INT:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mvw(dest->as_register(), c->as_jint());
+      break;
+
+    case T_ADDRESS:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mv(dest->as_register(), c->as_jint());
+      break;
+
+    case T_LONG:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ mv(dest->as_register_lo(), (intptr_t)c->as_jlong());
+      break;
+
+    case T_OBJECT:
+    case T_ARRAY:
+      if (patch_code == lir_patch_none) {
+        jobject2reg(c->as_jobject(), dest->as_register());
+      } else {
+        jobject2reg_with_patching(dest->as_register(), info);
+      }
+      break;
+
+    case T_METADATA:
+      if (patch_code != lir_patch_none) {
+        klass2reg_with_patching(dest->as_register(), info);
+      } else {
+        __ mov_metadata(dest->as_register(), c->as_metadata());
+      }
+      break;
+
+    case T_FLOAT:
+      const_addr = float_constant(c->as_jfloat());
+      assert(const_addr != NULL, "must create float constant in the constant table");
+      __ flw(dest->as_float_reg(), InternalAddress(const_addr));
+      break;
+
+    case T_DOUBLE:
+      const_addr = double_constant(c->as_jdouble());
+      assert(const_addr != NULL, "must create double constant in the constant table");
+      __ fld(dest->as_double_reg(), InternalAddress(const_addr));
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_stack(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  switch (c->type()) {
+    case T_OBJECT:
+      if (c->as_jobject() == NULL) {
+        __ sd(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      } else {
+        const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL);
+        reg2stack(FrameMap::t1_opr, dest, c->type(), false);
+      }
+      break;
+    case T_ADDRESS:   // fall through
+      const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL);
+      reg2stack(FrameMap::t1_opr, dest, c->type(), false);
+    case T_INT:       // fall through
+    case T_FLOAT:
+      if (c->as_jint_bits() == 0) {
+        __ sw(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
+      } else {
+        __ mvw(t1, c->as_jint_bits());
+        __ sw(t1, frame_map()->address_for_slot(dest->single_stack_ix()));
+      }
+      break;
+    case T_LONG:      // fall through
+    case T_DOUBLE:
+      if (c->as_jlong_bits() == 0) {
+        __ sd(zr, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                lo_word_offset_in_bytes));
+      } else {
+        __ mv(t1, (intptr_t)c->as_jlong_bits());
+        __ sd(t1, frame_map()->address_for_slot(dest->double_stack_ix(),
+                                                lo_word_offset_in_bytes));
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_address(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* to_addr = dest->as_address_ptr();
+  void (Assembler::* insn)(Register Rt, const Address &adr, Register temp);
+  switch (type) {
+    case T_ADDRESS:
+      assert(c->as_jint() == 0, "should be");
+      insn = &Assembler::sd; break;
+    case T_LONG:
+      assert(c->as_jlong() == 0, "should be");
+      insn = &Assembler::sd; break;
+    case T_DOUBLE:
+      assert(c->as_jdouble() == 0.0, "should be");
+      insn = &Assembler::sd; break;
+    case T_INT:
+      assert(c->as_jint() == 0, "should be");
+      insn = &Assembler::sw; break;
+    case T_FLOAT:
+      assert(c->as_jfloat() == 0.0f, "should be");
+      insn = &Assembler::sw; break;
+    case T_OBJECT:    // fall through
+    case T_ARRAY:
+      assert(c->as_jobject() == 0, "should be");
+      if (UseCompressedOops && !wide) {
+        insn = &Assembler::sw;
+      } else {
+        insn = &Assembler::sd;
+      }
+      break;
+    case T_CHAR:      // fall through
+    case T_SHORT:
+      assert(c->as_jint() == 0, "should be");
+      insn = &Assembler::sh;
+      break;
+    case T_BOOLEAN:   // fall through
+    case T_BYTE:
+      assert(c->as_jint() == 0, "should be");
+      insn = &Assembler::sb; break;
+    default:
+      ShouldNotReachHere();
+      insn = &Assembler::sd;  // unreachable
+  }
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+  (_masm->*insn)(zr, as_Address(to_addr), t0);
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  // move between cpu-registers
+  if (dest->is_single_cpu()) {
+    if (src->type() == T_LONG) {
+      // Can do LONG -> OBJECT
+      move_regs(src->as_register_lo(), dest->as_register());
+      return;
+    }
+    assert(src->is_single_cpu(), "must match");
+    if (src->type() == T_OBJECT) {
+      __ verify_oop(src->as_register());
+    }
+    move_regs(src->as_register(), dest->as_register());
+  } else if (dest->is_double_cpu()) {
+    if (is_reference_type(src->type())) {
+      __ verify_oop(src->as_register());
+      move_regs(src->as_register(), dest->as_register_lo());
+      return;
+    }
+    assert(src->is_double_cpu(), "must match");
+    Register f_lo = src->as_register_lo();
+    Register f_hi = src->as_register_hi();
+    Register t_lo = dest->as_register_lo();
+    Register t_hi = dest->as_register_hi();
+    assert(f_hi == f_lo, "must be same");
+    assert(t_hi == t_lo, "must be same");
+    move_regs(f_lo, t_lo);
+  } else if (dest->is_single_fpu()) {
+    assert(src->is_single_fpu(), "expect single fpu");
+    __ fmv_s(dest->as_float_reg(), src->as_float_reg());
+  } else if (dest->is_double_fpu()) {
+    assert(src->is_double_fpu(), "expect double fpu");
+    __ fmv_d(dest->as_double_reg(), src->as_double_reg());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  precond(src->is_register() && dest->is_stack());
+
+  uint const c_sz32 = sizeof(uint32_t);
+  uint const c_sz64 = sizeof(uint64_t);
+
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_stack(), "should not call otherwise");
+  if (src->is_single_cpu()) {
+    int index = dest->single_stack_ix();
+    if (is_reference_type(type)) {
+      __ sd(src->as_register(), stack_slot_address(index, c_sz64));
+      __ verify_oop(src->as_register());
+    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
+      __ sd(src->as_register(), stack_slot_address(index, c_sz64));
+    } else {
+      __ sw(src->as_register(), stack_slot_address(index, c_sz32));
+    }
+  } else if (src->is_double_cpu()) {
+    int index = dest->double_stack_ix();
+    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
+    __ sd(src->as_register_lo(), dest_addr_LO);
+  } else if (src->is_single_fpu()) {
+    int index = dest->single_stack_ix();
+    __ fsw(src->as_float_reg(), stack_slot_address(index, c_sz32));
+  } else if (src->is_double_fpu()) {
+    int index = dest->double_stack_ix();
+    __ fsd(src->as_double_reg(), stack_slot_address(index, c_sz64));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+  LIR_Address* to_addr = dest->as_address_ptr();
+  // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
+  Register compressed_src = t1;
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (is_reference_type(type)) {
+    __ verify_oop(src->as_register());
+
+    if (UseCompressedOops && !wide) {
+      __ encode_heap_oop(compressed_src, src->as_register());
+    } else {
+      compressed_src = src->as_register();
+    }
+  }
+
+  int null_check_here = code_offset();
+
+  switch (type) {
+    case T_FLOAT:
+      __ fsw(src->as_float_reg(), as_Address(to_addr));
+      break;
+
+    case T_DOUBLE:
+      __ fsd(src->as_double_reg(), as_Address(to_addr));
+      break;
+
+    case T_ARRAY:      // fall through
+    case T_OBJECT:
+      if (UseCompressedOops && !wide) {
+        __ sw(compressed_src, as_Address(to_addr));
+      } else {
+        __ sd(compressed_src, as_Address(to_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ sd(src->as_register(), as_Address(to_addr));
+      break;
+    case T_ADDRESS:
+      __ sd(src->as_register(), as_Address(to_addr));
+      break;
+    case T_INT:
+      __ sw(src->as_register(), as_Address(to_addr));
+      break;
+    case T_LONG:
+      __ sd(src->as_register_lo(), as_Address(to_addr));
+      break;
+    case T_BYTE:    // fall through
+    case T_BOOLEAN:
+      __ sb(src->as_register(), as_Address(to_addr));
+      break;
+    case T_CHAR:    // fall through
+    case T_SHORT:
+      __ sh(src->as_register(), as_Address(to_addr));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check(null_check_here, info);
+  }
+}
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  precond(src->is_stack() && dest->is_register());
+
+  uint const c_sz32 = sizeof(uint32_t);
+  uint const c_sz64 = sizeof(uint64_t);
+
+  if (dest->is_single_cpu()) {
+    int index = src->single_stack_ix();
+    if (type == T_INT) {
+      __ lw(dest->as_register(), stack_slot_address(index, c_sz32));
+    } else if (is_reference_type(type)) {
+      __ ld(dest->as_register(), stack_slot_address(index, c_sz64));
+      __ verify_oop(dest->as_register());
+    } else if (type == T_METADATA || type == T_ADDRESS) {
+      __ ld(dest->as_register(), stack_slot_address(index, c_sz64));
+    } else {
+      __ lwu(dest->as_register(), stack_slot_address(index, c_sz32));
+    }
+  } else if (dest->is_double_cpu()) {
+    int index = src->double_stack_ix();
+    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
+    __ ld(dest->as_register_lo(), src_addr_LO);
+  } else if (dest->is_single_fpu()) {
+    int index = src->single_stack_ix();
+    __ flw(dest->as_float_reg(), stack_slot_address(index, c_sz32));
+  } else if (dest->is_double_fpu()) {
+    int index = src->double_stack_ix();
+    __ fld(dest->as_double_reg(), stack_slot_address(index, c_sz64));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
+  deoptimize_trap(info);
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  LIR_Opr temp;
+  if (type == T_LONG || type == T_DOUBLE) {
+    temp = FrameMap::t1_long_opr;
+  } else {
+    temp = FrameMap::t1_opr;
+  }
+
+  stack2reg(src, temp, src->type());
+  reg2stack(temp, dest, dest->type(), false);
+}
+
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
+  assert(src->is_address(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  LIR_Address* addr = src->as_address_ptr();
+  LIR_Address* from_addr = src->as_address_ptr();
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(addr->base()->as_pointer_register());
+  }
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT:
+      __ flw(dest->as_float_reg(), as_Address(from_addr));
+      break;
+    case T_DOUBLE:
+      __ fld(dest->as_double_reg(), as_Address(from_addr));
+      break;
+    case T_ARRAY:     // fall through
+    case T_OBJECT:
+      if (UseCompressedOops && !wide) {
+        __ lwu(dest->as_register(), as_Address(from_addr));
+      } else {
+        __ ld(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be a compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ ld(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_ADDRESS:
+      // FIXME: OMG this is a horrible kludge.  Any offset from an
+      // address that matches klass_offset_in_bytes() will be loaded
+      // as a word, not a long.
+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+        __ lwu(dest->as_register(), as_Address(from_addr));
+      } else {
+        __ ld(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_INT:
+      __ lw(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_LONG:
+      __ ld(dest->as_register_lo(), as_Address_lo(from_addr));
+      break;
+    case T_BYTE:
+      __ lb(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_BOOLEAN:
+      __ lbu(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_CHAR:
+      __ lhu(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_SHORT:
+      __ lh(dest->as_register(), as_Address(from_addr));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  if (is_reference_type(type)) {
+    if (UseCompressedOops && !wide) {
+      __ decode_heap_oop(dest->as_register());
+    }
+    __ verify_oop(dest->as_register());
+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+    if (UseCompressedClassPointers) {
+      __ decode_klass_not_null(dest->as_register());
+    }
+  }
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+    case lir_idiv: // fall through
+    case lir_irem:
+      arithmetic_idiv(op->code(),
+                      op->in_opr1(),
+                      op->in_opr2(),
+                      op->in_opr3(),
+                      op->result_opr(),
+                      op->info());
+      break;
+    case lir_fmad:
+      __ fmadd_d(op->result_opr()->as_double_reg(),
+                 op->in_opr1()->as_double_reg(),
+                 op->in_opr2()->as_double_reg(),
+                 op->in_opr3()->as_double_reg());
+      break;
+    case lir_fmaf:
+      __ fmadd_s(op->result_opr()->as_float_reg(),
+                 op->in_opr1()->as_float_reg(),
+                 op->in_opr2()->as_float_reg(),
+                 op->in_opr3()->as_float_reg());
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
+  Label label;
+
+  emit_branch(condition, cmp_opr1, cmp_opr2, label, /* is_far */ false,
+              /* is_unordered */ (condition == lir_cond_greaterEqual || condition == lir_cond_greater) ? false : true);
+
+  Label done;
+  move_op(opr2, result, type, lir_patch_none, NULL,
+          false,   // pop_fpu_stack
+          false,   // unaligned
+          false);  // wide
+  __ j(done);
+  __ bind(label);
+  move_op(opr1, result, type, lir_patch_none, NULL,
+          false,   // pop_fpu_stack
+          false,   // unaligned
+          false);  // wide
+  __ bind(done);
+}
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+  LIR_Condition condition = op->cond();
+  if (condition == lir_cond_always) {
+    if (op->info() != NULL) {
+      add_debug_info_for_branch(op->info());
+    }
+  } else {
+    assert(op->in_opr1() != LIR_OprFact::illegalOpr && op->in_opr2() != LIR_OprFact::illegalOpr, "conditional branches must have legal operands");
+  }
+  bool is_unordered = (op->ublock() == op->block());
+  emit_branch(condition, op->in_opr1(), op->in_opr2(), *op->label(), /* is_far */ true, is_unordered);
+}
+
+void LIR_Assembler::emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label,
+                                bool is_far, bool is_unordered) {
+
+  if (cmp_flag == lir_cond_always) {
+    __ j(label);
+    return;
+  }
+
+  if (cmp1->is_cpu_register()) {
+    Register reg1 = as_reg(cmp1);
+    if (cmp2->is_cpu_register()) {
+      Register reg2 = as_reg(cmp2);
+      __ c1_cmp_branch(cmp_flag, reg1, reg2, label, cmp1->type(), is_far);
+    } else if (cmp2->is_constant()) {
+      const2reg_helper(cmp2);
+      __ c1_cmp_branch(cmp_flag, reg1, t0, label, cmp2->type(), is_far);
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (cmp1->is_single_fpu()) {
+    assert(cmp2->is_single_fpu(), "expect single float register");
+    __ c1_float_cmp_branch(cmp_flag, cmp1->as_float_reg(), cmp2->as_float_reg(), label, is_far, is_unordered);
+  } else if (cmp1->is_double_fpu()) {
+    assert(cmp2->is_double_fpu(), "expect double float register");
+    __ c1_float_cmp_branch(cmp_flag | C1_MacroAssembler::c1_double_branch_mask,
+                           cmp1->as_double_reg(), cmp2->as_double_reg(), label, is_far, is_unordered);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr src  = op->in_opr();
+  LIR_Opr dest = op->result_opr();
+
+  switch (op->bytecode()) {
+    case Bytecodes::_i2f:
+      __ fcvt_s_w(dest->as_float_reg(), src->as_register()); break;
+    case Bytecodes::_i2d:
+      __ fcvt_d_w(dest->as_double_reg(), src->as_register()); break;
+    case Bytecodes::_l2d:
+      __ fcvt_d_l(dest->as_double_reg(), src->as_register_lo()); break;
+    case Bytecodes::_l2f:
+      __ fcvt_s_l(dest->as_float_reg(), src->as_register_lo()); break;
+    case Bytecodes::_f2d:
+      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); break;
+    case Bytecodes::_d2f:
+      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break;
+    case Bytecodes::_i2c:
+      __ zero_extend(dest->as_register(), src->as_register(), 16); break;
+    case Bytecodes::_i2l:
+      __ addw(dest->as_register_lo(), src->as_register(), zr); break;
+    case Bytecodes::_i2s:
+      __ sign_extend(dest->as_register(), src->as_register(), 16); break;
+    case Bytecodes::_i2b:
+      __ sign_extend(dest->as_register(), src->as_register(), 8); break;
+    case Bytecodes::_l2i:
+      _masm->block_comment("FIXME: This coulde be no-op");
+      __ addw(dest->as_register(), src->as_register_lo(), zr); break;
+    case Bytecodes::_d2l:
+      __ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break;
+    case Bytecodes::_f2i:
+      __ fcvt_w_s_safe(dest->as_register(), src->as_float_reg()); break;
+    case Bytecodes::_f2l:
+      __ fcvt_l_s_safe(dest->as_register_lo(), src->as_float_reg()); break;
+    case Bytecodes::_d2i:
+      __ fcvt_w_d_safe(dest->as_register(), src->as_double_reg()); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    __ lbu(t0, Address(op->klass()->as_register(),
+                       InstanceKlass::init_state_offset()));
+    __ mvw(t1, InstanceKlass::fully_initialized);
+    add_debug_info_for_null_check_here(op->stub()->info());
+    __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true);
+  }
+
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  Register len = op->len()->as_register();
+
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
+      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
+    __ j(*op->stub()->entry());
+  } else {
+    Register tmp1 = op->tmp1()->as_register();
+    Register tmp2 = op->tmp2()->as_register();
+    Register tmp3 = op->tmp3()->as_register();
+    if (len == tmp1) {
+      tmp1 = tmp3;
+    } else if (len == tmp2) {
+      tmp2 = tmp3;
+    } else if (len == tmp3) {
+      // everything is ok
+    } else {
+      __ mv(tmp3, len);
+    }
+    __ allocate_array(op->obj()->as_register(),
+                      len,
+                      tmp1,
+                      tmp2,
+                      arrayOopDesc::header_size(op->type()),
+                      array_element_size(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+                                        Register recv, Label* update_done) {
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    __ bne(recv, t1, next_test);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+    __ increment(data_addr, DataLayout::counter_increment);
+    __ j(*update_done);
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)));
+    __ ld(t1, recv_addr);
+    __ bnez(t1, next_test);
+    __ sd(recv, recv_addr);
+    __ mv(t1, DataLayout::counter_increment);
+    __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
+    __ j(*update_done);
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) {
+  ciMethod* method = op->profiled_method();
+  assert(method != NULL, "Should have method");
+  int bci = op->profiled_bci();
+  *md = method->method_data_or_null();
+  guarantee(*md != NULL, "Sanity");
+  *data = ((*md)->bci_to_data(bci));
+  assert(*data != NULL, "need data for type check");
+  assert((*data)->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+}
+
+void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Register Rtmp1,
+                                               Register k_RInfo, Register klass_RInfo,
+                                               Label *failure_target, Label *success_target) {
+  // get object class
+  // not a safepoint as obj null check happens earlier
+  __ load_klass(klass_RInfo, obj);
+  if (k->is_loaded()) {
+    // See if we get an immediate positive hit
+    __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset())));
+    if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
+      __ bne(k_RInfo, t0, *failure_target, /* is_far */ true);
+      // successful cast, fall through to profile or jump
+    } else {
+      // See if we get an immediate positive hit
+      __ beq(k_RInfo, t0, *success_target);
+      // check for self
+      __ beq(klass_RInfo, k_RInfo, *success_target);
+
+      __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
+      __ sd(k_RInfo, Address(sp, 0));             // sub klass
+      __ sd(klass_RInfo, Address(sp, wordSize));  // super klass
+      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+      // load result to k_RInfo
+      __ ld(k_RInfo, Address(sp, 0));
+      __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
+      // result is a boolean
+      __ beqz(k_RInfo, *failure_target, /* is_far */ true);
+      // successful cast, fall through to profile or jump
+    }
+  } else {
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+    // call out-of-line instance of __ check_klass_subtytpe_slow_path(...)
+    __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
+    __ sd(klass_RInfo, Address(sp, wordSize));  // sub klass
+    __ sd(k_RInfo, Address(sp, 0));             // super klass
+    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+    // load result to k_RInfo
+    __ ld(k_RInfo, Address(sp, 0));
+    __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
+    // result is a boolean
+    __ beqz(k_RInfo, *failure_target, /* is_far */ true);
+    // successful cast, fall thriugh to profile or jump
+  }
+}
+
+void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Register obj,
+                                   Register klass_RInfo, Label* obj_is_null) {
+  Label not_null;
+  __ bnez(obj, not_null);
+  // Object is null, update MDO and exit
+  Register mdo = klass_RInfo;
+  __ mov_metadata(mdo, md->constant_encoding());
+  Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
+  __ lbu(t0, data_addr);
+  __ ori(t0, t0, BitData::null_seen_byte_constant());
+  __ sb(t0, data_addr);
+  __ j(*obj_is_null);
+  __ bind(not_null);
+}
+
+void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) {
+  if (!k->is_loaded()) {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  } else {
+    __ mov_metadata(k_RInfo, k->constant_encoding());
+  }
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  Register Rtmp1 = noreg;
+
+  // check if it needs to be profiled
+  ciMethodData* md = NULL;
+  ciProfileData* data = NULL;
+
+  const bool should_profile = op->should_profile();
+  if (should_profile) {
+    data_check(op, &md, &data);
+  }
+  Label profile_cast_success, profile_cast_failure;
+  Label *success_target = should_profile ? &profile_cast_success : success;
+  Label *failure_target = should_profile ? &profile_cast_failure : failure;
+
+  if (obj == k_RInfo) {
+    k_RInfo = dst;
+  } else if (obj == klass_RInfo) {
+    klass_RInfo = dst;
+  }
+  if (k->is_loaded() && !UseCompressedClassPointers) {
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
+  } else {
+    Rtmp1 = op->tmp3()->as_register();
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
+  }
+
+  assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+  if (should_profile) {
+    profile_object(md, data, obj, klass_RInfo, obj_is_null);
+  } else {
+    __ beqz(obj, *obj_is_null);
+  }
+
+  typecheck_loaded(op, k, k_RInfo);
+  __ verify_oop(obj);
+
+  if (op->fast_check()) {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(t0, obj);
+    __ bne(t0, k_RInfo, *failure_target, /* is_far */ true);
+    // successful cast, fall through to profile or jump
+  } else {
+    typecheck_helper_slowcheck(k, obj, Rtmp1, k_RInfo, klass_RInfo, failure_target, success_target);
+  }
+  if (should_profile) {
+    type_profile(obj, md, klass_RInfo, k_RInfo, data, success, failure, profile_cast_success, profile_cast_failure);
+  }
+  __ j(*success);
+}
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  const bool should_profile = op->should_profile();
+
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    typecheck_lir_store(op, should_profile);
+  } else if (code == lir_checkcast) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success;
+    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+    __ bind(success);
+    if (dst != obj) {
+      __ mv(dst, obj);
+    }
+  } else if (code == lir_instanceof) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, &failure, &failure);
+    __ bind(failure);
+    __ mv(dst, zr);
+    __ j(done);
+    __ bind(success);
+    __ mv(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  assert(VM_Version::supports_cx8(), "wrong machine");
+  Register addr;
+  if (op->addr()->is_register()) {
+    addr = as_reg(op->addr());
+  } else {
+    assert(op->addr()->is_address(), "what else?");
+    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
+    assert(addr_ptr->disp() == 0, "need 0 disp");
+    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
+    addr = as_reg(addr_ptr->base());
+  }
+  Register newval = as_reg(op->new_value());
+  Register cmpval = as_reg(op->cmp_value());
+
+  if (op->code() == lir_cas_obj) {
+    if (UseCompressedOops) {
+      Register tmp1 = op->tmp1()->as_register();
+      assert(op->tmp1()->is_valid(), "must be");
+      __ encode_heap_oop(tmp1, cmpval);
+      cmpval = tmp1;
+      __ encode_heap_oop(t1, newval);
+      newval = t1;
+      caswu(addr, newval, cmpval);
+    } else {
+      casl(addr, newval, cmpval);
+    }
+  } else if (op->code() == lir_cas_int) {
+    casw(addr, newval, cmpval);
+  } else {
+    casl(addr, newval, cmpval);
+  }
+}
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+  switch (code) {
+    case lir_abs:  __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
+    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
+    default:       ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
+  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  if (dst->is_single_cpu()) {
+    Register Rdst = dst->as_register();
+    if (right->is_constant()) {
+      int right_const = right->as_jint();
+      if (Assembler::operand_valid_for_add_immediate(right_const)) {
+        logic_op_imm(Rdst, Rleft, right_const, code);
+        __ addw(Rdst, Rdst, zr);
+     } else {
+        __ mv(t0, right_const);
+        logic_op_reg32(Rdst, Rleft, t0, code);
+     }
+    } else {
+      Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
+      logic_op_reg32(Rdst, Rleft, Rright, code);
+    }
+  } else {
+    Register Rdst = dst->as_register_lo();
+    if (right->is_constant()) {
+      long right_const = right->as_jlong();
+      if (Assembler::operand_valid_for_add_immediate(right_const)) {
+        logic_op_imm(Rdst, Rleft, right_const, code);
+      } else {
+        __ mv(t0, right_const);
+        logic_op_reg(Rdst, Rleft, t0, code);
+      }
+    } else {
+      Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
+      logic_op_reg(Rdst, Rleft, Rright, code);
+    }
+  }
+}
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr src, LIR_Opr result, LIR_Op2* op) {
+  ShouldNotCallThis();
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) {
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      __ float_cmp(true, is_unordered_less ? -1 : 1,
+                   left->as_float_reg(), right->as_float_reg(), dst->as_register());
+    } else if (left->is_double_fpu()) {
+      __ float_cmp(false, is_unordered_less ? -1 : 1,
+                   left->as_double_reg(), right->as_double_reg(), dst->as_register());
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (code == lir_cmp_l2i) {
+    __ cmp_l2i(dst->as_register(), left->as_register_lo(), right->as_register_lo());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::align_call(LIR_Code code) {
+  // With RVC a call instruction may get 2-byte aligned.
+  // The address of the call instruction needs to be 4-byte aligned to
+  // ensure that it does not span a cache line so that it can be patched.
+  __ align(NativeInstruction::instruction_size);
+}
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  address call = __ trampoline_call(Address(op->addr(), rtype));
+  if (call == NULL) {
+    bailout("trampoline stub overflow");
+    return;
+  }
+  add_call_info(code_offset(), op->info());
+}
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  address call = __ ic_call(op->addr());
+  if (call == NULL) {
+    bailout("trampoline stub overflow");
+    return;
+  }
+  add_call_info(code_offset(), op->info());
+}
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  MacroAssembler::assert_alignment(call_pc);
+  address stub = __ start_a_stub(call_stub_size());
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  int start = __ offset();
+
+  __ relocate(static_stub_Relocation::spec(call_pc));
+  __ emit_static_call_stub();
+
+  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size()
+         <= call_stub_size(), "stub too big");
+  __ end_a_stub();
+}
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  assert(exceptionOop->as_register() == x10, "must match");
+  assert(exceptionPC->as_register() == x13, "must match");
+
+  // exception object is not added to oop map by LinearScan
+  // (LinearScan assumes that no oops are in fixed registers)
+  info->add_register_oop(exceptionOop);
+  Runtime1::StubID unwind_id;
+
+  // get current pc information
+  // pc is only needed if the method has an exception handler, the unwind code does not need it.
+  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
+    // As no instructions have been generated yet for this LIR node it's
+    // possible that an oop map already exists for the current offset.
+    // In that case insert an dummy NOP here to ensure all oop map PCs
+    // are unique. See JDK-8237483.
+    __ nop();
+  }
+  int pc_for_athrow_offset = __ offset();
+  InternalAddress pc_for_athrow(__ pc());
+  int32_t off = 0;
+  __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off);
+  __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  __ verify_not_null_oop(x10);
+  // search an exception handler (x10: exception oop, x13: throwing pc)
+  if (compilation()->has_fpu_code()) {
+    unwind_id = Runtime1::handle_exception_id;
+  } else {
+    unwind_id = Runtime1::handle_exception_nofpu_id;
+  }
+  __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
+  __ nop();
+}
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  assert(exceptionOop->as_register() == x10, "must match");
+  __ j(_unwind_handler_entry);
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+  Register count_reg = count->as_register();
+  if (dest->is_single_cpu()) {
+    assert (dest->type() == T_INT, "unexpected result type");
+    assert (left->type() == T_INT, "unexpected left type");
+    __ andi(t0, count_reg, 31); // should not shift more than 31 bits
+    switch (code) {
+      case lir_shl:  __ sllw(dest_reg, left_reg, t0); break;
+      case lir_shr:  __ sraw(dest_reg, left_reg, t0); break;
+      case lir_ushr: __ srlw(dest_reg, left_reg, t0); break;
+      default: ShouldNotReachHere();
+    }
+  } else if (dest->is_double_cpu()) {
+    __ andi(t0, count_reg, 63); // should not shift more than 63 bits
+    switch (code) {
+      case lir_shl:  __ sll(dest_reg, left_reg, t0); break;
+      case lir_shr:  __ sra(dest_reg, left_reg, t0); break;
+      case lir_ushr: __ srl(dest_reg, left_reg, t0); break;
+      default: ShouldNotReachHere();
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+  if (dest->is_single_cpu()) {
+    assert (dest->type() == T_INT, "unexpected result type");
+    assert (left->type() == T_INT, "unexpected left type");
+    count &= 0x1f;
+    if (count != 0) {
+      switch (code) {
+        case lir_shl:  __ slliw(dest_reg, left_reg, count); break;
+        case lir_shr:  __ sraiw(dest_reg, left_reg, count); break;
+        case lir_ushr: __ srliw(dest_reg, left_reg, count); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      move_regs(left_reg, dest_reg);
+    }
+  } else if (dest->is_double_cpu()) {
+    count &= 0x3f;
+    if (count != 0) {
+      switch (code) {
+        case lir_shl:  __ slli(dest_reg, left_reg, count); break;
+        case lir_shr:  __ srai(dest_reg, left_reg, count); break;
+        case lir_ushr: __ srli(dest_reg, left_reg, count); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      move_regs(left->as_register_lo(), dest->as_register_lo());
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();  // may not be an oop
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+  if (!UseFastLocking) {
+    __ j(*op->stub()->entry());
+  } else if (op->code() == lir_lock) {
+    Register scratch = noreg;
+    if (UseBiasedLocking) {
+      scratch = op->scratch_opr()->as_register();
+    }
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    // add debug info for NullPointerException only if one is possible
+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check(null_check_offset, op->info());
+    }
+  } else if (op->code() == lir_unlock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+  } else {
+    Unimplemented();
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+
+  // Update counter for all call types
+  ciMethodData* md = method->method_data_or_null();
+  guarantee(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+  __ mov_metadata(mdo, md->constant_encoding());
+  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes
+  if (op->should_profile_receiver_type()) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ increment(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+          __ mov_metadata(t1, known_klass->constant_encoding());
+          __ sd(t1, recv_addr);
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ increment(data_addr, DataLayout::counter_increment);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ increment(counter_addr, DataLayout::counter_increment);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ increment(counter_addr, DataLayout::counter_increment);
+  }
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) { Unimplemented(); }
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+  __ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); }
+
+void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass,
+                                   Register tmp, Label &next, Label &none,
+                                   Address mdo_addr) {
+  if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+    if (exact_klass != NULL) {
+      __ mov_metadata(tmp, exact_klass->constant_encoding());
+    } else {
+      __ load_klass(tmp, tmp);
+    }
+
+    __ ld(t1, mdo_addr);
+    __ xorr(tmp, tmp, t1);
+    __ andi(t0, tmp, TypeEntries::type_klass_mask);
+    // klass seen before, nothing to do. The unknown bit may have been
+    // set already but no need to check.
+    __ beqz(t0, next);
+
+    // already unknown. Nothing to do anymore.
+    __ andi(t0, tmp, TypeEntries::type_unknown);
+    __ bnez(t0, next);
+
+    if (TypeEntries::is_type_none(current_klass)) {
+      __ beqz(t1, none);
+      __ mv(t0, (u1)TypeEntries::null_seen);
+      __ beq(t0, t1, none);
+      // There is a chance that the checks above (re-reading profiling
+      // data from memory) fail if another thread has just set the
+      // profiling to this obj's klass
+      __ membar(MacroAssembler::LoadLoad);
+      __ ld(t1, mdo_addr);
+      __ xorr(tmp, tmp, t1);
+      __ andi(t0, tmp, TypeEntries::type_klass_mask);
+      __ beqz(t0, next);
+    }
+  } else {
+    assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+           ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+    __ ld(tmp, mdo_addr);
+    // already unknown. Nothing to do anymore.
+    __ andi(t0, tmp, TypeEntries::type_unknown);
+    __ bnez(t0, next);
+  }
+
+  // different than before. Cannot keep accurate profile.
+  __ ld(t1, mdo_addr);
+  __ ori(t1, t1, TypeEntries::type_unknown);
+  __ sd(t1, mdo_addr);
+
+  if (TypeEntries::is_type_none(current_klass)) {
+    __ j(next);
+
+    __ bind(none);
+    // first time here. Set profile type.
+    __ sd(tmp, mdo_addr);
+  }
+}
+
+void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp,
+                                      Address mdo_addr, Label &next) {
+  // There's a single possible klass at this profile point
+  assert(exact_klass != NULL, "should be");
+  if (TypeEntries::is_type_none(current_klass)) {
+    __ mov_metadata(tmp, exact_klass->constant_encoding());
+    __ ld(t1, mdo_addr);
+    __ xorr(tmp, tmp, t1);
+    __ andi(t0, tmp, TypeEntries::type_klass_mask);
+    __ beqz(t0, next);
+#ifdef ASSERT
+  {
+    Label ok;
+    __ ld(t0, mdo_addr);
+    __ beqz(t0, ok);
+    __ mv(t1, (u1)TypeEntries::null_seen);
+    __ beq(t0, t1, ok);
+    // may have been set by another thread
+    __ membar(MacroAssembler::LoadLoad);
+    __ mov_metadata(t0, exact_klass->constant_encoding());
+    __ ld(t1, mdo_addr);
+    __ xorr(t1, t0, t1);
+    __ andi(t1, t1, TypeEntries::type_mask);
+    __ beqz(t1, ok);
+
+    __ stop("unexpected profiling mismatch");
+    __ bind(ok);
+  }
+#endif
+    // first time here. Set profile type.
+    __ sd(tmp, mdo_addr);
+  } else {
+    assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+           ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+    __ ld(tmp, mdo_addr);
+    // already unknown. Nothing to do anymore.
+    __ andi(t0, tmp, TypeEntries::type_unknown);
+    __ bnez(t0, next);
+
+    __ ori(tmp, tmp, TypeEntries::type_unknown);
+    __ sd(tmp, mdo_addr);
+  }
+}
+
+void LIR_Assembler::check_null(Register tmp, Label &update, intptr_t current_klass,
+                               Address mdo_addr, bool do_update, Label &next) {
+  __ bnez(tmp, update);
+  if (!TypeEntries::was_null_seen(current_klass)) {
+    __ ld(t1, mdo_addr);
+    __ ori(t1, t1, TypeEntries::null_seen);
+    __ sd(t1, mdo_addr);
+  }
+  if (do_update) {
+    __ j(next);
+  }
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  COMMENT("emit_profile_type {");
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+  assert_different_registers(tmp, t0, t1, mdo_addr.base());
+
+  __ verify_oop(obj);
+
+  if (tmp != obj) {
+    __ mv(tmp, obj);
+  }
+  if (do_null) {
+    check_null(tmp, update, current_klass, mdo_addr, do_update, next);
+#ifdef ASSERT
+  } else {
+    __ bnez(tmp, update);
+    __ stop("unexpected null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      check_exact_klass(tmp, exact_klass);
+    }
+#endif
+    if (!no_conflict) {
+      check_conflict(exact_klass, current_klass, tmp, next, none, mdo_addr);
+    } else {
+      check_no_conflict(exact_klass, current_klass, tmp, mdo_addr, next);
+    }
+
+    __ bind(next);
+  }
+  COMMENT("} emit_profile_type");
+}
+
+void LIR_Assembler::align_backward_branch_target() { }
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
+
+  if (left->is_single_cpu()) {
+    assert(dest->is_single_cpu(), "expect single result reg");
+    __ negw(dest->as_register(), left->as_register());
+  } else if (left->is_double_cpu()) {
+    assert(dest->is_double_cpu(), "expect double result reg");
+    __ neg(dest->as_register_lo(), left->as_register_lo());
+  } else if (left->is_single_fpu()) {
+    assert(dest->is_single_fpu(), "expect single float result reg");
+    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
+  } else {
+    assert(left->is_double_fpu(), "expect double float operand reg");
+    assert(dest->is_double_fpu(), "expect double float result reg");
+    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
+  }
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+#if INCLUDE_SHENANDOAHGC
+  if (UseShenandoahGC && patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+#endif
+
+  assert(patch_code == lir_patch_none, "Patch code not supported");
+  LIR_Address* adr = addr->as_address_ptr();
+  Register dst = dest->as_register_lo();
+
+  assert_different_registers(dst, t0);
+  if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
+    int scale = adr->scale();
+    intptr_t offset = adr->disp();
+    LIR_Opr index_op = adr->index();
+    if (index_op->is_constant()) {
+      offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale;
+    }
+
+    if (!is_imm_in_range(offset, 12, 0)) {
+      __ la(t0, as_Address(adr));
+      __ mv(dst, t0);
+      return;
+    }
+  }
+
+  __ la(dst, as_Address(adr));
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  assert(!tmp->is_valid(), "don't need temporary");
+
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  if (cb != NULL) {
+    __ far_call(RuntimeAddress(dest));
+  } else {
+    int32_t offset = 0;
+    __ la_patchable(t0, RuntimeAddress(dest), offset);
+    __ jalr(x1, t0, offset);
+  }
+
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  if (dest->is_address() || src->is_address()) {
+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#ifdef ASSERT
+// emit run-time assertion
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  assert(op->code() == lir_assert, "must be");
+
+  Label ok;
+  if (op->in_opr1()->is_valid()) {
+    assert(op->in_opr2()->is_valid(), "both operands must be valid");
+    bool is_unordered = false;
+    LIR_Condition cond = op->condition();
+    emit_branch(cond, op->in_opr1(), op->in_opr2(), ok, /* is_far */ false,
+                /* is_unordered */(cond == lir_cond_greaterEqual || cond == lir_cond_greater) ? false : true);
+  } else {
+    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
+    assert(op->condition() == lir_cond_always, "no other conditions allowed");
+  }
+
+  if (op->halt()) {
+    const char* str = __ code_string(op->msg());
+    __ stop(str);
+  } else {
+    breakpoint();
+  }
+  __ bind(ok);
+}
+#endif
+
+#ifndef PRODUCT
+#define COMMENT(x)   do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+void LIR_Assembler::membar() {
+  COMMENT("membar");
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+}
+
+void LIR_Assembler::membar_release() {
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(MacroAssembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
+
+void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
+
+void LIR_Assembler::on_spin_wait() {
+  Unimplemented();
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  __ mv(result_reg->as_register(), xthread);
+}
+
+void LIR_Assembler::peephole(LIR_List *lir) {}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
+  Address addr = as_Address(src->as_address_ptr());
+  BasicType type = src->type();
+  bool is_oop = is_reference_type(type);
+
+  get_op(type);
+
+  switch (code) {
+    case lir_xadd:
+      {
+        RegisterOrConstant inc;
+        Register tmp = as_reg(tmp_op);
+        Register dst = as_reg(dest);
+        if (data->is_constant()) {
+          inc = RegisterOrConstant(as_long(data));
+          assert_different_registers(dst, addr.base(), tmp);
+          assert_different_registers(tmp, t0);
+        } else {
+          inc = RegisterOrConstant(as_reg(data));
+          assert_different_registers(inc.as_register(), dst, addr.base(), tmp);
+        }
+        __ la(tmp, addr);
+        (_masm->*add)(dst, inc, tmp);
+        break;
+      }
+    case lir_xchg:
+      {
+        Register tmp = tmp_op->as_register();
+        Register obj = as_reg(data);
+        Register dst = as_reg(dest);
+        if (is_oop && UseCompressedOops) {
+          __ encode_heap_oop(t0, obj);
+          obj = t0;
+        }
+        assert_different_registers(obj, addr.base(), tmp, dst);
+        __ la(tmp, addr);
+        (_masm->*xchg)(dst, obj, tmp);
+        if (is_oop && UseCompressedOops) {
+          __ decode_heap_oop(dst);
+        }
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  __ membar(MacroAssembler::AnyAny);
+}
+
+int LIR_Assembler::array_element_size(BasicType type) const {
+  int elem_size = type2aelembytes(type);
+  return exact_log2(elem_size);
+}
+
+// helper functions which checks for overflow and sets bailout if it
+// occurs.  Always returns a valid embeddable pointer but in the
+// bailout case the pointer won't be to unique storage.
+address LIR_Assembler::float_constant(float f) {
+  address const_addr = __ float_constant(f);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+address LIR_Assembler::double_constant(double d) {
+  address const_addr = __ double_constant(d);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+address LIR_Assembler::int_constant(jlong n) {
+  address const_addr = __ long_constant(n);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
+
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
+
+void LIR_Assembler::fpop() { Unimplemented(); }
+
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
+
+void LIR_Assembler::fld(int i) { Unimplemented(); }
+
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
+
+void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
+  __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
+             Assembler::rl /* release */, t0, true /* result as bool */);
+  __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::caswu(Register addr, Register newval, Register cmpval) {
+  __ cmpxchg(addr, cmpval, newval, Assembler::uint32, Assembler::aq /* acquire */,
+             Assembler::rl /* release */, t0, true /* result as bool */);
+  __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
+  __ cmpxchg(addr, cmpval, newval, Assembler::int64, Assembler::aq /* acquire */,
+             Assembler::rl /* release */, t0, true /* result as bool */);
+  __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1
+  __ membar(MacroAssembler::AnyAny);
+}
+
+void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
+  address target = NULL;
+
+  switch (patching_id(info)) {
+    case PatchingStub::access_field_id:
+      target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+      break;
+    case PatchingStub::load_klass_id:
+      target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+      break;
+    case PatchingStub::load_mirror_id:
+      target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+      break;
+    case PatchingStub::load_appendix_id:
+      target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  __ far_call(RuntimeAddress(target));
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) {
+  Label ok;
+  __ load_klass(tmp, tmp);
+  __ mov_metadata(t0, exact_klass->constant_encoding());
+  __ beq(tmp, t0, ok);
+  __ stop("exact klass and actual klass differ");
+  __ bind(ok);
+}
+
+void LIR_Assembler::get_op(BasicType type) {
+  switch (type) {
+    case T_INT:
+      xchg = &MacroAssembler::atomic_xchgalw;
+      add = &MacroAssembler::atomic_addalw;
+      break;
+    case T_LONG:
+      xchg = &MacroAssembler::atomic_xchgal;
+      add = &MacroAssembler::atomic_addal;
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      if (UseCompressedOops) {
+        xchg = &MacroAssembler::atomic_xchgalwu;
+        add = &MacroAssembler::atomic_addalw;
+      } else {
+        xchg = &MacroAssembler::atomic_xchgal;
+        add = &MacroAssembler::atomic_addal;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// emit_opTypeCheck sub functions
+void LIR_Assembler::typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile) {
+  Register value = op->object()->as_register();
+  Register array = op->array()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register Rtmp1 = op->tmp3()->as_register();
+
+  CodeStub* stub = op->stub();
+
+  // check if it needs to be profiled
+  ciMethodData* md = NULL;
+  ciProfileData* data = NULL;
+
+  if (should_profile) {
+    data_check(op, &md, &data);
+  }
+  Label profile_cast_success, profile_cast_failure, done;
+  Label *success_target = should_profile ? &profile_cast_success : &done;
+  Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
+
+  if (should_profile) {
+    profile_object(md, data, value, klass_RInfo, &done);
+  } else {
+    __ beqz(value, done);
+  }
+
+  add_debug_info_for_null_check_here(op->info_for_exception());
+  __ load_klass(k_RInfo, array);
+  __ load_klass(klass_RInfo, value);
+
+  lir_store_slowcheck(k_RInfo, klass_RInfo, Rtmp1, success_target, failure_target);
+
+  // fall through to the success case
+  if (should_profile) {
+    Register mdo = klass_RInfo;
+    Register recv = k_RInfo;
+    __ bind(profile_cast_success);
+    __ mov_metadata(mdo, md->constant_encoding());
+    __ load_klass(recv, value);
+    type_profile_helper(mdo, md, data, recv, &done);
+    __ j(done);
+
+    __ bind(profile_cast_failure);
+    __ mov_metadata(mdo, md->constant_encoding());
+    Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+    __ ld(t1, counter_addr);
+    __ addi(t1, t1, -DataLayout::counter_increment);
+    __ sd(t1, counter_addr);
+    __ j(*stub->entry());
+  }
+
+  __ bind(done);
+}
+
+void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
+                                 ciProfileData* data, Label* success, Label* failure,
+                                 Label& profile_cast_success, Label& profile_cast_failure) {
+  Register mdo = klass_RInfo;
+  Register recv = k_RInfo;
+  __ bind(profile_cast_success);
+  __ mov_metadata(mdo, md->constant_encoding());
+  __ load_klass(recv, obj);
+  Label update_done;
+  type_profile_helper(mdo, md, data, recv, success);
+  __ j(*success);
+
+  __ bind(profile_cast_failure);
+  __ mov_metadata(mdo, md->constant_encoding());
+  Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  __ ld(t0, counter_addr);
+  __ addi(t0, t0, -DataLayout::counter_increment);
+  __ sd(t0, counter_addr);
+  __ j(*failure);
+}
+
+void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1,
+                                        Label* success_target, Label* failure_target) {
+  // get instance klass (it's already uncompressed)
+  __ ld(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+  // perform the fast part of the checking logic
+  __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+  // call out-of-line instance of __ check_klass_subtype_slow_path(...)
+  __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo
+  __ sd(klass_RInfo, Address(sp, wordSize));  // sub klass
+  __ sd(k_RInfo, Address(sp, 0));             // super klass
+  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
+  // load result to k_RInfo
+  __ ld(k_RInfo, Address(sp, 0));
+  __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo
+  // result is a boolean
+  __ beqz(k_RInfo, *failure_target, /* is_far */ true);
+}
+
+void LIR_Assembler::const2reg_helper(LIR_Opr src) {
+  switch (src->as_constant_ptr()->type()) {
+    case T_INT:
+    case T_ADDRESS:
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_METADATA:
+        const2reg(src, FrameMap::t0_opr, lir_patch_none, NULL);
+        break;
+    case T_LONG:
+        const2reg(src, FrameMap::t0_long_opr, lir_patch_none, NULL);
+        break;
+    case T_FLOAT:
+    case T_DOUBLE:
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op_reg32(Register dst, Register left, Register right, LIR_Code code) {
+  switch (code) {
+    case lir_logic_and: __ andrw(dst, left, right); break;
+    case lir_logic_or:  __ orrw (dst, left, right); break;
+    case lir_logic_xor: __ xorrw(dst, left, right); break;
+    default:            ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op_reg(Register dst, Register left, Register right, LIR_Code code) {
+  switch (code) {
+    case lir_logic_and: __ andr(dst, left, right); break;
+    case lir_logic_or:  __ orr (dst, left, right); break;
+    case lir_logic_xor: __ xorr(dst, left, right); break;
+    default:            ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op_imm(Register dst, Register left, int right, LIR_Code code) {
+  switch (code) {
+    case lir_logic_and: __ andi(dst, left, right); break;
+    case lir_logic_or:  __ ori (dst, left, right); break;
+    case lir_logic_xor: __ xori(dst, left, right); break;
+    default:            ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ sd(r, Address(sp, offset_from_rsp_in_bytes));
+}
+
+void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ mv(t0, c);
+  __ sd(t0, Address(sp, offset_from_rsp_in_bytes));
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
new file mode 100644
index 0000000000..2afd61a3db
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
+#define CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
+
+// ArrayCopyStub needs access to bailout
+friend class ArrayCopyStub;
+
+private:
+
+#include "c1_LIRAssembler_arith_riscv.hpp"
+#include "c1_LIRAssembler_arraycopy_riscv.hpp"
+
+  int array_element_size(BasicType type) const;
+
+  static Register as_reg(LIR_Opr op) {
+    return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
+  }
+
+  Address as_Address(LIR_Address* addr, Register tmp);
+
+  // helper functions which checks for overflow and sets bailout if it
+  // occurs.  Always returns a valid embeddable pointer but in the
+  // bailout case the pointer won't be to unique storage.
+  address float_constant(float f);
+  address double_constant(double d);
+  address int_constant(jlong n);
+
+  // Ensure we have a valid Address (base + offset) to a stack-slot.
+  Address stack_slot_address(int index, uint shift, int adjust = 0);
+
+  // Record the type of the receiver in ReceiverTypeData
+  void type_profile_helper(Register mdo,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Label* update_done);
+
+  void casw(Register addr, Register newval, Register cmpval);
+  void caswu(Register addr, Register newval, Register cmpval);
+  void casl(Register addr, Register newval, Register cmpval);
+
+  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
+
+  void deoptimize_trap(CodeEmitInfo *info);
+
+  enum {
+    // See emit_static_call_stub for detail
+    // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
+    _call_stub_size = 14 * NativeInstruction::instruction_size +
+                      (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
+    _call_aot_stub_size = 0,
+    // See emit_exception_handler for detail
+    // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
+    _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
+    // See emit_deopt_handler for detail
+    // auipc (1) + far_jump (6 or 2)
+    _deopt_handler_size = 1 * NativeInstruction::instruction_size +
+                          6 * NativeInstruction::instruction_size // or smaller
+  };
+
+  void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp,
+                      Label &next, Label &none, Address mdo_addr);
+  void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next);
+
+  void check_exact_klass(Register tmp, ciKlass* exact_klass);
+
+  void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next);
+
+  void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr);
+  void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr);
+
+  void get_op(BasicType type);
+
+  // emit_typecheck_helper sub functions
+  void data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data);
+  void typecheck_helper_slowcheck(ciKlass* k, Register obj, Register Rtmp1,
+                                  Register k_RInfo, Register klass_RInfo,
+                                  Label* failure_target, Label* success_target);
+  void profile_object(ciMethodData* md, ciProfileData* data, Register obj,
+                      Register klass_RInfo, Label* obj_is_null);
+  void typecheck_loaded(LIR_OpTypeCheck* op, ciKlass* k, Register k_RInfo);
+
+  // emit_opTypeCheck sub functions
+  void typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile);
+
+  void type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
+                    ciProfileData* data, Label* success, Label* failure,
+                    Label& profile_cast_success, Label& profile_cast_failure);
+
+  void lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1,
+                           Label* success_target, Label* failure_target);
+
+  void const2reg_helper(LIR_Opr src);
+
+  void emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, bool is_far, bool is_unordered);
+
+  void logic_op_reg32(Register dst, Register left, Register right, LIR_Code code);
+  void logic_op_reg(Register dst, Register left, Register right, LIR_Code code);
+  void logic_op_imm(Register dst, Register left, int right, LIR_Code code);
+
+public:
+
+  void emit_cmove(LIR_Op4* op);
+
+  void store_parameter(Register r, int offset_from_rsp_in_words);
+  void store_parameter(jint c, int offset_from_rsp_in_words);
+
+#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
new file mode 100644
index 0000000000..c41819fc2a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
@@ -0,0 +1,1094 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+// Item will be loaded into a byte register; Intel only
+void LIRItem::load_byte_item() {
+  load_item();
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (r->is_constant()) {
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r10_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::r13_opr; }
+LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
+LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r10_opr; }
+LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
+
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case intTag:     opr = FrameMap::r10_opr;          break;
+    case objectTag:  opr = FrameMap::r10_oop_opr;      break;
+    case longTag:    opr = FrameMap::long10_opr;       break;
+    case floatTag:   opr = FrameMap::fpu10_float_opr;  break;
+    case doubleTag:  opr = FrameMap::fpu10_double_opr; break;
+
+    case addressTag: // fall through
+    default:
+      ShouldNotReachHere();
+      return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  LIR_Opr reg = new_register(T_INT);
+  set_vreg_flag(reg, LIRGenerator::byte_reg);
+  return reg;
+}
+
+//--------- loading items into registers --------------------------------
+
+
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return v->type()->as_IntConstant()->value() == 0;
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else if (v->type()->as_FloatConstant() != NULL) {
+    return jint_cast(v->type()->as_FloatConstant()->value()) == 0.0f;
+  } else if (v->type()->as_DoubleConstant() != NULL) {
+    return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0.0;
+  }
+  return false;
+}
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    int value = v->type()->as_IntConstant()->value();
+    // "-value" must be defined for value may be used for sub
+    return Assembler::operand_valid_for_add_immediate(value) &&
+           Assembler::operand_valid_for_add_immediate(- value);
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else if (v->type()->as_LongConstant() != NULL) {
+    long value = v->type()->as_LongConstant()->value();
+    // "-value" must be defined for value may be used for sub
+    return Assembler::operand_valid_for_add_immediate(value) &&
+           Assembler::operand_valid_for_add_immediate(- value);
+  } else if (v->type()->as_FloatConstant() != NULL) {
+    return v->type()->as_FloatConstant()->value() == 0.0f;
+  } else if (v->type()->as_DoubleConstant() != NULL) {
+    return v->type()->as_DoubleConstant()->value() == 0.0;
+  }
+  return false;
+}
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+  if (c->as_constant() != NULL) {
+    long constant = 0;
+    switch (c->type()) {
+      case T_INT:  constant = c->as_jint();   break;
+      case T_LONG: constant = c->as_jlong();  break;
+      default:     return false;
+    }
+    // "-constant" must be defined for c may be used for sub
+    return Assembler::operand_valid_for_add_immediate(constant) &&
+           Assembler::operand_valid_for_add_immediate(- constant);
+  }
+  return false;
+}
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return LIR_OprFact::illegalOpr;
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+
+  if (index->is_constant()) {
+    LIR_Const *constant = index->as_constant_ptr();
+    jlong c;
+    if (constant->type() == T_INT) {
+      c = (jlong(index->as_jint()) << shift) + disp;
+    } else {
+      assert(constant->type() == T_LONG, "should be");
+      c = (index->as_jlong() << shift) + disp;
+    }
+    if ((jlong)((jint)c) == c) {
+      return new LIR_Address(base, (jint)c, type);
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(index, tmp);
+      return new LIR_Address(base, tmp, type);
+    }
+  }
+
+  return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type);
+}
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type) {
+  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+  return generate_address(array_opr, index_opr, shift, offset_in_bytes, type);
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r;
+  switch (type) {
+    case T_LONG:
+      r = LIR_OprFact::longConst(x);
+      break;
+    case T_INT:
+      r = LIR_OprFact::intConst(x);
+      break;
+    default:
+      ShouldNotReachHere();
+      r = NULL;
+  }
+  return r;
+}
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr reg = new_register(addr->type());
+  __ load(addr, reg);
+  __ add(reg, load_immediate(step, addr->type()), reg);
+  __ store(reg, addr);
+}
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr reg = new_register(T_INT);
+  __ load(generate_address(base, disp, T_INT), reg, info);
+  __ cmp(condition, reg, LIR_OprFact::intConst(c));
+}
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr reg1 = new_register(T_INT);
+  __ load(generate_address(base, disp, type), reg1, info);
+  __ cmp(condition, reg, reg1);
+}
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+  if (tmp->is_valid() && c > 0 && c < max_jint) {
+    if (is_power_of_2(c - 1)) {
+      __ shift_left(left, exact_log2(c - 1), tmp);
+      __ add(tmp, left, result);
+      return true;
+    } else if (is_power_of_2(c + 1)) {
+      __ shift_left(left, exact_log2(c + 1), tmp);
+      __ sub(tmp, left, result);
+      return true;
+    }
+  }
+  return false;
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType type = item->type();
+  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
+}
+
+void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info,
+                                     ciMethod* profiled_method, int profiled_bci) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = new_register(objectType);
+    __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
+}
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(), "");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop
+  LIR_Opr lock = new_register(T_INT);
+  // Need a scratch register for biased locking
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
+  if (UseBiasedLocking) {
+    scratch = new_register(T_INT);
+  }
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+  // this CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked)
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                x->monitor_no(), info_for_exception, info);
+}
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(), "");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+// neg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem from(x->x(), this);
+  from.load_item();
+  LIR_Opr result = rlock_result(x);
+  __ negate(from.result(), result);
+}
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
+
+    // float remainder is implemented as a direct call into the runtime
+    BasicTypeList signature(2);
+    if (x->op() == Bytecodes::_frem) {
+      signature.append(T_FLOAT);
+      signature.append(T_FLOAT);
+    } else {
+      signature.append(T_DOUBLE);
+      signature.append(T_DOUBLE);
+    }
+    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+    const LIR_Opr result_reg = result_register_for(x->type());
+
+    left.load_item();
+    __ move(left.result(), cc->at(0));
+    right.load_item_force(cc->at(1));
+
+    address entry;
+    if (x->op() == Bytecodes::_frem) {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+    } else {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+    }
+
+    LIR_Opr result = rlock_result(x);
+    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
+    __ move(result_reg, result);
+
+    return;
+  }
+
+  if (!left.is_register()) {
+    left.load_item();
+  }
+  // Always load right hand side.
+  right.load_item();
+
+  LIR_Opr reg = rlock(x);
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
+    tmp = new_register(T_DOUBLE);
+  }
+
+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
+
+  set_result(x, round_item(reg));
+}
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+
+  // missing test if instr is commutative and if we should swap
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+
+    left.load_item();
+
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jlong c = right.get_jlong_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) { need_zero_check = false; }
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2_long(c)) {
+        right.dont_load_item();
+      } else {
+        right.load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+    }
+
+    rlock_result(x);
+    switch (x->op()) {
+      case Bytecodes::_lrem:
+        __ rem(left.result(), right.result(), x->operand());
+        break;
+      case Bytecodes::_ldiv:
+        __ div(left.result(), right.result(), x->operand());
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
+           "expect lmul, ladd or lsub");
+    // add, sub, mul
+    left.load_item();
+    if (!right.is_register()) {
+      if (x->op() == Bytecodes::_lmul ||
+          !right.is_constant() ||
+          (x->op() == Bytecodes::_ladd &&
+          !Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
+          (x->op() == Bytecodes::_lsub &&
+          !Assembler::operand_valid_for_add_immediate(-right.get_jlong_constant()))) {
+            right.load_item();
+      } else { // add, sub
+        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expected ladd or lsub");
+        // don't load constants to save register
+        right.load_nonconstant();
+      }
+    }
+    rlock_result(x);
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+
+  // Test if instr is commutative and if we should swap
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg = &left;
+  LIRItem* right_arg = &right;
+  if (x->is_commutative() && left.is_stack() && right.is_register()) {
+    // swap them if left is real stack (or cached) and right is real register(not cached)
+    left_arg = &right;
+    right_arg = &left;
+  }
+  left_arg->load_item();
+  // do not need to load right, as we can handle stack and constants
+  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+
+    rlock_result(x);
+
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) { need_zero_check = false; }
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2(c)) {
+        right_arg->dont_load_item();
+      } else {
+        right_arg->load_item();
+      }
+    } else {
+      right_arg->load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+    }
+
+    LIR_Opr ill = LIR_OprFact::illegalOpr;
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
+    }
+
+  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
+    if (right.is_constant() &&
+        ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) ||
+         (x->op() == Bytecodes::_isub && !Assembler::operand_valid_for_add_immediate(-right.get_jint_constant())))) {
+      right.load_nonconstant();
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
+  } else {
+    assert (x->op() == Bytecodes::_imul, "expect imul");
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
+        right_arg->dont_load_item();
+      } else {
+        // Cannot use constant op.
+        right_arg->load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
+  }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag:  do_ArithmeticOp_FPU(x);  return;
+    case longTag:    do_ArithmeticOp_Long(x); return;
+    case intTag:     do_ArithmeticOp_Int(x);  return;
+    default:         ShouldNotReachHere();    return;
+  }
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  LIRItem value(x->x(), this);
+  LIRItem count(x->y(), this);
+
+  value.load_item();
+  if (count.is_constant()) {
+    assert(count.type()->as_IntConstant() != NULL || count.type()->as_LongConstant() != NULL , "should be");
+    count.dont_load_item();
+  } else {
+    count.load_item();
+  }
+
+  LIR_Opr res = rlock_result(x);
+  shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr);
+}
+
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+  rlock_result(x);
+  ValueTag tag = right.type()->tag();
+  if (right.is_constant() &&
+     ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
+      (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant()))))  {
+    right.dont_load_item();
+  } else {
+    right.load_item();
+  }
+
+  switch (x->op()) {
+    case Bytecodes::_iand:  // fall through
+    case Bytecodes::_land:
+      __ logical_and(left.result(), right.result(), x->operand()); break;
+    case Bytecodes::_ior:   // fall through
+    case Bytecodes::_lor:
+      __ logical_or(left.result(), right.result(), x->operand()); break;
+    case Bytecodes::_ixor:  // fall through
+    case Bytecodes::_lxor:
+      __ logical_xor(left.result(), right.result(), x->operand()); break;
+    default: Unimplemented();
+  }
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  ValueTag tag = x->x()->type()->tag();
+  if (tag == longTag) {
+    left.set_destroys_register();
+  }
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) {
+  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+  new_value.load_item();
+  cmp_value.load_item();
+  LIR_Opr result = new_register(T_INT);
+  if (is_reference_type(type)) {
+    __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
+  } else if (type == T_INT) {
+    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
+  } else if (type == T_LONG) {
+    __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
+  } else {
+    ShouldNotReachHere();
+  }
+  __ logical_xor(FrameMap::r5_opr, LIR_OprFact::intConst(1), result);
+  return result;
+}
+
+LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
+  bool is_oop = is_reference_type(type);
+  LIR_Opr result = new_register(type);
+  value.load_item();
+  assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
+  LIR_Opr tmp = new_register(T_INT);
+  __ xchg(addr, value.result(), result, tmp);
+  return result;
+}
+
+LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
+  LIR_Opr result = new_register(type);
+  value.load_item();
+  assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type");
+  LIR_Opr tmp = new_register(T_INT);
+  __ xadd(addr, value.result(), result, tmp);
+  return result;
+}
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
+         "wrong type");
+
+  switch (x->id()) {
+    case vmIntrinsics::_dexp: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dpow: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dlog10:
+      do_LibmIntrinsic(x);
+      break;
+    case vmIntrinsics::_dabs: // fall through
+    case vmIntrinsics::_dsqrt: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+
+      switch (x->id()) {
+        case vmIntrinsics::_dsqrt: {
+          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        }
+        case vmIntrinsics::_dabs: {
+          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        }
+        default:
+          ShouldNotReachHere();
+      }
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
+  LIRItem value(x->argument_at(0), this);
+  value.set_destroys_register();
+
+  LIR_Opr calc_result = rlock_result(x);
+  LIR_Opr result_reg = result_register_for(x->type());
+
+  CallingConvention* cc = NULL;
+
+  if (x->id() == vmIntrinsics::_dpow) {
+    LIRItem value1(x->argument_at(1), this);
+
+    value1.set_destroys_register();
+
+    BasicTypeList signature(2);
+    signature.append(T_DOUBLE);
+    signature.append(T_DOUBLE);
+    cc = frame_map()->c_calling_convention(&signature);
+    value.load_item_force(cc->at(0));
+    value1.load_item_force(cc->at(1));
+  } else {
+    BasicTypeList signature(1);
+    signature.append(T_DOUBLE);
+    cc = frame_map()->c_calling_convention(&signature);
+    value.load_item_force(cc->at(0));
+  }
+
+  switch (x->id()) {
+    case vmIntrinsics::_dexp:
+      if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dlog:
+      if (StubRoutines::dlog() != NULL) {  __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dlog10:
+      if (StubRoutines::dlog10() != NULL) { __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dsin:
+      if (StubRoutines::dsin() != NULL) { __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dcos:
+      if (StubRoutines::dcos() != NULL) {  __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dtan:
+      if (StubRoutines::dtan() != NULL) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    case vmIntrinsics::_dpow:
+      if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); }
+      else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); }
+      break;
+    default:  ShouldNotReachHere();
+  }
+  __ move(result_reg, calc_result);
+}
+
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src(x->argument_at(0), this);
+  LIRItem src_pos(x->argument_at(1), this);
+  LIRItem dst(x->argument_at(2), this);
+  LIRItem dst_pos(x->argument_at(3), this);
+  LIRItem length(x->argument_at(4), this);
+
+  // operands for arraycopy must use fixed registers, otherwise
+  // LinearScan will fail allocation (because arraycopy always needs a
+  // call)
+
+  // The java calling convention will give us enough registers
+  // so that on the stub side the args will be perfect already.
+  // On the other slow/special case side we call C and the arg
+  // positions are not similar enough to pick one as the best.
+  // Also because the java calling convention is a "shifted" version
+  // of the C convention we can process the java args trivially into C
+  // args without worry of overwriting during the xfer
+
+  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
+  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
+  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
+  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
+  length.load_item_force  (FrameMap::as_opr(j_rarg4));
+
+  LIR_Opr tmp = FrameMap::as_opr(j_rarg5);
+
+  set_no_result(x);
+
+  int flags;
+  ciArrayKlass* expected_type = NULL;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp,
+               expected_type, flags, info); // does add_safepoint
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  ShouldNotReachHere();
+}
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  ShouldNotReachHere();
+}
+
+void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
+  assert(x->number_of_arguments() == 3, "wrong type");
+  assert(UseFMA, "Needs FMA instructions support.");
+  LIRItem value(x->argument_at(0), this);
+  LIRItem value1(x->argument_at(1), this);
+  LIRItem value2(x->argument_at(2), this);
+
+  value.load_item();
+  value1.load_item();
+  value2.load_item();
+
+  LIR_Opr calc_input = value.result();
+  LIR_Opr calc_input1 = value1.result();
+  LIR_Opr calc_input2 = value2.result();
+  LIR_Opr calc_result = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_fmaD:   __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
+    case vmIntrinsics::_fmaF:   __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
+    default:                    ShouldNotReachHere();
+  }
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  LIRItem value(x->value(), this);
+  value.load_item();
+  LIR_Opr input = value.result();
+  LIR_Opr result = rlock(x);
+
+  // arguments of lir_convert
+  LIR_Opr conv_input = input;
+  LIR_Opr conv_result = result;
+
+  __ convert(x->op(), conv_input, conv_result);
+
+  assert(result->is_virtual(), "result must be virtual register");
+  set_result(x, result);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(), x->is_unresolved(),
+               FrameMap::r12_oop_opr,
+               FrameMap::r15_oop_opr,
+               FrameMap::r14_oop_opr,
+               LIR_OprFact::illegalOpr,
+               FrameMap::r13_metadata_opr,
+               info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item_force(FrameMap::r9_opr);
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r12_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r14_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r15_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r13_metadata_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  LIRItem length(x->length(), this);
+  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+  // and therefore provide the state before the parameters have been consumed
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info =  state_for(x, x->state_before());
+  }
+
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::r12_oop_opr;
+  LIR_Opr tmp2 = FrameMap::r14_oop_opr;
+  LIR_Opr tmp3 = FrameMap::r15_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::r13_metadata_opr;
+
+  length.load_item_force(FrameMap::r9_opr);
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(i, i, NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_item();
+
+    store_stack_parameter(size->result(), in_ByteSize(i * BytesPerInt));
+  }
+
+  LIR_Opr klass_reg = FrameMap::r10_metadata_opr;
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::r9_opr;
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+  LIR_Opr varargs = FrameMap::r12_opr;
+  __ move(FrameMap::sp_opr, varargs);
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  LIR_Opr reg = result_register_for(x->type());
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() ||
+      (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) {
+    // must do this before locking the destination register as an oop register,
+    // and before the obj is loaded (the latter is for deoptimization)
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+
+  // info for exceptions
+  CodeEmitInfo* info_for_exception =
+      (x->needs_exception_state() ? state_for(x) :
+                                    state_for(x, x->state_before(), true /*ignore_xhandler*/ ));
+
+  CodeStub* stub = NULL;
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr,
+                                   info_for_exception);
+  } else if (x->is_invokespecial_receiver_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new DeoptimizeStub(info_for_exception,
+                              Deoptimization::Reason_class_check,
+                              Deoptimization::Action_none);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ checkcast(reg, obj.result(), x->klass(),
+               new_register(objectType), new_register(objectType), tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+
+  // result and test object may not be in same register
+  LIR_Opr reg = rlock_result(x);
+  CodeEmitInfo* patching_info = NULL;
+  if ((!x->klass()->is_loaded() || PatchALot)) {
+    // must do this before locking the destination register as an oop register
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ instanceof(reg, obj.result(), x->klass(),
+                new_register(objectType), new_register(objectType), tmp3,
+                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_If(If* x) {
+  // If should have two successors
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  bool is_safepoint = x->is_safepoint();
+
+  If::Condition cond = x->cond();
+
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+
+  if (tag == longTag) {
+    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
+    // mirror for other conditions
+    if (cond == If::gtr || cond == If::leq) {
+      cond = Instruction::mirror(cond);
+      xin = &yitem;
+      yin = &xitem;
+    }
+    xin->set_destroys_register();
+  }
+  xin->load_item();
+  yin->load_item();
+
+  set_no_result(x);
+
+  LIR_Opr left = xin->result();
+  LIR_Opr right = yin->result();
+
+  // add safepoint before generating condition code so it can be recomputed
+  if (x->is_safepoint()) {
+    // increment backedge counter if needed
+    increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
+                                             x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
+    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+  }
+
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  __ cmp(lir_cond(cond), left, right);
+  profile_branch(x, cond);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
+  } else {
+    __ branch(lir_cond(cond), right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+   return FrameMap::as_pointer_opr(xthread);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+  __ volatile_store_mem_reg(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  __ volatile_load_mem_reg(address, result, info);
+}
diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
new file mode 100644
index 0000000000..0317ed9003
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "c1/c1_LIR.hpp"
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+  return as_FloatRegister(fpu_regnr());
+}
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+  return as_FloatRegister(fpu_regnrLo());
+}
+
+// Reg2 unused.
+LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
+  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
+  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
+                             (reg1 << LIR_OprDesc::reg2_shift) |
+                             LIR_OprDesc::double_type          |
+                             LIR_OprDesc::fpu_register         |
+                             LIR_OprDesc::double_size);
+}
+
+#ifndef PRODUCT
+void LIR_Address::verify() const {
+  assert(base()->is_cpu_register(), "wrong base operand");
+  assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
+  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG ||
+         base()->type() == T_METADATA, "wrong type for addresses");
+}
+#endif // PRODUCT
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
new file mode 100644
index 0000000000..78a61128bd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack on RISCV
+}
diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
new file mode 100644
index 0000000000..d7ca7b0fd0
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
+#define CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num)
+{
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+  if (assigned_reg < pd_first_callee_saved_reg) {
+    return true;
+  }
+  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg_1) {
+    return true;
+  }
+  if (assigned_reg > pd_last_callee_saved_fpu_reg_1 && assigned_reg < pd_first_callee_saved_fpu_reg_2) {
+    return true;
+  }
+  if (assigned_reg > pd_last_callee_saved_fpu_reg_2 && assigned_reg < pd_last_fpu_reg) {
+    return true;
+  }
+  return false;
+}
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // No special case behaviours yet
+}
+
+
+// Implementation of LinearScanWalker
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur)
+{
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
+             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_allocatable_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
new file mode 100644
index 0000000000..957bfa1127
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LIR.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
+                                  FloatRegister freg0, FloatRegister freg1,
+                                  Register result)
+{
+  if (is_float) {
+    float_compare(result, freg0, freg1, unordered_result);
+  } else {
+    double_compare(result, freg0, freg1, unordered_result);
+  }
+}
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+  const int aligned_mask = BytesPerWord - 1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done;
+  int null_check_offset = -1;
+
+  verify_oop(obj);
+
+  // save object being locked into the BasicObjectLock
+  sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+  if (UseBiasedLocking) {
+    assert(scratch != noreg, "should have scratch register at this point");
+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
+  } else {
+    null_check_offset = offset();
+  }
+
+  // Load object header
+  ld(hdr, Address(obj, hdr_offset));
+  // and mark it as unlocked
+  ori(hdr, hdr, markOopDesc::unlocked_value);
+  // save unlocked object header into the displaced header location on the stack
+  sd(hdr, Address(disp_hdr, 0));
+  // test if object header is still the same (i.e. unlocked), and if so, store the
+  // displaced header address in the object header - if it is not the same, get the
+  // object header instead
+  la(t1, Address(obj, hdr_offset));
+  cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/NULL);
+  // if the object header was the same, we're done
+  // if the object header was not the same, it is now in the hdr register
+  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+  //
+  // 1) (hdr & aligned_mask) == 0
+  // 2) sp <= hdr
+  // 3) hdr <= sp + page_size
+  //
+  // these 3 tests can be done by evaluating the following expression:
+  //
+  // (hdr -sp) & (aligned_mask - page_size)
+  //
+  // assuming both the stack pointer and page_size have their least
+  // significant 2 bits cleared and page_size is a power of 2
+  sub(hdr, hdr, sp);
+  mv(t0, aligned_mask - os::vm_page_size());
+  andr(hdr, hdr, t0);
+  // for recursive locking, the result is zero => save it in the displaced header
+  // location (NULL in the displaced hdr location indicates recursive locking)
+  sd(hdr, Address(disp_hdr, 0));
+  // otherwise we don't care about the result and handle locking via runtime call
+  bnez(hdr, slow_case, /* is_far */ true);
+  bind(done);
+  if (PrintBiasedLockingStatistics) {
+    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+    incrementw(Address(t1, 0));
+  }
+  return null_check_offset;
+}
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int aligned_mask = BytesPerWord - 1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done;
+
+  if (UseBiasedLocking) {
+    // load object
+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    biased_locking_exit(obj, hdr, done);
+  }
+
+  // load displaced header
+  ld(hdr, Address(disp_hdr, 0));
+  // if the loaded hdr is NULL we had recursive locking
+  // if we had recursive locking, we are done
+  beqz(hdr, done);
+  if (!UseBiasedLocking) {
+    // load object
+    ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+  }
+  verify_oop(obj);
+  // test if object header is pointing to the displaced header, and if so, restore
+  // the displaced header in the object - if the object header is not pointing to
+  // the displaced header, get the object header instead
+  // if the object header was not pointing to the displaced header,
+  // we do unlocking via runtime call
+  if (hdr_offset) {
+    la(t0, Address(obj, hdr_offset));
+    cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case);
+  } else {
+    cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case);
+  }
+  bind(done);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, /* is_far */ true);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, /* is_far */ true);
+  }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) {
+  assert_different_registers(obj, klass, len);
+  if (UseBiasedLocking && !len->is_valid()) {
+    assert_different_registers(obj, klass, len, tmp1, tmp2);
+    ld(tmp1, Address(klass, Klass::prototype_header_offset()));
+  } else {
+    // This assumes that all prototype bits fitr in an int32_t
+    mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype());
+  }
+  sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (UseCompressedClassPointers) { // Take care not to kill klass
+    encode_klass_not_null(tmp1, klass);
+    sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else {
+    sd(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+
+  if (len->is_valid()) {
+    sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+  } else if (UseCompressedClassPointers) {
+    store_klass_gap(obj, zr);
+  }
+}
+
+// preserves obj, destroys len_in_bytes
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) {
+  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
+  Label done;
+
+  // len_in_bytes is positive and ptr sized
+  sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
+  beqz(len_in_bytes, done);
+
+  // Preserve obj
+  if (hdr_size_in_bytes) {
+    add(obj, obj, hdr_size_in_bytes);
+  }
+  zero_memory(obj, len_in_bytes, tmp);
+  if (hdr_size_in_bytes) {
+    sub(obj, obj, hdr_size_in_bytes);
+  }
+
+  bind(done);
+}
+
+void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case) {
+  assert_different_registers(obj, tmp1, tmp2);
+  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+
+  try_allocate(obj, noreg, object_size * BytesPerWord, tmp1, tmp2, slow_case);
+
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, tmp1, tmp2, UseTLAB);
+}
+
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, bool is_tlab_allocated) {
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+         "con_size_in_bytes is not multiple of alignment");
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, tmp1, tmp2);
+
+  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+    // clear rest of allocated space
+    const Register index = tmp2;
+    // 16: multipler for threshold
+    const int threshold = 16 * BytesPerWord;    // approximate break even point for code size (see comments below)
+    if (var_size_in_bytes != noreg) {
+      mv(index, var_size_in_bytes);
+      initialize_body(obj, index, hdr_size_in_bytes, tmp1);
+    } else if (con_size_in_bytes <= threshold) {
+      // use explicit null stores
+      int i = hdr_size_in_bytes;
+      if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { // 2: multipler for BytesPerWord
+        sd(zr, Address(obj, i));
+        i += BytesPerWord;
+      }
+      for (; i < con_size_in_bytes; i += BytesPerWord) {
+        sd(zr, Address(obj, i));
+      }
+    } else if (con_size_in_bytes > hdr_size_in_bytes) {
+      block_comment("zero memory");
+      // use loop to null out the fields
+      int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
+      mv(index, words / 8); // 8: byte size
+
+      const int unroll = 8; // Number of sd(zr) instructions we'll unroll
+      int remainder = words % unroll;
+      la(t0, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
+
+      Label entry_point, loop;
+      j(entry_point);
+
+      bind(loop);
+      sub(index, index, 1);
+      for (int i = -unroll; i < 0; i++) {
+        if (-i == remainder) {
+          bind(entry_point);
+        }
+        sd(zr, Address(t0, i * wordSize));
+      }
+      if (remainder == 0) {
+        bind(entry_point);
+      }
+      add(t0, t0, unroll * wordSize);
+      bnez(index, loop);
+    }
+  }
+
+  membar(MacroAssembler::StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == x10, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) {
+  assert_different_registers(obj, len, tmp1, tmp2, klass);
+
+  // determine alignment mask
+  assert(!(BytesPerWord & 1), "must be multiple of 2 for masking code to work");
+
+  // check for negative or excessive length
+  mv(t0, (int32_t)max_array_allocation_length);
+  bgeu(len, t0, slow_case, /* is_far */ true);
+
+  const Register arr_size = tmp2; // okay to be the same
+  // align object end
+  mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  shadd(arr_size, len, arr_size, t0, f);
+  andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
+
+  try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case);
+
+  initialize_header(obj, klass, len, tmp1, tmp2);
+
+  // clear rest of allocated space
+  const Register len_zero = len;
+  initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+
+  membar(MacroAssembler::StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == x10, "must be");
+    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
+  }
+
+  verify_oop(obj);
+}
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, Label &L) {
+  verify_oop(receiver);
+  // explicit NULL check not needed since load from [klass_offset] causes a trap
+  // check against inline cache
+  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
+  cmp_klass(receiver, iCache, t0, L);
+}
+
+void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump. For this action to be legal we
+  // must ensure that this first instruction is a J, JAL or NOP.
+  // Make it a NOP.
+  nop();
+
+  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
+  // Make sure there is enough stack space for this method's activation.
+  // Note that we do this before creating a frame.
+  generate_stack_overflow_check(bang_size_in_bytes);
+  MacroAssembler::build_frame(framesize);
+}
+
+void C1_MacroAssembler::remove_frame(int framesize) {
+  MacroAssembler::remove_frame(framesize);
+}
+
+
+void C1_MacroAssembler::verified_entry() {
+  assert_alignment(pc());
+}
+
+void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
+  //  fp + -2: link
+  //     + -1: return address
+  //     +  0: argument with offset 0
+  //     +  1: argument with offset 1
+  //     +  2: ...
+  ld(reg, Address(fp, offset_in_words * BytesPerWord));
+}
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  if (!VerifyOops) {
+    return;
+  }
+  verify_oop_addr(Address(sp, stack_offset), "oop");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  if (!VerifyOops) return;
+  Label not_null;
+  bnez(r, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv_x12, bool inv_x13, bool inv_x14, bool inv_x15) {
+#ifdef ASSERT
+  static int nn;
+  if (inv_x10) { mv(x10, 0xDEAD); }
+  if (inv_x9)  { mv(x9, 0xDEAD);  }
+  if (inv_x12) { mv(x12, nn++);   }
+  if (inv_x13) { mv(x13, 0xDEAD); }
+  if (inv_x14) { mv(x14, 0xDEAD); }
+  if (inv_x15) { mv(x15, 0xDEAD); }
+#endif // ASSERT
+}
+#endif // ifndef PRODUCT
+
+typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
+typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2,
+              Label& label, bool is_far, bool is_unordered);
+
+static c1_cond_branch_insn c1_cond_branch[] =
+{
+  /* SHORT branches */
+  (c1_cond_branch_insn)&Assembler::beq,
+  (c1_cond_branch_insn)&Assembler::bne,
+  (c1_cond_branch_insn)&Assembler::blt,
+  (c1_cond_branch_insn)&Assembler::ble,
+  (c1_cond_branch_insn)&Assembler::bge,
+  (c1_cond_branch_insn)&Assembler::bgt,
+  (c1_cond_branch_insn)&Assembler::bleu, // lir_cond_belowEqual
+  (c1_cond_branch_insn)&Assembler::bgeu  // lir_cond_aboveEqual
+};
+
+static c1_float_cond_branch_insn c1_float_cond_branch[] =
+{
+  /* FLOAT branches */
+  (c1_float_cond_branch_insn)&MacroAssembler::float_beq,
+  (c1_float_cond_branch_insn)&MacroAssembler::float_bne,
+  (c1_float_cond_branch_insn)&MacroAssembler::float_blt,
+  (c1_float_cond_branch_insn)&MacroAssembler::float_ble,
+  (c1_float_cond_branch_insn)&MacroAssembler::float_bge,
+  (c1_float_cond_branch_insn)&MacroAssembler::float_bgt,
+  NULL, // lir_cond_belowEqual
+  NULL, // lir_cond_aboveEqual
+
+  /* DOUBLE branches */
+  (c1_float_cond_branch_insn)&MacroAssembler::double_beq,
+  (c1_float_cond_branch_insn)&MacroAssembler::double_bne,
+  (c1_float_cond_branch_insn)&MacroAssembler::double_blt,
+  (c1_float_cond_branch_insn)&MacroAssembler::double_ble,
+  (c1_float_cond_branch_insn)&MacroAssembler::double_bge,
+  (c1_float_cond_branch_insn)&MacroAssembler::double_bgt,
+  NULL, // lir_cond_belowEqual
+  NULL  // lir_cond_aboveEqual
+};
+
+void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label,
+                                      BasicType type, bool is_far) {
+  if (type == T_OBJECT || type == T_ARRAY) {
+    assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual");
+    if (cmpFlag == lir_cond_equal) {
+      beq(op1, op2, label, is_far);
+    } else {
+      bne(op1, op2, label, is_far);
+    }
+  } else {
+    assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
+           "invalid c1 conditional branch index");
+    (this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far);
+  }
+}
+
+void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label,
+                                            bool is_far, bool is_unordered) {
+  assert(cmpFlag >= 0 &&
+         cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])),
+         "invalid c1 float conditional branch index");
+  (this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered);
+}
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
new file mode 100644
index 0000000000..1950cee5dd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
+#define CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
+
+using MacroAssembler::build_frame;
+using MacroAssembler::null_check;
+
+// C1_MacroAssembler contains high-level macros for C1
+
+ private:
+  int _rsp_offset;    // track rsp changes
+  // initialization
+  void pd_init() { _rsp_offset = 0; }
+
+
+ public:
+  void try_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if known at compile time
+    Register tmp1,                     // temp register
+    Register tmp2,                     // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2);
+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp);
+
+  void float_cmp(bool is_float, int unordered_result,
+                 FloatRegister f0, FloatRegister f1,
+                 Register result);
+
+  // locking
+  // hdr     : must be x10, contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must point to the displaced header location, contents preserved
+  // scratch : scratch register, contents destroyed
+  // returns code offset at which to add null check debug information
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+
+  // unlocking
+  // hdr     : contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must be x10 & must point to the displaced header location, contents destroyed
+  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register tmp1,                     // temp register
+    Register tmp2,                     // temp register
+    bool     is_tlab_allocated         // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
+  );
+
+  // allocation of fixed-size objects
+  // (can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards)
+  // obj        : will contain pointer to allocated object
+  // t1, t2     : temp registers - contents destroyed
+  // header_size: size of object header in words
+  // object_size: total size of object in words
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case);
+
+  enum {
+    max_array_allocation_length = 0x00FFFFFF
+  };
+
+  // allocation of arrays
+  // obj        : will contain pointer to allocated object
+  // len        : array length in number of elements
+  // t          : temp register - contents destroyed
+  // header_size: size of object header in words
+  // f          : element scale factor
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case);
+
+  int  rsp_offset() const { return _rsp_offset; }
+
+  void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
+
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
+  void load_parameter(int offset_in_words, Register reg);
+
+  void inline_cache_check(Register receiver, Register iCache, Label &L);
+
+  static const int c1_double_branch_mask = 1 << 3; // depend on c1_float_cond_branch
+  void c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, BasicType type, bool is_far);
+  void c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label,
+                           bool is_far, bool is_unordered = false);
+
+#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
new file mode 100644
index 0000000000..ffcca64e0b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
@@ -0,0 +1,1210 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "compiler/disassembler.hpp"
+#include "compiler/oopMap.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_riscv.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframe.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_riscv.inline.hpp"
+
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) {
+  // setup registers
+  assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result,
+         "registers must be different");
+  assert(oop_result != xthread && metadata_result != xthread, "registers must be different");
+  assert(args_size >= 0, "illegal args_size");
+  bool align_stack = false;
+
+  mv(c_rarg0, xthread);
+  set_num_rt_args(0); // Nothing on stack
+
+  Label retaddr;
+  set_last_Java_frame(sp, fp, retaddr, t0);
+
+  // do the call
+  int32_t off = 0;
+  la_patchable(t0, RuntimeAddress(entry), off);
+  jalr(x1, t0, off);
+  bind(retaddr);
+  int call_offset = offset();
+  // verify callee-saved register
+#ifdef ASSERT
+  push_reg(x10, sp);
+  { Label L;
+    get_thread(x10);
+    beq(xthread, x10, L);
+    stop("StubAssembler::call_RT: xthread not callee saved?");
+    bind(L);
+  }
+  pop_reg(x10, sp);
+#endif
+  reset_last_Java_frame(true);
+
+  // check for pending exceptions
+  { Label L;
+    // check for pending exceptions (java_thread is set upon return)
+    ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+    beqz(t0, L);
+    // exception pending => remove activation and forward to exception handler
+    // make sure that the vm_results are cleared
+    if (oop_result->is_valid()) {
+      sd(zr, Address(xthread, JavaThread::vm_result_offset()));
+    }
+    if (metadata_result->is_valid()) {
+      sd(zr, Address(xthread, JavaThread::vm_result_2_offset()));
+    }
+    if (frame_size() == no_frame_size) {
+      leave();
+      far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+    }
+    bind(L);
+  }
+  // get oop results if there are any and reset the values in the thread
+  if (oop_result->is_valid()) {
+    get_vm_result(oop_result, xthread);
+  }
+  if (metadata_result->is_valid()) {
+    get_vm_result_2(metadata_result, xthread);
+  }
+  return call_offset;
+}
+
+int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) {
+  mv(c_rarg1, arg1);
+  return call_RT(oop_result, metadata_result, entry, 1);
+}
+
+int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) {
+  const int arg_num = 2;
+  if (c_rarg1 == arg2) {
+    if (c_rarg2 == arg1) {
+      xorr(arg1, arg1, arg2);
+      xorr(arg2, arg1, arg2);
+      xorr(arg1, arg1, arg2);
+    } else {
+      mv(c_rarg2, arg2);
+      mv(c_rarg1, arg1);
+    }
+  } else {
+    mv(c_rarg1, arg1);
+    mv(c_rarg2, arg2);
+  }
+  return call_RT(oop_result, metadata_result, entry, arg_num);
+}
+
+int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
+  const int arg_num = 3;
+  // if there is any conflict use the stack
+  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
+      arg2 == c_rarg1 || arg2 == c_rarg3 ||
+      arg3 == c_rarg1 || arg3 == c_rarg2) {
+    const int arg1_sp_offset = 0;
+    const int arg2_sp_offset = 1;
+    const int arg3_sp_offset = 2;
+    addi(sp, sp, -(arg_num + 1) * wordSize);
+    sd(arg1, Address(sp, arg1_sp_offset * wordSize));
+    sd(arg2, Address(sp, arg2_sp_offset * wordSize));
+    sd(arg3, Address(sp, arg3_sp_offset * wordSize));
+
+    ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize));
+    ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize));
+    ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize));
+    addi(sp, sp, (arg_num + 1) * wordSize);
+  } else {
+    mv(c_rarg1, arg1);
+    mv(c_rarg2, arg2);
+    mv(c_rarg3, arg3);
+  }
+  return call_RT(oop_result, metadata_result, entry, arg_num);
+}
+
+// Implementation of StubFrame
+
+class StubFrame: public StackObj {
+ private:
+  StubAssembler* _sasm;
+
+ public:
+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+  void load_argument(int offset_in_words, Register reg);
+
+  ~StubFrame();
+};;
+
+void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
+  set_info(name, must_gc_arguments);
+  enter();
+}
+
+void StubAssembler::epilogue() {
+  leave();
+  ret();
+}
+
+#define __ _sasm->
+
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+  _sasm = sasm;
+  __ prologue(name, must_gc_arguments);
+}
+
+// load parameters that were stored with LIR_Assembler::store_parameter
+// Note: offsets for store_parameter and load_argument must match
+void StubFrame::load_argument(int offset_in_words, Register reg) {
+  __ load_parameter(offset_in_words, reg);
+}
+
+
+StubFrame::~StubFrame() {
+  __ epilogue();
+  _sasm = NULL;
+}
+
+#undef __
+
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
+
+// Stack layout for saving/restoring  all the registers needed during a runtime
+// call (this includes deoptimization)
+// Note: note that users of this frame may well have arguments to some runtime
+// while these values are on the stack. These positions neglect those arguments
+// but the code in save_live_registers will take the argument count into
+// account.
+//
+
+enum reg_save_layout {
+  reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */
+};
+
+// Save off registers which might be killed by calls into the runtime.
+// Tries to smart of about FPU registers.  In particular we separate
+// saving and describing the FPU registers for deoptimization since we
+// have to save the FPU registers twice if we describe them.  The
+// deopt blob is the only thing which needs to describe FPU registers.
+// In all other cases it should be sufficient to simply save their
+// current value.
+
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
+  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
+  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+  int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+  assert_cond(oop_map != NULL);
+
+  // caller save registers only, see FrameMap::initialize
+  // in c1_FrameMap_riscv.cpp for detail.
+  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {
+    x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31
+  };
+
+  for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) {
+    Register r = caller_save_cpu_regs[i];
+    int sp_offset = cpu_reg_save_offsets[r->encoding()];
+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                              r->as_VMReg());
+  }
+
+  // fpu_regs
+  if (save_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      int sp_offset = fpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+                                r->as_VMReg());
+    }
+  }
+  return oop_map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm,
+                                   bool save_fpu_registers = true) {
+  __ block_comment("save_live_registers");
+
+  // if the number of pushed regs is odd, one slot will be reserved for alignment
+  __ push_reg(RegSet::range(x5, x31), sp);    // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
+
+  if (save_fpu_registers) {
+    // float registers
+    __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize));
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      __ fsd(as_FloatRegister(i), Address(sp, i * wordSize));
+    }
+  } else {
+    // we define reg_save_layout = 62 as the fixed frame size,
+    // we should also sub 32 * wordSize to sp when save_fpu_registers == false
+    __ addi(sp, sp, -32 * wordSize);
+  }
+
+  return generate_oop_map(sasm, save_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      __ fld(as_FloatRegister(i), Address(sp, i * wordSize));
+    }
+    __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize);
+  } else {
+    // we define reg_save_layout = 64 as the fixed frame size,
+    // we should also add 32 * wordSize to sp when save_fpu_registers == false
+    __ addi(sp, sp, 32 * wordSize);
+  }
+
+  // if the number of popped regs is odd, the reserved slot for alignment will be removed
+  __ pop_reg(RegSet::range(x5, x31), sp);   // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4)
+}
+
+static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      __ fld(as_FloatRegister(i), Address(sp, i * wordSize));
+    }
+    __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize);
+  } else {
+    // we define reg_save_layout = 64 as the fixed frame size,
+    // we should also add 32 * wordSize to sp when save_fpu_registers == false
+    __ addi(sp, sp, 32 * wordSize);
+  }
+
+  // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10
+  // there is one reserved slot for alignment on the stack in save_live_registers().
+  __ pop_reg(RegSet::range(x5, x9), sp);   // pop x5 ~ x9 with the reserved slot for alignment
+  __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here
+}
+
+void Runtime1::initialize_pd() {
+  int i = 0;
+  int sp_offset = 0;
+  const int step = 2; // SP offsets are in halfwords
+
+  // all float registers are saved explicitly
+  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    fpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += step;
+  }
+
+  // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg
+  sp_offset += step;
+  // we save x5 ~ x31, except x0 ~ x4: loop starts from x5
+  for (i = 5; i < FrameMap::nof_cpu_regs; i++) {
+    cpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += step;
+  }
+}
+
+// target: the entry point of the method that creates and posts the exception oop
+// has_argument: true if the exception needs arguments (passed in t0 and t1)
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
+  // make a frame and preserve the caller's caller-save registers
+  OopMap* oop_map = save_live_registers(sasm);
+  assert_cond(oop_map != NULL);
+  int call_offset = 0;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    __ mv(c_rarg1, t0);
+    __ mv(c_rarg2, t1);
+    call_offset = __ call_RT(noreg, noreg, target);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  __ should_not_reach_here();
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // incoming parameters
+  const Register exception_oop = x10;
+  const Register exception_pc  = x13;
+
+  OopMapSet* oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  OopMap* oop_map = NULL;
+
+  switch (id) {
+    case forward_exception_id:
+      // We're handling an exception in the context of a compiled frame.
+      // The registers have been saved in the standard places.  Perform
+      // an exception lookup in the caller and dispatch to the handler
+      // if found.  Otherwise unwind and dispatch to the callers
+      // exception handler.
+      oop_map = generate_oop_map(sasm, 1 /* thread */);
+
+      // load and clear pending exception oop into x10
+      __ ld(exception_oop, Address(xthread, Thread::pending_exception_offset()));
+      __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
+
+      // load issuing PC (the return address for this stub) into x13
+      __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
+
+      // make sure that the vm_results are cleared (may be unnecessary)
+      __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
+      __ sd(zr, Address(xthread, JavaThread::vm_result_2_offset()));
+      break;
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      // At this point all registers MAY be live.
+      oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
+      break;
+    case handle_exception_from_callee_id: {
+      // At this point all registers except exception oop (x10) and
+      // exception pc (ra) are dead.
+      const int frame_size = 2 /* fp, return address */;
+      oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
+      sasm->set_frame_size(frame_size);
+      break;
+    }
+    default:
+      __ should_not_reach_here();
+      break;
+  }
+
+  // verify that only x10 and x13 are valid at this time
+  __ invalidate_registers(false, true, true, false, true, true);
+  // verify that x10 contains a valid exception
+  __ verify_not_null_oop(exception_oop);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are
+  // empty before writing to them
+  Label oop_empty;
+  __ ld(t0, Address(xthread, JavaThread::exception_oop_offset()));
+  __ beqz(t0, oop_empty);
+  __ stop("exception oop already set");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
+  __ beqz(t0, pc_empty);
+  __ stop("exception pc already set");
+  __ bind(pc_empty);
+#endif
+
+  // save exception oop and issuing pc into JavaThread
+  // (exception handler will load it from here)
+  __ sd(exception_oop, Address(xthread, JavaThread::exception_oop_offset()));
+  __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset()));
+
+  // patch throwing pc into return address (has bci & oop map)
+  __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord));
+
+  // compute the exception handler.
+  // the exception oop and the throwing pc are read from the fields in JavaThread
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  guarantee(oop_map != NULL, "NULL oop_map!");
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // x10: handler address
+  //      will be the deopt blob if nmethod was deoptimized while we looked up
+  //      handler regardless of whether handler existed in the nmethod.
+
+  // only x10 is valid at this time, all other registers have been destroyed by the runtime call
+  __ invalidate_registers(false, true, true, true, true, true);
+
+  // patch the return address, this stub will directly return to the exception handler
+  __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord));
+
+  switch (id) {
+    case forward_exception_id:
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      // Restore the registers that were saved at the beginning.
+      restore_live_registers(sasm, id != handle_exception_nofpu_id);
+      break;
+    case handle_exception_from_callee_id:
+      // Pop the return address.
+      __ leave();
+      __ ret();  // jump to exception handler
+      break;
+    default: ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+  // incoming parameters
+  const Register exception_oop = x10;
+  // other registers used in this stub
+  const Register handler_addr = x11;
+
+  // verify that only x10, is valid at this time
+  __ invalidate_registers(false, true, true, true, true, true);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are empty
+  Label oop_empty;
+  __ ld(t0, Address(xthread, JavaThread::exception_oop_offset()));
+  __ beqz(t0, oop_empty);
+  __ stop("exception oop must be empty");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
+  __ beqz(t0, pc_empty);
+  __ stop("exception pc must be empty");
+  __ bind(pc_empty);
+#endif
+
+  // Save our return address because
+  // exception_handler_for_return_address will destroy it.  We also
+  // save exception_oop
+  __ addi(sp, sp, -2 * wordSize);
+  __ sd(exception_oop, Address(sp, wordSize));
+  __ sd(ra, Address(sp));
+
+  // search the exception handler address of the caller (using the return address)
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra);
+  // x10: exception handler address of the caller
+
+  // Only x10 is valid at this time; all other registers have been
+  // destroyed by the call.
+  __ invalidate_registers(false, true, true, true, false, true);
+
+  // move result of call into correct register
+  __ mv(handler_addr, x10);
+
+  // get throwing pc (= return address).
+  // ra has been destroyed by the call
+  __ ld(ra, Address(sp));
+  __ ld(exception_oop, Address(sp, wordSize));
+  __ addi(sp, sp, 2 * wordSize);
+  __ mv(x13, ra);
+
+  __ verify_not_null_oop(exception_oop);
+
+  // continue at exception handler (return address removed)
+  // note: do *not* remove arguments when unwinding the
+  //       activation since the caller assumes having
+  //       all arguments on the stack when entering the
+  //       runtime to determine the exception handler
+  //       (GC happens at call site with arguments!)
+  // x10: exception oop
+  // x13: throwing pc
+  // x11: exception handler
+  __ jr(handler_addr);
+}
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // use the maximum number of runtime-arguments here because it is difficult to
+  // distinguish each RT-Call.
+  // Note: This number affects also the RT-Call in generate_handle_exception because
+  //       the oop-map is shared for all calls.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  OopMap* oop_map = save_live_registers(sasm);
+  assert_cond(oop_map != NULL);
+
+  __ mv(c_rarg0, xthread);
+  Label retaddr;
+  __ set_last_Java_frame(sp, fp, retaddr, t0);
+  // do the call
+  int32_t off = 0;
+  __ la_patchable(t0, RuntimeAddress(target), off);
+  __ jalr(x1, t0, off);
+  __ bind(retaddr);
+  OopMapSet* oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  oop_maps->add_gc_map(__ offset(), oop_map);
+  // verify callee-saved register
+#ifdef ASSERT
+  { Label L;
+    __ get_thread(t0);
+    __ beq(xthread, t0, L);
+    __ stop("StubAssembler::call_RT: xthread not callee saved?");
+    __ bind(L);
+  }
+#endif
+  __ reset_last_Java_frame(true);
+
+  // check for pending exceptions
+  { Label L;
+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+    __ beqz(t0, L);
+    // exception pending => remove activation and forward to exception handler
+
+    { Label L1;
+      __ bnez(x10, L1);                                 // have we deoptimized?
+      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
+      __ bind(L1);
+    }
+
+    // the deopt blob expects exceptions in the special fields of
+    // JavaThread, so copy and clear pending exception.
+
+    // load and clear pending exception
+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
+
+    // check that there is really a valid exception
+    __ verify_not_null_oop(x10);
+
+    // load throwing pc: this is the return address of the stub
+    __ ld(x13, Address(fp, wordSize));
+
+#ifdef ASSERT
+    // Check that fields in JavaThread for exception oop and issuing pc are empty
+    Label oop_empty;
+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+    __ beqz(t0, oop_empty);
+    __ stop("exception oop must be empty");
+    __ bind(oop_empty);
+
+    Label pc_empty;
+    __ ld(t0, Address(xthread, JavaThread::exception_pc_offset()));
+    __ beqz(t0, pc_empty);
+    __ stop("exception pc must be empty");
+    __ bind(pc_empty);
+#endif
+
+    // store exception oop and throwing pc to JavaThread
+    __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
+    __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
+
+    restore_live_registers(sasm);
+
+    __ leave();
+
+    // Forward the exception directly to deopt blob. We can blow no
+    // registers and must leave throwing pc on the stack.  A patch may
+    // have values live in registers so the entry point with the
+    // exception in tls.
+    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
+
+    __ bind(L);
+  }
+
+  // Runtime will return true if the nmethod has been deoptimized during
+  // the patching process. In that case we must do a deopt reexecute instead.
+  Label cont;
+
+  __ beqz(x10, cont);                                 // have we deoptimized?
+
+  // Will reexecute. Proper return address is already on the stack we just restore
+  // registers, pop all of our frame but the return address and jump to the deopt blob
+
+  restore_live_registers(sasm);
+  __ leave();
+  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+
+  __ bind(cont);
+  restore_live_registers(sasm);
+  __ leave();
+  __ ret();
+
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+  // for better readability
+  const bool dont_gc_arguments = false;
+
+  // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
+  bool save_fpu_registers = true;
+
+  // stub code & info for the different stubs
+  OopMapSet* oop_maps = NULL;
+  switch (id) {
+    {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+        __ leave();
+        __ ret();
+      }
+      break;
+
+    case throw_div0_exception_id:
+      {
+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+
+    case throw_null_pointer_exception_id:
+      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        Register klass = x13; // Incoming
+        Register obj   = x10; // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        // If TLAB is disabled, see if there is support for inlining contiguous
+        // allocations.
+        // Otherwise, just go to the slow path.
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
+          Label slow_path;
+          Register obj_size   = x12;
+          Register tmp1       = x9;
+          Register tmp2       = x14;
+          assert_different_registers(klass, obj, obj_size, tmp1, tmp2);
+
+          const int sp_offset = 2;
+          const int x9_offset = 1;
+          const int zr_offset = 0;
+          __ addi(sp, sp, -(sp_offset * wordSize));
+          __ sd(x9, Address(sp, x9_offset * wordSize));
+          __ sd(zr, Address(sp, zr_offset * wordSize));
+
+          if (id == fast_new_instance_init_check_id) {
+            // make sure the klass is initialized
+            __ lbu(t0, Address(klass, InstanceKlass::init_state_offset()));
+            __ mv(t1, InstanceKlass::fully_initialized);
+            __ bne(t0, t1, slow_path);
+          }
+
+#ifdef ASSERT
+          // assert object can be fast path allocated
+          {
+            Label ok, not_ok;
+            __ lw(obj_size, Address(klass, Klass::layout_helper_offset()));
+            // make sure it's an instance. For instances, layout helper is a positive number.
+            // For arrays, layout helper is a negative number
+            __ blez(obj_size, not_ok);
+            __ andi(t0, obj_size, Klass::_lh_instance_slow_path_bit);
+            __ beqz(t0, ok);
+            __ bind(not_ok);
+            __ stop("assert(can be fast path allocated)");
+            __ should_not_reach_here();
+            __ bind(ok);
+          }
+#endif // ASSERT
+
+          // get the instance size
+          __ lwu(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ eden_allocate(obj, obj_size, 0, tmp1, slow_path);
+
+          __ initialize_object(obj, klass, obj_size, 0, tmp1, tmp2, /* is_tlab_allocated */ false);
+          __ verify_oop(obj);
+          __ ld(x9, Address(sp, x9_offset * wordSize));
+          __ ld(zr, Address(sp, zr_offset * wordSize));
+          __ addi(sp, sp, sp_offset * wordSize);
+          __ ret();
+
+          __ bind(slow_path);
+          __ ld(x9, Address(sp, x9_offset * wordSize));
+          __ ld(zr, Address(sp, zr_offset * wordSize));
+          __ addi(sp, sp, sp_offset * wordSize);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        assert_cond(map != NULL);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r10(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ ret();
+
+        // x10: new instance
+      }
+
+      break;
+
+    case counter_overflow_id:
+      {
+        Register bci = x10;
+        Register method = x11;
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        assert_cond(map != NULL);
+
+        const int bci_off = 0;
+        const int method_off = 1;
+        // Retrieve bci
+        __ lw(bci, Address(fp, bci_off * BytesPerWord));
+        // And a pointer to the Method*
+        __ ld(method, Address(fp, method_off * BytesPerWord));
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        __ ret();
+      }
+      break;
+
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        Register length   = x9;  // Incoming
+        Register klass    = x13; // Incoming
+        Register obj      = x10; // Result
+
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // assert object type is really an array of the proper kind
+        {
+          Label ok;
+          Register tmp = obj;
+          __ lwu(tmp, Address(klass, Klass::layout_helper_offset()));
+          __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift);
+          int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value);
+          __ mv(t0, tag);
+          __ beq(t0, tmp, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        // If TLAB is disabled, see if there is support for inlining contiguous
+        // allocations.
+        // Otherwise, just go to the slow path.
+        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
+          Register arr_size   = x14;
+          Register tmp1       = x12;
+          Register tmp2       = x15;
+          Label slow_path;
+          assert_different_registers(length, klass, obj, arr_size, tmp1, tmp2);
+
+          // check that array length is small enough for fast path.
+          __ mv(t0, C1_MacroAssembler::max_array_allocation_length);
+          __ bgtu(length, t0, slow_path);
+
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          __ lwu(tmp1, Address(klass, Klass::layout_helper_offset()));
+          __ andi(t0, tmp1, 0x1f);
+          __ sll(arr_size, length, t0);
+          int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
+          int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
+          __ slli(tmp1, tmp1, XLEN - lh_header_size_msb);
+          __ srli(tmp1, tmp1, XLEN - lh_header_size_width);
+          __ add(arr_size, arr_size, tmp1);
+          __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);
+
+          __ eden_allocate(obj, arr_size, 0, tmp1, slow_path); // preserves arr_size
+
+          __ initialize_header(obj, klass, length, tmp1, tmp2);
+          __ lbu(tmp1, Address(klass,
+                               in_bytes(Klass::layout_helper_offset()) +
+                               (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andi(tmp1, tmp1, Klass::_lh_header_size_mask);
+          __ sub(arr_size, arr_size, tmp1); // body length
+          __ add(tmp1, tmp1, obj);       // body start
+          __ initialize_body(tmp1, arr_size, 0, tmp2);
+          __ membar(MacroAssembler::StoreStore);
+          __ verify_oop(obj);
+
+          __ ret();
+
+          __ bind(slow_path);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        assert_cond(map != NULL);
+        int call_offset = 0;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r10(sasm);
+
+        __ verify_oop(obj);
+        __ leave();
+        __ ret();
+
+        // x10: new array
+      }
+      break;
+
+    case new_multi_array_id:
+      {
+        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
+        // x10: klass
+        // x9: rank
+        // x12: address of 1st dimension
+        OopMap* map = save_live_registers(sasm);
+        assert_cond(map != NULL);
+        __ mv(c_rarg1, x10);
+        __ mv(c_rarg3, x12);
+        __ mv(c_rarg2, x9);
+        int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, new_multi_array), x11, x12, x13);
+
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_r10(sasm);
+
+        // x10: new multi array
+        __ verify_oop(x10);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+
+        // This is called via call_runtime so the arguments
+        // will be place in C abi locations
+        __ verify_oop(c_rarg0);
+
+        // load the klass and check the has finalizer flag
+        Label register_finalizer;
+        Register t = x15;
+        __ load_klass(t, x10);
+        __ lwu(t, Address(t, Klass::access_flags_offset()));
+        __ andi(t0, t, JVM_ACC_HAS_FINALIZER);
+        __ bnez(t0, register_finalizer);
+        __ ret();
+
+        __ bind(register_finalizer);
+        __ enter();
+        OopMap* oop_map = save_live_registers(sasm);
+        assert_cond(oop_map != NULL);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), x10);
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        // Now restore all the live registers
+        restore_live_registers(sasm);
+
+        __ leave();
+        __ ret();
+      }
+      break;
+
+    case throw_class_cast_exception_id:
+      {
+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+      }
+      break;
+
+    case throw_incompatible_class_change_error_id:
+      {
+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm,
+                                            CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+
+    case slow_subtype_check_id:
+      {
+        // Typical calling sequence:
+        // push klass_RInfo (object klass or other subclass)
+        // push sup_k_RInfo (array element klass or other superclass)
+        // jump to slow_subtype_check
+        // Note that the subclass is pushed first, and is therefore deepest.
+        enum layout {
+          x10_off, x10_off_hi,
+          x12_off, x12_off_hi,
+          x14_off, x14_off_hi,
+          x15_off, x15_off_hi,
+          sup_k_off, sup_k_off_hi,
+          klass_off, klass_off_hi,
+          framesize,
+          result_off = sup_k_off
+        };
+
+        __ set_info("slow_subtype_check", dont_gc_arguments);
+        __ push_reg(RegSet::of(x10, x12, x14, x15), sp);
+
+        __ ld(x14, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // sub klass
+        __ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass
+
+        Label miss;
+        __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss);
+
+        // fallthrough on success:
+        __ mv(t0, 1);
+        __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
+        __ ret();
+
+        __ bind(miss);
+        __ sd(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
+        __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
+        __ ret();
+      }
+      break;
+
+    case monitorenter_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorenter_id:
+      {
+        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+        assert_cond(map != NULL);
+
+        // Called with store_parameter and not C abi
+        f.load_argument(1, x10); // x10: object
+        f.load_argument(0, x11); // x11: lock address
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), x10, x11);
+
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorexit_id:
+      {
+        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+        assert_cond(map != NULL);
+
+        // Called with store_parameter and not C abi
+        f.load_argument(0, x10); // x10: lock address
+
+        // note: really a leaf routine but must setup last java sp
+        //       => use call_RT for now (speed can be improved by
+        //       doing last java sp setup manually)
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), x10);
+
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case deoptimize_id:
+      {
+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+        assert_cond(oop_map != NULL);
+        f.load_argument(0, c_rarg1);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1);
+
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        __ leave();
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      {
+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+      }
+      break;
+
+    case unwind_exception_id:
+      {
+        __ set_info("unwind_exception", dont_gc_arguments);
+        // note: no stubframe since we are about to leave the current
+        //       activation and we are calling a leaf VM function only.
+        generate_unwind_exception(sasm);
+      }
+      break;
+
+    case access_field_patching_id:
+      {
+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      {
+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case load_mirror_patching_id:
+      {
+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+      }
+      break;
+
+    case load_appendix_patching_id:
+      {
+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+      }
+      break;
+
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      {
+        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      {
+        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case throw_index_exception_id:
+      {
+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+
+    case throw_array_store_exception_id:
+      {
+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+        // tos + 0: link
+        //     + 1: return address
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+
+    case predicate_failed_trap_id:
+      {
+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
+
+        OopMap* map = save_live_registers(sasm);
+        assert_cond(map != NULL);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+        oop_maps = new OopMapSet();
+        assert_cond(oop_maps != NULL);
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
+      }
+      break;
+
+    case dtrace_object_alloc_id:
+      { // c_rarg0: object
+        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+        save_live_registers(sasm);
+
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0);
+
+        restore_live_registers(sasm);
+      }
+      break;
+
+    default:
+      {
+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+        __ mv(x10, (int)id);
+        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
+        __ should_not_reach_here();
+      }
+      break;
+    }
+  }
+  return oop_maps;
+}
+
+#undef __
+
+const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
new file mode 100644
index 0000000000..9316d4be02
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C1_GLOBALS_RISCV_HPP
+#define CPU_RISCV_C1_GLOBALS_RISCV_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             1500 );
+
+define_pd_global(intx, OnStackReplacePercentage,     933  );
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
+define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+define_pd_global(uintx, MetaspaceSize,               12*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
+define_pd_global(bool, CICompileOSR,                 true );
+#endif // !TIERED
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               true );
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      true );
+define_pd_global(bool, CSEArrayLength,               false);
+define_pd_global(bool, TwoOperandLIRForm,            false);
+
+#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
new file mode 100644
index 0000000000..3da1f1c6d8
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP
+#define CPU_RISCV_C2_GLOBALS_RISCV_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).  Alpha-sorted.
+
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+define_pd_global(bool, ProfileInterpreter,           true);
+define_pd_global(bool, TieredCompilation,            trueInTiered);
+define_pd_global(intx, CompileThreshold,             10000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         0);
+define_pd_global(intx, FLOATPRESSURE,                32);
+define_pd_global(intx, FreqInlineSize,               325);
+define_pd_global(intx, MinJumpTableSize,             10);
+define_pd_global(intx, INTPRESSURE,                  24);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit,              60);
+define_pd_global(intx, LoopPercentProfileLimit,      10);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 false);
+define_pd_global(bool, OptoScheduling,               true);
+define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
+define_pd_global(bool, IdealizeClearArrayNode,       true);
+
+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
+define_pd_global(uintx, CodeCacheMinBlockLength,     6);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+// Heap related flags
+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
+
+#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
new file mode 100644
index 0000000000..cdbd69807b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// processor dependent initialization for riscv
+
+extern void reg_mask_init();
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+  reg_mask_init();
+}
diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
new file mode 100644
index 0000000000..14a68b4502
--- /dev/null
+++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP
+#define CPU_RISCV_CODEBUFFER_RISCV_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
new file mode 100644
index 0000000000..a4de342a93
--- /dev/null
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// ----------------------------------------------------------------------------
+
+#define __ _masm.
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+  precond(cbuf.stubs()->start() != badAddress);
+  precond(cbuf.stubs()->end() != badAddress);
+  // Stub is fixed up when the corresponding call is converted from
+  // calling compiled code to calling interpreted code.
+  // mv xmethod, 0
+  // jalr -4 # to self
+
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(to_interp_stub_size());
+  int offset = __ offset();
+  if (base == NULL) {
+    return NULL;  // CodeBuffer::expand failed
+  }
+  // static stub relocation stores the instruction address of the call
+  __ relocate(static_stub_Relocation::spec(mark));
+
+  __ emit_static_call_stub();
+
+  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
+  __ end_a_stub();
+  return base;
+}
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+  // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
+  return 12 * NativeInstruction::instruction_size;
+}
+
+int CompiledStaticCall::to_trampoline_stub_size() {
+  // Somewhat pessimistically, we count 4 instructions here (although
+  // there are only 3) because we sometimes emit an alignment nop.
+  // Trampoline stubs are always word aligned.
+  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
+}
+
+void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
+  address stub = find_stub(false /* is_aot */);
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  p2i(instruction_address()),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+#ifndef PRODUCT
+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+
+  // read the value once
+  volatile intptr_t data = method_holder->data();
+  assert(data == 0 || data == (intptr_t)callee(),
+         "a) MT-unsafe modification of inline cache");
+  assert(data == 0 || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+#endif
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
+  ICache::invalidate_range(stub, to_interp_stub_size());
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+  method_holder->set_data(0);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledDirectStaticCall::verify() {
+  // Verify call.
+  _call->verify();
+  _call->verify_alignment();
+
+  // Verify stub.
+  address stub = find_stub(false /* is_aot */);
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
new file mode 100644
index 0000000000..05da242e35
--- /dev/null
+++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_COPY_RISCV_HPP
+#define CPU_RISCV_COPY_RISCV_HPP
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#include OS_CPU_HEADER_INLINE(copy)
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif // CPU_RISCV_COPY_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
new file mode 100644
index 0000000000..e9ff307b64
--- /dev/null
+++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
+
+// Nothing to do on riscv
+
+#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
new file mode 100644
index 0000000000..06bca5298c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
+#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
+
+static int pd_instruction_alignment() {
+  return 1;
+}
+
+static const char* pd_cpu_opts() {
+  return "";
+}
+
+#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
new file mode 100644
index 0000000000..d4fcbdcbbd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
@@ -0,0 +1,694 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/oopMap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
+  address   unextended_sp = (address)_unextended_sp;
+
+  // consider stack guards when trying to determine "safe" stack pointers
+  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
+                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+  // sp must be within the usable part of the stack (not in guards)
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // When we are running interpreted code the machine stack pointer, SP, is
+  // set low enough so that the Java expression stack can grow and shrink
+  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
+
+  // When we call out of an interpreted method, SP is incremented so that
+  // the space between SP and ESP is removed.  The SP saved in the callee's
+  // frame is the SP *before* this increment.  So, when we walk a stack of
+  // interpreter frames the sender's SP saved in a frame might be less than
+  // the SP at the point of call.
+
+  // So unextended sp must be within the stack but we need not to check
+  // that unextended sp >= sp
+
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  // second evaluation on fp+ is added to handle situation where fp is -1
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // to construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
+      }
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!_cb->code_contains(_pc)) {
+      return false;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+      return fp_safe && is_entry_frame_valid(thread);
+    }
+
+    intptr_t* sender_sp = NULL;
+    intptr_t* sender_unextended_sp = NULL;
+    address   sender_pc = NULL;
+    intptr_t* saved_fp =  NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address)this->fp()[return_addr_offset];
+      // for interpreted frames, the value below is the sender "raw" sp,
+      // which can be different from the sender unextended sp (the sp seen
+      // by the sender) because of current frame local variables
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
+      saved_fp = (intptr_t*) this->fp()[link_offset];
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+      if (_cb->frame_size() <= 0) {
+        return false;
+      }
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      // Is sender_sp safe?
+      if ((address)sender_sp >= thread->stack_base()) {
+        return false;
+      }
+
+      sender_unextended_sp = sender_sp;
+      sender_pc = (address) *(sender_sp - 1);
+      saved_fp = (intptr_t*) *(sender_sp - 2);
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // fp is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
+      // is really a frame pointer.
+
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL || sender_blob == NULL) {
+      return false;
+    }
+
+    // Could be a zombie method
+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+      return false;
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender_blob->code_contains(sender_pc)) {
+      return false;
+    }
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+    if (sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
+    if (nm != NULL) {
+      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+          nm->method()->is_method_handle_intrinsic()) {
+        return false;
+      }
+    }
+
+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+    if (sender_blob->frame_size() <= 0) {
+      assert(!sender_blob->is_compiled(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+    if (!sender_blob->is_compiled()) {
+        return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+    return true;
+  }
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+  if (!fp_safe) {
+    return false;
+  }
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+  if ((address)this->fp()[return_addr_offset] == NULL) { return false; }
+
+  return true;
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  address* pc_addr = &(((address*) sp())[-1]);
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+  }
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+  *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original PC to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // leave _pc as is
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+  frame sender = this->sender(map);
+  return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // convert offset to index to deal with tsi
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  // Entry frame's arguments are always in relation to unextended_sp()
+  return &unextended_sp()[index];
+}
+
+// sender_sp
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+  // make sure the pointer points inside the frame
+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
+  return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) {
+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp;
+}
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+  // Since we are walking the stack now this nested anchor is obviously walkable
+  // even if it wasn't when it was stacked.
+  if (!jfa->walkable()) {
+    // Capture _last_Java_pc (if needed) and mark anchor walkable.
+    jfa->capture_last_Java_pc();
+  }
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+  return fr;
+}
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
+  frame fr;
+
+  // This is ugly but it's better than to change {get,set}_original_pc
+  // to take an SP value as argument.  And it's only a debugging
+  // method anyway.
+  fr._unextended_sp = unextended_sp;
+
+  assert_cond(nm != NULL);
+  address original_pc = nm->get_original_pc(&fr);
+  assert(nm->insts_contains_inclusive(original_pc),
+         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // On riscv, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
+
+  if (_cb != NULL) {
+    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
+    if (sender_cm != NULL) {
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (sender_cm->is_deopt_entry(_pc) ||
+          sender_cm->is_deopt_mh_entry(_pc)) {
+        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save fp in a known
+  // location on entry. We must record where that location is
+  // so that if fp was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves fp if we record where it is then
+  // we don't have to always save fp on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  assert(map != NULL, "map must be set");
+  map->set_location(::fp->as_VMReg(), (address) link_addr);
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  map->set_location(::fp->as_VMReg()->next(), (address) link_addr);
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_interpreter_frame
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+  // SP is the raw SP from the sender after adapter or interpreter
+  // extension.
+  intptr_t* sender_sp = this->sender_sp();
+
+  // This is the sp before any possible extension (adapter/locals).
+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+#ifdef COMPILER2
+  assert(map != NULL, "map must be set");
+  if (map->update_map()) {
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+  }
+#endif // COMPILER2
+
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_compiled_frame
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+  // we cannot rely upon the last fp having been saved to the thread
+  // in C2 code but it will have been pushed onto the stack. so we
+  // have to find it relative to the unextended sp
+
+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
+  intptr_t* unextended_sp = l_sender_sp;
+
+  // the return_address is always the word on the stack
+  address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset);
+
+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset);
+
+  assert(map != NULL, "map must be set");
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    // For C1, the runtime stub might not have oop maps, so set this flag
+    // outside of update_register_map.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+
+    // Since the prolog does the save and restore of FP there is no
+    // oopmap for it so we must fill in its location as if there was
+    // an oopmap entry since if our caller was compiled code there
+    // could be live jvm state in it.
+    update_map_with_saved_link(map, saved_fp_addr);
+  }
+
+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+//------------------------------------------------------------------------------
+// frame::sender
+frame frame::sender(RegisterMap* map) const {
+  // Default is we done have to follow them. The sender_for_xxx will
+  // update it accordingly
+  assert(map != NULL, "map must be set");
+  map->set_include_argument_oops(false);
+
+  if (is_entry_frame()) {
+    return sender_for_entry_frame(map);
+  }
+  if (is_interpreted_frame()) {
+    return sender_for_interpreter_frame(map);
+  }
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  // This test looks odd: why is it not is_compiled_frame() ?  That's
+  // because stubs also have OOP maps.
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), link(), sender_pc());
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  // These are reasonable sanity checks
+  if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+    return false;
+  }
+  // These are hacks to keep us out of trouble.
+  // The problem with these is that they mask other problems
+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+    return false;
+  }
+
+  // do some validation of frame elements
+
+  // first the method
+  Method* m = *interpreter_frame_method_addr();
+  // validate the method we'd find in this potential sender
+  if (!Method::is_valid_method(m)) {
+    return false;
+  }
+
+  // stack frames shouldn't be much larger than max_stack elements
+  // this test requires the use of unextended_sp which is the sp as seen by
+  // the current frame, and not sp which is the "raw" pc which could point
+  // further because of local variables of the callee method inserted after
+  // method arguments
+  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
+    return false;
+  }
+
+  // validate bci/bcx
+  address bcp = interpreter_frame_bcp();
+  if (m->validate_bci_from_bcp(bcp) < 0) {
+    return false;
+  }
+
+  // validate constantPoolCache*
+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+  if (MetaspaceObj::is_valid(cp) == false) {
+    return false;
+  }
+
+  // validate locals
+  address locals = (address) *interpreter_frame_locals_addr();
+  if (locals > thread->stack_base()) {
+    return false;
+  }
+
+  if (m->max_locals() > 0 && locals < (address) fp()) {
+    // fp in interpreter frame on RISC-V is higher than that on AArch64,
+    // pointing to sender_sp and sender_sp-2 relatively.
+    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
+    // pointing to sender_sp-1 (with one padding slot).
+    // So we verify the 'locals' pointer only if max_locals > 0.
+    return false;
+  }
+
+  // We'd have to be pretty unlucky to be mislead at this point
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  intptr_t* tos_addr = NULL;
+  if (method->is_native()) {
+    tos_addr = (intptr_t*)sp();
+    if (type == T_FLOAT || type == T_DOUBLE) {
+      // This is because we do a push(ltos) after push(dtos) in generate_native_entry.
+      tos_addr += 2 * Interpreter::stackElementWords;
+    }
+  } else {
+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
+  }
+
+  switch (type) {
+    case T_OBJECT  :
+    case T_ARRAY   : {
+      oop obj;
+      if (method->is_native()) {
+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+      } else {
+        oop* obj_p = (oop*)tos_addr;
+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+      }
+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+      *oop_result = obj;
+      break;
+    }
+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+    case T_FLOAT   : {
+        value_result->f = *(jfloat*)tos_addr;
+      break;
+    }
+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+    case T_VOID    : /* Nothing to do */ break;
+    default        : ShouldNotReachHere();
+  }
+
+  return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+  // Not used on riscv, but we must return something.
+  return NULL;
+}
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if (size > 0) {
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(!is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
+
+#undef DESCRIBE_FP_OFFSET
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* ptr_sp, void* ptr_fp, void* pc) {
+  init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc);
+}
+
+void frame::pd_ps() {}
+#endif
+
+void JavaFrameAnchor::make_walkable(JavaThread* thread) {
+  // last frame set?
+  if (last_Java_sp() == NULL) { return; }
+  // already walkable?
+  if (walkable()) { return; }
+  vmassert(Thread::current() == (Thread*)thread, "not current thread");
+  vmassert(last_Java_sp() != NULL, "not called from Java code?");
+  vmassert(last_Java_pc() == NULL, "already walkable");
+  capture_last_Java_pc();
+  vmassert(walkable(), "something went wrong");
+}
+
+void JavaFrameAnchor::capture_last_Java_pc() {
+  vmassert(_last_Java_sp != NULL, "no last frame set");
+  vmassert(_last_Java_pc == NULL, "already walkable");
+  _last_Java_pc = (address)_last_Java_sp[-1];
+}
diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
new file mode 100644
index 0000000000..18e021dcb9
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_FRAME_RISCV_HPP
+#define CPU_RISCV_FRAME_RISCV_HPP
+
+#include "runtime/synchronizer.hpp"
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+
+//    [monitors[0]           ]   \
+//     ...                        | monitor block size = k
+//    [monitors[k-1]         ]   /
+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+
+//    [klass of method       ]                   = mirror()             mirror_offset
+//    [padding               ]
+
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [Method                ]                   = method()             method_offset
+
+//    [last esp              ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+
+//    [old frame pointer     ]
+//    [return pc             ]
+
+//    [last sp               ]   <- fp           = link()
+//    [oop temp              ]                     (only for native calls)
+
+//    [padding               ]                     (to preserve machine SP alignment)
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C Frame ------------------------------------------------
+// Stack: gcc with -fno-omit-frame-pointer
+//                    .
+//                    .
+//       +->          .
+//       |   +-----------------+   |
+//       |   | return address  |   |
+//       |   |   previous fp ------+
+//       |   | saved registers |
+//       |   | local variables |
+//       |   |       ...       | <-+
+//       |   +-----------------+   |
+//       |   | return address  |   |
+//       +------ previous fp   |   |
+//           | saved registers |   |
+//           | local variables |   |
+//       +-> |       ...       |   |
+//       |   +-----------------+   |
+//       |   | return address  |   |
+//       |   |   previous fp ------+
+//       |   | saved registers |
+//       |   | local variables |
+//       |   |       ...       | <-+
+//       |   +-----------------+   |
+//       |   | return address  |   |
+//       +------ previous fp   |   |
+//           | saved registers |   |
+//           | local variables |   |
+//   $fp --> |       ...       |   |
+//           +-----------------+   |
+//           | return address  |   |
+//           |   previous fp ------+
+//           | saved registers |
+//   $sp --> | local variables |
+//           +-----------------+
+// ------------------------------ C Frame ------------------------------------------------
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      = -2,
+    return_addr_offset                               = -1,
+    sender_sp_offset                                 =  0,
+    // Interpreter frames
+    interpreter_frame_oop_temp_offset                =  1, // for native calls only
+
+    interpreter_frame_sender_sp_offset               = -3,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_padding_offset                 = interpreter_frame_mdp_offset - 1,
+    interpreter_frame_mirror_offset                  = interpreter_frame_padding_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mirror_offset - 1,
+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
+    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+    // Entry frames
+    // n.b. these values are determined by the layout defined in
+    // stubGenerator for the Java call stub
+    entry_frame_after_call_words                     =  34,
+    entry_frame_call_wrapper_offset                  = -10,
+
+    // we don't need a save area
+    arg_reg_save_area_bytes                          =  0
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+
+#ifdef ASSERT
+  // Used in frame::sender_for_{interpreter,compiled}_frame
+  static void verify_deopt_original_pc(   CompiledMethod* nm, intptr_t* unextended_sp);
+#endif
+
+ public:
+  // Constructors
+
+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
+
+  frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc);
+
+  frame(intptr_t* ptr_sp, intptr_t* ptr_fp);
+
+  void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc);
+
+  // accessors for the instance variables
+  // Note: not necessarily the real 'frame pointer' (see real_fp)
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+  // helper to update a map with callee-saved RBP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* last_sp);
+
+  static jint interpreter_frame_expression_stack_direction() { return -1; }
+
+#endif // CPU_RISCV_FRAME_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
new file mode 100644
index 0000000000..abd5bda7e4
--- /dev/null
+++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP
+#define CPU_RISCV_FRAME_RISCV_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for RISCV frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+static int spin;
+
+inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
+  intptr_t a = intptr_t(ptr_sp);
+  intptr_t b = intptr_t(ptr_fp);
+  _sp = ptr_sp;
+  _unextended_sp = ptr_sp;
+  _fp = ptr_fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) {
+  init(ptr_sp, ptr_fp, pc);
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) {
+  intptr_t a = intptr_t(ptr_sp);
+  intptr_t b = intptr_t(ptr_fp);
+  _sp = ptr_sp;
+  _unextended_sp = unextended_sp;
+  _fp = ptr_fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc),
+           "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) {
+  intptr_t a = intptr_t(ptr_sp);
+  intptr_t b = intptr_t(ptr_fp);
+  _sp = ptr_sp;
+  _unextended_sp = ptr_sp;
+  _fp = ptr_fp;
+  _pc = (address)(ptr_sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+
+  _cb = CodeCache::find_blob(_pc);
+  adjust_unextended_sp();
+
+  address original_pc = CompiledMethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp() &&
+              unextended_sp() == other.unextended_sp() &&
+              fp() == other.fp() &&
+              pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+
+inline intptr_t* frame::link_or_null() const {
+  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
+  return os::is_readable_pointer(ptr) ? *ptr : NULL;
+}
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address
+inline address* frame::sender_pc_addr() const     { return (address*) addr_at(return_addr_offset); }
+inline address  frame::sender_pc() const          { return *sender_pc_addr(); }
+inline intptr_t* frame::sender_sp() const         { return addr_at(sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+// Mirror
+
+inline oop* frame::interpreter_frame_mirror_addr() const {
+  return (oop*)addr_at(interpreter_frame_mirror_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL) {
+    return sp();
+  } else {
+    // sp() may have been extended or shrunk by an adapter.  At least
+    // check that we don't fall behind the legal region.
+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+    return last_sp;
+  }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+  return (*result_adr);
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  oop* result_adr = (oop *)map->location(x10->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+  *result_adr = obj;
+}
+
+#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000..c5ccf040c7
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1BarrierSetAssembler.hpp"
+#include "gc/g1/g1BarrierSetRuntime.hpp"
+#include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
+#include "gc/g1/heapRegion.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/g1/c1/g1BarrierSetC1.hpp"
+#endif
+
+#define __ masm->
+
+void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                            Register addr, Register count, RegSet saved_regs) {
+  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
+  if (!dest_uninitialized) {
+    Label done;
+    Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+
+    // Is marking active?
+    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+      __ lwu(t0, in_progress);
+    } else {
+      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+      __ lbu(t0, in_progress);
+    }
+    __ beqz(t0, done);
+
+    __ push_reg(saved_regs, sp);
+    if (count == c_rarg0) {
+      if (addr == c_rarg1) {
+        // exactly backwards!!
+        __ mv(t0, c_rarg0);
+        __ mv(c_rarg0, c_rarg1);
+        __ mv(c_rarg1, t0);
+      } else {
+        __ mv(c_rarg1, count);
+        __ mv(c_rarg0, addr);
+      }
+    } else {
+      __ mv(c_rarg0, addr);
+      __ mv(c_rarg1, count);
+    }
+    if (UseCompressedOops) {
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
+    } else {
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
+    }
+    __ pop_reg(saved_regs, sp);
+
+    __ bind(done);
+  }
+}
+
+void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
+  __ push_reg(saved_regs, sp);
+  assert_different_registers(start, count, tmp);
+  assert_different_registers(c_rarg0, count);
+  __ mv(c_rarg0, start);
+  __ mv(c_rarg1, count);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
+  __ pop_reg(saved_regs, sp);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+                                                 Register obj,
+                                                 Register pre_val,
+                                                 Register thread,
+                                                 Register tmp,
+                                                 bool tosca_live,
+                                                 bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+  assert(thread == xthread, "must be");
+
+  Label done;
+  Label runtime;
+
+  assert_different_registers(obj, pre_val, tmp, t0);
+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
+
+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width
+    __ lwu(tmp, in_progress);
+  } else {
+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ lbu(tmp, in_progress);
+  }
+  __ beqz(tmp, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+  }
+
+  // Is the previous value null?
+  __ beqz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  __ ld(tmp, index);                       // tmp := *index_adr
+  __ beqz(tmp, runtime);                   // tmp == 0?
+                                           // If yes, goto runtime
+
+  __ sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
+  __ sd(tmp, index);                       // *index_adr := tmp
+  __ ld(t0, buffer);
+  __ add(tmp, tmp, t0);                    // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  __ sd(pre_val, Address(tmp, 0));
+  __ j(done);
+
+  __ bind(runtime);
+
+  __ push_call_clobbered_registers();
+  if (expand_call) {
+    assert(pre_val != c_rarg1, "smashed arg");
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+  }
+  __ pop_call_clobbered_registers();
+
+  __ bind(done);
+
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+                                                  Register store_addr,
+                                                  Register new_val,
+                                                  Register thread,
+                                                  Register tmp,
+                                                  Register tmp2) {
+  assert(thread == xthread, "must be");
+  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
+                             t0);
+  assert(store_addr != noreg && new_val != noreg && tmp != noreg &&
+         tmp2 != noreg, "expecting a register");
+
+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  __ xorr(tmp, store_addr, new_val);
+  __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
+  __ beqz(tmp, done);
+
+  // crosses regions, storing NULL?
+
+  __ beqz(new_val, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base());
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+  const Register card_addr = tmp;
+
+  __ srli(card_addr, store_addr, CardTable::card_shift);
+
+  // get the address of the card
+  __ load_byte_map_base(tmp2);
+  __ add(card_addr, card_addr, tmp2);
+  __ lbu(tmp2, Address(card_addr));
+  __ mv(t0, (int)G1CardTable::g1_young_card_val());
+  __ beq(tmp2, t0, done);
+
+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+
+  __ membar(MacroAssembler::StoreLoad);
+
+  __ lbu(tmp2, Address(card_addr));
+  __ beqz(tmp2, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  __ sb(zr, Address(card_addr));
+
+  __ ld(t0, queue_index);
+  __ beqz(t0, runtime);
+  __ sub(t0, t0, wordSize);
+  __ sd(t0, queue_index);
+
+  __ ld(tmp2, buffer);
+  __ add(t0, tmp2, t0);
+  __ sd(card_addr, Address(t0, 0));
+  __ j(done);
+
+  __ bind(runtime);
+  // save the live input values
+  RegSet saved = RegSet::of(store_addr, new_val);
+  __ push_reg(saved, sp);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
+  __ pop_reg(saved, sp);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_oop = is_reference_type(type);
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+  if (on_oop && on_reference) {
+    // RA is live.  It must be saved around calls.
+    __ enter(); // barrier may call runtime
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    g1_write_barrier_pre(masm /* masm */,
+                         noreg /* obj */,
+                         dst /* pre_val */,
+                         xthread /* thread */,
+                         tmp1 /* tmp */,
+                         true /* tosca_live */,
+                         true /* expand_call */);
+    __ leave();
+  }
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  // flatten object address if needed
+  if (dst.offset() == 0) {
+    if (dst.base() != x13) {
+      __ mv(x13, dst.base());
+    }
+  } else {
+    __ la(x13, dst);
+  }
+
+  g1_write_barrier_pre(masm,
+                       x13 /* obj */,
+                       tmp2 /* pre_val */,
+                       xthread /* thread */,
+                       tmp1  /* tmp */,
+                       val != noreg /* tosca_live */,
+                       false /* expand_call */);
+
+  if (val == noreg) {
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
+  } else {
+    // G1 barrier needs uncompressed oop for region cross check.
+    Register new_val = val;
+    if (UseCompressedOops) {
+      new_val = t1;
+      __ mv(new_val, val);
+    }
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
+    g1_write_barrier_post(masm,
+                          x13 /* store_adr */,
+                          new_val /* new_val */,
+                          xthread /* thread */,
+                          tmp1 /* tmp */,
+                          tmp2 /* tmp2 */);
+  }
+}
+
+#ifdef COMPILER1
+
+#undef __
+#define __ ce->masm()->
+
+void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+  __ bind(*stub->entry());
+
+  assert(stub->pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = stub->pre_val()->as_register();
+
+  if (stub->do_load()) {
+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
+  }
+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+  ce->store_parameter(stub->pre_val()->as_register(), 0);
+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
+  __ j(*stub->continuation());
+}
+
+void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
+  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+  __ bind(*stub->entry());
+  assert(stub->addr()->is_register(), "Precondition");
+  assert(stub->new_val()->is_register(), "Precondition");
+  Register new_val_reg = stub->new_val()->as_register();
+  __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true);
+  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
+  __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
+  __ j(*stub->continuation());
+}
+
+#undef __
+
+#define __ sasm->
+
+void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
+  __ prologue("g1_pre_barrier", false);
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+
+  // arg0 : previous value of memory
+  const Register pre_val = x10;
+  const Register thread = xthread;
+  const Register tmp = t0;
+
+  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
+  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  Label done;
+  Label runtime;
+
+  // Is marking still active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {  // 4-byte width
+    __ lwu(tmp, in_progress);
+  } else {
+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ lbu(tmp, in_progress);
+  }
+  __ beqz(tmp, done);
+
+  // Can we store original value in the thread's buffer?
+  __ ld(tmp, queue_index);
+  __ beqz(tmp, runtime);
+
+  __ sub(tmp, tmp, wordSize);
+  __ sd(tmp, queue_index);
+  __ ld(t1, buffer);
+  __ add(tmp, tmp, t1);
+  __ load_parameter(0, t1);
+  __ sd(t1, Address(tmp, 0));
+  __ j(done);
+
+  __ bind(runtime);
+  __ push_call_clobbered_registers();
+  __ load_parameter(0, pre_val);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
+  __ pop_call_clobbered_registers();
+  __ bind(done);
+
+  __ epilogue();
+}
+
+void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
+  __ prologue("g1_post_barrier", false);
+
+  // arg0 : store_address
+  Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // At this point we know new_value is non-NULL and the new_value crosses regions.
+  // Must check to see if card is already dirty
+  const Register thread = xthread;
+
+  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
+  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
+
+  const Register card_offset = t1;
+  // RA is free here, so we can use it to hold the byte_map_base.
+  const Register byte_map_base = ra;
+
+  assert_different_registers(card_offset, byte_map_base, t0);
+
+  __ load_parameter(0, card_offset);
+  __ srli(card_offset, card_offset, CardTable::card_shift);
+  __ load_byte_map_base(byte_map_base);
+
+  // Convert card offset into an address in card_addr
+  Register card_addr = card_offset;
+  __ add(card_addr, byte_map_base, card_addr);
+
+  __ lbu(t0, Address(card_addr, 0));
+  __ sub(t0, t0, (int)G1CardTable::g1_young_card_val());
+  __ beqz(t0, done);
+
+  assert((int)CardTable::dirty_card_val() == 0, "must be 0");
+
+  __ membar(MacroAssembler::StoreLoad);
+  __ lbu(t0, Address(card_addr, 0));
+  __ beqz(t0, done);
+
+  // storing region crossing non-NULL, card is clean.
+  // dirty card and log.
+  __ sb(zr, Address(card_addr, 0));
+
+  __ ld(t0, queue_index);
+  __ beqz(t0, runtime);
+  __ sub(t0, t0, wordSize);
+  __ sd(t0, queue_index);
+
+  // Reuse RA to hold buffer_addr
+  const Register buffer_addr = ra;
+
+  __ ld(buffer_addr, buffer);
+  __ add(t0, buffer_addr, t0);
+  __ sd(card_addr, Address(t0, 0));
+  __ j(done);
+
+  __ bind(runtime);
+  __ push_call_clobbered_registers();
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
+  __ pop_call_clobbered_registers();
+  __ bind(done);
+  __ epilogue();
+}
+
+#undef __
+
+#endif // COMPILER1
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000..37bc183f39
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+#include "utilities/macros.hpp"
+
+#ifdef COMPILER1
+class LIR_Assembler;
+#endif
+class StubAssembler;
+class G1PreBarrierStub;
+class G1PostBarrierStub;
+
+class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
+protected:
+  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                       Register addr, Register count, RegSet saved_regs);
+  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                        Register start, Register count, Register tmp, RegSet saved_regs);
+
+  void g1_write_barrier_pre(MacroAssembler* masm,
+                            Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(MacroAssembler* masm,
+                             Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+
+public:
+#ifdef COMPILER1
+  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
+  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
+
+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
+#endif
+
+  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+               Register dst, Address src, Register tmp1, Register tmp_thread);
+};
+
+#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
new file mode 100644
index 0000000000..8735fd014f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
+
+const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
+
+#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000..2b439280fa
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/classLoaderData.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "memory/universe.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.hpp"
+
+#define __ masm->
+
+void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
+  // RA is live. It must be saved around calls.
+
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
+  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
+  switch (type) {
+    case T_OBJECT:  // fall through
+    case T_ARRAY: {
+      if (in_heap) {
+        if (UseCompressedOops) {
+          __ lwu(dst, src);
+          if (is_not_null) {
+            __ decode_heap_oop_not_null(dst);
+          } else {
+            __ decode_heap_oop(dst);
+          }
+        } else {
+          __ ld(dst, src);
+        }
+      } else {
+        assert(in_native, "why else?");
+        __ ld(dst, src);
+      }
+      break;
+    }
+    case T_BOOLEAN: __ load_unsigned_byte (dst, src); break;
+    case T_BYTE:    __ load_signed_byte   (dst, src); break;
+    case T_CHAR:    __ load_unsigned_short(dst, src); break;
+    case T_SHORT:   __ load_signed_short  (dst, src); break;
+    case T_INT:     __ lw                 (dst, src); break;
+    case T_LONG:    __ ld                 (dst, src); break;
+    case T_ADDRESS: __ ld                 (dst, src); break;
+    case T_FLOAT:   __ flw                (f10, src); break;
+    case T_DOUBLE:  __ fld                (f10, src); break;
+    default: Unimplemented();
+  }
+}
+
+void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                   Address dst, Register val, Register tmp1, Register tmp2) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_native = (decorators & IN_NATIVE) != 0;
+  switch (type) {
+    case T_OBJECT: // fall through
+    case T_ARRAY: {
+      val = val == noreg ? zr : val;
+      if (in_heap) {
+        if (UseCompressedOops) {
+          assert(!dst.uses(val), "not enough registers");
+          if (val != zr) {
+            __ encode_heap_oop(val);
+          }
+          __ sw(val, dst);
+        } else {
+          __ sd(val, dst);
+        }
+      } else {
+        assert(in_native, "why else?");
+        __ sd(val, dst);
+      }
+      break;
+    }
+    case T_BOOLEAN:
+      __ andi(val, val, 0x1);  // boolean is true if LSB is 1
+      __ sb(val, dst);
+      break;
+    case T_BYTE:    __ sb(val, dst); break;
+    case T_CHAR:    __ sh(val, dst); break;
+    case T_SHORT:   __ sh(val, dst); break;
+    case T_INT:     __ sw(val, dst); break;
+    case T_LONG:    __ sd(val, dst); break;
+    case T_ADDRESS: __ sd(val, dst); break;
+    case T_FLOAT:   __ fsw(f10,  dst); break;
+    case T_DOUBLE:  __ fsd(f10,  dst); break;
+    default: Unimplemented();
+  }
+
+}
+
+void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                                        Register obj, Register tmp, Label& slowpath) {
+  // If mask changes we need to ensure that the inverse is still encodable as an immediate
+  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
+  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
+  __ ld(obj, Address(obj, 0));             // *obj
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
+                                        Register var_size_in_bytes,
+                                        int con_size_in_bytes,
+                                        Register tmp1,
+                                        Register tmp2,
+                                        Label& slow_case,
+                                        bool is_far) {
+  assert_different_registers(obj, tmp2);
+  assert_different_registers(obj, var_size_in_bytes);
+  Register end = tmp2;
+
+  __ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
+  if (var_size_in_bytes == noreg) {
+    __ la(end, Address(obj, con_size_in_bytes));
+  } else {
+    __ add(end, obj, var_size_in_bytes);
+  }
+  __ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
+  __ bgtu(end, t0, slow_case, is_far);
+
+  // update the tlab top pointer
+  __ sd(end, Address(xthread, JavaThread::tlab_top_offset()));
+
+  // recover var_size_in_bytes if necessary
+  if (var_size_in_bytes == end) {
+    __ sub(var_size_in_bytes, var_size_in_bytes, obj);
+  }
+}
+
+// Defines obj, preserves var_size_in_bytes
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
+                                        Register var_size_in_bytes,
+                                        int con_size_in_bytes,
+                                        Register tmp1,
+                                        Label& slow_case,
+                                        bool is_far) {
+  assert_cond(masm != NULL);
+  assert_different_registers(obj, var_size_in_bytes, tmp1);
+  if (!Universe::heap()->supports_inline_contig_alloc()) {
+    __ j(slow_case);
+  } else {
+    Register end = tmp1;
+    Label retry;
+    __ bind(retry);
+
+    // Get the current end of the heap
+    ExternalAddress address_end((address) Universe::heap()->end_addr());
+    {
+      int32_t offset;
+      __ la_patchable(t1, address_end, offset);
+      __ ld(t1, Address(t1, offset));
+    }
+
+    // Get the current top of the heap
+    ExternalAddress address_top((address) Universe::heap()->top_addr());
+    {
+      int32_t offset;
+      __ la_patchable(t0, address_top, offset);
+      __ addi(t0, t0, offset);
+      __ lr_d(obj, t0, Assembler::aqrl);
+    }
+
+    // Adjust it my the size of our new object
+    if (var_size_in_bytes == noreg) {
+      __ la(end, Address(obj, con_size_in_bytes));
+    } else {
+      __ add(end, obj, var_size_in_bytes);
+    }
+
+    // if end < obj then we wrapped around high memory
+    __ bltu(end, obj, slow_case, is_far);
+
+    __ bgtu(end, t1, slow_case, is_far);
+
+    // If heap_top hasn't been changed by some other thread, update it.
+    __ sc_d(t1, end, t0, Assembler::rl);
+    __ bnez(t1, retry);
+
+    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
+  }
+}
+
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
+                                               Register var_size_in_bytes,
+                                               int con_size_in_bytes,
+                                               Register tmp1) {
+  assert(tmp1->is_valid(), "need temp reg");
+
+  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
+  if (var_size_in_bytes->is_valid()) {
+    __ add(tmp1, tmp1, var_size_in_bytes);
+  } else {
+    __ add(tmp1, tmp1, con_size_in_bytes);
+  }
+  __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000..984d94f4c3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "memory/allocation.hpp"
+#include "oops/access.hpp"
+
+class BarrierSetAssembler: public CHeapObj<mtGC> {
+private:
+  void incr_allocated_bytes(MacroAssembler* masm,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
+
+public:
+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register src, Register dst, Register count, RegSet saved_regs) {}
+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register start, Register end, Register tmp, RegSet saved_regs) {}
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+
+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                             Register obj, Register tmp, Label& slowpath);
+
+  virtual void tlab_allocate(MacroAssembler* masm,
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register tmp1,                     // temp register
+    Register tmp2,                     // temp register
+    Label&   slow_case,                // continuation point if fast allocation fails
+    bool is_far = false
+  );
+
+  void eden_allocate(MacroAssembler* masm,
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register tmp1,                     // temp register
+    Label&   slow_case,                // continuation point if fast allocation fails
+    bool is_far = false
+  );
+  virtual void barrier_stubs_init() {}
+
+  virtual ~BarrierSetAssembler() {}
+};
+
+#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000..671cad68b2
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/cardTableBarrierSetAssembler.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "interpreter/interp_masm.hpp"
+
+#define __ masm->
+
+
+void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
+  assert_different_registers(obj, tmp);
+  BarrierSet* bs = BarrierSet::barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
+
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  __ srli(obj, obj, CardTable::card_shift);
+
+  assert(CardTable::dirty_card_val() == 0, "must be");
+
+  __ load_byte_map_base(tmp);
+  __ add(tmp, obj, tmp);
+
+  if (UseCondCardMark) {
+    Label L_already_dirty;
+    __ membar(MacroAssembler::StoreLoad);
+    __ lbu(t1,  Address(tmp));
+    __ beqz(t1, L_already_dirty);
+    __ sb(zr, Address(tmp));
+    __ bind(L_already_dirty);
+  } else {
+    if (ct->scanned_concurrently()) {
+      __ membar(MacroAssembler::StoreStore);
+    }
+    __ sb(zr, Address(tmp));
+  }
+}
+
+void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
+  assert_different_registers(start, tmp);
+  assert_different_registers(count, tmp);
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
+  CardTable* ct = ctbs->card_table();
+
+  Label L_loop, L_done;
+  const Register end = count;
+
+  __ beqz(count, L_done); // zero count - nothing to do
+  // end = start + count << LogBytesPerHeapOop
+  __ shadd(end, count, start, count, LogBytesPerHeapOop);
+  __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive
+
+  __ srli(start, start, CardTable::card_shift);
+  __ srli(end, end, CardTable::card_shift);
+  __ sub(count, end, start); // number of bytes to copy
+
+  __ load_byte_map_base(tmp);
+  __ add(start, start, tmp);
+  if (ct->scanned_concurrently()) {
+    __ membar(MacroAssembler::StoreStore);
+  }
+
+  __ bind(L_loop);
+  __ add(tmp, start, count);
+  __ sb(zr, Address(tmp));
+  __ sub(count, count, 1);
+  __ bgez(count, L_loop);
+  __ bind(L_done);
+}
+
+void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                                Address dst, Register val, Register tmp1, Register tmp2) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool is_array = (decorators & IS_ARRAY) != 0;
+  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool precise = is_array || on_anonymous;
+
+  bool needs_post_barrier = val != noreg && in_heap;
+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
+  if (needs_post_barrier) {
+    // flatten object address if needed
+    if (!precise || dst.offset() == 0) {
+      store_check(masm, dst.base(), x13);
+    } else {
+      __ la(x13, dst);
+      store_check(masm, x13, t0);
+    }
+  }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000..686fe8fa47
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+
+class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
+protected:
+  void store_check(MacroAssembler* masm, Register obj, Register tmp);
+
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                Register start, Register count, Register tmp, RegSet saved_regs);
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2);
+};
+
+#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000..4b7982eb21
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/modRefBarrierSetAssembler.hpp"
+
+#define __ masm->
+
+void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                                   Register src, Register dst, Register count, RegSet saved_regs) {
+  if (is_oop) {
+    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
+  }
+}
+
+void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                                   Register start, Register count, Register tmp,
+                                                   RegSet saved_regs) {
+  if (is_oop) {
+    gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs);
+  }
+}
+
+void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  if (is_reference_type(type)) {
+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  } else {
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+  }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000..00419c3163
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+
+// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
+// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
+// accesses, which are overridden in the concrete BarrierSetAssembler.
+
+class ModRefBarrierSetAssembler: public BarrierSetAssembler {
+protected:
+  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                               Register addr, Register count, RegSet saved_regs) {}
+  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
+                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
+
+  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
+
+public:
+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register src, Register dst, Register count, RegSet saved_regs);
+  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register start, Register count, Register tmp, RegSet saved_regs);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+};
+
+#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
new file mode 100644
index 0000000000..d19f5b859c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
+
+#define __ masm->masm()->
+
+void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
+  Register addr = _addr->as_register_lo();
+  Register newval = _new_value->as_register();
+  Register cmpval = _cmp_value->as_register();
+  Register tmp1 = _tmp1->as_register();
+  Register tmp2 = _tmp2->as_register();
+  Register result = result_opr()->as_register();
+
+  ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1);
+
+  if (UseCompressedOops) {
+    __ encode_heap_oop(tmp1, cmpval);
+    cmpval = tmp1;
+    __ encode_heap_oop(tmp2, newval);
+    newval = tmp2;
+  }
+
+  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
+                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
+}
+
+#undef __
+
+#ifdef ASSERT
+#define __ gen->lir(__FILE__, __LINE__)->
+#else
+#define __ gen->lir()->
+#endif
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) {
+  BasicType bt = access.type();
+  if (access.is_oop()) {
+    LIRGenerator *gen = access.gen();
+    if (ShenandoahSATBBarrier) {
+      pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(),
+                  LIR_OprFact::illegalOpr /* pre_val */);
+    }
+    if (ShenandoahCASBarrier) {
+      cmp_value.load_item();
+      new_value.load_item();
+
+      LIR_Opr tmp1 = gen->new_register(T_OBJECT);
+      LIR_Opr tmp2 = gen->new_register(T_OBJECT);
+      LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base();
+      LIR_Opr result = gen->new_register(T_INT);
+
+      __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result));
+      return result;
+    }
+  }
+  return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value);
+}
+
+LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) {
+  LIRGenerator* gen = access.gen();
+  BasicType type = access.type();
+
+  LIR_Opr result = gen->new_register(type);
+  value.load_item();
+  LIR_Opr value_opr = value.result();
+
+  if (access.is_oop()) {
+    value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators());
+  }
+
+  assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type");
+  LIR_Opr tmp = gen->new_register(T_INT);
+  __ xchg(access.resolved_addr(), value_opr, result, tmp);
+
+  if (access.is_oop()) {
+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
+    LIR_Opr tmp_opr = gen->new_register(type);
+    __ move(result, tmp_opr);
+    result = tmp_opr;
+    if (ShenandoahSATBBarrier) {
+      pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr,
+                  result /* pre_val */);
+    }
+  }
+
+  return result;
+}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
new file mode 100644
index 0000000000..d73ea36b24
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahForwarding.hpp"
+#include "gc/shenandoah/shenandoahHeap.hpp"
+#include "gc/shenandoah/shenandoahHeapRegion.hpp"
+#include "gc/shenandoah/shenandoahRuntime.hpp"
+#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
+#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.hpp"
+#ifdef COMPILER1
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
+#endif
+
+#define __ masm->
+
+address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
+
+void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                                       Register src, Register dst, Register count, RegSet saved_regs) {
+  if (is_oop) {
+    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
+
+      Label done;
+
+      // Avoid calling runtime if count == 0
+      __ beqz(count, done);
+
+      // Is GC active?
+      Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+      assert_different_registers(src, dst, count, t0);
+
+      __ lbu(t0, gc_state);
+      if (ShenandoahSATBBarrier && dest_uninitialized) {
+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED);
+        __ beqz(t0, done);
+      } else {
+        __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
+        __ beqz(t0, done);
+      }
+
+      __ push_reg(saved_regs, sp);
+      if (UseCompressedOops) {
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),
+                        src, dst, count);
+      } else {
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count);
+      }
+      __ pop_reg(saved_regs, sp);
+      __ bind(done);
+    }
+  }
+}
+
+void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
+                                                                 Register obj,
+                                                                 Register pre_val,
+                                                                 Register thread,
+                                                                 Register tmp,
+                                                                 bool tosca_live,
+                                                                 bool expand_call) {
+  if (ShenandoahSATBBarrier) {
+    satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);
+  }
+}
+
+void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
+                                                           Register obj,
+                                                           Register pre_val,
+                                                           Register thread,
+                                                           Register tmp,
+                                                           bool tosca_live,
+                                                           bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+  assert(thread == xthread, "must be");
+
+  Label done;
+  Label runtime;
+
+  assert_different_registers(obj, pre_val, tmp, t0);
+  assert(pre_val != noreg &&  tmp != noreg, "expecting a register");
+
+  Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));
+  Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  // Is marking active?
+  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
+    __ lwu(tmp, in_progress);
+  } else {
+    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ lbu(tmp, in_progress);
+  }
+  __ beqz(tmp, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+  }
+
+  // Is the previous value null?
+  __ beqz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+  __ ld(tmp, index);                        // tmp := *index_adr
+  __ beqz(tmp, runtime);                    // tmp == 0? If yes, goto runtime
+
+  __ sub(tmp, tmp, wordSize);               // tmp := tmp - wordSize
+  __ sd(tmp, index);                        // *index_adr := tmp
+  __ ld(t0, buffer);
+  __ add(tmp, tmp, t0);                     // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  __ sd(pre_val, Address(tmp, 0));
+  __ j(done);
+
+  __ bind(runtime);
+  // save the live input values
+  RegSet saved = RegSet::of(pre_val);
+  if (tosca_live) saved += RegSet::of(x10);
+  if (obj != noreg) saved += RegSet::of(obj);
+
+  __ push_reg(saved, sp);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+  if (expand_call) {
+    assert(pre_val != c_rarg1, "smashed arg");
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+  }
+
+  __ pop_reg(saved, sp);
+
+  __ bind(done);
+}
+
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
+
+  Label is_null;
+  __ beqz(dst, is_null);
+  resolve_forward_pointer_not_null(masm, dst, tmp);
+  __ bind(is_null);
+}
+
+// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely
+// passed in.
+void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
+  assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled");
+  // The below loads the mark word, checks if the lowest two bits are
+  // set, and if so, clear the lowest two bits and copy the result
+  // to dst. Otherwise it leaves dst alone.
+  // Implementing this is surprisingly awkward. I do it here by:
+  // - Inverting the mark word
+  // - Test lowest two bits == 0
+  // - If so, set the lowest two bits
+  // - Invert the result back, and copy to dst
+  RegSet saved_regs = RegSet::of(t2);
+  bool borrow_reg = (tmp == noreg);
+  if (borrow_reg) {
+    // No free registers available. Make one useful.
+    tmp = t0;
+    if (tmp == dst) {
+      tmp = t1;
+    }
+    saved_regs += RegSet::of(tmp);
+  }
+
+  assert_different_registers(tmp, dst, t2);
+  __ push_reg(saved_regs, sp);
+
+  Label done;
+  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
+  __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1
+  __ andi(t2, tmp, markOopDesc::lock_mask_in_place);
+  __ bnez(t2, done);
+  __ ori(tmp, tmp, markOopDesc::marked_value);
+  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
+  __ bind(done);
+
+  __ pop_reg(saved_regs, sp);
+}
+
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
+                                                                    Register dst,
+                                                                    Address load_addr) {
+  assert(ShenandoahLoadRefBarrier, "Should be enabled");
+  assert(dst != t1 && load_addr.base() != t1, "need t1");
+  assert_different_registers(load_addr.base(), t0, t1);
+
+  Label done;
+  __ enter();
+  Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+  __ lbu(t1, gc_state);
+
+  // Check for heap stability
+  __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED);
+  __ beqz(t1, done);
+
+  // use x11 for load address
+  Register result_dst = dst;
+  if (dst == x11) {
+    __ mv(t1, dst);
+    dst = t1;
+  }
+
+  // Save x10 and x11, unless it is an output register
+  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
+  __ push_reg(saved_regs, sp);
+  __ la(x11, load_addr);
+  __ mv(x10, dst);
+
+  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+
+  __ mv(result_dst, x10);
+  __ pop_reg(saved_regs, sp);
+
+  __ bind(done);
+  __ leave();
+}
+
+void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
+  if (ShenandoahIUBarrier) {
+    __ push_call_clobbered_registers();
+
+    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
+
+    __ pop_call_clobbered_registers();
+  }
+}
+
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
+  if (ShenandoahLoadRefBarrier) {
+    Label is_null;
+    __ beqz(dst, is_null);
+    load_reference_barrier_not_null(masm, dst, load_addr);
+    __ bind(is_null);
+  }
+}
+
+//
+// Arguments:
+//
+// Inputs:
+//   src:        oop location to load from, might be clobbered
+//
+// Output:
+//   dst:        oop loaded from src location
+//
+// Kill:
+//   x30 (tmp reg)
+//
+// Alias:
+//   dst: x30 (might use x30 as temporary output register to avoid clobbering src)
+//
+void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm,
+                                            DecoratorSet decorators,
+                                            BasicType type,
+                                            Register dst,
+                                            Address src,
+                                            Register tmp1,
+                                            Register tmp_thread) {
+  // 1: non-reference load, no additional barrier is needed
+  if (!is_reference_type(type)) {
+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+    return;
+  }
+
+  // 2: load a reference from src location and apply LRB if needed
+  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
+    Register result_dst = dst;
+
+    // Preserve src location for LRB
+    RegSet saved_regs;
+    if (dst == src.base()) {
+      dst = (src.base() == x28) ? x29 : x28;
+      saved_regs = RegSet::of(dst);
+      __ push_reg(saved_regs, sp);
+    }
+    assert_different_registers(dst, src.base());
+
+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+
+    load_reference_barrier(masm, dst, src);
+
+    if (dst != result_dst) {
+      __ mv(result_dst, dst);
+      dst = result_dst;
+    }
+
+    if (saved_regs.bits() != 0) {
+      __ pop_reg(saved_regs, sp);
+    }
+  } else {
+    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+  }
+
+  // 3: apply keep-alive barrier if needed
+  if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {
+    __ enter();
+    __ push_call_clobbered_registers();
+    satb_write_barrier_pre(masm /* masm */,
+                           noreg /* obj */,
+                           dst /* pre_val */,
+                           xthread /* thread */,
+                           tmp1 /* tmp */,
+                           true /* tosca_live */,
+                           true /* expand_call */);
+    __ pop_call_clobbered_registers();
+    __ leave();
+  }
+}
+
+void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                             Address dst, Register val, Register tmp1, Register tmp2) {
+  bool on_oop = is_reference_type(type);
+  if (!on_oop) {
+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
+    return;
+  }
+
+  // flatten object address if needed
+  if (dst.offset() == 0) {
+    if (dst.base() != x13) {
+      __ mv(x13, dst.base());
+    }
+  } else {
+    __ la(x13, dst);
+  }
+
+  shenandoah_write_barrier_pre(masm,
+                               x13 /* obj */,
+                               tmp2 /* pre_val */,
+                               xthread /* thread */,
+                               tmp1  /* tmp */,
+                               val != noreg /* tosca_live */,
+                               false /* expand_call */);
+
+  if (val == noreg) {
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
+  } else {
+    iu_barrier(masm, val, tmp1);
+    // G1 barrier needs uncompressed oop for region cross check.
+    Register new_val = val;
+    if (UseCompressedOops) {
+      new_val = t1;
+      __ mv(new_val, val);
+    }
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
+  }
+}
+
+void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                                                  Register obj, Register tmp, Label& slowpath) {
+  Label done;
+  // Resolve jobject
+  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);
+
+  // Check for null.
+  __ beqz(obj, done);
+
+  assert(obj != t1, "need t1");
+  Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());
+  __ lbu(t1, gc_state);
+
+  // Check for heap in evacuation phase
+  __ andi(t0, t1, ShenandoahHeap::EVACUATION);
+  __ bnez(t0, slowpath);
+
+  __ bind(done);
+}
+
+// Special Shenandoah CAS implementation that handles false negatives due
+// to concurrent evacuation.  The service is more complex than a
+// traditional CAS operation because the CAS operation is intended to
+// succeed if the reference at addr exactly matches expected or if the
+// reference at addr holds a pointer to a from-space object that has
+// been relocated to the location named by expected.  There are two
+// races that must be addressed:
+//  a) A parallel thread may mutate the contents of addr so that it points
+//     to a different object.  In this case, the CAS operation should fail.
+//  b) A parallel thread may heal the contents of addr, replacing a
+//     from-space pointer held in addr with the to-space pointer
+//     representing the new location of the object.
+// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
+// or it refers to an object that is not being evacuated out of
+// from-space, or it refers to the to-space version of an object that
+// is being evacuated out of from-space.
+//
+// By default the value held in the result register following execution
+// of the generated code sequence is 0 to indicate failure of CAS,
+// non-zero to indicate success. If is_cae, the result is the value most
+// recently fetched from addr rather than a boolean success indicator.
+//
+// Clobbers t0, t1
+void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
+                                                Register addr,
+                                                Register expected,
+                                                Register new_val,
+                                                Assembler::Aqrl acquire,
+                                                Assembler::Aqrl release,
+                                                bool is_cae,
+                                                Register result) {
+  bool is_narrow = UseCompressedOops;
+  Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64;
+
+  assert_different_registers(addr, expected, t0, t1);
+  assert_different_registers(addr, new_val, t0, t1);
+
+  Label retry, success, fail, done;
+
+  __ bind(retry);
+
+  // Step1: Try to CAS.
+  __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1);
+
+  // If success, then we are done.
+  __ beq(expected, t1, success);
+
+  // Step2: CAS failed, check the forwared pointer.
+  __ mv(t0, t1);
+
+  if (is_narrow) {
+    __ decode_heap_oop(t0, t0);
+  }
+  resolve_forward_pointer(masm, t0);
+
+  __ encode_heap_oop(t0, t0);
+
+  // Report failure when the forwarded oop was not expected.
+  __ bne(t0, expected, fail);
+
+  // Step 3: CAS again using the forwarded oop.
+  __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0);
+
+  // Retry when failed.
+  __ bne(t0, t1, retry);
+
+  __ bind(success);
+  if (is_cae) {
+    __ mv(result, expected);
+  } else {
+    __ mv(result, 1);
+  }
+  __ j(done);
+
+  __ bind(fail);
+  if (is_cae) {
+    __ mv(result, t0);
+  } else {
+    __ mv(result, zr);
+  }
+
+  __ bind(done);
+}
+
+#undef __
+
+#ifdef COMPILER1
+
+#define __ ce->masm()->
+
+void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {
+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+  __ bind(*stub->entry());
+
+  assert(stub->pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = stub->pre_val()->as_register();
+
+  if (stub->do_load()) {
+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
+  }
+  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
+  ce->store_parameter(stub->pre_val()->as_register(), 0);
+  __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
+  __ j(*stub->continuation());
+}
+
+void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce,
+                                                                    ShenandoahLoadReferenceBarrierStub* stub) {
+  ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
+  __ bind(*stub->entry());
+
+  Register obj = stub->obj()->as_register();
+  Register res = stub->result()->as_register();
+  Register addr = stub->addr()->as_pointer_register();
+  Register tmp1 = stub->tmp1()->as_register();
+  Register tmp2 = stub->tmp2()->as_register();
+
+  assert(res == x10, "result must arrive in x10");
+  assert_different_registers(tmp1, tmp2, t0);
+
+  if (res != obj) {
+    __ mv(res, obj);
+  }
+
+  // Check for null.
+  __ beqz(res, *stub->continuation(), /* is_far */ true);
+
+  // Check for object in cset.
+  __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr());
+  __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+  __ add(t0, tmp2, tmp1);
+  __ lb(tmp2, Address(t0));
+  __ beqz(tmp2, *stub->continuation(), /* is_far */ true);
+
+  // Check if object is already forwarded.
+  Label slow_path;
+  __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
+  __ xori(tmp1, tmp1, -1);
+  __ andi(t0, tmp1, markOopDesc::lock_mask_in_place);
+  __ bnez(t0, slow_path);
+
+  // Decode forwarded object.
+  __ ori(tmp1, tmp1, markOopDesc::marked_value);
+  __ xori(res, tmp1, -1);
+  __ j(*stub->continuation());
+
+  __ bind(slow_path);
+  ce->store_parameter(res, 0);
+  ce->store_parameter(addr, 1);
+  __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+
+  __ j(*stub->continuation());
+}
+
+#undef __
+
+#define __ sasm->
+
+void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
+  __ prologue("shenandoah_pre_barrier", false);
+
+  // arg0 : previous value of memory
+
+  BarrierSet* bs = BarrierSet::barrier_set();
+
+  const Register pre_val = x10;
+  const Register thread = xthread;
+  const Register tmp = t0;
+
+  Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));
+  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
+
+  Label done;
+  Label runtime;
+
+  // Is marking still active?
+  Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
+  __ lb(tmp, gc_state);
+  __ andi(tmp, tmp, ShenandoahHeap::MARKING);
+  __ beqz(tmp, done);
+
+  // Can we store original value in the thread's buffer?
+  __ ld(tmp, queue_index);
+  __ beqz(tmp, runtime);
+
+  __ sub(tmp, tmp, wordSize);
+  __ sd(tmp, queue_index);
+  __ ld(t1, buffer);
+  __ add(tmp, tmp, t1);
+  __ load_parameter(0, t1);
+  __ sd(t1, Address(tmp, 0));
+  __ j(done);
+
+  __ bind(runtime);
+  __ push_call_clobbered_registers();
+  __ load_parameter(0, pre_val);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread);
+  __ pop_call_clobbered_registers();
+  __ bind(done);
+
+  __ epilogue();
+}
+
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) {
+  __ prologue("shenandoah_load_reference_barrier", false);
+  // arg0 : object to be resolved
+
+  __ push_call_clobbered_registers();
+  __ load_parameter(0, x10);
+  __ load_parameter(1, x11);
+
+  if (UseCompressedOops) {
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+  } else {
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  }
+  __ jalr(ra);
+  __ mv(t0, x10);
+  __ pop_call_clobbered_registers();
+  __ mv(x10, t0);
+
+  __ epilogue();
+}
+
+#undef __
+
+#endif // COMPILER1
+
+address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
+  assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
+  return _shenandoah_lrb;
+}
+
+#define __ cgen->assembler()->
+
+// Shenandoah load reference barrier.
+//
+// Input:
+//   x10: OOP to evacuate.  Not null.
+//   x11: load address
+//
+// Output:
+//   x10: Pointer to evacuated OOP.
+//
+// Trash t0 t1  Preserve everything else.
+address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
+  __ align(6);
+  StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
+  address start = __ pc();
+
+  Label slow_path;
+  __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr());
+  __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+  __ add(t1, t1, t0);
+  __ lbu(t1, Address(t1, 0));
+  __ andi(t0, t1, 1);
+  __ bnez(t0, slow_path);
+  __ ret();
+
+  __ bind(slow_path);
+  __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+  __ push_call_clobbered_registers();
+
+  if (UseCompressedOops) {
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+  } else {
+    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  }
+  __ jalr(ra);
+  __ mv(t0, x10);
+  __ pop_call_clobbered_registers();
+  __ mv(x10, t0);
+
+  __ leave(); // required for proper stackwalking of RuntimeStub frame
+  __ ret();
+
+  return start;
+}
+
+#undef __
+
+void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
+  if (ShenandoahLoadRefBarrier) {
+    int stub_code_size = 2048;
+    ResourceMark rm;
+    BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
+    CodeBuffer buf(bb);
+    StubCodeGenerator cgen(&buf);
+    _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
+  }
+}
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
new file mode 100644
index 0000000000..5d75035e9d
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#ifdef COMPILER1
+class LIR_Assembler;
+class ShenandoahPreBarrierStub;
+class ShenandoahLoadReferenceBarrierStub;
+class StubAssembler;
+#endif
+class StubCodeGenerator;
+
+class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
+private:
+
+  static address _shenandoah_lrb;
+
+  void satb_write_barrier_pre(MacroAssembler* masm,
+                              Register obj,
+                              Register pre_val,
+                              Register thread,
+                              Register tmp,
+                              bool tosca_live,
+                              bool expand_call);
+  void shenandoah_write_barrier_pre(MacroAssembler* masm,
+                                    Register obj,
+                                    Register pre_val,
+                                    Register thread,
+                                    Register tmp,
+                                    bool tosca_live,
+                                    bool expand_call);
+
+  void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
+  void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
+
+  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
+
+public:
+
+  static address shenandoah_lrb();
+
+  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
+
+#ifdef COMPILER1
+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
+#endif
+
+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
+                                  Register src, Register dst, Register count, RegSet saved_regs);
+
+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                       Register dst, Address src, Register tmp1, Register tmp_thread);
+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                        Address dst, Register val, Register tmp1, Register tmp2);
+
+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
+                                             Register obj, Register tmp, Label& slowpath);
+
+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
+                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
+
+  virtual void barrier_stubs_init();
+};
+
+#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
new file mode 100644
index 0000000000..bab407a8b7
--- /dev/null
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
@@ -0,0 +1,197 @@
+//
+// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+source_hpp %{
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
+%}
+
+instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+
+  format %{
+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+
+  format %{
+    "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+  predicate(needs_acquiring_load_reserved(n));
+  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+
+  format %{
+    "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+  predicate(needs_acquiring_load_reserved(n));
+  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+
+  format %{
+    "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::aq /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
+
+  format %{
+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   true /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP_DEF res, TEMP tmp, KILL cr);
+  format %{
+    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   true /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+  format %{
+    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
+    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
+  match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(10 * DEFAULT_COST);
+
+  effect(TEMP tmp, KILL cr);
+  format %{
+    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
+  %}
+
+  ins_encode %{
+    Register tmp = $tmp$$Register;
+    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
+    ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
+                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
+                                                   false /* is_cae */, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
new file mode 100644
index 0000000000..d6ce8da07b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
+
+const int StackAlignmentInBytes = 16;
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
+// To be safe, we deoptimize when we come across an access that needs
+// patching. This is similar to what is done on aarch64.
+#define DEOPTIMIZE_WHEN_PATCHING
+
+#define SUPPORTS_NATIVE_CX8
+
+#define SUPPORT_RESERVED_STACK_AREA
+
+#define THREAD_LOCAL_POLL
+
+#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
new file mode 100644
index 0000000000..90db2f4460
--- /dev/null
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_GLOBALS_RISCV_HPP
+#define CPU_RISCV_GLOBALS_RISCV_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
+
+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
+define_pd_global(bool, TrapBasedNullChecks,      false);
+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
+
+define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
+define_pd_global(intx, CodeEntryAlignment,       64);
+define_pd_global(intx, OptoLoopAlignment,        16);
+define_pd_global(intx, InlineFrequencyCount,     100);
+
+#define DEFAULT_STACK_YELLOW_PAGES (2)
+#define DEFAULT_STACK_RED_PAGES (1)
+// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
+// stack if compiled for unix and LP64. To pass stack overflow tests we need
+// 20 shadow pages.
+#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
+#define DEFAULT_STACK_RESERVED_PAGES (1)
+
+#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
+#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
+#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_RESERVED_PAGES (0)
+
+define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
+define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
+define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
+define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+
+define_pd_global(bool, UseMembar,            true);
+
+define_pd_global(bool, PreserveFramePointer, false);
+
+// GC Ergo Flags
+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, CompactStrings, true);
+
+// Clear short arrays bigger than one word in an arch-specific way
+define_pd_global(intx, InitArrayShortSize, BytesPerLong);
+
+define_pd_global(bool, ThreadLocalHandshakes, true);
+
+define_pd_global(intx, InlineSmallCode,          1000);
+
+#define ARCH_FLAGS(develop,                                                      \
+                   product,                                                      \
+                   diagnostic,                                                   \
+                   experimental,                                                 \
+                   notproduct,                                                   \
+                   range,                                                        \
+                   constraint,                                                   \
+                   writeable)                                                    \
+                                                                                 \
+  product(bool, NearCpool, true,                                                 \
+         "constant pool is close to instructions")                               \
+  product(intx, BlockZeroingLowLimit, 256,                                       \
+          "Minimum size in bytes when block zeroing will be used")               \
+          range(1, max_jint)                                                     \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")         \
+  /* For now we're going to be safe and add the I/O bits to userspace fences. */ \
+  product(bool, UseConservativeFence, true,                                      \
+          "Extend i for r and o for w in the pred/succ flags of fence")          \
+  product(bool, AvoidUnalignedAccesses, true,                                    \
+          "Avoid generating unaligned memory accesses")                          \
+  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
+  experimental(bool, UseZba, false, "Use Zba instructions")                      \
+  experimental(bool, UseZbb, false, "Use Zbb instructions")                      \
+  experimental(bool, UseZbs, false, "Use Zbs instructions")                      \
+  experimental(bool, UseRVC, false, "Use RVC instructions")
+
+#endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
new file mode 100644
index 0000000000..cc93103dc5
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc/shared/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
+  // 5: auipc + ld + j + address(2 * instruction_size)
+  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
+}
+
+#define __ masm->
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+  assert_cond(code_begin != NULL && entry_point != NULL);
+  ResourceMark rm;
+  CodeBuffer      code(code_begin, ic_stub_code_size());
+  MacroAssembler* masm            = new MacroAssembler(&code);
+  // Note: even though the code contains an embedded value, we do not need reloc info
+  // because
+  // (1) the value is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+
+  address start = __ pc();
+  Label l;
+  __ ld(t1, l);
+  __ far_jump(ExternalAddress(entry_point));
+  __ align(wordSize);
+  __ bind(l);
+  __ emit_int64((intptr_t)cached_value);
+  // Only need to invalidate the 1st two instructions - not the whole ic stub
+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+  assert(__ pc() - start == ic_stub_code_size(), "must be");
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeJump* jump = nativeJump_at(move->next_instruction_address());
+  return jump->jump_destination();
+}
+
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  // The word containing the cached value is at the end of this IC buffer
+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
+  void* o = (void*)*p;
+  return o;
+}
diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
new file mode 100644
index 0000000000..d615dcfb9e
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * Copyright (c) 2023, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "riscv_flush_icache.hpp"
+#include "runtime/java.hpp"
+#include "runtime/icache.hpp"
+
+#define __ _masm->
+
+static int icache_flush(address addr, int lines, int magic) {
+  // To make a store to instruction memory visible to all RISC-V harts,
+  // the writing hart has to execute a data FENCE before requesting that
+  // all remote RISC-V harts execute a FENCE.I.
+
+  // We need to make sure stores happens before the I/D cache synchronization.
+  __asm__ volatile("fence rw, rw" : : : "memory");
+
+  RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size);
+
+  return magic;
+}
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+  // Only riscv_flush_icache is supported as I-cache synchronization.
+  // We must make sure the VM can execute such without error.
+  if (!RiscvFlushIcache::test()) {
+    vm_exit_during_initialization("Unable to synchronize I-cache");
+  }
+
+  address start = (address)icache_flush;
+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
+
+  // ICache::invalidate_range() contains explicit condition that the first
+  // call is invoked on the generated icache flush stub code range.
+  ICache::invalidate_range(start, 0);
+
+  {
+    StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush");
+    __ ret();
+  }
+}
+
+#undef __
diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
new file mode 100644
index 0000000000..5bf40ca820
--- /dev/null
+++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_ICACHE_RISCV_HPP
+#define CPU_RISCV_ICACHE_RISCV_HPP
+
+// Interface for updating the instruction cache. Whenever the VM
+// modifies code, part of the processor instruction cache potentially
+// has to be flushed.
+
+class ICache : public AbstractICache {
+public:
+  enum {
+    stub_size      = 16,                // Size of the icache flush stub in bytes
+    line_size      = BytesPerWord,      // conservative
+    log2_line_size = LogBytesPerWord    // log2(line_size)
+  };
+};
+
+#endif // CPU_RISCV_ICACHE_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
new file mode 100644
index 0000000000..fa5ddc34b2
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -0,0 +1,1931 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interp_masm_riscv.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "logging/log.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+void InterpreterMacroAssembler::narrow(Register result) {
+  // Get method->_constMethod->_result_type
+  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+  ld(t0, Address(t0, Method::const_offset()));
+  lbu(t0, Address(t0, ConstMethod::result_type_offset()));
+
+  Label done, notBool, notByte, notChar;
+
+  // common case first
+  mv(t1, T_INT);
+  beq(t0, t1, done);
+
+  // mask integer result to narrower return type.
+  mv(t1, T_BOOLEAN);
+  bne(t0, t1, notBool);
+
+  andi(result, result, 0x1);
+  j(done);
+
+  bind(notBool);
+  mv(t1, T_BYTE);
+  bne(t0, t1, notByte);
+  sign_extend(result, result, 8);
+  j(done);
+
+  bind(notByte);
+  mv(t1, T_CHAR);
+  bne(t0, t1, notChar);
+  zero_extend(result, result, 16);
+  j(done);
+
+  bind(notChar);
+  sign_extend(result, result, 16);
+
+  // Nothing to do for T_INT
+  bind(done);
+  addw(result, result, zr);
+}
+
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry != NULL, "Entry must have been generated by now");
+  j(entry);
+}
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed. If the flag has the popframe_processing bit set,
+    // it means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    lwu(t1, Address(xthread, JavaThread::popframe_condition_offset()));
+    andi(t0, t1, JavaThread::popframe_pending_bit);
+    beqz(t0, L);
+    andi(t0, t1, JavaThread::popframe_processing_bit);
+    bnez(t0, L);
+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
+    // address of the same-named entrypoint in the generated interpreter code.
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    jr(x10);
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset());
+  const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset());
+  const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset());
+  switch (state) {
+    case atos:
+      ld(x10, oop_addr);
+      sd(zr, oop_addr);
+      verify_oop(x10);
+      break;
+    case ltos:
+      ld(x10, val_addr);
+      break;
+    case btos:  // fall through
+    case ztos:  // fall through
+    case ctos:  // fall through
+    case stos:  // fall through
+    case itos:
+      lwu(x10, val_addr);
+      break;
+    case ftos:
+      flw(f10, val_addr);
+      break;
+    case dtos:
+      fld(f10, val_addr);
+      break;
+    case vtos:
+      /* nothing to do */
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  mvw(t0, (int) ilgl);
+  sw(t0, tos_addr);
+  sw(zr, val_addr);
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+    beqz(t0, L);  // if [thread->jvmti_thread_state() == NULL] then exit
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset()));
+    mv(t1, JvmtiThreadState::earlyret_pending);
+    bne(t0, t1, L);
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+    lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset()));
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0);
+    jr(x10);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) {
+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+  lhu(reg, Address(xbcp, bcp_offset));
+  revb_h(reg, reg);
+}
+
+void InterpreterMacroAssembler::get_dispatch() {
+  int32_t offset = 0;
+  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
+  addi(xdispatch, xdispatch, offset);
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    load_unsigned_short(index, Address(xbcp, bcp_offset));
+  } else if (index_size == sizeof(u4)) {
+    lwu(index, Address(xbcp, bcp_offset));
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+    xori(index, index, -1);
+    addw(index, index, zr);
+  } else if (index_size == sizeof(u1)) {
+    load_unsigned_byte(index, Address(xbcp, bcp_offset));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+// Return
+// Rindex: index into constant pool
+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
+//
+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
+// the true address of the cache entry.
+//
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset,
+                                                           size_t index_size) {
+  assert_different_registers(cache, index);
+  assert_different_registers(cache, xcpool);
+  get_cache_index_at_bcp(index, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // Convert from field index to ConstantPoolCacheEntry
+  // riscv already has the cache in xcpool so there is no need to
+  // install it in cache. Instead we pre-add the indexed offset to
+  // xcpool and return it in cache. All clients of this method need to
+  // be modified accordingly.
+  shadd(cache, index, xcpool, cache, 5);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  // n.b. unlike x86 cache already includes the index offset
+  la(bytecode, Address(cache,
+                       ConstantPoolCache::base_offset() +
+                       ConstantPoolCacheEntry::indices_offset()));
+  membar(MacroAssembler::AnyAny);
+  lwu(bytecode, bytecode);
+  membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte));
+  srli(bytecode, bytecode, XLEN - BitsPerByte);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  assert(cache != tmp, "must use different register");
+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // Convert from field index to ConstantPoolCacheEntry index
+  // and from word offset to byte offset
+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
+         "else change next line");
+  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
+  // skip past the header
+  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+  // construct pointer to cache entry
+  shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord);
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+                                Register result, Register index, Register tmp) {
+  assert_different_registers(result, index);
+
+  get_constant_pool(result);
+  // Load pointer for resolved_references[] objArray
+  ld(result, Address(result, ConstantPool::cache_offset_in_bytes()));
+  ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
+  resolve_oop_handle(result, tmp);
+  // Add in the index
+  addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+  shadd(result, index, result, index, LogBytesPerHeapOop);
+  load_heap_oop(result, Address(result, 0));
+}
+
+void InterpreterMacroAssembler::load_resolved_klass_at_offset(
+                                Register cpool, Register index, Register klass, Register temp) {
+  shadd(temp, index, cpool, temp, LogBytesPerWord);
+  lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index
+  ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses
+  shadd(klass, temp, klass, temp, LogBytesPerWord);
+  ld(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
+}
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
+// subtype of super_klass.
+//
+// Args:
+//      x10: superklass
+//      Rsub_klass: subklass
+//
+// Kills:
+//      x12, x15
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+                                                  Label& ok_is_subtype) {
+  assert(Rsub_klass != x10, "x10 holds superklass");
+  assert(Rsub_klass != x12, "x12 holds 2ndary super array length");
+  assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr");
+
+  // Profile the not-null value's klass.
+  profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(x12); // blows x12
+}
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  ld(r, Address(esp, 0));
+  addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  lw(r, Address(esp, 0)); // lw do signed extended
+  addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  ld(r, Address(esp, 0));
+  addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  addi(esp, esp, -wordSize);
+  sd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  addi(esp, esp, -wordSize);
+  addw(r, r, zr); // signed extended
+  sd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  addi(esp, esp, -2 * wordSize);
+  sd(zr, Address(esp, wordSize));
+  sd(r, Address(esp));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+  flw(r, esp, 0);
+  addi(esp, esp, wordSize);
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+  fld(r, esp, 0);
+  addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+  addi(esp, esp, -wordSize);
+  fsw(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+  addi(esp, esp, -2 * wordSize);
+  fsd(r, Address(esp, 0));
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+    case atos:
+      pop_ptr();
+      verify_oop(x10);
+      break;
+    case btos:  // fall through
+    case ztos:  // fall through
+    case ctos:  // fall through
+    case stos:  // fall through
+    case itos:
+      pop_i();
+      break;
+    case ltos:
+      pop_l();
+      break;
+    case ftos:
+      pop_f();
+      break;
+    case dtos:
+      pop_d();
+      break;
+    case vtos:
+      /* nothing to do */
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+  switch (state) {
+    case atos:
+      verify_oop(x10);
+      push_ptr();
+      break;
+    case btos:  // fall through
+    case ztos:  // fall through
+    case ctos:  // fall through
+    case stos:  // fall through
+    case itos:
+      push_i();
+      break;
+    case ltos:
+      push_l();
+      break;
+    case ftos:
+      push_f();
+      break;
+    case dtos:
+      push_d();
+      break;
+    case vtos:
+      /* nothing to do */
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+// Helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::load_float(Address src) {
+  flw(f10, src);
+}
+
+void InterpreterMacroAssembler::load_double(Address src) {
+  fld(f10, src);
+}
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+  // set sender sp
+  mv(x30, sp);
+  // record last_sp
+  sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
+  prepare_to_jump_from_interpreted();
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+    beqz(t0, run_compiled_code);
+    ld(t0, Address(method, Method::interpreter_entry_offset()));
+    jr(t0);
+    bind(run_compiled_code);
+  }
+
+  ld(t0, Address(method, Method::from_interpreted_offset()));
+  jr(t0);
+}
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.  amd64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+  dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop,
+                                              bool generate_poll,
+                                              Register Rs) {
+  // Pay attention to the argument Rs, which is acquiesce in t0.
+  if (VerifyActivationFrameSize) {
+    Unimplemented();
+  }
+  if (verifyoop && state == atos) {
+    verify_oop(x10);
+  }
+
+  Label safepoint;
+  address* const safepoint_table = Interpreter::safept_table(state);
+  bool needs_thread_local_poll = generate_poll &&
+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
+
+  if (needs_thread_local_poll) {
+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+    ld(t1, Address(xthread, Thread::polling_page_offset()));
+    andi(t1, t1, SafepointMechanism::poll_bit());
+    bnez(t1, safepoint);
+  }
+  if (table == Interpreter::dispatch_table(state)) {
+    mv(t1, Interpreter::distance_from_dispatch_table(state));
+    add(t1, Rs, t1);
+    shadd(t1, t1, xdispatch, t1, 3);
+  } else {
+    mv(t1, (address)table);
+    shadd(t1, Rs, t1, Rs, 3);
+  }
+  ld(t1, Address(t1));
+  jr(t1);
+
+  if (needs_thread_local_poll) {
+    bind(safepoint);
+    la(t1, ExternalAddress((address)safepoint_table));
+    shadd(t1, Rs, t1, Rs, 3);
+    ld(t1, Address(t1));
+    jr(t1);
+  }
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) {
+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) {
+  dispatch_base(state, Interpreter::normal_table(state), Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) {
+  dispatch_base(state, Interpreter::normal_table(state), false, Rs);
+}
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
+  // load next bytecode
+  load_unsigned_byte(t0, Address(xbcp, step));
+  add(xbcp, xbcp, step);
+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+  lbu(t0, Address(xbcp, 0));
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+void InterpreterMacroAssembler::remove_activation(
+                                TosState state,
+                                bool throw_monitor_exception,
+                                bool install_monitor_exception,
+                                bool notify_jvmdi) {
+  // Note: Registers x13 may be in use for the
+  // result check if synchronized method
+  Label unlocked, unlock, no_unlock;
+
+  // get the value of _do_not_unlock_if_synchronized into x13
+  const Address do_not_unlock_if_synchronized(xthread,
+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  lbu(x13, do_not_unlock_if_synchronized);
+  sb(zr, do_not_unlock_if_synchronized); // reset the flag
+
+  // get method access flags
+  ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+  ld(x12, Address(x11, Method::access_flags_offset()));
+  andi(t0, x12, JVM_ACC_SYNCHRONIZED);
+  beqz(t0, unlocked);
+
+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+  // is set.
+  bnez(x13, no_unlock);
+
+  // unlock monitor
+  push(state); // save result
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  const Address monitor(fp, frame::interpreter_frame_initial_sp_offset *
+                        wordSize - (int) sizeof(BasicObjectLock));
+  // We use c_rarg1 so that if we go slow path it will be the correct
+  // register for unlock_object to pass to VM directly
+  la(c_rarg1, monitor); // address of first monitor
+
+  ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+  bnez(x10, unlock);
+
+  pop(state);
+  if (throw_monitor_exception) {
+    // Entry already unlocked, need to throw exception
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll. If requested,
+    // install an illegal_monitor_state_exception.  Continue with
+    // stack unrolling.
+    if (install_monitor_exception) {
+      call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                      InterpreterRuntime::new_illegal_monitor_state_exception));
+    }
+    j(unlocked);
+  }
+
+  bind(unlock);
+  unlock_object(c_rarg1);
+  pop(state);
+
+  // Check that for block-structured locking (i.e., that all locked
+  // objects has been unlocked)
+  bind(unlocked);
+
+  // x10: Might contain return value
+
+  // Check that all monitors are unlocked
+  {
+    Label loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    const Address monitor_block_top(
+      fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    const Address monitor_block_bot(
+      fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+    bind(restart);
+    // We use c_rarg1 so that if we go slow path it will be the correct
+    // register for unlock_object to pass to VM directly
+    ld(c_rarg1, monitor_block_top); // points to current entry, starting
+                                     // with top-most entry
+    la(x9, monitor_block_bot);  // points to word before bottom of
+                                  // monitor block
+
+    j(entry);
+
+    // Entry already locked, need to throw exception
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception
+      MacroAssembler::call_VM(noreg,
+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
+                                               throw_illegal_monitor_state_exception));
+
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
+      // Unlock does not block, so don't have to worry about the frame.
+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
+
+      push(state);
+      unlock_object(c_rarg1);
+      pop(state);
+
+      if (install_monitor_exception) {
+        call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::
+                                        new_illegal_monitor_state_exception));
+      }
+
+      j(restart);
+    }
+
+    bind(loop);
+    // check if current entry is used
+    add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes());
+    ld(t0, Address(t0, 0));
+    bnez(t0, exception);
+
+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
+    bind(entry);
+    bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry
+  }
+
+  bind(no_unlock);
+
+  // jvmti support
+  if (notify_jvmdi) {
+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
+
+  } else {
+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+  }
+
+  // remove activation
+  // get sender esp
+  ld(t1,
+     Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+  if (StackReservedPages > 0) {
+    // testing if reserved zone needs to be re-enabled
+    Label no_reserved_zone_enabling;
+
+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
+    ble(t1, t0, no_reserved_zone_enabling);
+
+    call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread);
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::throw_delayed_StackOverflowError));
+    should_not_reach_here();
+
+    bind(no_reserved_zone_enabling);
+  }
+
+  // restore sender esp
+  mv(esp, t1);
+
+  // remove frame anchor
+  leave();
+  // If we're returning to interpreted code we will shortly be
+  // adjusting SP to allow some space for ESP.  If we're returning to
+  // compiled code the saved sender SP was saved in sender_sp, so this
+  // restores it.
+  andi(sp, esp, -16);
+}
+
+// Lock object
+//
+// Args:
+//      c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+//      x10
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      t0, t1 (temp regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg = x10;
+    const Register tmp = c_rarg2;
+    const Register obj_reg = c_rarg3; // Will contain the oop
+
+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+    const int mark_offset = lock_offset +
+                            BasicLock::displaced_header_offset_in_bytes();
+
+    Label slow_case;
+
+    // Load object pointer into obj_reg c_rarg3
+    ld(obj_reg, Address(lock_reg, obj_offset));
+
+    if (UseBiasedLocking) {
+      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
+    }
+
+    // Load (object->mark() | 1) into swap_reg
+    ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+    ori(swap_reg, t0, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    sd(swap_reg, Address(lock_reg, mark_offset));
+
+    assert(lock_offset == 0,
+           "displached header must be first word in BasicObjectLock");
+
+    if (PrintBiasedLockingStatistics) {
+      Label fail, fast;
+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail);
+      bind(fast);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  t1, t0);
+      j(done);
+      bind(fail);
+    } else {
+      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL);
+    }
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 7) == 0, and
+    //  2) sp <= mark < mark + os::pagesize()
+    //
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (7 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 3 bits clear.
+    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
+    sub(swap_reg, swap_reg, sp);
+    mv(t0, (int64_t)(7 - os::vm_page_size()));
+    andr(swap_reg, swap_reg, t0);
+
+    // Save the test result, for recursive case, the result is zero
+    sd(swap_reg, Address(lock_reg, mark_offset));
+
+    if (PrintBiasedLockingStatistics) {
+      bnez(swap_reg, slow_case);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  t1, t0);
+    }
+    beqz(swap_reg, done);
+
+    bind(slow_case);
+
+    // Call the runtime routine for slow case
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+
+    bind(done);
+  }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+//      x10
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+//      t0, t1 (temp regs)
+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg   = x10;
+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
+    const Register obj_reg    = c_rarg3;  // Will contain the oop
+
+    save_bcp(); // Save in case of exception
+
+    // Convert from BasicObjectLock structure to object and BasicLock
+    // structure Store the BasicLock address into x10
+    la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+
+    // Load oop into obj_reg(c_rarg3)
+    ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    // Free entry
+    sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    if (UseBiasedLocking) {
+      biased_locking_exit(obj_reg, header_reg, done);
+    }
+
+    // Load the old header from BasicLock structure
+    ld(header_reg, Address(swap_reg,
+                           BasicLock::displaced_header_offset_in_bytes()));
+
+    // Test for recursion
+    beqz(header_reg, done);
+
+    // Atomic swap back the old header
+    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL);
+
+    // Call the runtime routine for slow case.
+    sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+
+    bind(done);
+
+    restore_bcp();
+  }
+}
+
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+  beqz(mdp, zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label set_mdp;
+  push_reg(RegSet::of(x10, x11), sp); // save x10, x11
+
+  // Test MDO to avoid the call if it is NULL.
+  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
+  beqz(x10, set_mdp);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp);
+  // x10: mdi
+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
+  ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
+  la(x11, Address(x11, in_bytes(MethodData::data_offset())));
+  add(x10, x11, x10);
+  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+  bind(set_mdp);
+  pop_reg(RegSet::of(x10, x11), sp);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  add(sp, sp, -4 * wordSize);
+  sd(x10, Address(sp, 0));
+  sd(x11, Address(sp, wordSize));
+  sd(x12, Address(sp, 2 * wordSize));
+  sd(x13, Address(sp, 3 * wordSize));
+  test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue
+  get_method(x11);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  lh(x12, Address(x13, in_bytes(DataLayout::bci_offset())));
+  ld(t0, Address(x11, Method::const_offset()));
+  add(x12, x12, t0);
+  la(x12, Address(x12, ConstMethod::codes_offset()));
+  beq(x12, xbcp, verify_continue);
+  // x10: method
+  // xbcp: bcp // xbcp == 22
+  // x13: mdp
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
+               x11, xbcp, x13);
+  bind(verify_continue);
+  ld(x10, Address(sp, 0));
+  ld(x11, Address(sp, wordSize));
+  ld(x12, Address(sp, 2 * wordSize));
+  ld(x13, Address(sp, 3 * wordSize));
+  add(sp, sp, 4 * wordSize);
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address data(mdp_in, constant);
+  sd(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+
+  assert_different_registers(t1, t0, mdp_in, reg);
+
+  Address addr1(mdp_in, constant);
+  Address addr2(t1, 0);
+  Address &addr = addr1;
+  if (reg != noreg) {
+    la(t1, addr1);
+    add(t1, t1, reg);
+    addr = addr2;
+  }
+
+  if (decrement) {
+    ld(t0, addr);
+    addi(t0, t0, -DataLayout::counter_increment);
+    Label L;
+    bltz(t0, L);      // skip store if counter underflow
+    sd(t0, addr);
+    bind(L);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    ld(t0, addr);
+    addi(t0, t0, DataLayout::counter_increment);
+    Label L;
+    blez(t0, L);       // skip store if counter overflow
+    sd(t0, addr);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  int flags_offset = in_bytes(DataLayout::flags_offset());
+  // Set the flag
+  lbu(t1, Address(mdp_in, flags_offset));
+  ori(t1, t1, flag_byte_constant);
+  sb(t1, Address(mdp_in, flags_offset));
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    ld(t1, Address(mdp_in, offset));
+    bne(value, t1, not_equal_continue);
+  } else {
+    // Put the test value into a register, so caller can use it:
+    ld(test_value_out, Address(mdp_in, offset));
+    bne(value, test_value_out, not_equal_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ld(t1, Address(mdp_in, offset_of_disp));
+  add(mdp_in, mdp_in, t1);
+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  add(t1, mdp_in, reg);
+  ld(t1, Address(t1, offset_of_disp));
+  add(mdp_in, mdp_in, t1);
+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  addi(mdp_in, mdp_in, (unsigned)constant);
+  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+
+  // save/restore across call_VM
+  addi(sp, sp, -2 * wordSize);
+  sd(zr, Address(sp, 0));
+  sd(return_bci, Address(sp, wordSize));
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  ld(zr, Address(sp, 0));
+  ld(return_bci, Address(sp, wordSize));
+  addi(sp, sp, 2 * wordSize);
+}
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    Address data(mdp, in_bytes(JumpData::taken_offset()));
+    ld(bumped_count, data);
+    assert(DataLayout::counter_increment == 1,
+            "flow-free idiom only works with 1");
+    addi(bumped_count, bumped_count, DataLayout::counter_increment);
+    Label L;
+    // eg: bumped_count=0x7fff ffff ffff ffff  + 1 < 0. so we use <= 0;
+    blez(bumped_count, L);       // skip store if counter overflow,
+    sd(bumped_count, data);
+    bind(L);
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      Label not_null;
+      // We are making a call.  Increment the count for null receiver.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      j(skip_receiver_profile);
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                Register receiver, Register mdp,
+                                Register reg2,
+                                Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+
+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
+  }
+}
+
+void InterpreterMacroAssembler::record_item_in_profile_helper(
+  Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows,
+  OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) {
+  int last_row = total_rows - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the item and for null.
+  // Take any of three different outcomes:
+  //   1. found item => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(mdp, item_offset, item,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the item from the CallData.)
+
+    // The item is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
+    increment_mdp_data_at(mdp, count_offset);
+    j(done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on item[n]...  Test for null.
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (non_profiled_offset >= 0) {
+          beqz(reg2, found_null);
+          // Item did not match any saved item and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, non_profiled_offset);
+          j(done);
+          bind(found_null);
+        } else {
+          bnez(reg2, done);
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      beqz(reg2, found_null);
+
+      // Put all the "Case 3" tests here.
+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
+
+      // Found a null.  Keep searching for a matching item,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(mdp, item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
+  mv(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    j(done);
+  }
+}
+
+// Example state machine code for three profile rows:
+//   # main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) then [
+//     row[0].incr()
+//     goto done
+//   ]
+//   if (row[0].rec != NULL) then [
+//     # inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) then [
+//       row[1].incr()
+//       goto done
+//     ]
+//     if (row[1].rec != NULL) then [
+//       # degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) then [
+//         row[2].incr()
+//         goto done
+//       ]
+//       if (row[2].rec != NULL) then [
+//         count.incr()
+//         goto done
+//       ] # overflow
+//       row[2].init(rec)
+//       goto done
+//     ] else [
+//       # remember row[1] is empty
+//       if (row[2].rec == rec) then [
+//         row[2].incr()
+//         goto done
+//       ]
+//       row[1].init(rec)
+//       goto done
+//     ]
+//   else [
+//     # remember row[0] is empty
+//     if (row[1].rec == rec) then [
+//       row[1].incr()
+//       goto done
+//     ]
+//     if (row[2].rec == rec) then [
+//       row[2].incr()
+//       goto done
+//     ]
+//     row[0].init(rec)
+//     goto done
+//   ]
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call);
+
+  bind(done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (uint row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      j(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+    if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, true);
+
+    bind (profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2, false);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count
+    increment_mdp_data_at(mdp,
+                          in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         in_bytes(MultiBranchData::
+                                  default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes()
+    mvw(reg2, in_bytes(MultiBranchData::per_case_size()));
+    mvw(t0, in_bytes(MultiBranchData::case_array_offset()));
+    Assembler::mul(index, index, reg2);
+    Assembler::add(index, index, t0);
+
+    // Update the case count
+    increment_mdp_data_at(mdp,
+                          index,
+                          in_bytes(MultiBranchData::relative_count_offset()));
+
+    // The method data pointer need to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::
+                                  relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
+
+void InterpreterMacroAssembler::notify_method_entry() {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
+    beqz(x13, L);
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::post_method_entry));
+    bind(L);
+  }
+
+  {
+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
+    get_method(c_rarg1);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+                 xthread, c_rarg1);
+  }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
+    get_method(c_rarg1);
+    call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      xthread, c_rarg1);
+  }
+}
+
+
+void InterpreterMacroAssembler::notify_method_exit(
+    TosState state, NotifyMethodExitMode mode) {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    // Note: frame::interpreter_frame_result has a dependency on how the
+    // method result is saved across the call to post_method_exit. If this
+    // is changed then the interpreter_frame_result implementation will
+    // need to be updated too.
+
+    // template interpreter will leave the result on the top of the stack.
+    push(state);
+    lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset()));
+    beqz(x13, L);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    bind(L);
+    pop(state);
+  }
+
+  {
+    SkipIfEqual skip(this, &DTraceMethodProbes, false);
+    push(state);
+    get_method(c_rarg1);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+                 xthread, c_rarg1);
+    pop(state);
+  }
+}
+
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+                                                        int increment, Address mask,
+                                                        Register tmp1, Register tmp2,
+                                                        bool preloaded, Label* where) {
+  Label done;
+  if (!preloaded) {
+    lwu(tmp1, counter_addr);
+  }
+  add(tmp1, tmp1, increment);
+  sw(tmp1, counter_addr);
+  lwu(tmp2, mask);
+  andr(tmp1, tmp1, tmp2);
+  bnez(tmp1, done);
+  j(*where); // offset is too large so we have to use j instead of beqz here
+  bind(done);
+}
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+                                                  int number_of_arguments) {
+  // interpreter specific
+  //
+  // Note: No need to save/restore rbcp & rlocals pointer since these
+  //       are callee saved registers and no blocking/ GC can happen
+  //       in leaf calls.
+#ifdef ASSERT
+  {
+   Label L;
+   ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+   beqz(t0, L);
+   stop("InterpreterMacroAssembler::call_VM_leaf_base:"
+        " last_sp != NULL");
+   bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+                                             Register java_thread,
+                                             Register last_java_sp,
+                                             address  entry_point,
+                                             int      number_of_arguments,
+                                             bool     check_exceptions) {
+  // interpreter specific
+  //
+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+  //       really make a difference for these runtime calls, since they are
+  //       slow anyway. Btw., bcp must be saved/restored since it may change
+  //       due to GC.
+  save_bcp();
+#ifdef ASSERT
+  {
+    Label L;
+    ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+    beqz(t0, L);
+    stop("InterpreterMacroAssembler::call_VM_base:"
+         " last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
+                               entry_point, number_of_arguments,
+                               check_exceptions);
+// interpreter specific
+  restore_bcp();
+  restore_locals();
+}
+
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) {
+  assert_different_registers(obj, tmp, t0, mdo_addr.base());
+  Label update, next, none;
+
+  verify_oop(obj);
+
+  bnez(obj, update);
+  orptr(mdo_addr, TypeEntries::null_seen, t0, tmp);
+  j(next);
+
+  bind(update);
+  load_klass(obj, obj);
+
+  ld(t0, mdo_addr);
+  xorr(obj, obj, t0);
+  andi(t0, obj, TypeEntries::type_klass_mask);
+  beqz(t0, next); // klass seen before, nothing to
+                  // do. The unknown bit may have been
+                  // set already but no need to check.
+
+  andi(t0, obj, TypeEntries::type_unknown);
+  bnez(t0, next);
+  // already unknown. Nothing to do anymore.
+
+  ld(t0, mdo_addr);
+  beqz(t0, none);
+  mv(tmp, (u1)TypeEntries::null_seen);
+  beq(t0, tmp, none);
+  // There is a chance that the checks above (re-reading profiling
+  // data from memory) fail if another thread has just set the
+  // profiling to this obj's klass
+  ld(t0, mdo_addr);
+  xorr(obj, obj, t0);
+  andi(t0, obj, TypeEntries::type_klass_mask);
+  beqz(t0, next);
+
+  // different than before. Cannot keep accurate profile.
+  orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp);
+  j(next);
+
+  bind(none);
+  // first time here. Set profile type.
+  sd(obj, mdo_addr);
+
+  bind(next);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
+    if (is_virtual) {
+      mv(tmp, (u1)DataLayout::virtual_call_type_data_tag);
+      bne(t0, tmp, profile_continue);
+    } else {
+      mv(tmp, (u1)DataLayout::call_type_data_tag);
+      bne(t0, tmp, profile_continue);
+    }
+
+    // calculate slot step
+    static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0));
+    static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0;
+
+    // calculate type step
+    static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0));
+    static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0;
+
+    if (MethodData::profile_arguments()) {
+      Label done, loop, loopEnd, profileArgument, profileReturnType;
+      RegSet pushed_registers;
+      pushed_registers += x15;
+      pushed_registers += x16;
+      pushed_registers += x17;
+      Register mdo_addr = x15;
+      Register index = x16;
+      Register off_to_args = x17;
+      push_reg(pushed_registers, sp);
+
+      mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset()));
+      mv(t0, TypeProfileArgsLimit);
+      beqz(t0, loopEnd);
+
+      mv(index, zr); // index < TypeProfileArgsLimit
+      bind(loop);
+      bgtz(index, profileReturnType);
+      mv(t0, (int)MethodData::profile_return());
+      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
+      bind(profileReturnType);
+      // If return value type is profiled we may have no argument to profile
+      ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
+      mv(t1, - TypeStackSlotEntries::per_arg_count());
+      mul(t1, index, t1);
+      add(tmp, tmp, t1);
+      mv(t1, TypeStackSlotEntries::per_arg_count());
+      add(t0, mdp, off_to_args);
+      blt(tmp, t1, done);
+
+      bind(profileArgument);
+
+      ld(tmp, Address(callee, Method::const_offset()));
+      load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
+      // stack offset o (zero based) from the start of the argument
+      // list, for n arguments translates into offset n - o - 1 from
+      // the end of the argument list
+      mv(t0, stack_slot_offset0);
+      mv(t1, slot_step);
+      mul(t1, index, t1);
+      add(t0, t0, t1);
+      add(t0, mdp, t0);
+      ld(t0, Address(t0));
+      sub(tmp, tmp, t0);
+      addi(tmp, tmp, -1);
+      Address arg_addr = argument_address(tmp);
+      ld(tmp, arg_addr);
+
+      mv(t0, argument_type_offset0);
+      mv(t1, type_step);
+      mul(t1, index, t1);
+      add(t0, t0, t1);
+      add(mdo_addr, mdp, t0);
+      Address mdo_arg_addr(mdo_addr, 0);
+      profile_obj_type(tmp, mdo_arg_addr, t1);
+
+      int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+      addi(off_to_args, off_to_args, to_add);
+
+      // increment index by 1
+      addi(index, index, 1);
+      mv(t1, TypeProfileArgsLimit);
+      blt(index, t1, loop);
+      bind(loopEnd);
+
+      if (MethodData::profile_return()) {
+        ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
+        addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
+      }
+
+      add(t0, mdp, off_to_args);
+      bind(done);
+      mv(mdp, t0);
+
+      // unspill the clobbered registers
+      pop_reg(pushed_registers, sp);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size));
+      }
+      sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+  assert_different_registers(mdp, ret, tmp, xbcp, t0, t1);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
+
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length
+      Label do_profile;
+      lbu(t0, Address(xbcp, 0));
+      mv(tmp, (u1)Bytecodes::_invokedynamic);
+      beq(t0, tmp, do_profile);
+      mv(tmp, (u1)Bytecodes::_invokehandle);
+      beq(t0, tmp, do_profile);
+      get_method(tmp);
+      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
+      mv(t1, vmIntrinsics::_compiledLambdaForm);
+      bne(t0, t1, profile_continue);
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+    mv(tmp, ret);
+    profile_obj_type(tmp, mdo_ret_addr, t1);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) {
+  assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3);
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters
+    lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
+    srli(tmp2, tmp1, 31);
+    bnez(tmp2, profile_continue);  // i.e. sign bit set
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+    add(mdp, mdp, tmp1);
+    ld(tmp1, Address(mdp, ArrayData::array_len_offset()));
+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
+
+    Label loop;
+    bind(loop);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    int per_arg_scale = exact_log2(DataLayout::cell_size);
+    add(t0, mdp, off_base);
+    add(t1, mdp, type_base);
+
+    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
+    // load offset on the stack from the slot for this parameter
+    ld(tmp2, Address(tmp2, 0));
+    neg(tmp2, tmp2);
+
+    // read the parameter from the local area
+    shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize);
+    ld(tmp2, Address(tmp2, 0));
+
+    // profile the parameter
+    shadd(t1, tmp1, t1, t0, per_arg_scale);
+    Address arg_type(t1, 0);
+    profile_obj_type(tmp2, arg_type, tmp3);
+
+    // go to next parameter
+    add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count());
+    bgez(tmp1, loop);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+                                                    Register mcs, Label& skip) {
+  Label has_counters;
+  ld(mcs, Address(method, Method::method_counters_offset()));
+  bnez(mcs, has_counters);
+  call_VM(noreg, CAST_FROM_FN_PTR(address,
+          InterpreterRuntime::build_method_counters), method);
+  ld(mcs, Address(method, Method::method_counters_offset()));
+  beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory
+  bind(has_counters);
+}
+
+#ifdef ASSERT
+void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits,
+                                                    const char* msg, bool stop_by_hit) {
+  Label L;
+  andi(t0, access_flags, flag_bits);
+  if (stop_by_hit) {
+    beqz(t0, L);
+  } else {
+    bnez(t0, L);
+  }
+  stop(msg);
+  bind(L);
+}
+
+void InterpreterMacroAssembler::verify_frame_setup() {
+  Label L;
+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  ld(t0, monitor_block_top);
+  beq(esp, t0, L);
+  stop("broken stack frame setup in interpreter");
+  bind(L);
+}
+#endif
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
new file mode 100644
index 0000000000..4126e8ee70
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP
+#define CPU_RISCV_INTERP_MASM_RISCV_HPP
+
+#include "asm/macroAssembler.hpp"
+#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+typedef ByteSize (*OffsetFunction)(uint);
+
+class InterpreterMacroAssembler: public MacroAssembler {
+ protected:
+  // Interpreter specific version of call_VM_base
+  using MacroAssembler::call_VM_leaf_base;
+
+  virtual void call_VM_leaf_base(address entry_point,
+                                 int number_of_arguments);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register java_thread,
+                            Register last_java_sp,
+                            address  entry_point,
+                            int number_of_arguments,
+                            bool check_exceptions);
+
+  // base routine for all dispatches
+  void dispatch_base(TosState state, address* table, bool verifyoop = true,
+                     bool generate_poll = false, Register Rs = t0);
+
+ public:
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+  virtual ~InterpreterMacroAssembler() {}
+
+  void load_earlyret_value(TosState state);
+
+  void jump_to_entry(address entry);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // Interpreter-specific registers
+  void save_bcp() {
+    sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
+  }
+
+  void restore_bcp() {
+    ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize));
+  }
+
+  void restore_locals() {
+    ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize));
+  }
+
+  void restore_constant_pool_cache() {
+    ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
+  }
+
+  void get_dispatch();
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg) {
+    ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize));
+  }
+
+  void get_const(Register reg) {
+    get_method(reg);
+    ld(reg, Address(reg, in_bytes(Method::const_offset())));
+  }
+
+  void get_constant_pool(Register reg) {
+    get_const(reg);
+    ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset())));
+  }
+
+  void get_constant_pool_cache(Register reg) {
+    get_constant_pool(reg);
+    ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
+  }
+
+  void get_cpool_and_tags(Register cpool, Register tags) {
+    get_constant_pool(cpool);
+    ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
+  }
+
+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_method_counters(Register method, Register mcs, Label& skip);
+
+  // Load cpool->resolved_references(index).
+  void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15);
+
+  // Load cpool->resolved_klass_at(index).
+  void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
+
+  void pop_ptr(Register r = x10);
+  void pop_i(Register r = x10);
+  void pop_l(Register r = x10);
+  void pop_f(FloatRegister r = f10);
+  void pop_d(FloatRegister r = f10);
+  void push_ptr(Register r = x10);
+  void push_i(Register r = x10);
+  void push_l(Register r = x10);
+  void push_f(FloatRegister r = f10);
+  void push_d(FloatRegister r = f10);
+
+  void pop(TosState state); // transition vtos -> state
+  void push(TosState state); // transition state -> vtos
+
+  void empty_expression_stack() {
+    ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+    // NULL last_sp until next java call
+    sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  }
+
+  // Helpers for swap and dup
+  void load_ptr(int n, Register val);
+  void store_ptr(int n, Register val);
+
+  // Load float value from 'address'. The value is loaded onto the FPU register v0.
+  void load_float(Address src);
+  void load_double(Address src);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass.
+  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+
+  // Dispatching
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  // dispatch via t0
+  void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0);
+  // dispatch normal table via t0 (assume t0 is loaded already)
+  void dispatch_only_normal(TosState state, Register Rs = t0);
+  void dispatch_only_noverify(TosState state, Register Rs = t0);
+  // load t0 from [xbcp + step] and dispatch via t0
+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
+  // load t0 from [xbcp] and dispatch via t0 and table
+  void dispatch_via (TosState state, address* table);
+
+  // jump to an invoked target
+  void prepare_to_jump_from_interpreted();
+  void jump_from_interpreted(Register method);
+
+
+  // Returning from interpreted functions
+  //
+  // Removes the current activation (incl. unlocking of monitors)
+  // and sets up the return address.  This code is also used for
+  // exception unwindwing. In that case, we do not want to throw
+  // IllegalMonitorStateExceptions, since that might get us into an
+  // infinite rethrow exception loop.
+  // Additionally this code is used for popFrame and earlyReturn.
+  // In popFrame case we want to skip throwing an exception,
+  // installing an exception, and notifying jvmdi.
+  // In earlyReturn case we only want to skip throwing an exception
+  // and installing an exception.
+  void remove_activation(TosState state,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmdi = true);
+
+  // FIXME: Give us a valid frame at a null check.
+  virtual void null_check(Register reg, int offset = -1) {
+        MacroAssembler::null_check(reg, offset);
+  }
+
+  // Object locking
+  void lock_object  (Register lock_reg);
+  void unlock_object(Register lock_reg);
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Address data, bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+                             bool decrement = false);
+  void increment_mask_and_jump(Address counter_addr,
+                               int increment, Address mask,
+                               Register tmp1, Register tmp2,
+                               bool preloaded, Label* where);
+
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2, bool is_virtual_call);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2,
+                                      Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item, Register mdp,
+                                     Register reg2, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  // narrow int return value
+  void narrow(Register result);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register t1,
+                            bool receiver_can_be_null = false);
+  void profile_ret(Register return_bci, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register temp);
+  void profile_typecheck_failed(Register mdp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register temp);
+
+  void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp);
+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+  void profile_return_type(Register mdp, Register ret, Register tmp);
+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3);
+
+  // Debugging
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+  // support for jvmti/dtrace
+  void notify_method_entry();
+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
+
+  virtual void _call_Unimplemented(address call_site) {
+    save_bcp();
+    set_last_Java_frame(esp, fp, (address) pc(), t0);
+    MacroAssembler::_call_Unimplemented(call_site);
+  }
+
+#ifdef ASSERT
+  void verify_access_flags(Register access_flags, uint32_t flag_bits,
+                           const char* msg, bool stop_by_hit = true);
+  void verify_frame_setup();
+#endif
+};
+
+#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
new file mode 100644
index 0000000000..b5e6b8c512
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+// Implementation of SignatureHandlerGenerator
+Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; }
+Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return sp; }
+Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; }
+
+Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() {
+  if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+    return g_INTArgReg[++_num_reg_int_args];
+  }
+  return noreg;
+}
+
+FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
+  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+    return g_FPArgReg[_num_reg_fp_args++];
+  } else {
+    return fnoreg;
+  }
+}
+
+int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
+  int ret = _stack_offset;
+  _stack_offset += wordSize;
+  return ret;
+}
+
+InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
+  const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+  _masm = new MacroAssembler(buffer); // allocate on resourse area by default
+  _num_reg_int_args = (method->is_static() ? 1 : 0);
+  _num_reg_fp_args = 0;
+  _stack_offset = 0;
+}
+
+// The C ABI specifies:
+// "integer scalars narrower than XLEN bits are widened according to the sign
+// of their type up to 32 bits, then sign-extended to XLEN bits."
+// Applies for both passed in register and stack.
+//
+// Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot.
+// Native uses 64-bit stack slots for all integer scalar types.
+//
+// lw loads the Java stack slot, sign-extends and
+// sd store this widened integer into a 64 bit native stack slot.
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+  Register reg = next_gpr();
+  if (reg != noreg) {
+    __ lw(reg, src);
+  } else {
+    __ lw(x10, src);
+    __ sd(x10, Address(to(), next_stack_offset()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+  Register reg = next_gpr();
+  if (reg != noreg) {
+    __ ld(reg, src);
+  } else  {
+    __ ld(x10, src);
+    __ sd(x10, Address(to(), next_stack_offset()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
+
+  FloatRegister reg = next_fpr();
+  if (reg != fnoreg) {
+    __ flw(reg, src);
+  } else {
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    pass_int();
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
+
+  FloatRegister reg = next_fpr();
+  if (reg != fnoreg) {
+    __ fld(reg, src);
+  } else {
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    pass_long();
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+  Register reg = next_gpr();
+  if (reg == c_rarg1) {
+    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
+    __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset()));
+  } else if (reg != noreg) {
+      // c_rarg2-c_rarg7
+      __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2,  2:c_rarg3...
+      __ ld(temp(), x10);
+      Label L;
+      __ beqz(temp(), L);
+      __ mv(reg, x10);
+      __ bind(L);
+  } else {
+    //to stack
+    __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset()));
+    __ ld(temp(), x10);
+    Label L;
+    __ bnez(temp(), L);
+    __ mv(x10, zr);
+    __ bind(L);
+    assert(sizeof(jobject) == wordSize, "");
+    __ sd(x10, Address(to(), next_stack_offset()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // generate code to handle arguments
+  iterate(fingerprint);
+
+  // return result handler
+  __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type())));
+  __ ret();
+
+  __ flush();
+}
+
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+  : public NativeSignatureIterator {
+ private:
+  address   _from;
+  intptr_t* _to;
+  intptr_t* _int_args;
+  intptr_t* _fp_args;
+  intptr_t* _fp_identifiers;
+  unsigned int _num_reg_int_args;
+  unsigned int _num_reg_fp_args;
+
+  intptr_t* single_slot_addr() {
+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+    return from_addr;
+  }
+
+  intptr_t* double_slot_addr() {
+    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1));
+    _from -= 2 * Interpreter::stackElementSize;
+    return from_addr;
+  }
+
+  int pass_gpr(intptr_t value) {
+    if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) {
+      *_int_args++ = value;
+      return _num_reg_int_args++;
+    }
+    return -1;
+  }
+
+  int pass_fpr(intptr_t value) {
+    if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
+      *_fp_args++ = value;
+      return _num_reg_fp_args++;
+    }
+    return -1;
+  }
+
+  void pass_stack(intptr_t value) {
+    *_to++ = value;
+  }
+
+  virtual void pass_int() {
+    jint value = *(jint*)single_slot_addr();
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
+
+  virtual void pass_long() {
+    intptr_t value = *double_slot_addr();
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
+
+  virtual void pass_object() {
+    intptr_t* addr = single_slot_addr();
+    intptr_t value = *addr == 0 ? NULL : (intptr_t)addr;
+    if (pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
+
+  virtual void pass_float() {
+    jint value = *(jint*) single_slot_addr();
+    // a floating-point argument is passed according to the integer calling
+    // convention if no floating-point argument register available
+    if (pass_fpr(value) < 0 && pass_gpr(value) < 0) {
+      pass_stack(value);
+    }
+  }
+
+  virtual void pass_double() {
+    intptr_t value = *double_slot_addr();
+    int arg = pass_fpr(value);
+    if (0 <= arg) {
+      *_fp_identifiers |= (1ull << arg); // mark as double
+    } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack
+      pass_stack(value);
+    }
+  }
+
+ public:
+  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
+    : NativeSignatureIterator(method)
+  {
+    _from = from;
+    _to   = to;
+
+    _int_args = to - (method->is_static() ? 16 : 17);
+    _fp_args  = to - 8;
+    _fp_identifiers = to - 9;
+    *(int*) _fp_identifiers = 0;
+    _num_reg_int_args = (method->is_static() ? 1 : 0);
+    _num_reg_fp_args = 0;
+  }
+
+  ~SlowSignatureHandler()
+  {
+    _from           = NULL;
+    _to             = NULL;
+    _int_args       = NULL;
+    _fp_args        = NULL;
+    _fp_identifiers = NULL;
+  }
+};
+
+
+IRT_ENTRY(address,
+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                     Method* method,
+                                                     intptr_t* from,
+                                                     intptr_t* to))
+  methodHandle m(thread, (Method*)method);
+  assert(m->is_native(), "sanity check");
+
+  // handle arguments
+  SlowSignatureHandler ssh(m, (address)from, to);
+  ssh.iterate(UCONST64(-1));
+
+  // return result handler
+  return Interpreter::result_handler(m->result_type());
+IRT_END
diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
new file mode 100644
index 0000000000..05df63ba2a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP
+#define CPU_RISCV_INTERPRETERRT_RISCV_HPP
+
+// This is included in the middle of class Interpreter.
+// Do not include files here.
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+  unsigned int _num_reg_fp_args;
+  unsigned int _num_reg_int_args;
+  int _stack_offset;
+
+  void pass_int();
+  void pass_long();
+  void pass_float();
+  void pass_double();
+  void pass_object();
+
+  Register next_gpr();
+  FloatRegister next_fpr();
+  int next_stack_offset();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
+  virtual ~SignatureHandlerGenerator() {
+    _masm = NULL;
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+
+  // Code generation support
+  static Register from();
+  static Register to();
+  static Register temp();
+};
+
+#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
new file mode 100644
index 0000000000..5a0c9b812f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
+
+private:
+
+  // FP value associated with _last_Java_sp:
+  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    OrderAccess::release();
+    _last_Java_fp = NULL;
+    _last_Java_pc = NULL;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this"
+    // We must clear _last_Java_sp before copying the rest of the new data
+    //
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing
+    //
+    assert(src != NULL, "Src should not be NULL.");
+    if (_last_Java_sp != src->_last_Java_sp) {
+      _last_Java_sp = NULL;
+      OrderAccess::release();
+    }
+    _last_Java_fp = src->_last_Java_fp;
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
+  void make_walkable(JavaThread* thread);
+  void capture_last_Java_pc(void);
+
+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
+
+  const address last_Java_pc(void)               { return _last_Java_pc; }
+
+private:
+
+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
+
+  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
+
+  // Assert (last_Java_sp == NULL || fp == NULL)
+  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
+
+#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
new file mode 100644
index 0000000000..f6e7351c4f
--- /dev/null
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing a LoadLoad barrier we create an address
+// dependency between loads; this might be more efficient.
+
+// Common register usage:
+// x10/f10:      result
+// c_rarg0:    jni env
+// c_rarg1:    obj
+// c_rarg2:    jfield id
+
+static const Register robj          = x13;
+static const Register rcounter      = x14;
+static const Register roffset       = x15;
+static const Register rcounter_addr = x16;
+static const Register result        = x17;
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  const char *name;
+  switch (type) {
+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
+    case T_INT:     name = "jni_fast_GetIntField";     break;
+    case T_LONG:    name = "jni_fast_GetLongField";    break;
+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
+    default:        ShouldNotReachHere();
+      name = NULL;  // unreachable
+  }
+  ResourceMark rm;
+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+  CodeBuffer cbuf(blob);
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+  address fast_entry = __ pc();
+
+  Label slow;
+  int32_t offset = 0;
+  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
+  __ addi(rcounter_addr, rcounter_addr, offset);
+
+  Address safepoint_counter_addr(rcounter_addr, 0);
+  __ lwu(rcounter, safepoint_counter_addr);
+  // An even value means there are no ongoing safepoint operations
+  __ andi(t0, rcounter, 1);
+  __ bnez(t0, slow);
+  __ xorr(robj, c_rarg1, rcounter);
+  __ xorr(robj, robj, rcounter);               // obj, since
+                                               // robj ^ rcounter ^ rcounter == robj
+                                               // robj is address dependent on rcounter.
+
+  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  assert_cond(bs != NULL);
+  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
+
+  __ srli(roffset, c_rarg2, 2);                // offset
+
+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
+  __ add(roffset, robj, roffset);
+
+  switch (type) {
+    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
+    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
+    case T_CHAR:    __ lhu(result, Address(roffset, 0)); break;
+    case T_SHORT:   __ lh(result, Address(roffset, 0)); break;
+    case T_INT:     __ lw(result, Address(roffset, 0)); break;
+    case T_LONG:    __ ld(result, Address(roffset, 0)); break;
+    case T_FLOAT: {
+      __ flw(f28, Address(roffset, 0)); // f28 as temporaries
+      __ fmv_x_w(result, f28); // f{31--0}-->x
+      break;
+    }
+    case T_DOUBLE: {
+      __ fld(f28, Address(roffset, 0)); // f28 as temporaries
+      __ fmv_x_d(result, f28); // d{63--0}-->x
+      break;
+    }
+    default:        ShouldNotReachHere();
+  }
+
+  __ xorr(rcounter_addr, rcounter_addr, result);
+  __ xorr(rcounter_addr, rcounter_addr, result);
+  __ lw(t0, safepoint_counter_addr);
+  __ bne(rcounter, t0, slow);
+
+  switch (type) {
+    case T_FLOAT:   __ fmv_w_x(f10, result); break;
+    case T_DOUBLE:  __ fmv_d_x(f10, result); break;
+    default:        __ mv(x10, result);   break;
+  }
+  __ ret();
+
+  slowcase_entry_pclist[count++] = __ pc();
+  __ bind(slow);
+  address slow_case_addr;
+  switch (type) {
+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
+    default:        ShouldNotReachHere();
+      slow_case_addr = NULL;  // unreachable
+  }
+
+  {
+    __ enter();
+    int32_t tmp_offset = 0;
+    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
+    __ jalr(x1, t0, tmp_offset);
+    __ leave();
+    __ ret();
+  }
+  __ flush();
+
+  return fast_entry;
+}
+
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_int_field0(T_DOUBLE);
+}
diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
new file mode 100644
index 0000000000..df3c0267ee
--- /dev/null
+++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_JNITYPES_RISCV_HPP
+#define CPU_RISCV_JNITYPES_RISCV_HPP
+
+#include "jni.h"
+#include "oops/oop.hpp"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : private AllStatic {
+  // These functions write a java primitive type (in native format)
+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+  // I.e., they are functionally 'push' operations if they have a 'pos'
+  // formal parameter.  Note that jlong's and jdouble's are written
+  // _in reverse_ of the order in which they appear in the interpreter
+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
+  // reverse the argument list constructed by JavaCallArguments (see
+  // javaCalls.hpp).
+
+public:
+  // Ints are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to+1).
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+  }
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 1
+  // Doubles are stored in native word format in one JavaCallArgument
+  // slot at *(to+1).
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+  }
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  // No need to worry about alignment on Intel.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
+
+#endif // CPU_RISCV_JNITYPES_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
new file mode 100644
index 0000000000..14e07036ac
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -0,0 +1,5390 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/accessDecorators.hpp"
+#include "oops/compressedOops.inline.hpp"
+#include "oops/klass.inline.hpp"
+#include "oops/oop.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/jniHandles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.hpp"
+#ifdef COMPILER2
+#include "opto/compile.hpp"
+#include "opto/intrinsicnode.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#endif
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
+
+static void pass_arg0(MacroAssembler* masm, Register arg) {
+  if (c_rarg0 != arg) {
+    masm->mv(c_rarg0, arg);
+  }
+}
+
+static void pass_arg1(MacroAssembler* masm, Register arg) {
+  if (c_rarg1 != arg) {
+    masm->mv(c_rarg1, arg);
+  }
+}
+
+static void pass_arg2(MacroAssembler* masm, Register arg) {
+  if (c_rarg2 != arg) {
+    masm->mv(c_rarg2, arg);
+  }
+}
+
+static void pass_arg3(MacroAssembler* masm, Register arg) {
+  if (c_rarg3 != arg) {
+    masm->mv(c_rarg3, arg);
+  }
+}
+
+void MacroAssembler::align(int modulus, int extra_offset) {
+  CompressibleRegion cr(this);
+  while ((offset() + extra_offset) % modulus != 0) { nop(); }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+  call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
+}
+
+// Implementation of call_VM versions
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+
+  pass_arg1(this, arg_1);
+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  pass_arg1(this, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+// these are no-ops overridden by InterpreterMacroAssembler
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+    return RegisterOrConstant(value + offset);
+
+  // load indirectly to solve generation ordering problem
+  ld(tmp, ExternalAddress((address) delayed_value_addr));
+
+  if (offset != 0)
+    add(tmp, tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}
+
+// Calls to C land
+//
+// When entering C land, the fp, & esp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         Register last_java_pc,
+                                         Register tmp) {
+
+  if (last_java_pc->is_valid()) {
+      sd(last_java_pc, Address(xthread,
+                               JavaThread::frame_anchor_offset() +
+                               JavaFrameAnchor::last_Java_pc_offset()));
+  }
+
+  // determine last_java_sp register
+  if (last_java_sp == sp) {
+    mv(tmp, sp);
+    last_java_sp = tmp;
+  } else if (!last_java_sp->is_valid()) {
+    last_java_sp = esp;
+  }
+
+  sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
+
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
+  }
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc,
+                                         Register tmp) {
+  assert(last_java_pc != NULL, "must provide a valid PC");
+
+  la(tmp, last_java_pc);
+  sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+  set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         Label &L,
+                                         Register tmp) {
+  if (L.is_bound()) {
+    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
+  } else {
+    L.add_patch_at(code(), locator());
+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
+  }
+}
+
+// Just like safepoint_poll, but use an acquiring load for thread-
+// local polling.
+//
+// We need an acquire here to ensure that any subsequent load of the
+// global SafepointSynchronize::_state flag is ordered after this load
+// of the local Thread::_polling page.  We don't want this poll to
+// return false (i.e. not safepointing) and a later poll of the global
+// SafepointSynchronize::_state spuriously to return true.
+//
+// This is to avoid a race when we're in a native->Java transition
+// racing the code which wakes up from a safepoint.
+//
+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    membar(MacroAssembler::AnyAny);
+    ld(t1, Address(xthread, Thread::polling_page_offset()));
+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+    andi(t0, t1, SafepointMechanism::poll_bit());
+    bnez(t0, slow_path);
+  } else {
+    safepoint_poll(slow_path);
+  }
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
+  // we must set sp to zero to clear frame
+  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
+
+  // must clear fp, so that compiled frames are not confused; it is
+  // possible that we need it only for debugging
+  if (clear_fp) {
+    sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
+  }
+
+  // Always clear the pc because it could have been set by make_walkable()
+  sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+   // determine java_thread register
+  if (!java_thread->is_valid()) {
+    java_thread = xthread;
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = esp;
+  }
+
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+  assert(java_thread == xthread, "unexpected register");
+
+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+  // push java thread (becomes first argument of C function)
+  mv(c_rarg0, java_thread);
+
+  // set last Java frame before call
+  assert(last_java_sp != fp, "can't use fp");
+
+  Label l;
+  set_last_Java_frame(last_java_sp, fp, l, t0);
+
+  // do the call, remove parameters
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
+
+  // reset last Java frame
+  // Only interpreter should have to clear fp
+  reset_last_Java_frame(true);
+
+   // C++ interp handles this in the interpreter
+  check_and_handle_popframe(java_thread);
+  check_and_handle_earlyret(java_thread);
+
+  if (check_exceptions) {
+    // check for pending exceptions (java_thread is set upon return)
+    ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
+    Label ok;
+    beqz(t0, ok);
+    int32_t offset = 0;
+    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
+    jalr(x0, t0, offset);
+    bind(ok);
+  }
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    get_vm_result(oop_result, java_thread);
+  }
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+  sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
+  verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+  if (!VerifyOops) { return; }
+
+  // Pass register number to verify_oop_subroutine
+  const char* b = NULL;
+  {
+    ResourceMark rm;
+    stringStream ss;
+    ss.print("verify_oop: %s: %s", reg->name(), s);
+    b = code_string(ss.as_string());
+  }
+  BLOCK_COMMENT("verify_oop {");
+
+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+  mv(c_rarg0, reg); // c_rarg0 : x10
+  // The length of the instruction sequence emitted should be independent
+  // of the values of the local char buffer address so that the size of mach
+  // nodes for scratch emit and normal emit matches.
+  movptr(t0, (address)b);
+
+  // call indirectly to solve generation ordering problem
+  int32_t offset = 0;
+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
+  ld(t1, Address(t1, offset));
+  jalr(t1);
+
+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+  BLOCK_COMMENT("} verify_oop");
+}
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+  if (!VerifyOops) {
+    return;
+  }
+
+  const char* b = NULL;
+  {
+    ResourceMark rm;
+    stringStream ss;
+    ss.print("verify_oop_addr: %s", s);
+    b = code_string(ss.as_string());
+  }
+  BLOCK_COMMENT("verify_oop_addr {");
+
+  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+  if (addr.uses(sp)) {
+    la(x10, addr);
+    ld(x10, Address(x10, 4 * wordSize));
+  } else {
+    ld(x10, addr);
+  }
+
+  // The length of the instruction sequence emitted should be independent
+  // of the values of the local char buffer address so that the size of mach
+  // nodes for scratch emit and normal emit matches.
+  movptr(t0, (address)b);
+
+  // call indirectly to solve generation ordering problem
+  int32_t offset = 0;
+  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
+  ld(t1, Address(t1, offset));
+  jalr(t1);
+
+  pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
+
+  BLOCK_COMMENT("} verify_oop_addr");
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+  if (arg_slot.is_constant()) {
+    return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
+  } else {
+    assert_different_registers(t0, arg_slot.as_register());
+    shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
+    return Address(t0, offset);
+  }
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
+{
+  // In order to get locks to work, we need to fake a in_VM state
+  if (ShowMessageBoxOnError) {
+    JavaThread* thread = JavaThread::current();
+    JavaThreadState saved_state = thread->thread_state();
+    thread->set_thread_state(_thread_in_vm);
+#ifndef PRODUCT
+    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+      ttyLocker ttyl;
+      BytecodeCounter::print();
+    }
+#endif
+    if (os::message_box(msg, "Execution stopped, print registers?")) {
+      ttyLocker ttyl;
+      tty->print_cr(" pc = 0x%016lx", pc);
+#ifndef PRODUCT
+      tty->cr();
+      findpc(pc);
+      tty->cr();
+#endif
+      tty->print_cr(" x0 = 0x%016lx", regs[0]);
+      tty->print_cr(" x1 = 0x%016lx", regs[1]);
+      tty->print_cr(" x2 = 0x%016lx", regs[2]);
+      tty->print_cr(" x3 = 0x%016lx", regs[3]);
+      tty->print_cr(" x4 = 0x%016lx", regs[4]);
+      tty->print_cr(" x5 = 0x%016lx", regs[5]);
+      tty->print_cr(" x6 = 0x%016lx", regs[6]);
+      tty->print_cr(" x7 = 0x%016lx", regs[7]);
+      tty->print_cr(" x8 = 0x%016lx", regs[8]);
+      tty->print_cr(" x9 = 0x%016lx", regs[9]);
+      tty->print_cr("x10 = 0x%016lx", regs[10]);
+      tty->print_cr("x11 = 0x%016lx", regs[11]);
+      tty->print_cr("x12 = 0x%016lx", regs[12]);
+      tty->print_cr("x13 = 0x%016lx", regs[13]);
+      tty->print_cr("x14 = 0x%016lx", regs[14]);
+      tty->print_cr("x15 = 0x%016lx", regs[15]);
+      tty->print_cr("x16 = 0x%016lx", regs[16]);
+      tty->print_cr("x17 = 0x%016lx", regs[17]);
+      tty->print_cr("x18 = 0x%016lx", regs[18]);
+      tty->print_cr("x19 = 0x%016lx", regs[19]);
+      tty->print_cr("x20 = 0x%016lx", regs[20]);
+      tty->print_cr("x21 = 0x%016lx", regs[21]);
+      tty->print_cr("x22 = 0x%016lx", regs[22]);
+      tty->print_cr("x23 = 0x%016lx", regs[23]);
+      tty->print_cr("x24 = 0x%016lx", regs[24]);
+      tty->print_cr("x25 = 0x%016lx", regs[25]);
+      tty->print_cr("x26 = 0x%016lx", regs[26]);
+      tty->print_cr("x27 = 0x%016lx", regs[27]);
+      tty->print_cr("x28 = 0x%016lx", regs[28]);
+      tty->print_cr("x30 = 0x%016lx", regs[30]);
+      tty->print_cr("x31 = 0x%016lx", regs[31]);
+      BREAKPOINT;
+    }
+  }
+  fatal("DEBUG MESSAGE: %s", msg);
+}
+
+void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
+  Label done, not_weak;
+  beqz(value, done);           // Use NULL as-is.
+
+  // Test for jweak tag.
+  andi(t0, value, JNIHandles::weak_tag_mask);
+  beqz(t0, not_weak);
+
+  // Resolve jweak.
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
+                 Address(value, -JNIHandles::weak_tag_value), tmp, thread);
+  verify_oop(value);
+  j(done);
+
+  bind(not_weak);
+  // Resolve (untagged) jobject.
+  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
+  verify_oop(value);
+  bind(done);
+}
+
+void MacroAssembler::stop(const char* msg) {
+  address ip = pc();
+  pusha();
+  // The length of the instruction sequence emitted should be independent
+  // of the values of msg and ip so that the size of mach nodes for scratch
+  // emit and normal emit matches.
+  mv(c_rarg0, (address)msg);
+  mv(c_rarg1, (address)ip);
+  mv(c_rarg2, sp);
+  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+  jalr(c_rarg3);
+  ebreak();
+}
+
+void MacroAssembler::unimplemented(const char* what) {
+  const char* buf = NULL;
+  {
+    ResourceMark rm;
+    stringStream ss;
+    ss.print("unimplemented: %s", what);
+    buf = code_string(ss.as_string());
+  }
+  stop(buf);
+}
+
+void MacroAssembler::emit_static_call_stub() {
+  // CompiledDirectStaticCall::set_to_interpreted knows the
+  // exact layout of this stub.
+
+  mov_metadata(xmethod, (Metadata*)NULL);
+
+  // Jump to the entry point of the i2c stub.
+  int32_t offset = 0;
+  movptr_with_offset(t0, 0, offset);
+  jalr(x0, t0, offset);
+}
+
+void MacroAssembler::call_VM_leaf_base(address entry_point,
+                                       int number_of_arguments,
+                                       Label *retaddr) {
+  int32_t offset = 0;
+  push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
+  movptr_with_offset(t0, entry_point, offset);
+  jalr(x1, t0, offset);
+  if (retaddr != NULL) {
+    bind(*retaddr);
+  }
+  pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+  pass_arg0(this, arg_0);
+  pass_arg1(this, arg_1);
+  call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
+                                  Register arg_1, Register arg_2) {
+  pass_arg0(this, arg_0);
+  pass_arg1(this, arg_1);
+  pass_arg2(this, arg_2);
+  call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  assert(arg_0 != c_rarg2, "smashed arg");
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
+  assert(arg_0 != c_rarg3, "smashed arg");
+  assert(arg_1 != c_rarg3, "smashed arg");
+  assert(arg_2 != c_rarg3, "smashed arg");
+  pass_arg3(this, arg_3);
+  assert(arg_0 != c_rarg2, "smashed arg");
+  assert(arg_1 != c_rarg2, "smashed arg");
+  pass_arg2(this, arg_2);
+  assert(arg_0 != c_rarg1, "smashed arg");
+  pass_arg1(this, arg_1);
+  pass_arg0(this, arg_0);
+  MacroAssembler::call_VM_leaf_base(entry_point, 4);
+}
+
+void MacroAssembler::nop() {
+  addi(x0, x0, 0);
+}
+
+void MacroAssembler::mv(Register Rd, Register Rs) {
+  if (Rd != Rs) {
+    addi(Rd, Rs, 0);
+  }
+}
+
+void MacroAssembler::notr(Register Rd, Register Rs) {
+  xori(Rd, Rs, -1);
+}
+
+void MacroAssembler::neg(Register Rd, Register Rs) {
+  sub(Rd, x0, Rs);
+}
+
+void MacroAssembler::negw(Register Rd, Register Rs) {
+  subw(Rd, x0, Rs);
+}
+
+void MacroAssembler::sext_w(Register Rd, Register Rs) {
+  addiw(Rd, Rs, 0);
+}
+
+void MacroAssembler::zext_b(Register Rd, Register Rs) {
+  andi(Rd, Rs, 0xFF);
+}
+
+void MacroAssembler::seqz(Register Rd, Register Rs) {
+  sltiu(Rd, Rs, 1);
+}
+
+void MacroAssembler::snez(Register Rd, Register Rs) {
+  sltu(Rd, x0, Rs);
+}
+
+void MacroAssembler::sltz(Register Rd, Register Rs) {
+  slt(Rd, Rs, x0);
+}
+
+void MacroAssembler::sgtz(Register Rd, Register Rs) {
+  slt(Rd, x0, Rs);
+}
+
+void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
+  if (Rd != Rs) {
+    fsgnj_s(Rd, Rs, Rs);
+  }
+}
+
+void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
+  fsgnjx_s(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
+  fsgnjn_s(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
+  if (Rd != Rs) {
+    fsgnj_d(Rd, Rs, Rs);
+  }
+}
+
+void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
+  fsgnjx_d(Rd, Rs, Rs);
+}
+
+void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
+  fsgnjn_d(Rd, Rs, Rs);
+}
+
+void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
+  vmnand_mm(vd, vs, vs);
+}
+
+void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
+  vnsrl_wx(vd, vs, x0, vm);
+}
+
+void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
+  vfsgnjn_vv(vd, vs, vs);
+}
+
+void MacroAssembler::la(Register Rd, const address &dest) {
+  int64_t offset = dest - pc();
+  if (is_offset_in_range(offset, 32)) {
+    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
+    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
+  } else {
+    movptr(Rd, dest);
+  }
+}
+
+void MacroAssembler::la(Register Rd, const Address &adr) {
+  switch (adr.getMode()) {
+    case Address::literal: {
+      relocInfo::relocType rtype = adr.rspec().reloc()->type();
+      if (rtype == relocInfo::none) {
+        mv(Rd, (intptr_t)(adr.target()));
+      } else {
+        relocate(adr.rspec());
+        movptr(Rd, adr.target());
+      }
+      break;
+    }
+    case Address::base_plus_offset: {
+      int32_t offset = 0;
+      baseOffset(Rd, adr, offset);
+      addi(Rd, Rd, offset);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::la(Register Rd, Label &label) {
+  la(Rd, target(label));
+}
+
+#define INSN(NAME)                                                                \
+  void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
+    NAME(Rs, zr, dest);                                                           \
+  }                                                                               \
+  void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
+    NAME(Rs, zr, l, is_far);                                                      \
+  }                                                                               \
+
+  INSN(beq);
+  INSN(bne);
+  INSN(blt);
+  INSN(ble);
+  INSN(bge);
+  INSN(bgt);
+
+#undef INSN
+
+// Float compare branch instructions
+
+#define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
+    FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
+    BRANCH(t0, l, is_far);                                                                                             \
+  }                                                                                                                    \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
+    FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
+    BRANCH(t0, l, is_far);                                                                                             \
+  }
+
+  INSN(beq, feq, bnez);
+  INSN(bne, feq, beqz);
+
+#undef INSN
+
+
+#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
+                                    bool is_far, bool is_unordered) {                 \
+    if (is_unordered) {                                                               \
+      /* jump if either source is NaN or condition is expected */                     \
+      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
+      beqz(t0, l, is_far);                                                            \
+    } else {                                                                          \
+      /* jump if no NaN in source and condition is expected */                        \
+      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
+      bnez(t0, l, is_far);                                                            \
+    }                                                                                 \
+  }                                                                                   \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
+                                     bool is_far, bool is_unordered) {                \
+    if (is_unordered) {                                                               \
+      /* jump if either source is NaN or condition is expected */                     \
+      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
+      beqz(t0, l, is_far);                                                            \
+    } else {                                                                          \
+      /* jump if no NaN in source and condition is expected */                        \
+      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
+      bnez(t0, l, is_far);                                                            \
+    }                                                                                 \
+  }
+
+  INSN(ble, fle, flt);
+  INSN(blt, flt, fle);
+
+#undef INSN
+
+#define INSN(NAME, CMP)                                                              \
+  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
+                                    bool is_far, bool is_unordered) {                \
+    float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
+  }                                                                                  \
+  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
+                                     bool is_far, bool is_unordered) {               \
+    double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
+  }
+
+  INSN(bgt, blt);
+  INSN(bge, ble);
+
+#undef INSN
+
+
+#define INSN(NAME, CSR)                       \
+  void MacroAssembler::NAME(Register Rd) {    \
+    csrr(Rd, CSR);                            \
+  }
+
+  INSN(rdinstret,  CSR_INSTERT);
+  INSN(rdcycle,    CSR_CYCLE);
+  INSN(rdtime,     CSR_TIME);
+  INSN(frcsr,      CSR_FCSR);
+  INSN(frrm,       CSR_FRM);
+  INSN(frflags,    CSR_FFLAGS);
+
+#undef INSN
+
+void MacroAssembler::csrr(Register Rd, unsigned csr) {
+  csrrs(Rd, csr, x0);
+}
+
+#define INSN(NAME, OPFUN)                                      \
+  void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
+    OPFUN(x0, csr, Rs);                                        \
+  }
+
+  INSN(csrw, csrrw);
+  INSN(csrs, csrrs);
+  INSN(csrc, csrrc);
+
+#undef INSN
+
+#define INSN(NAME, OPFUN)                                      \
+  void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
+    OPFUN(x0, csr, imm);                                       \
+  }
+
+  INSN(csrwi, csrrwi);
+  INSN(csrsi, csrrsi);
+  INSN(csrci, csrrci);
+
+#undef INSN
+
+#define INSN(NAME, CSR)                                      \
+  void MacroAssembler::NAME(Register Rd, Register Rs) {      \
+    csrrw(Rd, CSR, Rs);                                      \
+  }
+
+  INSN(fscsr,   CSR_FCSR);
+  INSN(fsrm,    CSR_FRM);
+  INSN(fsflags, CSR_FFLAGS);
+
+#undef INSN
+
+#define INSN(NAME)                              \
+  void MacroAssembler::NAME(Register Rs) {      \
+    NAME(x0, Rs);                               \
+  }
+
+  INSN(fscsr);
+  INSN(fsrm);
+  INSN(fsflags);
+
+#undef INSN
+
+void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
+  guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
+  csrrwi(Rd, CSR_FRM, imm);
+}
+
+void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
+   csrrwi(Rd, CSR_FFLAGS, imm);
+}
+
+#define INSN(NAME)                             \
+  void MacroAssembler::NAME(unsigned imm) {    \
+    NAME(x0, imm);                             \
+  }
+
+  INSN(fsrmi);
+  INSN(fsflagsi);
+
+#undef INSN
+
+void MacroAssembler::push_reg(Register Rs)
+{
+  addi(esp, esp, 0 - wordSize);
+  sd(Rs, Address(esp, 0));
+}
+
+void MacroAssembler::pop_reg(Register Rd)
+{
+  ld(Rd, esp, 0);
+  addi(esp, esp, wordSize);
+}
+
+int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
+  int count = 0;
+  // Scan bitset to accumulate register pairs
+  for (int reg = 31; reg >= 0; reg--) {
+    if ((1U << 31) & bitset) {
+      regs[count++] = reg;
+    }
+    bitset <<= 1;
+  }
+  return count;
+}
+
+// Push integer registers in the bitset supplied. Don't push sp.
+// Return the number of words pushed
+int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
+  DEBUG_ONLY(int words_pushed = 0;)
+  CompressibleRegion cr(this);
+
+  unsigned char regs[32];
+  int count = bitset_to_regs(bitset, regs);
+  // reserve one slot to align for odd count
+  int offset = is_even(count) ? 0 : wordSize;
+
+  if (count) {
+    addi(stack, stack, -count * wordSize - offset);
+  }
+  for (int i = count - 1; i >= 0; i--) {
+    sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
+    DEBUG_ONLY(words_pushed++;)
+  }
+
+  assert(words_pushed == count, "oops, pushed != count");
+
+  return count;
+}
+
+int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
+  DEBUG_ONLY(int words_popped = 0;)
+  CompressibleRegion cr(this);
+
+  unsigned char regs[32];
+  int count = bitset_to_regs(bitset, regs);
+  // reserve one slot to align for odd count
+  int offset = is_even(count) ? 0 : wordSize;
+
+  for (int i = count - 1; i >= 0; i--) {
+    ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
+    DEBUG_ONLY(words_popped++;)
+  }
+
+  if (count) {
+    addi(stack, stack, count * wordSize + offset);
+  }
+  assert(words_popped == count, "oops, popped != count");
+
+  return count;
+}
+
+// Push floating-point registers in the bitset supplied.
+// Return the number of words pushed
+int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
+  CompressibleRegion cr(this);
+  DEBUG_ONLY(int words_pushed = 0;)
+  unsigned char regs[32];
+  int count = bitset_to_regs(bitset, regs);
+  int push_slots = count + (count & 1);
+
+  if (count) {
+    addi(stack, stack, -push_slots * wordSize);
+  }
+
+  for (int i = count - 1; i >= 0; i--) {
+    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
+    DEBUG_ONLY(words_pushed++;)
+  }
+
+  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
+
+  return count;
+}
+
+int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
+  CompressibleRegion cr(this);
+  DEBUG_ONLY(int words_popped = 0;)
+  unsigned char regs[32];
+  int count = bitset_to_regs(bitset, regs);
+  int pop_slots = count + (count & 1);
+
+  for (int i = count - 1; i >= 0; i--) {
+    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
+    DEBUG_ONLY(words_popped++;)
+  }
+
+  if (count) {
+    addi(stack, stack, pop_slots * wordSize);
+  }
+
+  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
+
+  return count;
+}
+
+void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
+  CompressibleRegion cr(this);
+  // Push integer registers x7, x10-x17, x28-x31.
+  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
+
+  // Push float registers f0-f7, f10-f17, f28-f31.
+  addi(sp, sp, - wordSize * 20);
+  int offset = 0;
+  for (int i = 0; i < 32; i++) {
+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
+      fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
+    }
+  }
+}
+
+void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
+  CompressibleRegion cr(this);
+  int offset = 0;
+  for (int i = 0; i < 32; i++) {
+    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
+      fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
+    }
+  }
+  addi(sp, sp, wordSize * 20);
+
+  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
+}
+
+// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
+void MacroAssembler::pusha() {
+  CompressibleRegion cr(this);
+  push_reg(RegSet::of(x1) + RegSet::range(x5, x31), sp);
+}
+
+// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
+void MacroAssembler::popa() {
+  CompressibleRegion cr(this);
+  pop_reg(RegSet::of(x1) + RegSet::range(x5, x31), sp);
+}
+
+void MacroAssembler::push_CPU_state() {
+  CompressibleRegion cr(this);
+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+  push_reg(RegSet::range(x5, x31), sp);
+
+  // float registers
+  addi(sp, sp, - 32 * wordSize);
+  for (int i = 0; i < 32; i++) {
+    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
+  }
+}
+
+void MacroAssembler::pop_CPU_state() {
+  CompressibleRegion cr(this);
+
+  // float registers
+  for (int i = 0; i < 32; i++) {
+    fld(as_FloatRegister(i), Address(sp, i * wordSize));
+  }
+  addi(sp, sp, 32 * wordSize);
+
+  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
+  pop_reg(RegSet::range(x5, x31), sp);
+}
+
+static int patch_offset_in_jal(address branch, int64_t offset) {
+  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
+  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
+  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
+  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
+  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
+  return NativeInstruction::instruction_size;                                   // only one instruction
+}
+
+static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
+  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
+  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
+  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
+  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
+  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
+  return NativeInstruction::instruction_size;                                   // only one instruction
+}
+
+static int patch_offset_in_pc_relative(address branch, int64_t offset) {
+  const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
+  Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
+  Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
+  return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
+}
+
+static int patch_addr_in_movptr(address branch, address target) {
+  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
+  int32_t lower = ((intptr_t)target << 35) >> 35;
+  int64_t upper = ((intptr_t)target - lower) >> 29;
+  Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
+  Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
+  Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
+  Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
+  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+}
+
+static int patch_imm_in_li32(address branch, int32_t target) {
+  const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
+  int64_t upper = (intptr_t)target;
+  int32_t lower = (((int32_t)target) << 20) >> 20;
+  upper -= lower;
+  upper = (int32_t)upper;
+  Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
+  Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
+  return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+}
+
+static long get_offset_of_jal(address insn_addr) {
+  assert_cond(insn_addr != NULL);
+  long offset = 0;
+  unsigned insn = *(unsigned*)insn_addr;
+  long val = (long)Assembler::sextract(insn, 31, 12);
+  offset |= ((val >> 19) & 0x1) << 20;
+  offset |= (val & 0xff) << 12;
+  offset |= ((val >> 8) & 0x1) << 11;
+  offset |= ((val >> 9) & 0x3ff) << 1;
+  offset = (offset << 43) >> 43;
+  return offset;
+}
+
+static long get_offset_of_conditional_branch(address insn_addr) {
+  long offset = 0;
+  assert_cond(insn_addr != NULL);
+  unsigned insn = *(unsigned*)insn_addr;
+  offset = (long)Assembler::sextract(insn, 31, 31);
+  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
+  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
+  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
+  offset = (offset << 41) >> 41;
+  return offset;
+}
+
+static long get_offset_of_pc_relative(address insn_addr) {
+  long offset = 0;
+  assert_cond(insn_addr != NULL);
+  offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
+  offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
+  offset = (offset << 32) >> 32;
+  return offset;
+}
+
+static address get_target_of_movptr(address insn_addr) {
+  assert_cond(insn_addr != NULL);
+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
+  return (address) target_address;
+}
+
+static address get_target_of_li32(address insn_addr) {
+  assert_cond(insn_addr != NULL);
+  intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
+  return (address)target_address;
+}
+
+// Patch any kind of instruction; there may be several instructions.
+// Return the total length (in bytes) of the instructions.
+int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
+  assert_cond(branch != NULL);
+  int64_t offset = target - branch;
+  if (NativeInstruction::is_jal_at(branch)) {                         // jal
+    return patch_offset_in_jal(branch, offset);
+  } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
+    return patch_offset_in_conditional_branch(branch, offset);
+  } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
+    return patch_offset_in_pc_relative(branch, offset);
+  } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
+    return patch_addr_in_movptr(branch, target);
+  } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
+    int64_t imm = (intptr_t)target;
+    return patch_imm_in_li32(branch, (int32_t)imm);
+  } else {
+#ifdef ASSERT
+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
+                  *(unsigned*)branch, p2i(branch));
+    Disassembler::decode(branch - 16, branch + 16);
+#endif
+    ShouldNotReachHere();
+    return -1;
+  }
+}
+
+address MacroAssembler::target_addr_for_insn(address insn_addr) {
+  long offset = 0;
+  assert_cond(insn_addr != NULL);
+  if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
+    offset = get_offset_of_jal(insn_addr);
+  } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
+    offset = get_offset_of_conditional_branch(insn_addr);
+  } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
+    offset = get_offset_of_pc_relative(insn_addr);
+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
+    return get_target_of_movptr(insn_addr);
+  } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
+    return get_target_of_li32(insn_addr);
+  } else {
+    ShouldNotReachHere();
+  }
+  return address(((uintptr_t)insn_addr + offset));
+}
+
+int MacroAssembler::patch_oop(address insn_addr, address o) {
+  // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
+  // narrow OOPs by setting the upper 16 bits in the first
+  // instruction.
+  if (NativeInstruction::is_li32_at(insn_addr)) {
+    // Move narrow OOP
+    narrowOop n = CompressedOops::encode((oop)o);
+    return patch_imm_in_li32(insn_addr, (int32_t)n);
+  } else if (NativeInstruction::is_movptr_at(insn_addr)) {
+    // Move wide OOP
+    return patch_addr_in_movptr(insn_addr, o);
+  }
+  ShouldNotReachHere();
+  return -1;
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops) {
+    if (Universe::is_fully_initialized()) {
+      mv(xheapbase, Universe::narrow_ptrs_base());
+    } else {
+      int32_t offset = 0;
+      la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset);
+      ld(xheapbase, Address(xheapbase, offset));
+    }
+  }
+}
+
+void MacroAssembler::mv(Register Rd, Address dest) {
+  assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
+  relocate(dest.rspec());
+  movptr(Rd, dest.target());
+}
+
+void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
+  if (src.is_register()) {
+    mv(Rd, src.as_register());
+  } else {
+    mv(Rd, src.as_constant());
+  }
+}
+
+void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
+  andr(Rd, Rs1, Rs2);
+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
+  // and the result is stored in Rd
+  addw(Rd, Rd, zr);
+}
+
+void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
+  orr(Rd, Rs1, Rs2);
+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
+  // and the result is stored in Rd
+  addw(Rd, Rd, zr);
+}
+
+void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
+  xorr(Rd, Rs1, Rs2);
+  // addw: The result is clipped to 32 bits, then the sign bit is extended,
+  // and the result is stored in Rd
+  addw(Rd, Rd, zr);
+}
+
+// Note: load_unsigned_short used to be called load_unsigned_word.
+int MacroAssembler::load_unsigned_short(Register dst, Address src) {
+  int off = offset();
+  lhu(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
+  int off = offset();
+  lbu(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_short(Register dst, Address src) {
+  int off = offset();
+  lh(dst, src);
+  return off;
+}
+
+int MacroAssembler::load_signed_byte(Register dst, Address src) {
+  int off = offset();
+  lb(dst, src);
+  return off;
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+  switch (size_in_bytes) {
+    case  8:  ld(dst, src); break;
+    case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
+    case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
+    case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
+    default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+  switch (size_in_bytes) {
+    case  8:  sd(src, dst); break;
+    case  4:  sw(src, dst); break;
+    case  2:  sh(src, dst); break;
+    case  1:  sb(src, dst); break;
+    default:  ShouldNotReachHere();
+  }
+}
+
+// reverse bytes in halfword in lower 16 bits and sign-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
+void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    srai(Rd, Rd, 48);
+    return;
+  }
+  assert_different_registers(Rs, tmp);
+  assert_different_registers(Rd, tmp);
+  srli(tmp, Rs, 8);
+  andi(tmp, tmp, 0xFF);
+  slli(Rd, Rs, 56);
+  srai(Rd, Rd, 48); // sign-extend
+  orr(Rd, Rd, tmp);
+}
+
+// reverse bytes in lower word and sign-extend
+// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
+void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    srai(Rd, Rd, 32);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb_h_w_u(Rd, Rs, tmp1, tmp2);
+  slli(tmp2, Rd, 48);
+  srai(tmp2, tmp2, 32); // sign-extend
+  srli(Rd, Rd, 16);
+  orr(Rd, Rd, tmp2);
+}
+
+// reverse bytes in halfword in lower 16 bits and zero-extend
+// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    srli(Rd, Rd, 48);
+    return;
+  }
+  assert_different_registers(Rs, tmp);
+  assert_different_registers(Rd, tmp);
+  srli(tmp, Rs, 8);
+  andi(tmp, tmp, 0xFF);
+  andi(Rd, Rs, 0xFF);
+  slli(Rd, Rd, 8);
+  orr(Rd, Rd, tmp);
+}
+
+// reverse bytes in halfwords in lower 32 bits and zero-extend
+// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
+void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    rori(Rd, Rd, 32);
+    roriw(Rd, Rd, 16);
+    zero_extend(Rd, Rd, 32);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  srli(tmp2, Rs, 16);
+  revb_h_h_u(tmp2, tmp2, tmp1);
+  revb_h_h_u(Rd, Rs, tmp1);
+  slli(tmp2, tmp2, 16);
+  orr(Rd, Rd, tmp2);
+}
+
+// This method is only used for revb_h
+// Rd = Rs[47:0] Rs[55:48] Rs[63:56]
+void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1);
+  srli(tmp1, Rs, 48);
+  andi(tmp2, tmp1, 0xFF);
+  slli(tmp2, tmp2, 8);
+  srli(tmp1, tmp1, 8);
+  orr(tmp1, tmp1, tmp2);
+  slli(Rd, Rs, 16);
+  orr(Rd, Rd, tmp1);
+}
+
+// reverse bytes in each halfword
+// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
+void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseZbb) {
+    assert_different_registers(Rs, tmp1);
+    assert_different_registers(Rd, tmp1);
+    rev8(Rd, Rs);
+    zero_extend(tmp1, Rd, 32);
+    roriw(tmp1, tmp1, 16);
+    slli(tmp1, tmp1, 32);
+    srli(Rd, Rd, 32);
+    roriw(Rd, Rd, 16);
+    zero_extend(Rd, Rd, 32);
+    orr(Rd, Rd, tmp1);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb_h_helper(Rd, Rs, tmp1, tmp2);
+  for (int i = 0; i < 3; ++i) {
+    revb_h_helper(Rd, Rd, tmp1, tmp2);
+  }
+}
+
+// reverse bytes in each word
+// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
+void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    rori(Rd, Rd, 32);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  revb(Rd, Rs, tmp1, tmp2);
+  ror_imm(Rd, Rd, 32);
+}
+
+// reverse bytes in doubleword
+// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
+void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
+  if (UseZbb) {
+    rev8(Rd, Rs);
+    return;
+  }
+  assert_different_registers(Rs, tmp1, tmp2);
+  assert_different_registers(Rd, tmp1, tmp2);
+  andi(tmp1, Rs, 0xFF);
+  slli(tmp1, tmp1, 8);
+  for (int step = 8; step < 56; step += 8) {
+    srli(tmp2, Rs, step);
+    andi(tmp2, tmp2, 0xFF);
+    orr(tmp1, tmp1, tmp2);
+    slli(tmp1, tmp1, 8);
+  }
+  srli(Rd, Rs, 56);
+  andi(Rd, Rd, 0xFF);
+  orr(Rd, tmp1, Rd);
+}
+
+// rotate right with shift bits
+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
+{
+  if (UseZbb) {
+    rori(dst, src, shift);
+    return;
+  }
+
+  assert_different_registers(dst, tmp);
+  assert_different_registers(src, tmp);
+  assert(shift < 64, "shift amount must be < 64");
+  slli(tmp, src, 64 - shift);
+  srli(dst, src, shift);
+  orr(dst, dst, tmp);
+}
+
+void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
+  if (is_imm_in_range(imm, 12, 0)) {
+    and_imm12(Rd, Rn, imm);
+  } else {
+    assert_different_registers(Rn, tmp);
+    mv(tmp, imm);
+    andr(Rd, Rn, tmp);
+  }
+}
+
+void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
+  ld(tmp1, adr);
+  if (src.is_register()) {
+    orr(tmp1, tmp1, src.as_register());
+  } else {
+    if (is_imm_in_range(src.as_constant(), 12, 0)) {
+      ori(tmp1, tmp1, src.as_constant());
+    } else {
+      assert_different_registers(tmp1, tmp2);
+      mv(tmp2, src.as_constant());
+      orr(tmp1, tmp1, tmp2);
+    }
+  }
+  sd(tmp1, adr);
+}
+
+void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
+  if (UseCompressedClassPointers) {
+      lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+    if (Universe::narrow_klass_base() == NULL) {
+      slli(tmp, tmp, Universe::narrow_klass_shift());
+      beq(trial_klass, tmp, L);
+      return;
+    }
+    decode_klass_not_null(tmp);
+  } else {
+    ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+  }
+  beq(trial_klass, tmp, L);
+}
+
+// Move an oop into a register.  immediate is true if we want
+// immediate instructions, i.e. we are not going to patch this
+// instruction while the code is being executed by another thread.  In
+// that case we can use move immediates rather than the constant pool.
+void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
+  int oop_index;
+  if (obj == NULL) {
+    oop_index = oop_recorder()->allocate_oop_index(obj);
+  } else {
+#ifdef ASSERT
+    {
+      ThreadInVMfromUnknown tiv;
+      assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+    }
+#endif
+    oop_index = oop_recorder()->find_index(obj);
+  }
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+  if (!immediate) {
+    address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
+    ld_constant(dst, Address(dummy, rspec));
+  } else
+    mv(dst, Address((address)obj, rspec));
+}
+
+// Move a metadata address into a register.
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+  int oop_index;
+  if (obj == NULL) {
+    oop_index = oop_recorder()->allocate_metadata_index(obj);
+  } else {
+    oop_index = oop_recorder()->find_index(obj);
+  }
+  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
+  mv(dst, Address((address)obj, rspec));
+}
+
+// Writes to stack successive pages until offset reached to check for
+// stack overflow + shadow pages.  This clobbers tmp.
+void MacroAssembler::bang_stack_size(Register size, Register tmp) {
+  assert_different_registers(tmp, size, t0);
+  // Bang stack for total size given plus shadow page size.
+  // Bang one page at a time because large size can bang beyond yellow and
+  // red zones.
+  mv(t0, os::vm_page_size());
+  Label loop;
+  bind(loop);
+  sub(tmp, sp, t0);
+  subw(size, size, t0);
+  sd(size, Address(tmp));
+  bgtz(size, loop);
+
+  // Bang down shadow pages too.
+  // At this point, (tmp-0) is the last address touched, so don't
+  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
+  // was post-decremented.)  Skip this address by starting at i=1, and
+  // touch a few more pages below.  N.B.  It is important to touch all
+  // the way down to and including i=StackShadowPages.
+  for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
+    // this could be any sized move but this is can be a debugging crumb
+    // so the bigger the better.
+    sub(tmp, tmp, os::vm_page_size());
+    sd(size, Address(tmp, 0));
+  }
+}
+
+SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
+  int32_t offset = 0;
+  _masm = masm;
+  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
+  _masm->lbu(t0, Address(t0, offset));
+  _masm->beqz(t0, _label);
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+  _masm = NULL;
+}
+
+void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
+  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+  ld(dst, Address(xmethod, Method::const_offset()));
+  ld(dst, Address(dst, ConstMethod::constants_offset()));
+  ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
+  ld(dst, Address(dst, mirror_offset));
+  resolve_oop_handle(dst, tmp);
+}
+
+void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
+  // OopHandle::resolve is an indirection.
+  assert_different_registers(result, tmp);
+  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
+}
+
+void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
+                                    Register dst, Address src,
+                                    Register tmp1, Register thread_tmp) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  decorators = AccessInternal::decorator_fixup(decorators);
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  } else {
+    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  }
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any registers
+    // NOTE: this is plenty to provoke a segv
+    ld(zr, Address(reg, 0));
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
+                                     Address dst, Register src,
+                                     Register tmp1, Register thread_tmp) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  decorators = AccessInternal::decorator_fixup(decorators);
+  bool as_raw = (decorators & AS_RAW) != 0;
+  if (as_raw) {
+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  } else {
+    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
+  }
+}
+
+// Algorithm must match CompressedOops::encode.
+void MacroAssembler::encode_heap_oop(Register d, Register s) {
+  verify_oop(s, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      srli(d, s, LogMinObjAlignmentInBytes);
+    } else {
+      mv(d, s);
+    }
+  } else {
+    Label notNull;
+    sub(d, s, xheapbase);
+    bgez(d, notNull);
+    mv(d, zr);
+    bind(notNull);
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      srli(d, d, Universe::narrow_oop_shift());
+    }
+  }
+}
+
+void MacroAssembler::load_klass(Register dst, Register src) {
+  if (UseCompressedClassPointers) {
+    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_klass_not_null(dst);
+  } else {
+    ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+  // FIXME: Should this be a store release? concurrent gcs assumes
+  // klass length is valid if klass field is not null.
+  if (UseCompressedClassPointers) {
+    encode_klass_not_null(src);
+    sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+  } else {
+    sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedClassPointers) {
+    // Store to klass gap in destination
+    sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register r) {
+  decode_klass_not_null(r, r);
+}
+
+void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
+
+  if (Universe::narrow_klass_base() == NULL) {
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      slli(dst, src, LogKlassAlignmentInBytes);
+    } else {
+      mv(dst, src);
+    }
+    return;
+  }
+
+  Register xbase = dst;
+  if (dst == src) {
+    xbase = tmp;
+  }
+
+  assert_different_registers(src, xbase);
+  mv(xbase, (uintptr_t)Universe::narrow_klass_base());
+
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    assert_different_registers(t0, xbase);
+    shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
+  } else {
+    add(dst, xbase, src);
+  }
+
+  if (xbase == xheapbase) { reinit_heapbase(); }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+  encode_klass_not_null(r, r);
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
+
+  if (Universe::narrow_klass_base() == NULL) {
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      srli(dst, src, LogKlassAlignmentInBytes);
+    } else {
+      mv(dst, src);
+    }
+    return;
+  }
+
+  if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 &&
+      Universe::narrow_klass_shift() == 0) {
+    zero_extend(dst, src, 32);
+    return;
+  }
+
+  Register xbase = dst;
+  if (dst == src) {
+    xbase = tmp;
+  }
+
+  assert_different_registers(src, xbase);
+  mv(xbase, (intptr_t)Universe::narrow_klass_base());
+  sub(dst, src, xbase);
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    srli(dst, dst, LogKlassAlignmentInBytes);
+  }
+  if (xbase == xheapbase) {
+    reinit_heapbase();
+  }
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  decode_heap_oop_not_null(r, r);
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  assert(UseCompressedOops, "should only be used for compressed headers");
+  assert(Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    slli(dst, src, LogMinObjAlignmentInBytes);
+    if (Universe::narrow_oop_base() != NULL) {
+      add(dst, xheapbase, dst);
+    }
+  } else {
+    assert(Universe::narrow_oop_base() == NULL, "sanity");
+    mv(dst, src);
+  }
+}
+
+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0 || d != s) {
+      slli(d, s, Universe::narrow_oop_shift());
+    }
+  } else {
+    Label done;
+    mv(d, s);
+    beqz(s, done);
+    shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
+    bind(done);
+  }
+  verify_oop(d, "broken oop in decode_heap_oop");
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
+                                    Register thread_tmp, DecoratorSet decorators) {
+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
+                                   Register thread_tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
+}
+
+void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
+                                            Register thread_tmp, DecoratorSet decorators) {
+  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
+}
+
+// Used for storing NULLs.
+void MacroAssembler::store_heap_oop_null(Address dst) {
+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
+}
+
+int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
+                                    bool want_remainder)
+{
+  // Full implementation of Java idiv and irem.  The function
+  // returns the (pc) offset of the div instruction - may be needed
+  // for implicit exceptions.
+  //
+  // input : rs1: dividend
+  //         rs2: divisor
+  //
+  // result: either
+  //         quotient  (= rs1 idiv rs2)
+  //         remainder (= rs1 irem rs2)
+
+
+  int idivl_offset = offset();
+  if (!want_remainder) {
+    divw(result, rs1, rs2);
+  } else {
+    remw(result, rs1, rs2); // result = rs1 % rs2;
+  }
+  return idivl_offset;
+}
+
+int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
+                                    bool want_remainder)
+{
+  // Full implementation of Java ldiv and lrem.  The function
+  // returns the (pc) offset of the div instruction - may be needed
+  // for implicit exceptions.
+  //
+  // input : rs1: dividend
+  //         rs2: divisor
+  //
+  // result: either
+  //         quotient  (= rs1 idiv rs2)
+  //         remainder (= rs1 irem rs2)
+
+  int idivq_offset = offset();
+  if (!want_remainder) {
+    div(result, rs1, rs2);
+  } else {
+    rem(result, rs1, rs2); // result = rs1 % rs2;
+  }
+  return idivq_offset;
+}
+
+// Look up the method for a megamorpic invkkeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_tmp,
+                                             Label& L_no_such_interface,
+                                             bool return_method) {
+  assert_different_registers(recv_klass, intf_klass, scan_tmp);
+  assert_different_registers(method_result, intf_klass, scan_tmp);
+  assert(recv_klass != method_result || !return_method,
+         "recv_klass can be destroyed when mehtid isn't needed");
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must be same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable).
+  int vtable_base = in_bytes(Klass::vtable_start_offset());
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size_in_bytes();
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
+  add(scan_tmp, scan_tmp, vtable_base);
+
+  if (return_method) {
+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+    if (itable_index.is_register()) {
+      slli(t0, itable_index.as_register(), 3);
+    } else {
+      mv(t0, itable_index.as_constant() << 3);
+    }
+    add(recv_klass, recv_klass, t0);
+    if (itentry_off) {
+      add(recv_klass, recv_klass, itentry_off);
+    }
+  }
+
+  Label search, found_method;
+
+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
+  beq(intf_klass, method_result, found_method);
+  bind(search);
+  // Check that the previous entry is non-null. A null entry means that
+  // the receiver class doens't implement the interface, and wasn't the
+  // same as when the caller was compiled.
+  beqz(method_result, L_no_such_interface, /* is_far */ true);
+  addi(scan_tmp, scan_tmp, scan_step);
+  ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
+  bne(intf_klass, method_result, search);
+
+  bind(found_method);
+
+  // Got a hit.
+  if (return_method) {
+    lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
+    add(method_result, recv_klass, scan_tmp);
+    ld(method_result, Address(method_result));
+  }
+}
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  const int base = in_bytes(Klass::vtable_start_offset());
+  assert(vtableEntry::size() * wordSize == 8,
+         "adjust the scaling in the code below");
+  int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
+
+  if (vtable_index.is_register()) {
+    shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
+    ld(method_result, Address(method_result, vtable_offset_in_bytes));
+  } else {
+    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
+    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
+  }
+}
+
+void MacroAssembler::membar(uint32_t order_constraint) {
+  address prev = pc() - NativeMembar::instruction_size;
+  address last = code()->last_insn();
+
+  if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
+    NativeMembar *bar = NativeMembar_at(prev);
+    // We are merging two memory barrier instructions.  On RISCV we
+    // can do this simply by ORing them together.
+    bar->set_kind(bar->get_kind() | order_constraint);
+    BLOCK_COMMENT("merged membar");
+  } else {
+    code()->set_last_insn(pc());
+
+    uint32_t predecessor = 0;
+    uint32_t successor = 0;
+
+    membar_mask_to_pred_succ(order_constraint, predecessor, successor);
+    fence(predecessor, successor);
+  }
+}
+
+// Form an addres from base + offset in Rd. Rd my or may not
+// actually be used: you must use the Address that is returned. It
+// is up to you to ensure that the shift provided mathces the size
+// of your data.
+Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
+  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
+    return Address(base, byte_offset);
+  }
+
+  // Do it the hard way
+  mv(Rd, byte_offset);
+  add(Rd, base, Rd);
+  return Address(Rd);
+}
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                                         Register super_klass,
+                                         Register tmp_reg,
+                                         Label& L_success) {
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+void MacroAssembler::safepoint_poll(Label& slow_path) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ld(t1, Address(xthread, Thread::polling_page_offset()));
+    andi(t0, t1, SafepointMechanism::poll_bit());
+    bnez(t0, slow_path);
+  } else {
+    int32_t offset = 0;
+    la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
+    lwu(t0, Address(t0, offset));
+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
+    bnez(t0, slow_path);
+  }
+}
+
+void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
+                                Label &succeed, Label *fail) {
+  // oldv holds comparison value
+  // newv holds value to write in exchange
+  // addr identifies memory word to compare against/update
+  Label retry_load, nope;
+  bind(retry_load);
+  // Load reserved from the memory location
+  lr_d(tmp, addr, Assembler::aqrl);
+  // Fail and exit if it is not what we expect
+  bne(tmp, oldv, nope);
+  // If the store conditional succeeds, tmp will be zero
+  sc_d(tmp, newv, addr, Assembler::rl);
+  beqz(tmp, succeed);
+  // Retry only when the store conditional failed
+  j(retry_load);
+
+  bind(nope);
+  membar(AnyAny);
+  mv(oldv, tmp);
+  if (fail != NULL) {
+    j(*fail);
+  }
+}
+
+void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
+                                        Label &succeed, Label *fail) {
+  assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
+  cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
+}
+
+void MacroAssembler::load_reserved(Register addr,
+                                   enum operand_size size,
+                                   Assembler::Aqrl acquire) {
+  switch (size) {
+    case int64:
+      lr_d(t0, addr, acquire);
+      break;
+    case int32:
+      lr_w(t0, addr, acquire);
+      break;
+    case uint32:
+      lr_w(t0, addr, acquire);
+      zero_extend(t0, t0, 32);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_conditional(Register addr,
+                                       Register new_val,
+                                       enum operand_size size,
+                                       Assembler::Aqrl release) {
+  switch (size) {
+    case int64:
+      sc_d(t0, new_val, addr, release);
+      break;
+    case int32:
+    case uint32:
+      sc_w(t0, new_val, addr, release);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+
+void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
+                                                 Register new_val,
+                                                 enum operand_size size,
+                                                 Register tmp1, Register tmp2, Register tmp3) {
+  assert(size == int8 || size == int16, "unsupported operand size");
+
+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
+
+  andi(shift, addr, 3);
+  slli(shift, shift, 3);
+
+  andi(aligned_addr, addr, ~3);
+
+  if (size == int8) {
+    mv(mask, 0xff);
+  } else {
+    // size == int16 case
+    mv(mask, -1);
+    zero_extend(mask, mask, 16);
+  }
+  sll(mask, mask, shift);
+
+  xori(not_mask, mask, -1);
+
+  sll(expected, expected, shift);
+  andr(expected, expected, mask);
+
+  sll(new_val, new_val, shift);
+  andr(new_val, new_val, mask);
+}
+
+// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
+// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
+// which are forced to work with 4-byte aligned address.
+void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
+                                          Register new_val,
+                                          enum operand_size size,
+                                          Assembler::Aqrl acquire, Assembler::Aqrl release,
+                                          Register result, bool result_as_bool,
+                                          Register tmp1, Register tmp2, Register tmp3) {
+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
+
+  Label retry, fail, done;
+
+  bind(retry);
+  lr_w(old, aligned_addr, acquire);
+  andr(tmp, old, mask);
+  bne(tmp, expected, fail);
+
+  andr(tmp, old, not_mask);
+  orr(tmp, tmp, new_val);
+  sc_w(tmp, tmp, aligned_addr, release);
+  bnez(tmp, retry);
+
+  if (result_as_bool) {
+    mv(result, 1);
+    j(done);
+
+    bind(fail);
+    mv(result, zr);
+
+    bind(done);
+  } else {
+    andr(tmp, old, mask);
+
+    bind(fail);
+    srl(result, tmp, shift);
+
+    if (size == int8) {
+      sign_extend(result, result, 8);
+    } else {
+      // size == int16 case
+      sign_extend(result, result, 16);
+    }
+  }
+}
+
+// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
+// the weak CAS stuff. The major difference is that it just failed when store conditional
+// failed.
+void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
+                                               Register new_val,
+                                               enum operand_size size,
+                                               Assembler::Aqrl acquire, Assembler::Aqrl release,
+                                               Register result,
+                                               Register tmp1, Register tmp2, Register tmp3) {
+  Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
+  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
+  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
+
+  Label fail, done;
+
+  lr_w(old, aligned_addr, acquire);
+  andr(tmp, old, mask);
+  bne(tmp, expected, fail);
+
+  andr(tmp, old, not_mask);
+  orr(tmp, tmp, new_val);
+  sc_w(tmp, tmp, aligned_addr, release);
+  bnez(tmp, fail);
+
+  // Success
+  mv(result, 1);
+  j(done);
+
+  // Fail
+  bind(fail);
+  mv(result, zr);
+
+  bind(done);
+}
+
+void MacroAssembler::cmpxchg(Register addr, Register expected,
+                             Register new_val,
+                             enum operand_size size,
+                             Assembler::Aqrl acquire, Assembler::Aqrl release,
+                             Register result, bool result_as_bool) {
+  assert(size != int8 && size != int16, "unsupported operand size");
+
+  Label retry_load, done, ne_done;
+  bind(retry_load);
+  load_reserved(addr, size, acquire);
+  bne(t0, expected, ne_done);
+  store_conditional(addr, new_val, size, release);
+  bnez(t0, retry_load);
+
+  // equal, succeed
+  if (result_as_bool) {
+    mv(result, 1);
+  } else {
+    mv(result, expected);
+  }
+  j(done);
+
+  // not equal, failed
+  bind(ne_done);
+  if (result_as_bool) {
+    mv(result, zr);
+  } else {
+    mv(result, t0);
+  }
+
+  bind(done);
+}
+
+void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
+                                  Register new_val,
+                                  enum operand_size size,
+                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
+                                  Register result) {
+  Label fail, done;
+  load_reserved(addr, size, acquire);
+  bne(t0, expected, fail);
+  store_conditional(addr, new_val, size, release);
+  bnez(t0, fail);
+
+  // Success
+  mv(result, 1);
+  j(done);
+
+  // Fail
+  bind(fail);
+  mv(result, zr);
+
+  bind(done);
+}
+
+#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
+  prev = prev->is_valid() ? prev : zr;                                                      \
+  if (incr.is_register()) {                                                                 \
+    AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
+  } else {                                                                                  \
+    mv(t0, incr.as_constant());                                                             \
+    AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
+  }                                                                                         \
+  return;                                                                                   \
+}
+
+ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
+ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
+
+#undef ATOMIC_OP
+
+#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
+void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
+  prev = prev->is_valid() ? prev : zr;                                               \
+  AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
+  return;                                                                            \
+}
+
+ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
+ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
+ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
+
+#undef ATOMIC_XCHG
+
+#define ATOMIC_XCHGU(OP1, OP2)                                                       \
+void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
+  atomic_##OP2(prev, newv, addr);                                                    \
+  zero_extend(prev, prev, 32);                                                       \
+  return;                                                                            \
+}
+
+ATOMIC_XCHGU(xchgwu, xchgw)
+ATOMIC_XCHGU(xchgalwu, xchgalw)
+
+#undef ATOMIC_XCHGU
+
+void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  int32_t offset = 0;
+  if (far_branches()) {
+    // We can use auipc + jalr here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    la_patchable(tmp, entry, offset);
+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
+    jalr(x0, tmp, offset);
+  } else {
+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
+    j(entry);
+  }
+}
+
+void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(entry.target()) != NULL,
+         "destination of far call not found in code cache");
+  int32_t offset = 0;
+  if (far_branches()) {
+    // We can use auipc + jalr here because we know that the total size of
+    // the code cache cannot exceed 2Gb.
+    la_patchable(tmp, entry, offset);
+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
+    jalr(x1, tmp, offset); // link
+  } else {
+    if (cbuf != NULL) { cbuf->set_insts_mark(); }
+    jal(entry); // link
+  }
+}
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register tmp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                                   Register super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, tmp_reg);
+  bool must_load_sco = (super_check_offset == noreg);
+  if (must_load_sco) {
+    assert(tmp_reg != noreg, "supply either a temp or a register offset");
+  } else {
+    assert_different_registers(sub_klass, super_klass, super_check_offset);
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in batch");
+
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+  Address super_check_offset_addr(super_klass, sco_offset);
+
+  // Hacked jmp, which may only be used just before L_fallthrough.
+#define final_jmp(label)                                                \
+  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
+  else                            j(label)             /*omit semi*/
+
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface. Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front fo the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  beq(sub_klass, super_klass, *L_success);
+
+  // Check the supertype display:
+  if (must_load_sco) {
+    lwu(tmp_reg, super_check_offset_addr);
+    super_check_offset = tmp_reg;
+  }
+  add(t0, sub_klass, super_check_offset);
+  Address super_check_addr(t0);
+  ld(t0, super_check_addr); // load displayed supertype
+
+  // Ths check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_Cache and the primary super dispaly elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  beq(super_klass, t0, *L_success);
+  mv(t1, sc_offset);
+  if (L_failure == &L_fallthrough) {
+    beq(super_check_offset, t1, *L_slow_path);
+  } else {
+    bne(super_check_offset, t1, *L_failure, /* is_far */ true);
+    final_jmp(*L_slow_path);
+  }
+
+  bind(L_fallthrough);
+
+#undef final_jmp
+}
+
+// Scans count pointer sized words at [addr] for occurence of value,
+// generic
+void MacroAssembler::repne_scan(Register addr, Register value, Register count,
+                                Register tmp) {
+  Label Lloop, Lexit;
+  beqz(count, Lexit);
+  bind(Lloop);
+  ld(tmp, addr);
+  beq(value, tmp, Lexit);
+  add(addr, addr, wordSize);
+  sub(count, count, 1);
+  bnez(count, Lloop);
+  bind(Lexit);
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register tmp1_reg,
+                                                   Register tmp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure) {
+  assert_different_registers(sub_klass, super_klass, tmp1_reg);
+  if (tmp2_reg != noreg) {
+    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
+  }
+#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // A couple of usefule fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  BLOCK_COMMENT("check_klass_subtype_slow_path");
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connecitons with the input regs.
+
+  assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
+  assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
+
+  RegSet pushed_registers;
+  if (!IS_A_TEMP(x12)) {
+    pushed_registers += x12;
+  }
+  if (!IS_A_TEMP(x15)) {
+    pushed_registers += x15;
+  }
+
+  if (super_klass != x10 || UseCompressedOops) {
+    if (!IS_A_TEMP(x10)) {
+      pushed_registers += x10;
+    }
+  }
+
+  push_reg(pushed_registers, sp);
+
+  // Get super_klass value into x10 (even if it was in x15 or x12)
+  mv(x10, super_klass);
+
+#ifndef PRODUCT
+  mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
+  Address pst_counter_addr(t1);
+  ld(t0, pst_counter_addr);
+  add(t0, t0, 1);
+  sd(t0, pst_counter_addr);
+#endif // PRODUCT
+
+  // We will consult the secondary-super array.
+  ld(x15, secondary_supers_addr);
+  // Load the array length.
+  lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
+  // Skip to start of data.
+  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
+
+  // Set t0 to an obvious invalid value, falling through by default
+  mv(t0, -1);
+  // Scan X12 words at [X15] for an occurrence of X10.
+  repne_scan(x15, x10, x12, t0);
+
+  // pop will restore x10, so we should use a temp register to keep its value
+  mv(t1, x10);
+
+  // Unspill the temp registers:
+  pop_reg(pushed_registers, sp);
+
+  bne(t1, t0, *L_failure);
+
+  // Success. Cache the super we found an proceed in triumph.
+  sd(super_klass, super_cache_addr);
+
+  if (L_success != &L_fallthrough) {
+    j(*L_success);
+  }
+
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register tmp1,
+                                   Register tmp2,
+                                   Label& slow_case,
+                                   bool is_far) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj,
+                                   Register var_size_in_bytes,
+                                   int con_size_in_bytes,
+                                   Register tmp,
+                                   Label& slow_case,
+                                   bool is_far) {
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
+}
+
+
+// get_thread() can be called anywhere inside generated code so we
+// need to save whatever non-callee save context might get clobbered
+// by the call to Thread::current() or, indeed, the call setup code.
+void MacroAssembler::get_thread(Register thread) {
+  // save all call-clobbered regs except thread
+  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
+                      RegSet::range(x28, x31) + ra - thread;
+  push_reg(saved_regs, sp);
+
+  mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
+  jalr(ra);
+  if (thread != c_rarg0) {
+    mv(thread, c_rarg0);
+  }
+
+  // restore pushed registers
+  pop_reg(saved_regs, sp);
+}
+
+void MacroAssembler::load_byte_map_base(Register reg) {
+  jbyte *byte_map_base =
+    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
+  mv(reg, (uint64_t)byte_map_base);
+}
+
+void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
+  relocInfo::relocType rtype = dest.rspec().reloc()->type();
+  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
+  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
+  unsigned long dest_address = (uintptr_t)dest.target();
+  long offset_low = dest_address - low_address;
+  long offset_high = dest_address - high_address;
+
+  assert(is_valid_riscv64_address(dest.target()), "bad address");
+  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
+
+  relocate(dest.rspec());
+  // RISC-V doesn't compute a page-aligned address, in order to partially
+  // compensate for the use of *signed* offsets in its base+disp12
+  // addressing mode (RISC-V's PC-relative reach remains asymmetric
+  // [-(2G + 2K), 2G - 2k).
+  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
+    int64_t distance = dest.target() - pc();
+    auipc(reg1, (int32_t)distance + 0x800);
+    offset = ((int32_t)distance << 20) >> 20;
+  } else {
+    movptr_with_offset(reg1, dest.target(), offset);
+  }
+}
+
+void MacroAssembler::build_frame(int framesize) {
+  assert(framesize >= 2, "framesize must include space for FP/RA");
+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
+  sub(sp, sp, framesize);
+  sd(fp, Address(sp, framesize - 2 * wordSize));
+  sd(ra, Address(sp, framesize - wordSize));
+  if (PreserveFramePointer) { add(fp, sp, framesize); }
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+  assert(framesize >= 2, "framesize must include space for FP/RA");
+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
+  ld(fp, Address(sp, framesize - 2 * wordSize));
+  ld(ra, Address(sp, framesize - wordSize));
+  add(sp, sp, framesize);
+}
+
+void MacroAssembler::reserved_stack_check() {
+    // testing if reserved zone needs to be enabled
+    Label no_reserved_zone_enabling;
+
+    ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
+    bltu(sp, t0, no_reserved_zone_enabling);
+
+    enter();   // RA and FP are live.
+    mv(c_rarg0, xthread);
+    int32_t offset = 0;
+    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
+    jalr(x1, t0, offset);
+    leave();
+
+    // We have already removed our own frame.
+    // throw_delayed_StackOverflowError will think that it's been
+    // called by our caller.
+    offset = 0;
+    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
+    jalr(x0, t0, offset);
+    should_not_reach_here();
+
+    bind(no_reserved_zone_enabling);
+}
+
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
+  Label retry_load;
+  bind(retry_load);
+  // flush and load exclusive from the memory location
+  lr_w(tmp, counter_addr);
+  addw(tmp, tmp, 1);
+  // if we store+flush with no intervening write tmp wil be zero
+  sc_w(tmp, tmp, counter_addr);
+  bnez(tmp, retry_load);
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ld(dst, Address(dst, Klass::prototype_header_offset()));
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters,
+                                         Register flag) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  assert_different_registers(lock_reg, obj_reg, swap_reg);
+
+  if (PrintBiasedLockingStatistics && counters == NULL)
+    counters = BiasedLocking::counters();
+
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    ld(swap_reg, mark_addr);
+  }
+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
+  li(t0, markOopDesc::biased_lock_pattern);
+  bne(t0, tmp_reg, cas_label);
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  load_prototype_header(tmp_reg, obj_reg);
+  orr(tmp_reg, tmp_reg, xthread);
+  xorr(tmp_reg, swap_reg, tmp_reg);
+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
+  if (flag->is_valid()) {
+    mv(flag, tmp_reg);
+  }
+  if (counters != NULL) {
+    Label around;
+    bnez(tmp_reg, around);
+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
+    j(done);
+    bind(around);
+  } else {
+    beqz(tmp_reg, done);
+  }
+
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  bnez(t0, try_revoke_bias);
+
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
+  bnez(t0, try_rebias);
+
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+  {
+    Label cas_success;
+    Label counter;
+    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+    andr(swap_reg, swap_reg, t0);
+    orr(tmp_reg, swap_reg, xthread);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
+    // cas failed here if slow_cass == NULL
+    if (flag->is_valid()) {
+      mv(flag, 1);
+      j(counter);
+    }
+    // If the biasing toward our thread failed, this means that
+    // another thread succeeded in biasing it toward itself and we
+    // need to revoke that bias. The revocation will occur in the
+    // interpreter runtime in the slow case.
+    bind(cas_success);
+    if (flag->is_valid()) {
+      mv(flag, 0);
+      bind(counter);
+    }
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
+                  tmp_reg, t0);
+    }
+  }
+  j(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label cas_success;
+    Label counter;
+    load_prototype_header(tmp_reg, obj_reg);
+    orr(tmp_reg, xthread, tmp_reg);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
+    // cas failed here if slow_cass == NULL
+    if (flag->is_valid()) {
+      mv(flag, 1);
+      j(counter);
+    }
+
+    // If the biasing toward our thread failed, then another thread
+    // succeeded in biasing it toward itself and we need to revoke that
+    // bias. The revocation will occur in the runtime in the slow case.
+    bind(cas_success);
+    if (flag->is_valid()) {
+      mv(flag, 0);
+      bind(counter);
+    }
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
+                  tmp_reg, t0);
+    }
+  }
+  j(done);
+
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  {
+    Label cas_success, nope;
+    load_prototype_header(tmp_reg, obj_reg);
+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
+    bind(cas_success);
+
+    // Fall through to the normal CAS-based lock, because no matter what
+    // the result of the above CAS, some thread must have succeeded in
+    // removing the bias bit from the object's header.
+    if (counters != NULL) {
+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
+                  t0);
+    }
+    bind(nope);
+  }
+
+  bind(cas_label);
+
+  return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
+  if (flag->is_valid()) { mv(flag, tmp_reg); }
+  beqz(tmp_reg, done);
+}
+
+// Move the address of the polling page into dest.
+void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ld(dest, Address(xthread, Thread::polling_page_offset()));
+  } else {
+    uint64_t align = (uint64_t)page & 0xfff;
+    assert(align == 0, "polling page must be page aligned");
+    la_patchable(dest, Address(page, rtype), offset);
+  }
+}
+
+// Read the polling page.  The address of the polling page must
+// already be in r.
+void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
+  int32_t offset = 0;
+  get_polling_page(dest, page, offset, rtype);
+  read_polling_page(dest, offset, rtype);
+}
+
+// Read the polling page.  The address of the polling page must
+// already be in r.
+void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
+  relocate(rtype);
+  lwu(zr, Address(dest, offset));
+}
+
+void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+#ifdef ASSERT
+  {
+    ThreadInVMfromUnknown tiv;
+    assert (UseCompressedOops, "should only be used for compressed oops");
+    assert (Universe::heap() != NULL, "java heap should be initialized");
+    assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+    assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop");
+  }
+#endif
+  int oop_index = oop_recorder()->find_index(obj);
+  relocate(oop_Relocation::spec(oop_index));
+  li32(dst, 0xDEADBEEF);
+  zero_extend(dst, dst, 32);
+}
+
+void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
+  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int index = oop_recorder()->find_index(k);
+  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
+
+  narrowKlass nk = Klass::encode_klass(k);
+  relocate(metadata_Relocation::spec(index));
+  li32(dst, nk);
+  zero_extend(dst, dst, 32);
+}
+
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
+  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
+         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
+         entry.rspec().type() == relocInfo::static_call_type ||
+         entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  // We need a trampoline if branches are far.
+  if (far_branches()) {
+    bool in_scratch_emit_size = false;
+#ifdef COMPILER2
+    // We don't want to emit a trampoline if C2 is generating dummy
+    // code during its branch shortening phase.
+    CompileTask* task = ciEnv::current()->task();
+    in_scratch_emit_size =
+      (task != NULL && is_c2_compile(task->comp_level()) &&
+       Compile::current()->in_scratch_emit_size());
+#endif
+    if (!in_scratch_emit_size) {
+      address stub = emit_trampoline_stub(offset(), entry.target());
+      if (stub == NULL) {
+        postcond(pc() == badAddress);
+        return NULL; // CodeCache is full
+      }
+    }
+  }
+
+  if (cbuf != NULL) { cbuf->set_insts_mark(); }
+#ifdef ASSERT
+  if (entry.rspec().type() != relocInfo::runtime_call_type) {
+    assert_alignment(pc());
+  }
+#endif
+  relocate(entry.rspec());
+  if (!far_branches()) {
+    jal(entry.target());
+  } else {
+    jal(pc());
+  }
+  // just need to return a non-null address
+  postcond(pc() != badAddress);
+  return pc();
+}
+
+address MacroAssembler::ic_call(address entry, jint method_index) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
+  movptr(t1, (address)Universe::non_oop_word());
+  assert_cond(entry != NULL);
+  return trampoline_call(Address(entry, rh));
+}
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+//   branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+//   load the call target from the constant pool
+//   branch (RA still points to the call site above)
+
+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+  address stub = start_a_stub(NativeInstruction::instruction_size
+                            + NativeCallTrampolineStub::instruction_size);
+  if (stub == NULL) {
+    return NULL;  // CodeBuffer::expand failed
+  }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+
+  // make sure 4 byte aligned here, so that the destination address would be
+  // 8 byte aligned after 3 intructions
+  // when we reach here we may get a 2-byte alignment so need to align it
+  align(wordSize, NativeCallTrampolineStub::data_offset);
+
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
+                                            insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // Now, create the trampoline stub's code:
+  // - load the call
+  // - call
+  Label target;
+  ld(t0, target);  // auipc + ld
+  jr(t0);          // jalr
+  bind(target);
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  assert(offset() % wordSize == 0, "bad alignment");
+  emit_int64((intptr_t)dest);
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  end_a_stub();
+  return stub_start_addr;
+}
+
+Address MacroAssembler::add_memory_helper(const Address dst) {
+  switch (dst.getMode()) {
+    case Address::base_plus_offset:
+      // This is the expected mode, although we allow all the other
+      // forms below.
+      return form_address(t1, dst.base(), dst.offset());
+    default:
+      la(t1, dst);
+      return Address(t1);
+  }
+}
+
+void MacroAssembler::increment(const Address dst, int64_t value) {
+  assert(((dst.getMode() == Address::base_plus_offset &&
+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
+          "invalid value and address mode combination");
+  Address adr = add_memory_helper(dst);
+  assert(!adr.uses(t0), "invalid dst for address increment");
+  ld(t0, adr);
+  add(t0, t0, value, t1);
+  sd(t0, adr);
+}
+
+void MacroAssembler::incrementw(const Address dst, int32_t value) {
+  assert(((dst.getMode() == Address::base_plus_offset &&
+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
+          "invalid value and address mode combination");
+  Address adr = add_memory_helper(dst);
+  assert(!adr.uses(t0), "invalid dst for address increment");
+  lwu(t0, adr);
+  addw(t0, t0, value, t1);
+  sw(t0, adr);
+}
+
+void MacroAssembler::decrement(const Address dst, int64_t value) {
+  assert(((dst.getMode() == Address::base_plus_offset &&
+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
+          "invalid value and address mode combination");
+  Address adr = add_memory_helper(dst);
+  assert(!adr.uses(t0), "invalid dst for address decrement");
+  ld(t0, adr);
+  sub(t0, t0, value, t1);
+  sd(t0, adr);
+}
+
+void MacroAssembler::decrementw(const Address dst, int32_t value) {
+  assert(((dst.getMode() == Address::base_plus_offset &&
+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
+          "invalid value and address mode combination");
+  Address adr = add_memory_helper(dst);
+  assert(!adr.uses(t0), "invalid dst for address decrement");
+  lwu(t0, adr);
+  subw(t0, t0, value, t1);
+  sw(t0, adr);
+}
+
+void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
+  assert_different_registers(src1, t0);
+  int32_t offset;
+  la_patchable(t0, src2, offset);
+  ld(t0, Address(t0, offset));
+  beq(src1, t0, equal);
+}
+
+// string indexof
+// compute index by trailing zeros
+void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
+                                   Register match_mask, Register result,
+                                   Register ch2, Register tmp,
+                                   bool haystack_isL)
+{
+  int haystack_chr_shift = haystack_isL ? 0 : 1;
+  srl(match_mask, match_mask, trailing_zeros);
+  srli(match_mask, match_mask, 1);
+  srli(tmp, trailing_zeros, LogBitsPerByte);
+  if (!haystack_isL) andi(tmp, tmp, 0xE);
+  add(haystack, haystack, tmp);
+  ld(ch2, Address(haystack));
+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
+  add(result, result, tmp);
+}
+
+// string indexof
+// Find pattern element in src, compute match mask,
+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
+// match mask patterns and corresponding indices would be like:
+// - 0x8080808080808080 (Latin1)
+// -   7 6 5 4 3 2 1 0  (match index)
+// - 0x8000800080008000 (UTF16)
+// -   3   2   1   0    (match index)
+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
+                                        Register mask1, Register mask2)
+{
+  xorr(src, pattern, src);
+  sub(match_mask, src, mask1);
+  orr(src, src, mask2);
+  notr(src, src);
+  andr(match_mask, match_mask, src);
+}
+
+#ifdef COMPILER2
+// Code for BigInteger::mulAdd instrinsic
+// out     = x10
+// in      = x11
+// offset  = x12  (already out.length-offset)
+// len     = x13
+// k       = x14
+// tmp     = x28
+//
+// pseudo code from java implementation:
+// long kLong = k & LONG_MASK;
+// carry = 0;
+// offset = out.length-offset - 1;
+// for (int j = len - 1; j >= 0; j--) {
+//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
+//     out[offset--] = (int)product;
+//     carry = product >>> 32;
+// }
+// return (int)carry;
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
+                             Register len, Register k, Register tmp) {
+  Label L_tail_loop, L_unroll, L_end;
+  mv(tmp, out);
+  mv(out, zr);
+  blez(len, L_end);
+  zero_extend(k, k, 32);
+  slliw(t0, offset, LogBytesPerInt);
+  add(offset, tmp, t0);
+  slliw(t0, len, LogBytesPerInt);
+  add(in, in, t0);
+
+  const int unroll = 8;
+  mv(tmp, unroll);
+  blt(len, tmp, L_tail_loop);
+  bind(L_unroll);
+  for (int i = 0; i < unroll; i++) {
+    sub(in, in, BytesPerInt);
+    lwu(t0, Address(in, 0));
+    mul(t1, t0, k);
+    add(t0, t1, out);
+    sub(offset, offset, BytesPerInt);
+    lwu(t1, Address(offset, 0));
+    add(t0, t0, t1);
+    sw(t0, Address(offset, 0));
+    srli(out, t0, 32);
+  }
+  subw(len, len, tmp);
+  bge(len, tmp, L_unroll);
+
+  bind(L_tail_loop);
+  blez(len, L_end);
+  sub(in, in, BytesPerInt);
+  lwu(t0, Address(in, 0));
+  mul(t1, t0, k);
+  add(t0, t1, out);
+  sub(offset, offset, BytesPerInt);
+  lwu(t1, Address(offset, 0));
+  add(t0, t0, t1);
+  sw(t0, Address(offset, 0));
+  srli(out, t0, 32);
+  subw(len, len, 1);
+  j(L_tail_loop);
+
+  bind(L_end);
+}
+
+// add two unsigned input and output carry
+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, carry);
+  assert_different_registers(dst, src2);
+  add(dst, src1, src2);
+  sltu(carry, dst, src2);
+}
+
+// add two input with carry
+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, carry);
+  add(dst, src1, src2);
+  add(dst, dst, carry);
+}
+
+// add two unsigned input with carry and output carry
+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
+{
+  assert_different_registers(dst, src2);
+  adc(dst, src1, src2, carry);
+  sltu(carry, dst, src2);
+}
+
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                                     Register src1, Register src2, Register carry)
+{
+  cad(dest_lo, dest_lo, src1, carry);
+  add(dest_hi, dest_hi, carry);
+  cad(dest_lo, dest_lo, src2, carry);
+  add(final_dest_hi, dest_hi, carry);
+}
+
+/**
+ * Multiply 32 bit by 32 bit first loop.
+ */
+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx)
+{
+  // jlong carry, x[], y[], z[];
+  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+  //     long product = y[idx] * x[xstart] + carry;
+  //     z[kdx] = (int)product;
+  //     carry = product >>> 32;
+  // }
+  // z[xstart] = (int)carry;
+
+  Label L_first_loop, L_first_loop_exit;
+  blez(idx, L_first_loop_exit);
+
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  lwu(x_xstart, Address(t0, 0));
+
+  bind(L_first_loop);
+  subw(idx, idx, 1);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  lwu(y_idx, Address(t0, 0));
+  mul(product, x_xstart, y_idx);
+  add(product, product, carry);
+  srli(carry, product, 32);
+  subw(kdx, kdx, 1);
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(product, Address(t0, 0));
+  bgtz(idx, L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx)
+{
+  //
+  //  jlong carry, x[], y[], z[];
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
+  //    huge_128 product = y[idx] * x[xstart] + carry;
+  //    z[kdx] = (jlong)product;
+  //    carry  = (jlong)(product >>> 64);
+  //  }
+  //  z[xstart] = carry;
+  //
+
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
+
+  subw(xstart, xstart, 1);
+  bltz(xstart, L_one_x);
+
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  ld(x_xstart, Address(t0, 0));
+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
+
+  bind(L_first_loop);
+  subw(idx, idx, 1);
+  bltz(idx, L_first_loop_exit);
+  subw(idx, idx, 1);
+  bltz(idx, L_one_y);
+
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(y_idx, Address(t0, 0));
+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
+  bind(L_multiply);
+
+  mulhu(t0, x_xstart, y_idx);
+  mul(product, x_xstart, y_idx);
+  cad(product, product, carry, t1);
+  adc(carry, t0, zr, t1);
+
+  subw(kdx, kdx, 2);
+  ror_imm(product, product, 32); // back to big-endian
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sd(product, Address(t0, 0));
+
+  j(L_first_loop);
+
+  bind(L_one_y);
+  lwu(y_idx, Address(y, 0));
+  j(L_multiply);
+
+  bind(L_one_x);
+  lwu(x_xstart, Address(x, 0));
+  j(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 128 bit by 128 bit. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
+                                             Register carry, Register carry2,
+                                             Register idx, Register jdx,
+                                             Register yz_idx1, Register yz_idx2,
+                                             Register tmp, Register tmp3, Register tmp4,
+                                             Register tmp6, Register product_hi)
+{
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = xstart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
+  //     carry  = (jlong)(tmp4 >>> 64);
+  //     z[kdx+idx+1] = (jlong)tmp3;
+  //     z[kdx+idx] = (jlong)tmp4;
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)yz_idx1;
+  //     carry  = (jlong)(yz_idx1 >>> 64);
+  //   }
+  //
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  srliw(jdx, idx, 2);
+
+  bind(L_third_loop);
+
+  subw(jdx, jdx, 1);
+  bltz(jdx, L_third_loop_exit);
+  subw(idx, idx, 4);
+
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(yz_idx2, Address(t0, 0));
+  ld(yz_idx1, Address(t0, wordSize));
+
+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
+
+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+  ror_imm(yz_idx2, yz_idx2, 32);
+
+  ld(t1, Address(tmp6, 0));
+  ld(t0, Address(tmp6, wordSize));
+
+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
+  mulhu(tmp4, product_hi, yz_idx1);
+
+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
+  ror_imm(t1, t1, 32, tmp);
+
+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
+  mulhu(carry2, product_hi, yz_idx2);
+
+  cad(tmp3, tmp3, carry, carry);
+  adc(tmp4, tmp4, zr, carry);
+  cad(tmp3, tmp3, t0, t0);
+  cadc(tmp4, tmp4, tmp, t0);
+  adc(carry, carry2, zr, t0);
+  cad(tmp4, tmp4, t1, carry2);
+  adc(carry, carry, zr, carry2);
+
+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
+  ror_imm(tmp4, tmp4, 32);
+  sd(tmp4, Address(tmp6, 0));
+  sd(tmp3, Address(tmp6, wordSize));
+
+  j(L_third_loop);
+
+  bind(L_third_loop_exit);
+
+  andi(idx, idx, 0x3);
+  beqz(idx, L_post_third_loop_done);
+
+  Label L_check_1;
+  subw(idx, idx, 2);
+  bltz(idx, L_check_1);
+
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  ld(yz_idx1, Address(t0, 0));
+  ror_imm(yz_idx1, yz_idx1, 32);
+
+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
+  mulhu(tmp4, product_hi, yz_idx1);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  ld(yz_idx2, Address(t0, 0));
+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
+
+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
+
+  ror_imm(tmp3, tmp3, 32, tmp);
+  sd(tmp3, Address(t0, 0));
+
+  bind(L_check_1);
+
+  andi(idx, idx, 0x1);
+  subw(idx, idx, 1);
+  bltz(idx, L_post_third_loop_done);
+  shadd(t0, idx, y, t0, LogBytesPerInt);
+  lwu(tmp4, Address(t0, 0));
+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
+  mulhu(carry2, tmp4, product_hi);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  lwu(tmp4, Address(t0, 0));
+
+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
+
+  shadd(t0, idx, z, t0, LogBytesPerInt);
+  sw(tmp3, Address(t0, 0));
+
+  slli(t0, carry2, 32);
+  srli(carry, tmp3, 32);
+  orr(carry, carry, t0);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() intrinsic.
+ *
+ * x10: x
+ * x11: xlen
+ * x12: y
+ * x13: ylen
+ * x14: z
+ * x15: zlen
+ * x16: tmp1
+ * x17: tmp2
+ * x7:  tmp3
+ * x28: tmp4
+ * x29: tmp5
+ * x30: tmp6
+ * x31: tmp7
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                                     Register z, Register zlen,
+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                     Register tmp5, Register tmp6, Register product_hi)
+{
+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product = xlen;
+  const Register x_xstart = zlen; // reuse register
+
+  mv(idx, ylen); // idx = ylen;
+  mv(kdx, zlen); // kdx = xlen+ylen;
+  mv(carry, zr); // carry = 0;
+
+  Label L_multiply_64_x_64_loop, L_done;
+
+  subw(xstart, xlen, 1);
+  bltz(xstart, L_done);
+
+  const Register jdx = tmp1;
+
+  if (AvoidUnalignedAccesses) {
+    // Check if x and y are both 8-byte aligned.
+    orr(t0, xlen, ylen);
+    andi(t0, t0, 0x1);
+    beqz(t0, L_multiply_64_x_64_loop);
+
+    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    sw(carry, Address(t0, 0));
+
+    Label L_second_loop_unaligned;
+    bind(L_second_loop_unaligned);
+    mv(carry, zr);
+    mv(jdx, ylen);
+    subw(xstart, xstart, 1);
+    bltz(xstart, L_done);
+    sub(sp, sp, 2 * wordSize);
+    sd(z, Address(sp, 0));
+    sd(zr, Address(sp, wordSize));
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    addi(z, t0, 4);
+    shadd(t0, xstart, x, t0, LogBytesPerInt);
+    lwu(product, Address(t0, 0));
+    Label L_third_loop, L_third_loop_exit;
+
+    blez(jdx, L_third_loop_exit);
+
+    bind(L_third_loop);
+    subw(jdx, jdx, 1);
+    shadd(t0, jdx, y, t0, LogBytesPerInt);
+    lwu(t0, Address(t0, 0));
+    mul(t1, t0, product);
+    add(t0, t1, carry);
+    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
+    lwu(t1, Address(tmp6, 0));
+    add(t0, t0, t1);
+    sw(t0, Address(tmp6, 0));
+    srli(carry, t0, 32);
+    bgtz(jdx, L_third_loop);
+
+    bind(L_third_loop_exit);
+    ld(z, Address(sp, 0));
+    addi(sp, sp, 2 * wordSize);
+    shadd(t0, xstart, z, t0, LogBytesPerInt);
+    sw(carry, Address(t0, 0));
+
+    j(L_second_loop_unaligned);
+  }
+
+  bind(L_multiply_64_x_64_loop);
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  Label L_second_loop_aligned;
+  beqz(kdx, L_second_loop_aligned);
+
+  Label L_carry;
+  subw(kdx, kdx, 1);
+  beqz(kdx, L_carry);
+
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+  srli(carry, carry, 32);
+  subw(kdx, kdx, 1);
+
+  bind(L_carry);
+  shadd(t0, kdx, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
+
+  bind(L_second_loop_aligned);
+  mv(carry, zr); // carry = 0;
+  mv(jdx, ylen); // j = ystart+1
+
+  subw(xstart, xstart, 1); // i = xstart-1;
+  bltz(xstart, L_done);
+
+  sub(sp, sp, 4 * wordSize);
+  sd(z, Address(sp, 0));
+
+  Label L_last_x;
+  shadd(t0, xstart, z, t0, LogBytesPerInt);
+  addi(z, t0, 4);
+  subw(xstart, xstart, 1); // i = xstart-1;
+  bltz(xstart, L_last_x);
+
+  shadd(t0, xstart, x, t0, LogBytesPerInt);
+  ld(product_hi, Address(t0, 0));
+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
+
+  Label L_third_loop_prologue;
+  bind(L_third_loop_prologue);
+
+  sd(ylen, Address(sp, wordSize));
+  sd(x, Address(sp, 2 * wordSize));
+  sd(xstart, Address(sp, 3 * wordSize));
+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
+  ld(z, Address(sp, 0));
+  ld(ylen, Address(sp, wordSize));
+  ld(x, Address(sp, 2 * wordSize));
+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
+  addi(sp, sp, 4 * wordSize);
+
+  addiw(tmp3, xlen, 1);
+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+
+  subw(tmp3, tmp3, 1);
+  bltz(tmp3, L_done);
+
+  srli(carry, carry, 32);
+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
+  sw(carry, Address(t0, 0));
+  j(L_second_loop_aligned);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  lwu(product_hi, Address(x, 0));
+  j(L_third_loop_prologue);
+
+  bind(L_done);
+}
+#endif
+
+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
+// shift 16 bits once.
+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
+{
+  if (UseZbb) {
+    assert_different_registers(Rd, Rs, tmp1);
+    int step = isLL ? 8 : 16;
+    ctz(Rd, Rs);
+    andi(tmp1, Rd, step - 1);
+    sub(Rd, Rd, tmp1);
+    return;
+  }
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  Label Loop;
+  int step = isLL ? 8 : 16;
+  mv(Rd, -step);
+  mv(tmp2, Rs);
+
+  bind(Loop);
+  addi(Rd, Rd, step);
+  andi(tmp1, tmp2, ((1 << step) - 1));
+  srli(tmp2, tmp2, step);
+  beqz(tmp1, Loop);
+}
+
+// This instruction reads adjacent 4 bytes from the lower half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A300A200A100A0
+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  mv(tmp1, 0xFF);
+  mv(Rd, zr);
+  for (int i = 0; i <= 3; i++)
+  {
+    andr(tmp2, Rs, tmp1);
+    if (i) {
+      slli(tmp2, tmp2, i * 8);
+    }
+    orr(Rd, Rd, tmp2);
+    if (i != 3) {
+      slli(tmp1, tmp1, 8);
+    }
+  }
+}
+
+// This instruction reads adjacent 4 bytes from the upper half of source register,
+// inflate into a register, for example:
+// Rs: A7A6A5A4A3A2A1A0
+// Rd: 00A700A600A500A4
+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
+{
+  assert_different_registers(Rd, Rs, tmp1, tmp2);
+  mv(tmp1, 0xFF00000000);
+  mv(Rd, zr);
+  for (int i = 0; i <= 3; i++)
+  {
+    andr(tmp2, Rs, tmp1);
+    orr(Rd, Rd, tmp2);
+    srli(Rd, Rd, 8);
+    if (i != 3) {
+      slli(tmp1, tmp1, 8);
+    }
+  }
+}
+
+// The size of the blocks erased by the zero_blocks stub.  We must
+// handle anything smaller than this ourselves in zero_words().
+const int MacroAssembler::zero_words_block_size = 8;
+
+// zero_words() is used by C2 ClearArray patterns.  It is as small as
+// possible, handling small word counts locally and delegating
+// anything larger to the zero_blocks stub.  It is expanded many times
+// in compiled code, so it is important to keep it short.
+
+// ptr:   Address of a buffer to be zeroed.
+// cnt:   Count in HeapWords.
+//
+// ptr, cnt, and t0 are clobbered.
+address MacroAssembler::zero_words(Register ptr, Register cnt)
+{
+  assert(is_power_of_2(zero_words_block_size), "adjust this");
+  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
+  assert_different_registers(cnt, t0);
+
+  BLOCK_COMMENT("zero_words {");
+  mv(t0, zero_words_block_size);
+  Label around, done, done16;
+  bltu(cnt, t0, around);
+  {
+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
+    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
+    if (StubRoutines::riscv::complete()) {
+      address tpc = trampoline_call(zero_blocks);
+      if (tpc == NULL) {
+        DEBUG_ONLY(reset_labels1(around));
+        postcond(pc() == badAddress);
+        return NULL;
+      }
+    } else {
+      jal(zero_blocks);
+    }
+  }
+  bind(around);
+  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
+    Label l;
+    andi(t0, cnt, i);
+    beqz(t0, l);
+    for (int j = 0; j < i; j++) {
+      sd(zr, Address(ptr, 0));
+      addi(ptr, ptr, 8);
+    }
+    bind(l);
+  }
+  {
+    Label l;
+    andi(t0, cnt, 1);
+    beqz(t0, l);
+    sd(zr, Address(ptr, 0));
+    bind(l);
+  }
+  BLOCK_COMMENT("} zero_words");
+  postcond(pc() != badAddress);
+  return pc();
+}
+
+#define SmallArraySize (18 * BytesPerLong)
+
+// base:  Address of a buffer to be zeroed, 8 bytes aligned.
+// cnt:   Immediate count in HeapWords.
+void MacroAssembler::zero_words(Register base, u_int64_t cnt)
+{
+  assert_different_registers(base, t0, t1);
+
+  BLOCK_COMMENT("zero_words {");
+
+  if (cnt <= SmallArraySize / BytesPerLong) {
+    for (int i = 0; i < (int)cnt; i++) {
+      sd(zr, Address(base, i * wordSize));
+    }
+  } else {
+    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
+    int remainder = cnt % unroll;
+    for (int i = 0; i < remainder; i++) {
+      sd(zr, Address(base, i * wordSize));
+    }
+
+    Label loop;
+    Register cnt_reg = t0;
+    Register loop_base = t1;
+    cnt = cnt - remainder;
+    mv(cnt_reg, cnt);
+    add(loop_base, base, remainder * wordSize);
+    bind(loop);
+    sub(cnt_reg, cnt_reg, unroll);
+    for (int i = 0; i < unroll; i++) {
+      sd(zr, Address(loop_base, i * wordSize));
+    }
+    add(loop_base, loop_base, unroll * wordSize);
+    bnez(cnt_reg, loop);
+  }
+
+  BLOCK_COMMENT("} zero_words");
+}
+
+// base:   Address of a buffer to be filled, 8 bytes aligned.
+// cnt:    Count in 8-byte unit.
+// value:  Value to be filled with.
+// base will point to the end of the buffer after filling.
+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
+{
+//  Algorithm:
+//
+//    t0 = cnt & 7
+//    cnt -= t0
+//    p += t0
+//    switch (t0):
+//      switch start:
+//      do while cnt
+//        cnt -= 8
+//          p[-8] = value
+//        case 7:
+//          p[-7] = value
+//        case 6:
+//          p[-6] = value
+//          // ...
+//        case 1:
+//          p[-1] = value
+//        case 0:
+//          p += 8
+//      do-while end
+//    switch end
+
+  assert_different_registers(base, cnt, value, t0, t1);
+
+  Label fini, skip, entry, loop;
+  const int unroll = 8; // Number of sd instructions we'll unroll
+
+  beqz(cnt, fini);
+
+  andi(t0, cnt, unroll - 1);
+  sub(cnt, cnt, t0);
+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
+  shadd(base, t0, base, t1, 3);
+  la(t1, entry);
+  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
+  sub(t1, t1, t0);
+  jr(t1);
+
+  bind(loop);
+  add(base, base, unroll * 8);
+  for (int i = -unroll; i < 0; i++) {
+    sd(value, Address(base, i * 8));
+  }
+  bind(entry);
+  sub(cnt, cnt, unroll);
+  bgez(cnt, loop);
+
+  bind(fini);
+}
+
+#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
+  Label L_Okay;                                                                                  \
+  fscsr(zr);                                                                                     \
+  FLOATCVT(dst, src);                                                                            \
+  frcsr(tmp);                                                                                    \
+  andi(tmp, tmp, 0x1E);                                                                          \
+  beqz(tmp, L_Okay);                                                                             \
+  FLOATEQ(tmp, src, src);                                                                        \
+  bnez(tmp, L_Okay);                                                                             \
+  mv(dst, zr);                                                                                   \
+  bind(L_Okay);                                                                                  \
+}
+
+FCVT_SAFE(fcvt_w_s, feq_s)
+FCVT_SAFE(fcvt_l_s, feq_s)
+FCVT_SAFE(fcvt_w_d, feq_d)
+FCVT_SAFE(fcvt_l_d, feq_d)
+
+#undef FCVT_SAFE
+
+#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
+                                         FloatRegister Rs2, int unordered_result) {     \
+  Label Ldone;                                                                          \
+  if (unordered_result < 0) {                                                           \
+    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
+    /* installs 1 if gt else 0 */                                                       \
+    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
+    /* Rs1 > Rs2, install 1 */                                                          \
+    bgtz(result, Ldone);                                                                \
+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
+    addi(result, result, -1);                                                           \
+    /* Rs1 = Rs2, install 0 */                                                          \
+    /* NaN or Rs1 < Rs2, install -1 */                                                  \
+    bind(Ldone);                                                                        \
+  } else {                                                                              \
+    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
+    /* installs 1 if gt or unordered else 0 */                                          \
+    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
+    /* Rs1 < Rs2, install -1 */                                                         \
+    bgtz(result, Ldone);                                                                \
+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
+    addi(result, result, -1);                                                           \
+    /* Rs1 = Rs2, install 0 */                                                          \
+    /* NaN or Rs1 > Rs2, install 1 */                                                   \
+    bind(Ldone);                                                                        \
+    neg(result, result);                                                                \
+  }                                                                                     \
+}
+
+FCMP(float, s);
+FCMP(double, d);
+
+#undef FCMP
+
+// Zero words; len is in bytes
+// Destroys all registers except addr
+// len must be a nonzero multiple of wordSize
+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
+  assert_different_registers(addr, len, tmp, t0, t1);
+
+#ifdef ASSERT
+  {
+    Label L;
+    andi(t0, len, BytesPerWord - 1);
+    beqz(t0, L);
+    stop("len is not a multiple of BytesPerWord");
+    bind(L);
+  }
+#endif // ASSERT
+
+#ifndef PRODUCT
+  block_comment("zero memory");
+#endif // PRODUCT
+
+  Label loop;
+  Label entry;
+
+  // Algorithm:
+  //
+  //  t0 = cnt & 7
+  //  cnt -= t0
+  //  p += t0
+  //  switch (t0) {
+  //    do {
+  //      cnt -= 8
+  //        p[-8] = 0
+  //      case 7:
+  //        p[-7] = 0
+  //      case 6:
+  //        p[-6] = 0
+  //        ...
+  //      case 1:
+  //        p[-1] = 0
+  //      case 0:
+  //        p += 8
+  //     } while (cnt)
+  //  }
+
+  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
+
+  srli(len, len, LogBytesPerWord);
+  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
+  sub(len, len, t0);          // cnt -= unroll
+  // tmp always points to the end of the region we're about to zero
+  shadd(tmp, t0, addr, t1, LogBytesPerWord);
+  la(t1, entry);
+  slli(t0, t0, 2);
+  sub(t1, t1, t0);
+  jr(t1);
+  bind(loop);
+  sub(len, len, unroll);
+  for (int i = -unroll; i < 0; i++) {
+    Assembler::sd(zr, Address(tmp, i * wordSize));
+  }
+  bind(entry);
+  add(tmp, tmp, unroll * wordSize);
+  bnez(len, loop);
+}
+
+// shift left by shamt and add
+// Rd = (Rs1 << shamt) + Rs2
+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
+  if (UseZba) {
+    if (shamt == 1) {
+      sh1add(Rd, Rs1, Rs2);
+      return;
+    } else if (shamt == 2) {
+      sh2add(Rd, Rs1, Rs2);
+      return;
+    } else if (shamt == 3) {
+      sh3add(Rd, Rs1, Rs2);
+      return;
+    }
+  }
+
+  if (shamt != 0) {
+    slli(tmp, Rs1, shamt);
+    add(Rd, Rs2, tmp);
+  } else {
+    add(Rd, Rs1, Rs2);
+  }
+}
+
+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
+  if (UseZba && bits == 32) {
+    zext_w(dst, src);
+    return;
+  }
+
+  if (UseZbb && bits == 16) {
+    zext_h(dst, src);
+    return;
+  }
+
+  if (bits == 8) {
+    zext_b(dst, src);
+  } else {
+    slli(dst, src, XLEN - bits);
+    srli(dst, dst, XLEN - bits);
+  }
+}
+
+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
+  if (UseZbb) {
+    if (bits == 8) {
+      sext_b(dst, src);
+      return;
+    } else if (bits == 16) {
+      sext_h(dst, src);
+      return;
+    }
+  }
+
+  if (bits == 32) {
+    sext_w(dst, src);
+  } else {
+    slli(dst, src, XLEN - bits);
+    srai(dst, dst, XLEN - bits);
+  }
+}
+
+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
+{
+  if (src1 == src2) {
+    mv(dst, zr);
+    return;
+  }
+  Label done;
+  Register left = src1;
+  Register right = src2;
+  if (dst == src1) {
+    assert_different_registers(dst, src2, tmp);
+    mv(tmp, src1);
+    left = tmp;
+  } else if (dst == src2) {
+    assert_different_registers(dst, src1, tmp);
+    mv(tmp, src2);
+    right = tmp;
+  }
+
+  // installs 1 if gt else 0
+  slt(dst, right, left);
+  bnez(dst, done);
+  slt(dst, left, right);
+  // dst = -1 if lt; else if eq , dst = 0
+  neg(dst, dst);
+  bind(done);
+}
+
+#ifdef COMPILER2
+// short string
+// StringUTF16.indexOfChar
+// StringLatin1.indexOfChar
+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
+                                                  Register ch, Register result,
+                                                  bool isL)
+{
+  Register ch1 = t0;
+  Register index = t1;
+
+  BLOCK_COMMENT("string_indexof_char_short {");
+
+  Label LOOP, LOOP1, LOOP4, LOOP8;
+  Label MATCH,  MATCH1, MATCH2, MATCH3,
+          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
+
+  mv(result, -1);
+  mv(index, zr);
+
+  bind(LOOP);
+  addi(t0, index, 8);
+  ble(t0, cnt1, LOOP8);
+  addi(t0, index, 4);
+  ble(t0, cnt1, LOOP4);
+  j(LOOP1);
+
+  bind(LOOP8);
+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+  beq(ch, ch1, MATCH);
+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+  beq(ch, ch1, MATCH1);
+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+  beq(ch, ch1, MATCH2);
+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+  beq(ch, ch1, MATCH3);
+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
+  beq(ch, ch1, MATCH4);
+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
+  beq(ch, ch1, MATCH5);
+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
+  beq(ch, ch1, MATCH6);
+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
+  beq(ch, ch1, MATCH7);
+  addi(index, index, 8);
+  addi(str1, str1, isL ? 8 : 16);
+  blt(index, cnt1, LOOP);
+  j(NOMATCH);
+
+  bind(LOOP4);
+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
+  beq(ch, ch1, MATCH);
+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
+  beq(ch, ch1, MATCH1);
+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
+  beq(ch, ch1, MATCH2);
+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
+  beq(ch, ch1, MATCH3);
+  addi(index, index, 4);
+  addi(str1, str1, isL ? 4 : 8);
+  bge(index, cnt1, NOMATCH);
+
+  bind(LOOP1);
+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
+  beq(ch, ch1, MATCH);
+  addi(index, index, 1);
+  addi(str1, str1, isL ? 1 : 2);
+  blt(index, cnt1, LOOP1);
+  j(NOMATCH);
+
+  bind(MATCH1);
+  addi(index, index, 1);
+  j(MATCH);
+
+  bind(MATCH2);
+  addi(index, index, 2);
+  j(MATCH);
+
+  bind(MATCH3);
+  addi(index, index, 3);
+  j(MATCH);
+
+  bind(MATCH4);
+  addi(index, index, 4);
+  j(MATCH);
+
+  bind(MATCH5);
+  addi(index, index, 5);
+  j(MATCH);
+
+  bind(MATCH6);
+  addi(index, index, 6);
+  j(MATCH);
+
+  bind(MATCH7);
+  addi(index, index, 7);
+
+  bind(MATCH);
+  mv(result, index);
+  bind(NOMATCH);
+  BLOCK_COMMENT("} string_indexof_char_short");
+}
+
+// StringUTF16.indexOfChar
+// StringLatin1.indexOfChar
+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
+                                            Register ch, Register result,
+                                            Register tmp1, Register tmp2,
+                                            Register tmp3, Register tmp4,
+                                            bool isL)
+{
+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
+  Register ch1 = t0;
+  Register orig_cnt = t1;
+  Register mask1 = tmp3;
+  Register mask2 = tmp2;
+  Register match_mask = tmp1;
+  Register trailing_char = tmp4;
+  Register unaligned_elems = tmp4;
+
+  BLOCK_COMMENT("string_indexof_char {");
+  beqz(cnt1, NOMATCH);
+
+  addi(t0, cnt1, isL ? -32 : -16);
+  bgtz(t0, DO_LONG);
+  string_indexof_char_short(str1, cnt1, ch, result, isL);
+  j(DONE);
+
+  bind(DO_LONG);
+  mv(orig_cnt, cnt1);
+  if (AvoidUnalignedAccesses) {
+    Label ALIGNED;
+    andi(unaligned_elems, str1, 0x7);
+    beqz(unaligned_elems, ALIGNED);
+    sub(unaligned_elems, unaligned_elems, 8);
+    neg(unaligned_elems, unaligned_elems);
+    if (!isL) {
+      srli(unaligned_elems, unaligned_elems, 1);
+    }
+    // do unaligned part per element
+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
+    bgez(result, DONE);
+    mv(orig_cnt, cnt1);
+    sub(cnt1, cnt1, unaligned_elems);
+    bind(ALIGNED);
+  }
+
+  // duplicate ch
+  if (isL) {
+    slli(ch1, ch, 8);
+    orr(ch, ch1, ch);
+  }
+  slli(ch1, ch, 16);
+  orr(ch, ch1, ch);
+  slli(ch1, ch, 32);
+  orr(ch, ch1, ch);
+
+  if (!isL) {
+    slli(cnt1, cnt1, 1);
+  }
+
+  uint64_t mask0101 = UCONST64(0x0101010101010101);
+  uint64_t mask0001 = UCONST64(0x0001000100010001);
+  mv(mask1, isL ? mask0101 : mask0001);
+  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+  mv(mask2, isL ? mask7f7f : mask7fff);
+
+  bind(CH1_LOOP);
+  ld(ch1, Address(str1));
+  addi(str1, str1, 8);
+  addi(cnt1, cnt1, -8);
+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
+  bnez(match_mask, HIT);
+  bgtz(cnt1, CH1_LOOP);
+  j(NOMATCH);
+
+  bind(HIT);
+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
+  srli(trailing_char, trailing_char, 3);
+  addi(cnt1, cnt1, 8);
+  ble(cnt1, trailing_char, NOMATCH);
+  // match case
+  if (!isL) {
+    srli(cnt1, cnt1, 1);
+    srli(trailing_char, trailing_char, 1);
+  }
+
+  sub(result, orig_cnt, cnt1);
+  add(result, result, trailing_char);
+  j(DONE);
+
+  bind(NOMATCH);
+  mv(result, -1);
+
+  bind(DONE);
+  BLOCK_COMMENT("} string_indexof_char");
+}
+
+typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
+
+// Search for needle in haystack and return index or -1
+// x10: result
+// x11: haystack
+// x12: haystack_len
+// x13: needle
+// x14: needle_len
+void MacroAssembler::string_indexof(Register haystack, Register needle,
+                                       Register haystack_len, Register needle_len,
+                                       Register tmp1, Register tmp2,
+                                       Register tmp3, Register tmp4,
+                                       Register tmp5, Register tmp6,
+                                       Register result, int ae)
+{
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
+
+  Register ch1 = t0;
+  Register ch2 = t1;
+  Register nlen_tmp = tmp1; // needle len tmp
+  Register hlen_tmp = tmp2; // haystack len tmp
+  Register result_tmp = tmp4;
+
+  bool isLL = ae == StrIntrinsicNode::LL;
+
+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+  int needle_chr_shift = needle_isL ? 0 : 1;
+  int haystack_chr_shift = haystack_isL ? 0 : 1;
+  int needle_chr_size = needle_isL ? 1 : 2;
+  int haystack_chr_size = haystack_isL ? 1 : 2;
+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
+                                   (load_chr_insn)&MacroAssembler::lhu;
+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
+                                     (load_chr_insn)&MacroAssembler::lhu;
+
+  BLOCK_COMMENT("string_indexof {");
+
+  // Note, inline_string_indexOf() generates checks:
+  // if (pattern.count > src.count) return -1;
+  // if (pattern.count == 0) return 0;
+
+  // We have two strings, a source string in haystack, haystack_len and a pattern string
+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
+
+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
+  // With a small pattern and source we use linear scan.
+
+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
+  sub(result_tmp, haystack_len, needle_len);
+  // needle_len < 8, use linear scan
+  sub(t0, needle_len, 8);
+  bltz(t0, LINEARSEARCH);
+  // needle_len >= 256, use linear scan
+  sub(t0, needle_len, 256);
+  bgez(t0, LINEARSTUB);
+  // needle_len >= haystack_len/4, use linear scan
+  srli(t0, haystack_len, 2);
+  bge(needle_len, t0, LINEARSTUB);
+
+  // Boyer-Moore-Horspool introduction:
+  // The Boyer Moore alogorithm is based on the description here:-
+  //
+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
+  //
+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
+  // and the 'Good Suffix' rule.
+  //
+  // These rules are essentially heuristics for how far we can shift the
+  // pattern along the search string.
+  //
+  // The implementation here uses the 'Bad Character' rule only because of the
+  // complexity of initialisation for the 'Good Suffix' rule.
+  //
+  // This is also known as the Boyer-Moore-Horspool algorithm:
+  //
+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
+  //
+  // #define ASIZE 256
+  //
+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
+  //      int i, j;
+  //      unsigned c;
+  //      unsigned char bc[ASIZE];
+  //
+  //      /* Preprocessing */
+  //      for (i = 0; i < ASIZE; ++i)
+  //        bc[i] = m;
+  //      for (i = 0; i < m - 1; ) {
+  //        c = pattern[i];
+  //        ++i;
+  //        // c < 256 for Latin1 string, so, no need for branch
+  //        #ifdef PATTERN_STRING_IS_LATIN1
+  //        bc[c] = m - i;
+  //        #else
+  //        if (c < ASIZE) bc[c] = m - i;
+  //        #endif
+  //      }
+  //
+  //      /* Searching */
+  //      j = 0;
+  //      while (j <= n - m) {
+  //        c = src[i+j];
+  //        if (pattern[m-1] == c)
+  //          int k;
+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
+  //          if (k < 0) return j;
+  //          // c < 256 for Latin1 string, so, no need for branch
+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
+  //          // LL case: (c< 256) always true. Remove branch
+  //          j += bc[pattern[j+m-1]];
+  //          #endif
+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
+  //          if (c < ASIZE)
+  //            j += bc[pattern[j+m-1]];
+  //          else
+  //            j += 1
+  //          #endif
+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
+  //          if (c < ASIZE)
+  //            j += bc[pattern[j+m-1]];
+  //          else
+  //            j += m
+  //          #endif
+  //      }
+  //      return -1;
+  //    }
+
+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
+          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
+
+  Register haystack_end = haystack_len;
+  Register skipch = tmp2;
+
+  // pattern length is >=8, so, we can read at least 1 register for cases when
+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
+  // UL case. We'll re-read last character in inner pre-loop code to have
+  // single outer pre-loop load
+  const int firstStep = isLL ? 7 : 3;
+
+  const int ASIZE = 256;
+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
+
+  sub(sp, sp, ASIZE);
+
+  // init BC offset table with default value: needle_len
+  slli(t0, needle_len, 8);
+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
+  slli(tmp1, t0, 16);
+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
+  slli(tmp1, t0, 32);
+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
+
+  mv(ch1, sp);  // ch1 is t0
+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
+
+  bind(BM_INIT_LOOP);
+  // for (i = 0; i < ASIZE; ++i)
+  //   bc[i] = m;
+  for (int i = 0; i < 4; i++) {
+    sd(tmp5, Address(ch1, i * wordSize));
+  }
+  add(ch1, ch1, 32);
+  sub(tmp6, tmp6, 4);
+  bgtz(tmp6, BM_INIT_LOOP);
+
+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
+  Register orig_haystack = tmp5;
+  mv(orig_haystack, haystack);
+  // result_tmp = tmp4
+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
+  mv(tmp3, needle);
+
+  //  for (i = 0; i < m - 1; ) {
+  //    c = pattern[i];
+  //    ++i;
+  //    // c < 256 for Latin1 string, so, no need for branch
+  //    #ifdef PATTERN_STRING_IS_LATIN1
+  //    bc[c] = m - i;
+  //    #else
+  //    if (c < ASIZE) bc[c] = m - i;
+  //    #endif
+  //  }
+  bind(BCLOOP);
+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
+  add(tmp3, tmp3, needle_chr_size);
+  if (!needle_isL) {
+    // ae == StrIntrinsicNode::UU
+    mv(tmp6, ASIZE);
+    bgeu(ch1, tmp6, BCSKIP);
+  }
+  add(tmp4, sp, ch1);
+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
+
+  bind(BCSKIP);
+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
+  bgtz(ch2, BCLOOP);
+
+  // tmp6: pattern end, address after needle
+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
+  if (needle_isL == haystack_isL) {
+    // load last 8 bytes (8LL/4UU symbols)
+    ld(tmp6, Address(tmp6, -wordSize));
+  } else {
+    // UL: from UTF-16(source) search Latin1(pattern)
+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
+    slli(ch2, tmp6, XLEN - 24);
+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
+    slli(ch1, tmp6, XLEN - 16);
+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
+    slli(ch2, ch2, 16);
+    orr(ch2, ch2, ch1); // 0x00000b0c
+    slli(result, tmp3, 48); // use result as temp register
+    orr(tmp6, tmp6, result); // 0x0a00000d
+    slli(result, ch2, 16);
+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
+  }
+
+  // i = m - 1;
+  // skipch = j + i;
+  // if (skipch == pattern[m - 1]
+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
+  // else
+  //   move j with bad char offset table
+  bind(BMLOOPSTR2);
+  // compare pattern to source string backward
+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
+  if (needle_isL == haystack_isL) {
+    // re-init tmp3. It's for free because it's executed in parallel with
+    // load above. Alternative is to initialize it before loop, but it'll
+    // affect performance on in-order systems with 2 or more ld/st pipelines
+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
+  }
+  if (!isLL) { // UU/UL case
+    slli(ch2, nlen_tmp, 1); // offsets in bytes
+  }
+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
+  add(result, haystack, isLL ? nlen_tmp : ch2);
+  ld(ch2, Address(result)); // load 8 bytes from source string
+  mv(ch1, tmp6);
+  if (isLL) {
+    j(BMLOOPSTR1_AFTER_LOAD);
+  } else {
+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
+    j(BMLOOPSTR1_CMP);
+  }
+
+  bind(BMLOOPSTR1);
+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+
+  bind(BMLOOPSTR1_AFTER_LOAD);
+  sub(nlen_tmp, nlen_tmp, 1);
+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
+
+  bind(BMLOOPSTR1_CMP);
+  beq(ch1, ch2, BMLOOPSTR1);
+
+  bind(BMSKIP);
+  if (!isLL) {
+    // if we've met UTF symbol while searching Latin1 pattern, then we can
+    // skip needle_len symbols
+    if (needle_isL != haystack_isL) {
+      mv(result_tmp, needle_len);
+    } else {
+      mv(result_tmp, 1);
+    }
+    mv(t0, ASIZE);
+    bgeu(skipch, t0, BMADV);
+  }
+  add(result_tmp, sp, skipch);
+  lbu(result_tmp, Address(result_tmp)); // load skip offset
+
+  bind(BMADV);
+  sub(nlen_tmp, needle_len, 1);
+  // move haystack after bad char skip offset
+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
+  ble(haystack, haystack_end, BMLOOPSTR2);
+  add(sp, sp, ASIZE);
+  j(NOMATCH);
+
+  bind(BMLOOPSTR1_LASTCMP);
+  bne(ch1, ch2, BMSKIP);
+
+  bind(BMMATCH);
+  sub(result, haystack, orig_haystack);
+  if (!haystack_isL) {
+    srli(result, result, 1);
+  }
+  add(sp, sp, ASIZE);
+  j(DONE);
+
+  bind(LINEARSTUB);
+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
+  bltz(t0, LINEARSEARCH);
+  mv(result, zr);
+  RuntimeAddress stub = NULL;
+  if (isLL) {
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
+  } else if (needle_isL) {
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
+  } else {
+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
+  }
+  trampoline_call(stub);
+  j(DONE);
+
+  bind(NOMATCH);
+  mv(result, -1);
+  j(DONE);
+
+  bind(LINEARSEARCH);
+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
+
+  bind(DONE);
+  BLOCK_COMMENT("} string_indexof");
+}
+
+// string_indexof
+// result: x10
+// src: x11
+// src_count: x12
+// pattern: x13
+// pattern_count: x14 or 1/2/3/4
+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
+                                                  Register haystack_len, Register needle_len,
+                                                  Register tmp1, Register tmp2,
+                                                  Register tmp3, Register tmp4,
+                                                  int needle_con_cnt, Register result, int ae)
+{
+  // Note:
+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
+  Register ch1 = t0;
+  Register ch2 = t1;
+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
+
+  bool isLL = ae == StrIntrinsicNode::LL;
+
+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
+  int needle_chr_shift = needle_isL ? 0 : 1;
+  int haystack_chr_shift = haystack_isL ? 0 : 1;
+  int needle_chr_size = needle_isL ? 1 : 2;
+  int haystack_chr_size = haystack_isL ? 1 : 2;
+
+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
+                                   (load_chr_insn)&MacroAssembler::lhu;
+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
+                                     (load_chr_insn)&MacroAssembler::lhu;
+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
+
+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
+
+  Register first = tmp3;
+
+  if (needle_con_cnt == -1) {
+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
+
+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
+    bltz(t0, DOSHORT);
+
+    (this->*needle_load_1chr)(first, Address(needle), noreg);
+    slli(t0, needle_len, needle_chr_shift);
+    add(needle, needle, t0);
+    neg(nlen_neg, t0);
+    slli(t0, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, t0);
+    neg(hlen_neg, t0);
+
+    bind(FIRST_LOOP);
+    add(t0, haystack, hlen_neg);
+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
+    beq(first, ch2, STR1_LOOP);
+
+    bind(STR2_NEXT);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, FIRST_LOOP);
+    j(NOMATCH);
+
+    bind(STR1_LOOP);
+    add(nlen_tmp, nlen_neg, needle_chr_size);
+    add(hlen_tmp, hlen_neg, haystack_chr_size);
+    bgez(nlen_tmp, MATCH);
+
+    bind(STR1_NEXT);
+    add(ch1, needle, nlen_tmp);
+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
+    add(ch2, haystack, hlen_tmp);
+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+    bne(ch1, ch2, STR2_NEXT);
+    add(nlen_tmp, nlen_tmp, needle_chr_size);
+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
+    bltz(nlen_tmp, STR1_NEXT);
+    j(MATCH);
+
+    bind(DOSHORT);
+    if (needle_isL == haystack_isL) {
+      sub(t0, needle_len, 2);
+      bltz(t0, DO1);
+      bgtz(t0, DO3);
+    }
+  }
+
+  if (needle_con_cnt == 4) {
+    Label CH1_LOOP;
+    (this->*load_4chr)(ch1, Address(needle), noreg);
+    sub(result_tmp, haystack_len, 4);
+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(CH1_LOOP);
+    add(ch2, haystack, hlen_neg);
+    (this->*load_4chr)(ch2, Address(ch2), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, CH1_LOOP);
+    j(NOMATCH);
+  }
+
+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
+    Label CH1_LOOP;
+    BLOCK_COMMENT("string_indexof DO2 {");
+    bind(DO2);
+    (this->*load_2chr)(ch1, Address(needle), noreg);
+    if (needle_con_cnt == 2) {
+      sub(result_tmp, haystack_len, 2);
+    }
+    slli(tmp3, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(CH1_LOOP);
+    add(tmp3, haystack, hlen_neg);
+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, CH1_LOOP);
+    j(NOMATCH);
+    BLOCK_COMMENT("} string_indexof DO2");
+  }
+
+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
+    BLOCK_COMMENT("string_indexof DO3 {");
+
+    bind(DO3);
+    (this->*load_2chr)(first, Address(needle), noreg);
+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
+    if (needle_con_cnt == 3) {
+      sub(result_tmp, haystack_len, 3);
+    }
+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
+    add(haystack, haystack, hlen_tmp);
+    neg(hlen_neg, hlen_tmp);
+
+    bind(FIRST_LOOP);
+    add(ch2, haystack, hlen_neg);
+    (this->*load_2chr)(ch2, Address(ch2), noreg);
+    beq(first, ch2, STR1_LOOP);
+
+    bind(STR2_NEXT);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, FIRST_LOOP);
+    j(NOMATCH);
+
+    bind(STR1_LOOP);
+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
+    add(ch2, haystack, hlen_tmp);
+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
+    bne(ch1, ch2, STR2_NEXT);
+    j(MATCH);
+    BLOCK_COMMENT("} string_indexof DO3");
+  }
+
+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
+    Label DO1_LOOP;
+
+    BLOCK_COMMENT("string_indexof DO1 {");
+    bind(DO1);
+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
+    sub(result_tmp, haystack_len, 1);
+    mv(tmp3, result_tmp);
+    if (haystack_chr_shift) {
+      slli(tmp3, result_tmp, haystack_chr_shift);
+    }
+    add(haystack, haystack, tmp3);
+    neg(hlen_neg, tmp3);
+
+    bind(DO1_LOOP);
+    add(tmp3, haystack, hlen_neg);
+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
+    beq(ch1, ch2, MATCH);
+    add(hlen_neg, hlen_neg, haystack_chr_size);
+    blez(hlen_neg, DO1_LOOP);
+    BLOCK_COMMENT("} string_indexof DO1");
+  }
+
+  bind(NOMATCH);
+  mv(result, -1);
+  j(DONE);
+
+  bind(MATCH);
+  srai(t0, hlen_neg, haystack_chr_shift);
+  add(result, result_tmp, t0);
+
+  bind(DONE);
+}
+
+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
+                                       Register tmp3, int ae)
+{
+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
+          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
+          SHORT_LOOP_START, TAIL_CHECK, L;
+
+  const int STUB_THRESHOLD = 64 + 8;
+  bool isLL = ae == StrIntrinsicNode::LL;
+  bool isLU = ae == StrIntrinsicNode::LU;
+  bool isUL = ae == StrIntrinsicNode::UL;
+
+  bool str1_isL = isLL || isLU;
+  bool str2_isL = isLL || isUL;
+
+  // for L strings, 1 byte for 1 character
+  // for U strings, 2 bytes for 1 character
+  int str1_chr_size = str1_isL ? 1 : 2;
+  int str2_chr_size = str2_isL ? 1 : 2;
+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
+
+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
+
+  BLOCK_COMMENT("string_compare {");
+
+  // Bizzarely, the counts are passed in bytes, regardless of whether they
+  // are L or U strings, however the result is always in characters.
+  if (!str1_isL) {
+    sraiw(cnt1, cnt1, 1);
+  }
+  if (!str2_isL) {
+    sraiw(cnt2, cnt2, 1);
+  }
+
+  // Compute the minimum of the string lengths and save the difference in result.
+  sub(result, cnt1, cnt2);
+  bgt(cnt1, cnt2, L);
+  mv(cnt2, cnt1);
+  bind(L);
+
+  // A very short string
+  mv(t0, minCharsInWord);
+  ble(cnt2, t0, SHORT_STRING);
+
+  // Compare longwords
+  // load first parts of strings and finish initialization while loading
+  {
+    if (str1_isL == str2_isL) { // LL or UU
+      // load 8 bytes once to compare
+      ld(tmp1, Address(str1));
+      beq(str1, str2, DONE);
+      ld(tmp2, Address(str2));
+      mv(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      sub(cnt2, cnt2, minCharsInWord);
+      beqz(cnt2, TAIL_CHECK);
+      // convert cnt2 from characters to bytes
+      if (!str1_isL) {
+        slli(cnt2, cnt2, 1);
+      }
+      add(str2, str2, cnt2);
+      add(str1, str1, cnt2);
+      sub(cnt2, zr, cnt2);
+    } else if (isLU) { // LU case
+      lwu(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+      mv(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      addi(cnt2, cnt2, -4);
+      add(str1, str1, cnt2);
+      sub(cnt1, zr, cnt2);
+      slli(cnt2, cnt2, 1);
+      add(str2, str2, cnt2);
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+      sub(cnt2, zr, cnt2);
+      addi(cnt1, cnt1, 4);
+    } else { // UL case
+      ld(tmp1, Address(str1));
+      lwu(tmp2, Address(str2));
+      mv(t0, STUB_THRESHOLD);
+      bge(cnt2, t0, STUB);
+      addi(cnt2, cnt2, -4);
+      slli(t0, cnt2, 1);
+      sub(cnt1, zr, t0);
+      add(str1, str1, t0);
+      add(str2, str2, cnt2);
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+      sub(cnt2, zr, cnt2);
+      addi(cnt1, cnt1, 8);
+    }
+    addi(cnt2, cnt2, isUL ? 4 : 8);
+    bgez(cnt2, TAIL);
+    xorr(tmp3, tmp1, tmp2);
+    bnez(tmp3, DIFFERENCE);
+
+    // main loop
+    bind(NEXT_WORD);
+    if (str1_isL == str2_isL) { // LL or UU
+      add(t0, str1, cnt2);
+      ld(tmp1, Address(t0));
+      add(t0, str2, cnt2);
+      ld(tmp2, Address(t0));
+      addi(cnt2, cnt2, 8);
+    } else if (isLU) { // LU case
+      add(t0, str1, cnt1);
+      lwu(tmp1, Address(t0));
+      add(t0, str2, cnt2);
+      ld(tmp2, Address(t0));
+      addi(cnt1, cnt1, 4);
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+      addi(cnt2, cnt2, 8);
+    } else { // UL case
+      add(t0, str2, cnt2);
+      lwu(tmp2, Address(t0));
+      add(t0, str1, cnt1);
+      ld(tmp1, Address(t0));
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+      addi(cnt1, cnt1, 8);
+      addi(cnt2, cnt2, 4);
+    }
+    bgez(cnt2, TAIL);
+
+    xorr(tmp3, tmp1, tmp2);
+    beqz(tmp3, NEXT_WORD);
+    j(DIFFERENCE);
+    bind(TAIL);
+    xorr(tmp3, tmp1, tmp2);
+    bnez(tmp3, DIFFERENCE);
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+    if (str1_isL == str2_isL) { // LL or UU
+      ld(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+    } else if (isLU) { // LU case
+      lwu(tmp1, Address(str1));
+      ld(tmp2, Address(str2));
+      inflate_lo32(tmp3, tmp1);
+      mv(tmp1, tmp3);
+    } else { // UL case
+      lwu(tmp2, Address(str2));
+      ld(tmp1, Address(str1));
+      inflate_lo32(tmp3, tmp2);
+      mv(tmp2, tmp3);
+    }
+    bind(TAIL_CHECK);
+    xorr(tmp3, tmp1, tmp2);
+    beqz(tmp3, DONE);
+
+    // Find the first different characters in the longwords and
+    // compute their difference.
+    bind(DIFFERENCE);
+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
+    srl(tmp1, tmp1, result);
+    srl(tmp2, tmp2, result);
+    if (isLL) {
+      andi(tmp1, tmp1, 0xFF);
+      andi(tmp2, tmp2, 0xFF);
+    } else {
+      andi(tmp1, tmp1, 0xFFFF);
+      andi(tmp2, tmp2, 0xFFFF);
+    }
+    sub(result, tmp1, tmp2);
+    j(DONE);
+  }
+
+  bind(STUB);
+  RuntimeAddress stub = NULL;
+  switch (ae) {
+    case StrIntrinsicNode::LL:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
+      break;
+    case StrIntrinsicNode::UU:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
+      break;
+    case StrIntrinsicNode::LU:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
+      break;
+    case StrIntrinsicNode::UL:
+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
+  trampoline_call(stub);
+  j(DONE);
+
+  bind(SHORT_STRING);
+  // Is the minimum length zero?
+  beqz(cnt2, DONE);
+  // arrange code to do most branches while loading and loading next characters
+  // while comparing previous
+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  j(SHORT_LOOP_START);
+  bind(SHORT_LOOP);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST);
+  bind(SHORT_LOOP_START);
+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  (this->*str2_load_chr)(t0, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
+  addi(cnt2, cnt2, -1);
+  beqz(cnt2, SHORT_LAST2);
+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
+  addi(str1, str1, str1_chr_size);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  beq(tmp2, t0, SHORT_LOOP);
+  sub(result, tmp2, t0);
+  j(DONE);
+  bind(SHORT_LOOP_TAIL);
+  sub(result, tmp1, cnt1);
+  j(DONE);
+  bind(SHORT_LAST2);
+  beq(tmp2, t0, DONE);
+  sub(result, tmp2, t0);
+
+  j(DONE);
+  bind(SHORT_LAST_INIT);
+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
+  addi(str2, str2, str2_chr_size);
+  bind(SHORT_LAST);
+  beq(tmp1, cnt1, DONE);
+  sub(result, tmp1, cnt1);
+
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_compare");
+}
+
+void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
+                                      Register cnt1, int elem_size) {
+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
+  Register tmp1 = t0;
+  Register tmp2 = t1;
+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
+  Register elem_per_word = tmp6;
+  int log_elem_size = exact_log2(elem_size);
+  int length_offset = arrayOopDesc::length_offset_in_bytes();
+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+
+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
+  mv(elem_per_word, wordSize / elem_size);
+
+  BLOCK_COMMENT("arrays_equals {");
+
+  // if (a1 == a2), return true
+  beq(a1, a2, SAME);
+
+  mv(result, false);
+  beqz(a1, DONE);
+  beqz(a2, DONE);
+  lwu(cnt1, Address(a1, length_offset));
+  lwu(cnt2, Address(a2, length_offset));
+  bne(cnt2, cnt1, DONE);
+  beqz(cnt1, SAME);
+
+  slli(tmp5, cnt1, 3 + log_elem_size);
+  sub(tmp5, zr, tmp5);
+  add(a1, a1, base_offset);
+  add(a2, a2, base_offset);
+  ld(tmp3, Address(a1, 0));
+  ld(tmp4, Address(a2, 0));
+  ble(cnt1, elem_per_word, SHORT); // short or same
+
+  // Main 16 byte comparison loop with 2 exits
+  bind(NEXT_DWORD); {
+    ld(tmp1, Address(a1, wordSize));
+    ld(tmp2, Address(a2, wordSize));
+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
+    blez(cnt1, TAIL);
+    bne(tmp3, tmp4, DONE);
+    ld(tmp3, Address(a1, 2 * wordSize));
+    ld(tmp4, Address(a2, 2 * wordSize));
+    add(a1, a1, 2 * wordSize);
+    add(a2, a2, 2 * wordSize);
+    ble(cnt1, elem_per_word, TAIL2);
+  } beq(tmp1, tmp2, NEXT_DWORD);
+  j(DONE);
+
+  bind(TAIL);
+  xorr(tmp4, tmp3, tmp4);
+  xorr(tmp2, tmp1, tmp2);
+  sll(tmp2, tmp2, tmp5);
+  orr(tmp5, tmp4, tmp2);
+  j(IS_TMP5_ZR);
+
+  bind(TAIL2);
+  bne(tmp1, tmp2, DONE);
+
+  bind(SHORT);
+  xorr(tmp4, tmp3, tmp4);
+  sll(tmp5, tmp4, tmp5);
+
+  bind(IS_TMP5_ZR);
+  bnez(tmp5, DONE);
+
+  bind(SAME);
+  mv(result, true);
+  // That's it.
+  bind(DONE);
+
+  BLOCK_COMMENT("} array_equals");
+}
+
+// Compare Strings
+
+// For Strings we're passed the address of the first characters in a1
+// and a2 and the length in cnt1.
+// elem_size is the element size in bytes: either 1 or 2.
+// There are two implementations.  For arrays >= 8 bytes, all
+// comparisons (including the final one, which may overlap) are
+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
+// halfword, then a short, and then a byte.
+
+void MacroAssembler::string_equals(Register a1, Register a2,
+                                      Register result, Register cnt1, int elem_size)
+{
+  Label SAME, DONE, SHORT, NEXT_WORD;
+  Register tmp1 = t0;
+  Register tmp2 = t1;
+
+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
+
+  BLOCK_COMMENT("string_equals {");
+
+  mv(result, false);
+
+  // Check for short strings, i.e. smaller than wordSize.
+  sub(cnt1, cnt1, wordSize);
+  bltz(cnt1, SHORT);
+
+  // Main 8 byte comparison loop.
+  bind(NEXT_WORD); {
+    ld(tmp1, Address(a1, 0));
+    add(a1, a1, wordSize);
+    ld(tmp2, Address(a2, 0));
+    add(a2, a2, wordSize);
+    sub(cnt1, cnt1, wordSize);
+    bne(tmp1, tmp2, DONE);
+  } bgtz(cnt1, NEXT_WORD);
+
+  // Last longword.  In the case where length == 4 we compare the
+  // same longword twice, but that's still faster than another
+  // conditional branch.
+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+  // length == 4.
+  add(tmp1, a1, cnt1);
+  ld(tmp1, Address(tmp1, 0));
+  add(tmp2, a2, cnt1);
+  ld(tmp2, Address(tmp2, 0));
+  bne(tmp1, tmp2, DONE);
+  j(SAME);
+
+  bind(SHORT);
+  Label TAIL03, TAIL01;
+
+  // 0-7 bytes left.
+  andi(t0, cnt1, 4);
+  beqz(t0, TAIL03);
+  {
+    lwu(tmp1, Address(a1, 0));
+    add(a1, a1, 4);
+    lwu(tmp2, Address(a2, 0));
+    add(a2, a2, 4);
+    bne(tmp1, tmp2, DONE);
+  }
+
+  bind(TAIL03);
+  // 0-3 bytes left.
+  andi(t0, cnt1, 2);
+  beqz(t0, TAIL01);
+  {
+    lhu(tmp1, Address(a1, 0));
+    add(a1, a1, 2);
+    lhu(tmp2, Address(a2, 0));
+    add(a2, a2, 2);
+    bne(tmp1, tmp2, DONE);
+  }
+
+  bind(TAIL01);
+  if (elem_size == 1) { // Only needed when comparing 1-byte elements
+    // 0-1 bytes left.
+    andi(t0, cnt1, 1);
+    beqz(t0, SAME);
+    {
+      lbu(tmp1, a1, 0);
+      lbu(tmp2, a2, 0);
+      bne(tmp1, tmp2, DONE);
+    }
+  }
+
+  // Arrays are equal.
+  bind(SAME);
+  mv(result, true);
+
+  // That's it.
+  bind(DONE);
+  BLOCK_COMMENT("} string_equals");
+}
+
+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
+                                                              bool is_far, bool is_unordered);
+
+static conditional_branch_insn conditional_branches[] =
+{
+  /* SHORT branches */
+  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::bgt,
+  NULL, // BoolTest::overflow
+  (conditional_branch_insn)&Assembler::blt,
+  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::ble,
+  NULL, // BoolTest::no_overflow
+  (conditional_branch_insn)&Assembler::bge,
+
+  /* UNSIGNED branches */
+  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::bgtu,
+  NULL,
+  (conditional_branch_insn)&Assembler::bltu,
+  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::bleu,
+  NULL,
+  (conditional_branch_insn)&Assembler::bgeu
+};
+
+static float_conditional_branch_insn float_conditional_branches[] =
+{
+  /* FLOAT SHORT branches */
+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
+  NULL,  // BoolTest::overflow
+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
+  NULL, // BoolTest::no_overflow
+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
+
+  /* DOUBLE SHORT branches */
+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
+  NULL,
+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
+  NULL,
+  (float_conditional_branch_insn)&MacroAssembler::double_bge
+};
+
+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
+         "invalid conditional branch index");
+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
+}
+
+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
+         "invalid float conditional branch index");
+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
+                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
+}
+
+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+  switch (cmpFlag) {
+    case BoolTest::eq:
+    case BoolTest::le:
+      beqz(op1, L, is_far);
+      break;
+    case BoolTest::ne:
+    case BoolTest::gt:
+      bnez(op1, L, is_far);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
+  switch (cmpFlag) {
+    case BoolTest::eq:
+      beqz(op1, L, is_far);
+      break;
+    case BoolTest::ne:
+      bnez(op1, L, is_far);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
+  Label L;
+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
+  mv(dst, src);
+  bind(L);
+}
+
+// Set dst to NaN if any NaN input.
+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
+                                  bool is_double, bool is_min) {
+  assert_different_registers(dst, src1, src2);
+
+  Label Done;
+  fsflags(zr);
+  if (is_double) {
+    is_min ? fmin_d(dst, src1, src2)
+           : fmax_d(dst, src1, src2);
+    // Checking NaNs
+    flt_d(zr, src1, src2);
+  } else {
+    is_min ? fmin_s(dst, src1, src2)
+           : fmax_s(dst, src1, src2);
+    // Checking NaNs
+    flt_s(zr, src1, src2);
+  }
+
+  frflags(t0);
+  beqz(t0, Done);
+
+  // In case of NaNs
+  is_double ? fadd_d(dst, src1, src2)
+            : fadd_s(dst, src1, src2);
+
+  bind(Done);
+}
+
+#endif // COMPILER2
+
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
new file mode 100644
index 0000000000..eca18b51f3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -0,0 +1,984 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+
+#include "asm/assembler.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "nativeInst_riscv.hpp"
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+
+ public:
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {
+  }
+  virtual ~MacroAssembler() {}
+
+  void safepoint_poll(Label& slow_path);
+  void safepoint_poll_acquire(Label& slow_path);
+
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // swap_reg is killed.
+  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL,
+                           Register flag = noreg);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
+
+  // Helper functions for statistics gathering.
+  // Unconditional atomic increment.
+  void atomic_incw(Register counter_addr, Register tmp);
+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
+    la(tmp1, counter_addr);
+    atomic_incw(tmp1, tmp2);
+  }
+
+  // Alignment
+  void align(int modulus, int extra_offset = 0);
+  static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
+    assert(is_aligned(pc, alignment), "bad alignment");
+  }
+
+  // Stack frame creation/removal
+  // Note that SP must be updated to the right place before saving/restoring RA and FP
+  // because signal based thread suspend/resume could happen asynchronously.
+  void enter() {
+    addi(sp, sp, - 2 * wordSize);
+    sd(ra, Address(sp, wordSize));
+    sd(fp, Address(sp));
+    addi(fp, sp, 2 * wordSize);
+  }
+
+  void leave() {
+    addi(sp, fp, - 2 * wordSize);
+    ld(fp, Address(sp));
+    ld(ra, Address(sp, wordSize));
+    addi(sp, sp, 2 * wordSize);
+  }
+
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  void get_vm_result(Register oop_result, Register java_thread);
+  void get_vm_result_2(Register metadata_result, Register java_thread);
+
+  // These always tightly bind to MacroAssembler::call_VM_leaf_base
+  // bypassing the virtual implementation
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_0, Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_0, Register arg_1, Register arg_2);
+
+  // These always tightly bind to MacroAssembler::call_VM_base
+  // bypassing the virtual implementation
+  void super_call_VM_leaf(address entry_point, Register arg_0);
+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
+
+  // thread in the default location (xthread)
+  void reset_last_Java_frame(bool clear_fp);
+
+  virtual void call_VM_leaf_base(
+    address entry_point,                // the entry point
+    int     number_of_arguments,        // the number of arguments to pop after the call
+    Label*  retaddr = NULL
+  );
+
+  virtual void call_VM_leaf_base(
+    address entry_point,                // the entry point
+    int     number_of_arguments,        // the number of arguments to pop after the call
+    Label&  retaddr) {
+    call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
+  }
+
+  virtual void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
+
+  virtual void check_and_handle_earlyret(Register java_thread);
+  virtual void check_and_handle_popframe(Register java_thread);
+
+  void resolve_oop_handle(Register result, Register tmp = x15);
+  void resolve_jobject(Register value, Register thread, Register tmp);
+
+  void movoop(Register dst, jobject obj, bool immediate = false);
+  void mov_metadata(Register dst, Metadata* obj);
+  void bang_stack_size(Register size, Register tmp);
+  void set_narrow_oop(Register dst, jobject obj);
+  void set_narrow_klass(Register dst, Klass* k);
+
+  void load_mirror(Register dst, Register method, Register tmp = x15);
+  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
+                      Address src, Register tmp1, Register thread_tmp);
+  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
+                       Register src, Register tmp1, Register thread_tmp);
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
+
+  void encode_klass_not_null(Register r);
+  void decode_klass_not_null(Register r);
+  void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
+  void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
+  void decode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop(Register d, Register s);
+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
+  void encode_heap_oop(Register d, Register s);
+  void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
+  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
+                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
+  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
+                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
+  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
+                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
+
+  void store_klass_gap(Register dst, Register src);
+
+  // currently unimplemented
+  // Used for storing NULL. All other oop constants should be
+  // stored using routines that take a jobject.
+  void store_heap_oop_null(Address dst);
+
+  void load_prototype_header(Register dst, Register src);
+
+  // This dummy is to prevent a call to store_heap_oop from
+  // converting a zero (linke NULL) into a Register by giving
+  // the compiler two choices it can't resolve
+
+  void store_heap_oop(Address dst, void* dummy);
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generateion is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+
+  virtual void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+  static bool uses_implicit_null_check(void* address);
+
+  // idiv variant which deals with MINLONG as dividend and -1 as divisor
+  int corrected_idivl(Register result, Register rs1, Register rs2,
+                      bool want_remainder);
+  int corrected_idivq(Register result, Register rs1, Register rs2,
+                      bool want_remainder);
+
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register scan_tmp,
+                               Label& no_such_interface,
+                               bool return_method = true);
+
+  // virtual method calling
+  // n.n. x86 allows RegisterOrConstant for vtable_index
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Form an addres from base + offset in Rd. Rd my or may not
+  // actually be used: you must use the Address that is returned. It
+  // is up to you to ensure that the shift provided mathces the size
+  // of your data.
+  Address form_address(Register Rd, Register base, long byte_offset);
+
+  // allocation
+  void tlab_allocate(
+    Register obj,                   // result: pointer to object after successful allocation
+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
+    Register tmp1,                  // temp register
+    Register tmp2,                  // temp register
+    Label&   slow_case,             // continuation point of fast allocation fails
+    bool is_far = false
+  );
+
+  void eden_allocate(
+    Register obj,                   // result: pointer to object after successful allocation
+    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,     // object size in bytes if   known at compile time
+    Register tmp,                   // temp register
+    Label&   slow_case,             // continuation point if fast allocation fails
+    bool is_far = false
+  );
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except tmp_reg
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register tmp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                                     Register super_check_offset = noreg);
+
+  // The reset of the type cehck; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
+  // Updates the sub's secondary super cache as necessary.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register tmp1_reg,
+                                     Register tmp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure);
+
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register tmp_reg,
+                           Label& L_success);
+
+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+  // only if +VerifyOops
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char* s = "broken oop addr");
+
+  void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
+  void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
+
+#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+  // A more convenient access to fence for our purposes
+  // We used four bit to indicate the read and write bits in the predecessors and successors,
+  // and extended i for r, o for w if UseConservativeFence enabled.
+  enum Membar_mask_bits {
+    StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
+    LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
+    StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
+    LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
+    AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
+  };
+
+  void membar(uint32_t order_constraint);
+
+  static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
+    predecessor = (order_constraint >> 2) & 0x3;
+    successor = order_constraint & 0x3;
+
+    // extend rw -> iorw:
+    // 01(w) -> 0101(ow)
+    // 10(r) -> 1010(ir)
+    // 11(rw)-> 1111(iorw)
+    if (UseConservativeFence) {
+      predecessor |= predecessor << 2;
+      successor |= successor << 2;
+    }
+  }
+
+  static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
+    return ((predecessor & 0x3) << 2) | (successor & 0x3);
+  }
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+  void unimplemented(const char* what = "");
+
+  void should_not_reach_here() { stop("should not reach here"); }
+
+  static address target_addr_for_insn(address insn_addr);
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  static int pd_patch_instruction_size(address branch, address target);
+  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
+    pd_patch_instruction_size(branch, target);
+  }
+  static address pd_call_destination(address branch) {
+    return target_addr_for_insn(branch);
+  }
+
+  static int patch_oop(address insn_addr, address o);
+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
+  void emit_static_call_stub();
+
+  // The following 4 methods return the offset of the appropriate move instruction
+
+  // Support for fast byte/short loading with zero extension (depending on particular CPU)
+  int load_unsigned_byte(Register dst, Address src);
+  int load_unsigned_short(Register dst, Address src);
+
+  // Support for fast byte/short loading with sign extension (depending on particular CPU)
+  int load_signed_byte(Register dst, Address src);
+  int load_signed_short(Register dst, Address src);
+
+  // Load and store values by size and signed-ness
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+ public:
+  // Standard pseudoinstruction
+  void nop();
+  void mv(Register Rd, Register Rs);
+  void notr(Register Rd, Register Rs);
+  void neg(Register Rd, Register Rs);
+  void negw(Register Rd, Register Rs);
+  void sext_w(Register Rd, Register Rs);
+  void zext_b(Register Rd, Register Rs);
+  void seqz(Register Rd, Register Rs);          // set if = zero
+  void snez(Register Rd, Register Rs);          // set if != zero
+  void sltz(Register Rd, Register Rs);          // set if < zero
+  void sgtz(Register Rd, Register Rs);          // set if > zero
+
+  // Float pseudoinstruction
+  void fmv_s(FloatRegister Rd, FloatRegister Rs);
+  void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
+  void fneg_s(FloatRegister Rd, FloatRegister Rs);
+
+  // Double pseudoinstruction
+  void fmv_d(FloatRegister Rd, FloatRegister Rs);
+  void fabs_d(FloatRegister Rd, FloatRegister Rs);
+  void fneg_d(FloatRegister Rd, FloatRegister Rs);
+
+  // Pseudoinstruction for control and status register
+  void rdinstret(Register Rd);                  // read instruction-retired counter
+  void rdcycle(Register Rd);                    // read cycle counter
+  void rdtime(Register Rd);                     // read time
+  void csrr(Register Rd, unsigned csr);         // read csr
+  void csrw(unsigned csr, Register Rs);         // write csr
+  void csrs(unsigned csr, Register Rs);         // set bits in csr
+  void csrc(unsigned csr, Register Rs);         // clear bits in csr
+  void csrwi(unsigned csr, unsigned imm);
+  void csrsi(unsigned csr, unsigned imm);
+  void csrci(unsigned csr, unsigned imm);
+  void frcsr(Register Rd);                      // read float-point csr
+  void fscsr(Register Rd, Register Rs);         // swap float-point csr
+  void fscsr(Register Rs);                      // write float-point csr
+  void frrm(Register Rd);                       // read float-point rounding mode
+  void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
+  void fsrm(Register Rs);                       // write float-point rounding mode
+  void fsrmi(Register Rd, unsigned imm);
+  void fsrmi(unsigned imm);
+  void frflags(Register Rd);                    // read float-point exception flags
+  void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
+  void fsflags(Register Rs);                    // write float-point exception flags
+  void fsflagsi(Register Rd, unsigned imm);
+  void fsflagsi(unsigned imm);
+
+  void beqz(Register Rs, const address &dest);
+  void bnez(Register Rs, const address &dest);
+  void blez(Register Rs, const address &dest);
+  void bgez(Register Rs, const address &dest);
+  void bltz(Register Rs, const address &dest);
+  void bgtz(Register Rs, const address &dest);
+  void la(Register Rd, Label &label);
+  void la(Register Rd, const address &dest);
+  void la(Register Rd, const Address &adr);
+  //label
+  void beqz(Register Rs, Label &l, bool is_far = false);
+  void bnez(Register Rs, Label &l, bool is_far = false);
+  void blez(Register Rs, Label &l, bool is_far = false);
+  void bgez(Register Rs, Label &l, bool is_far = false);
+  void bltz(Register Rs, Label &l, bool is_far = false);
+  void bgtz(Register Rs, Label &l, bool is_far = false);
+  void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+  void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
+
+private:
+  int push_reg(unsigned int bitset, Register stack);
+  int pop_reg(unsigned int bitset, Register stack);
+  int push_fp(unsigned int bitset, Register stack);
+  int pop_fp(unsigned int bitset, Register stack);
+
+public:
+  void push_reg(Register Rs);
+  void pop_reg(Register Rd);
+  void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
+  void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
+
+  // Push and pop everything that might be clobbered by a native
+  // runtime call except t0 and t1. (They are always
+  // temporary registers, so we don't have to protect them.)
+  // Additional registers can be excluded in a passed RegSet.
+  void push_call_clobbered_registers_except(RegSet exclude);
+  void pop_call_clobbered_registers_except(RegSet exclude);
+
+  void push_call_clobbered_registers() {
+    push_call_clobbered_registers_except(RegSet());
+  }
+  void pop_call_clobbered_registers() {
+    pop_call_clobbered_registers_except(RegSet());
+  }
+
+  void pusha();
+  void popa();
+  void push_CPU_state();
+  void pop_CPU_state();
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  void bind(Label& L) {
+    Assembler::bind(L);
+    // fences across basic blocks should not be merged
+    code()->clear_last_insn();
+  }
+
+  // mv
+  void mv(Register Rd, address addr)          { li(Rd, (int64_t)addr); }
+
+  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned int imm64)       { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned long imm64)      { li(Rd, (int64_t)imm64); }
+  inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); }
+
+  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
+
+  void mv(Register Rd, Address dest);
+  void mv(Register Rd, RegisterOrConstant src);
+
+  // logic
+  void andrw(Register Rd, Register Rs1, Register Rs2);
+  void orrw(Register Rd, Register Rs1, Register Rs2);
+  void xorrw(Register Rd, Register Rs1, Register Rs2);
+
+  // revb
+  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
+  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
+  void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
+  void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
+  void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
+  void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
+  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
+  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
+
+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
+  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
+  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
+
+  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
+  void cmpxchg(Register addr, Register expected,
+               Register new_val,
+               enum operand_size size,
+               Assembler::Aqrl acquire, Assembler::Aqrl release,
+               Register result, bool result_as_bool = false);
+  void cmpxchg_weak(Register addr, Register expected,
+                    Register new_val,
+                    enum operand_size size,
+                    Assembler::Aqrl acquire, Assembler::Aqrl release,
+                    Register result);
+  void cmpxchg_narrow_value_helper(Register addr, Register expected,
+                                   Register new_val,
+                                   enum operand_size size,
+                                   Register tmp1, Register tmp2, Register tmp3);
+  void cmpxchg_narrow_value(Register addr, Register expected,
+                            Register new_val,
+                            enum operand_size size,
+                            Assembler::Aqrl acquire, Assembler::Aqrl release,
+                            Register result, bool result_as_bool,
+                            Register tmp1, Register tmp2, Register tmp3);
+  void weak_cmpxchg_narrow_value(Register addr, Register expected,
+                                 Register new_val,
+                                 enum operand_size size,
+                                 Assembler::Aqrl acquire, Assembler::Aqrl release,
+                                 Register result,
+                                 Register tmp1, Register tmp2, Register tmp3);
+
+  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
+
+  void atomic_xchg(Register prev, Register newv, Register addr);
+  void atomic_xchgw(Register prev, Register newv, Register addr);
+  void atomic_xchgal(Register prev, Register newv, Register addr);
+  void atomic_xchgalw(Register prev, Register newv, Register addr);
+  void atomic_xchgwu(Register prev, Register newv, Register addr);
+  void atomic_xchgalwu(Register prev, Register newv, Register addr);
+
+  static bool far_branches() {
+    return ReservedCodeCacheSize > branch_range;
+  }
+
+  // Jumps that can reach anywhere in the code cache.
+  // Trashes tmp.
+  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
+  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
+
+  static int far_branch_size() {
+    if (far_branches()) {
+      return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
+    } else {
+      return 4;
+    }
+  }
+
+  void load_byte_map_base(Register reg);
+
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    sub(t0, sp, offset);
+    sd(zr, Address(t0));
+  }
+
+  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
+
+  virtual void _call_Unimplemented(address call_site) {
+    mv(t1, call_site);
+  }
+
+  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
+
+  // Frame creation and destruction shared between JITs.
+  void build_frame(int framesize);
+  void remove_frame(int framesize);
+
+  void reserved_stack_check();
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
+  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
+  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
+  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
+
+  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
+  address ic_call(address entry, jint method_index = 0);
+
+  // Support for memory inc/dec
+  // n.b. increment/decrement calls with an Address destination will
+  // need to use a scratch register to load the value to be
+  // incremented. increment/decrement calls which add or subtract a
+  // constant value other than sign-extended 12-bit immediate will need
+  // to use a 2nd scratch register to hold the constant. so, an address
+  // increment/decrement may trash both t0 and t1.
+
+  void increment(const Address dst, int64_t value = 1);
+  void incrementw(const Address dst, int32_t value = 1);
+
+  void decrement(const Address dst, int64_t value = 1);
+  void decrementw(const Address dst, int32_t value = 1);
+
+  void cmpptr(Register src1, Address src2, Label& equal);
+
+  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
+                     Register result, Register char_tmp, Register tmp,
+                     bool haystack_isL);
+  void compute_match_mask(Register src, Register pattern, Register match_mask,
+                          Register mask1, Register mask2);
+
+#ifdef COMPILER2
+  void mul_add(Register out, Register in, Register offset,
+               Register len, Register k, Register tmp);
+  void cad(Register dst, Register src1, Register src2, Register carry);
+  void cadc(Register dst, Register src1, Register src2, Register carry);
+  void adc(Register dst, Register src1, Register src2, Register carry);
+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                       Register src1, Register src2, Register carry);
+  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_128_x_128_loop(Register y, Register z,
+                               Register carry, Register carry2,
+                               Register idx, Register jdx,
+                               Register yz_idx1, Register yz_idx2,
+                               Register tmp, Register tmp3, Register tmp4,
+                               Register tmp6, Register product_hi);
+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                       Register z, Register zlen,
+                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                       Register tmp5, Register tmp6, Register product_hi);
+#endif
+
+  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
+  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
+
+  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
+
+  void zero_words(Register base, u_int64_t cnt);
+  address zero_words(Register ptr, Register cnt);
+  void fill_words(Register base, Register cnt, Register value);
+  void zero_memory(Register addr, Register len, Register tmp);
+
+  // shift left by shamt and add
+  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
+
+  // Here the float instructions with safe deal with some exceptions.
+  // e.g. convert from NaN, +Inf, -Inf to int, float, double
+  // will trigger exception, we need to deal with these situations
+  // to get correct results.
+  void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
+  void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
+
+  // vector load/store unit-stride instructions
+  void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
+    switch (sew) {
+      case Assembler::e64:
+        vle64_v(vd, base, vm);
+        break;
+      case Assembler::e32:
+        vle32_v(vd, base, vm);
+        break;
+      case Assembler::e16:
+        vle16_v(vd, base, vm);
+        break;
+      case Assembler::e8: // fall through
+      default:
+        vle8_v(vd, base, vm);
+        break;
+    }
+  }
+
+  void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
+    switch (sew) {
+      case Assembler::e64:
+        vse64_v(store_data, base, vm);
+        break;
+      case Assembler::e32:
+        vse32_v(store_data, base, vm);
+        break;
+      case Assembler::e16:
+        vse16_v(store_data, base, vm);
+        break;
+      case Assembler::e8: // fall through
+      default:
+        vse8_v(store_data, base, vm);
+        break;
+    }
+  }
+
+  static const int zero_words_block_size;
+
+  void cast_primitive_type(BasicType type, Register Rt) {
+    switch (type) {
+      case T_BOOLEAN:
+        sltu(Rt, zr, Rt);
+        break;
+      case T_CHAR   :
+        zero_extend(Rt, Rt, 16);
+        break;
+      case T_BYTE   :
+        sign_extend(Rt, Rt, 8);
+        break;
+      case T_SHORT  :
+        sign_extend(Rt, Rt, 16);
+        break;
+      case T_INT    :
+        addw(Rt, Rt, zr);
+        break;
+      case T_LONG   : /* nothing to do */        break;
+      case T_VOID   : /* nothing to do */        break;
+      case T_FLOAT  : /* nothing to do */        break;
+      case T_DOUBLE : /* nothing to do */        break;
+      default: ShouldNotReachHere();
+    }
+  }
+
+  // float cmp with unordered_result
+  void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
+  void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
+
+  // Zero/Sign-extend
+  void zero_extend(Register dst, Register src, int bits);
+  void sign_extend(Register dst, Register src, int bits);
+
+  // compare src1 and src2 and get -1/0/1 in dst.
+  // if [src1 > src2], dst = 1;
+  // if [src1 == src2], dst = 0;
+  // if [src1 < src2], dst = -1;
+  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
+
+  // vext
+  void vmnot_m(VectorRegister vd, VectorRegister vs);
+  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
+  void vfneg_v(VectorRegister vd, VectorRegister vs);
+
+private:
+
+#ifdef ASSERT
+  // Macro short-hand support to clean-up after a failed call to trampoline
+  // call generation (see trampoline_call() below), when a set of Labels must
+  // be reset (before returning).
+#define reset_labels1(L1) L1.reset()
+#define reset_labels2(L1, L2) L1.reset(); L2.reset()
+#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
+#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
+#endif
+  void repne_scan(Register addr, Register value, Register count, Register tmp);
+
+  // Return true if an address is within the 48-bit RISCV64 address space.
+  bool is_valid_riscv64_address(address addr) {
+    // sv48: must have bits 63–48 all equal to bit 47
+    return ((uintptr_t)addr >> 47) == 0;
+  }
+
+  void ld_constant(Register dest, const Address &const_addr) {
+    if (NearCpool) {
+      ld(dest, const_addr);
+    } else {
+      int32_t offset = 0;
+      la_patchable(dest, InternalAddress(const_addr.target()), offset);
+      ld(dest, Address(dest, offset));
+    }
+  }
+
+  int bitset_to_regs(unsigned int bitset, unsigned char* regs);
+  Address add_memory_helper(const Address dst);
+
+  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
+  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
+
+public:
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2, Register result,
+                      Register tmp1, Register tmp2, Register tmp3,
+                      int ae);
+
+  void string_indexof_char_short(Register str1, Register cnt1,
+                                 Register ch, Register result,
+                                 bool isL);
+
+  void string_indexof_char(Register str1, Register cnt1,
+                           Register ch, Register result,
+                           Register tmp1, Register tmp2,
+                           Register tmp3, Register tmp4,
+                           bool isL);
+
+  void string_indexof(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      Register tmp1, Register tmp2,
+                      Register tmp3, Register tmp4,
+                      Register tmp5, Register tmp6,
+                      Register result, int ae);
+
+  void string_indexof_linearscan(Register haystack, Register needle,
+                                 Register haystack_len, Register needle_len,
+                                 Register tmp1, Register tmp2,
+                                 Register tmp3, Register tmp4,
+                                 int needle_con_cnt, Register result, int ae);
+
+  void arrays_equals(Register r1, Register r2,
+                     Register tmp3, Register tmp4,
+                     Register tmp5, Register tmp6,
+                     Register result, Register cnt1,
+                     int elem_size);
+
+  void string_equals(Register r1, Register r2,
+                     Register result, Register cnt1,
+                     int elem_size);
+
+  // refer to conditional_branches and float_conditional_branches
+  static const int bool_test_bits = 3;
+  static const int neg_cond_bits = 2;
+  static const int unsigned_branch_mask = 1 << bool_test_bits;
+  static const int double_branch_mask = 1 << bool_test_bits;
+
+  // cmp
+  void cmp_branch(int cmpFlag,
+                  Register op1, Register op2,
+                  Label& label, bool is_far = false);
+
+  void float_cmp_branch(int cmpFlag,
+                        FloatRegister op1, FloatRegister op2,
+                        Label& label, bool is_far = false);
+
+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
+                                    Label& L, bool is_far = false);
+
+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
+                               Label& L, bool is_far = false);
+
+  void enc_cmove(int cmpFlag,
+                 Register op1, Register op2,
+                 Register dst, Register src);
+
+  void spill(Register r, bool is64, int offset) {
+    is64 ? sd(r, Address(sp, offset))
+         : sw(r, Address(sp, offset));
+  }
+
+  void spill(FloatRegister f, bool is64, int offset) {
+    is64 ? fsd(f, Address(sp, offset))
+         : fsw(f, Address(sp, offset));
+  }
+
+  void spill(VectorRegister v, int offset) {
+    add(t0, sp, offset);
+    vs1r_v(v, t0);
+  }
+
+  void unspill(Register r, bool is64, int offset) {
+    is64 ? ld(r, Address(sp, offset))
+         : lw(r, Address(sp, offset));
+  }
+
+  void unspillu(Register r, bool is64, int offset) {
+    is64 ? ld(r, Address(sp, offset))
+         : lwu(r, Address(sp, offset));
+  }
+
+  void unspill(FloatRegister f, bool is64, int offset) {
+    is64 ? fld(f, Address(sp, offset))
+         : flw(f, Address(sp, offset));
+  }
+
+  void unspill(VectorRegister v, int offset) {
+    add(t0, sp, offset);
+    vl1r_v(v, t0);
+  }
+
+  void minmax_FD(FloatRegister dst,
+                 FloatRegister src1, FloatRegister src2,
+                 bool is_double, bool is_min);
+
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
new file mode 100644
index 0000000000..ef968ccd96
--- /dev/null
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
+
+// Still empty.
+
+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
new file mode 100644
index 0000000000..3c4e8847ce
--- /dev/null
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "classfile/javaClasses.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/flags/flagSetting.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+  if (VerifyMethodHandles) {
+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+                 "MH argument is a Class");
+  }
+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, "%s should be nonzero", xname);
+  return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //PRODUCT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj, SystemDictionary::WKID klass_id,
+                                 const char* error_message) {
+  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
+  Klass* klass = SystemDictionary::well_known_klass(klass_id);
+  Register temp = t1;
+  Register temp2 = t0; // used by MacroAssembler::cmpptr
+  Label L_ok, L_bad;
+  BLOCK_COMMENT("verify_klass {");
+  __ verify_oop(obj);
+  __ beqz(obj, L_bad);
+  __ push_reg(RegSet::of(temp, temp2), sp);
+  __ load_klass(temp, obj);
+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
+  intptr_t super_check_offset = klass->super_check_offset();
+  __ ld(temp, Address(temp, super_check_offset));
+  __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok);
+  __ pop_reg(RegSet::of(temp, temp2), sp);
+  __ bind(L_bad);
+  __ stop(error_message);
+  __ BIND(L_ok);
+  __ pop_reg(RegSet::of(temp, temp2), sp);
+  BLOCK_COMMENT("} verify_klass");
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                            bool for_compiler_entry) {
+  assert(method == xmethod, "interpreter calling convention");
+  Label L_no_such_method;
+  __ beqz(xmethod, L_no_such_method);
+  __ verify_method_ptr(method);
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+
+    __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
+    __ beqz(t0, run_compiled_code);
+    __ ld(t0, Address(method, Method::interpreter_entry_offset()));
+    __ jr(t0);
+    __ BIND(run_compiled_code);
+  }
+
+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
+                                                     Method::from_interpreted_offset();
+  __ ld(t0,Address(method, entry_offset));
+  __ jr(t0);
+  __ bind(L_no_such_method);
+  __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2);
+  assert(recv != noreg, "required register");
+  assert(method_temp == xmethod, "required register for loading method");
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
+  __ verify_oop(method_temp);
+  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ ld(temp2, Address(method_temp, Method::const_offset()));
+    __ load_sized_value(temp2,
+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    Label L;
+    __ ld(t0, __ argument_address(temp2, -1));
+    __ beq(recv, t0, L);
+    __ ld(x10, __ argument_address(temp2, -1));
+    __ ebreak();
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ ebreak();           // empty stubs make SG sick
+    return NULL;
+  }
+
+  // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+  // xmethod: Method*
+  // x13: argument locator (parameter slot count, added to sp)
+  // x11: used as temp to hold mh or receiver
+  // x10, x29: garbage temps, blown away
+  Register argp   = x13;   // argument list ptr, live on error paths
+  Register mh     = x11;   // MH receiver; dies quickly and is recycled
+
+  // here's where control starts out:
+  __ align(CodeEntryAlignment);
+  address entry_point = __ pc();
+
+  if (VerifyMethodHandles) {
+    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
+
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes()));
+    __ mv(t1, (int) iid);
+    __ beq(t0, t1, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
+    }
+    __ ebreak();
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address x13_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ ld(argp, Address(xmethod, Method::const_offset()));
+    __ load_sized_value(argp,
+                        Address(argp, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), /*is_signed*/ false);
+    x13_first_arg_addr = __ argument_address(argp, -1);
+  } else {
+    DEBUG_ONLY(argp = noreg);
+  }
+
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ld(mh, x13_first_arg_addr);
+    DEBUG_ONLY(argp = noreg);
+  }
+
+  // x13_first_arg_addr is live!
+
+  trace_method_handle_interpreter_entry(_masm, iid);
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ld(recv = x12, x13_first_arg_addr);
+    }
+    DEBUG_ONLY(argp = noreg);
+    Register xmember = xmethod;  // MemberName ptr; incoming method ptr is dead now
+    __ pop_reg(xmember);             // extract last argument
+    generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry);
+  }
+
+  return entry_point;
+}
+
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register temp1 = x7;
+  Register temp2 = x28;
+  Register temp3 = x29;  // x30 is live by this point: it contains the sender SP
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+    assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+    assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+    assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7);
+  }
+
+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
+  assert_different_registers(temp1, temp2, temp3, member_reg);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry);
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
+    Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
+
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(temp2_defc, member_clazz, temp3);
+        load_klass_from_Class(_masm, temp2_defc);
+        __ verify_klass_ptr(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+        // If we get here, the type check failed!
+        __ ebreak();
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+    //  x30 - interpreter linkage (if interpreted)
+    //  x11 ... x10 - compiler arguments (if compiled)
+
+    Label L_incompatible_class_change_error;
+    switch (iid) {
+      case vmIntrinsics::_linkToSpecial:
+        if (VerifyMethodHandles) {
+          verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+        }
+        __ load_heap_oop(xmethod, member_vmtarget);
+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
+        break;
+
+      case vmIntrinsics::_linkToStatic:
+        if (VerifyMethodHandles) {
+          verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+        }
+        __ load_heap_oop(xmethod, member_vmtarget);
+        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
+        break;
+
+      case vmIntrinsics::_linkToVirtual:
+      {
+        // same as TemplateTable::invokevirtual,
+        // minus the CP setup and profiling:
+
+        if (VerifyMethodHandles) {
+          verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+        }
+
+        // pick out the vtable index from the MemberName, and then we can discard it:
+        Register temp2_index = temp2;
+        __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
+
+        if (VerifyMethodHandles) {
+          Label L_index_ok;
+          __ bgez(temp2_index, L_index_ok);
+          __ ebreak();
+          __ BIND(L_index_ok);
+        }
+
+        // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+        // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+        // get target Method* & entry point
+        __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod);
+        break;
+      }
+
+      case vmIntrinsics::_linkToInterface:
+      {
+        // same as TemplateTable::invokeinterface
+        // (minus the CP setup and profiling, with different argument motion)
+        if (VerifyMethodHandles) {
+          verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+        }
+
+        Register temp3_intf = temp3;
+        __ load_heap_oop(temp3_intf, member_clazz);
+        load_klass_from_Class(_masm, temp3_intf);
+        __ verify_klass_ptr(temp3_intf);
+
+        Register rindex = xmethod;
+        __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg);
+        if (VerifyMethodHandles) {
+          Label L;
+          __ bgez(rindex, L);
+          __ ebreak();
+          __ bind(L);
+        }
+
+        // given intf, index, and recv klass, dispatch to the implementation method
+        __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                   // note: next two args must be the same:
+                                   rindex, xmethod,
+                                   temp2,
+                                   L_incompatible_class_change_error);
+        break;
+      }
+
+      default:
+        fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
+        break;
+    }
+
+    // live at this point:  xmethod, x30 (if interpreted)
+
+    // After figuring out which concrete method to call, jump into it.
+    // Note that this works in the interpreter with no data motion.
+    // But the compiled version will require that r2_recv be shifted out.
+    __ verify_method_ptr(xmethod);
+    jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry);
+    if (iid == vmIntrinsics::_linkToInterface) {
+      __ bind(L_incompatible_class_change_error);
+      __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
+    }
+  }
+
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+                              oopDesc* mh,
+                              intptr_t* saved_regs,
+                              intptr_t* entry_sp) {  }
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+  const char* adaptername;
+  oopDesc* mh;
+  intptr_t* saved_regs;
+  intptr_t* entry_sp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {  }
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {  }
+#endif //PRODUCT
diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
new file mode 100644
index 0000000000..65493eba76
--- /dev/null
+++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+enum /* platform_dependent_constants */ {
+  adapter_code_size = 32000 DEBUG_ONLY(+ 120000)
+};
+
+public:
+
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj, SystemDictionary::WKID klass_id,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 "reference is a MH");
+  }
+
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+  // Takes care of special dispatch from single stepping too.
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2,
+                                  bool for_compiler_entry);
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
new file mode 100644
index 0000000000..ecce8eb021
--- /dev/null
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/compiledIC.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+Register NativeInstruction::extract_rs1(address instr) {
+  assert_cond(instr != NULL);
+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15));
+}
+
+Register NativeInstruction::extract_rs2(address instr) {
+  assert_cond(instr != NULL);
+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20));
+}
+
+Register NativeInstruction::extract_rd(address instr) {
+  assert_cond(instr != NULL);
+  return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7));
+}
+
+uint32_t NativeInstruction::extract_opcode(address instr) {
+  assert_cond(instr != NULL);
+  return Assembler::extract(((unsigned*)instr)[0], 6, 0);
+}
+
+uint32_t NativeInstruction::extract_funct3(address instr) {
+  assert_cond(instr != NULL);
+  return Assembler::extract(((unsigned*)instr)[0], 14, 12);
+}
+
+bool NativeInstruction::is_pc_relative_at(address instr) {
+  // auipc + jalr
+  // auipc + addi
+  // auipc + load
+  // auipc + fload_load
+  return (is_auipc_at(instr)) &&
+         (is_addi_at(instr + instruction_size) ||
+          is_jalr_at(instr + instruction_size) ||
+          is_load_at(instr + instruction_size) ||
+          is_float_load_at(instr + instruction_size)) &&
+         check_pc_relative_data_dependency(instr);
+}
+
+// ie:ld(Rd, Label)
+bool NativeInstruction::is_load_pc_relative_at(address instr) {
+  return is_auipc_at(instr) && // auipc
+         is_ld_at(instr + instruction_size) && // ld
+         check_load_pc_relative_data_dependency(instr);
+}
+
+bool NativeInstruction::is_movptr_at(address instr) {
+  return is_lui_at(instr) && // Lui
+         is_addi_at(instr + instruction_size) && // Addi
+         is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11
+         is_addi_at(instr + instruction_size * 3) && // Addi
+         is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6
+         (is_addi_at(instr + instruction_size * 5) ||
+          is_jalr_at(instr + instruction_size * 5) ||
+          is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load
+         check_movptr_data_dependency(instr);
+}
+
+bool NativeInstruction::is_li32_at(address instr) {
+  return is_lui_at(instr) && // lui
+         is_addiw_at(instr + instruction_size) && // addiw
+         check_li32_data_dependency(instr);
+}
+
+void NativeCall::verify() {
+  assert(NativeCall::is_call_at((address)this), "unexpected code at call site");
+}
+
+address NativeCall::destination() const {
+  address addr = (address)this;
+  assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal.");
+  address destination = MacroAssembler::target_addr_for_insn(instruction_address());
+
+  // Do we use a trampoline stub for this call?
+  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+  assert(cb && cb->is_nmethod(), "sanity");
+  nmethod *nm = (nmethod *)cb;
+  if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+    // Yes we do, so get the destination from the trampoline stub.
+    const address trampoline_stub_addr = destination;
+    destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+  }
+
+  return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+  assert(!assert_lock ||
+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+         "concurrent code patching");
+
+  ResourceMark rm;
+  address addr_call = addr_at(0);
+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+  // Patch the constant in the call's trampoline stub.
+  address trampoline_stub_addr = get_trampoline();
+  if (trampoline_stub_addr != NULL) {
+    assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines");
+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+  }
+
+  // Patch the call.
+  if (Assembler::reachable_from_branch_at(addr_call, dest)) {
+    set_destination(dest);
+  } else {
+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
+    set_destination(trampoline_stub_addr);
+  }
+
+  ICache::invalidate_range(addr_call, instruction_size);
+}
+
+address NativeCall::get_trampoline() {
+  address call_addr = addr_at(0);
+
+  CodeBlob *code = CodeCache::find_blob(call_addr);
+  assert(code != NULL, "Could not find the containing code blob");
+
+  address jal_destination = MacroAssembler::pd_call_destination(call_addr);
+  if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) {
+    return jal_destination;
+  }
+
+  if (code != NULL && code->is_nmethod()) {
+    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+  }
+
+  return NULL;
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) { Unimplemented(); }
+
+//-------------------------------------------------------------------
+
+void NativeMovConstReg::verify() {
+  if (!(nativeInstruction_at(instruction_address())->is_movptr() ||
+        is_auipc_at(instruction_address()))) {
+    fatal("should be MOVPTR or AUIPC");
+  }
+}
+
+intptr_t NativeMovConstReg::data() const {
+  address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+  if (maybe_cpool_ref(instruction_address())) {
+    return *(intptr_t*)addr;
+  } else {
+    return (intptr_t)addr;
+  }
+}
+
+void NativeMovConstReg::set_data(intptr_t x) {
+  if (maybe_cpool_ref(instruction_address())) {
+    address addr = MacroAssembler::target_addr_for_insn(instruction_address());
+    *(intptr_t*)addr = x;
+  } else {
+    // Store x into the instruction stream.
+    MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
+    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+  }
+
+  // Find and replace the oop/metadata corresponding to this
+  // instruction in oops section.
+  CodeBlob* cb = CodeCache::find_blob(instruction_address());
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm != NULL) {
+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop* oop_addr = iter.oop_reloc()->oop_addr();
+        *oop_addr = cast_to_oop(x);
+        break;
+      } else if (iter.type() == relocInfo::metadata_type) {
+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
+        *metadata_addr = (Metadata*)x;
+        break;
+      }
+    }
+  }
+}
+
+void NativeMovConstReg::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+                p2i(instruction_address()), data());
+}
+
+//-------------------------------------------------------------------
+
+int NativeMovRegMem::offset() const  {
+  Unimplemented();
+  return 0;
+}
+
+void NativeMovRegMem::set_offset(int x) { Unimplemented(); }
+
+void NativeMovRegMem::verify() {
+  Unimplemented();
+}
+
+//--------------------------------------------------------------------------------
+
+void NativeJump::verify() { }
+
+
+void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
+  // Patching to not_entrant can happen while activations of the method are
+  // in use. The patching in that instance must happen only when certain
+  // alignment restrictions are true. These guarantees check those
+  // conditions.
+
+  // Must be 4 bytes aligned
+  MacroAssembler::assert_alignment(verified_entry);
+}
+
+
+address NativeJump::jump_destination() const {
+  address dest = MacroAssembler::target_addr_for_insn(instruction_address());
+
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+
+  // return -1 if jump to self
+  dest = (dest == (address) this) ? (address) -1 : dest;
+  return dest;
+};
+
+void NativeJump::set_jump_destination(address dest) {
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+  if (dest == (address) -1)
+    dest = instruction_address();
+
+  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
+  ICache::invalidate_range(instruction_address(), instruction_size);
+}
+
+//-------------------------------------------------------------------
+
+address NativeGeneralJump::jump_destination() const {
+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
+  address dest = (address) move->data();
+
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+
+  // return -1 if jump to self
+  dest = (dest == (address) this) ? (address) -1 : dest;
+  return dest;
+}
+
+//-------------------------------------------------------------------
+
+bool NativeInstruction::is_safepoint_poll() {
+  return is_lwu_to_zr(address(this));
+}
+
+bool NativeInstruction::is_lwu_to_zr(address instr) {
+  assert_cond(instr != NULL);
+  return (extract_opcode(instr) == 0b0000011 &&
+          extract_funct3(instr) == 0b110 &&
+          extract_rd(instr) == zr);         // zr
+}
+
+// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction.
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  // jvmci
+  return uint_at(0) == 0xffffffff;
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  assert_cond(code_pos != NULL);
+  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
+}
+
+bool NativeInstruction::is_stop() {
+  return uint_at(0) == 0xffffffff; // an illegal instruction
+}
+
+//-------------------------------------------------------------------
+
+// MT-safe inserting of a jump over a jump or a nop (used by
+// nmethod::make_not_entrant_or_zombie)
+
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+
+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
+         nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
+         "riscv cannot replace non-jump with jump");
+
+  check_verified_entry_alignment(entry, verified_entry);
+
+  // Patch this nmethod atomically.
+  if (Assembler::reachable_from_branch_at(verified_entry, dest)) {
+    ptrdiff_t offset = dest - verified_entry;
+    guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M
+
+    uint32_t insn = 0;
+    address pInsn = (address)&insn;
+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
+    Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump
+    Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset)
+    *(unsigned int*)verified_entry = insn;
+  } else {
+    // We use an illegal instruction for marking a method as
+    // not_entrant or zombie.
+    NativeIllegalInstruction::insert(verified_entry);
+  }
+
+  ICache::invalidate_range(verified_entry, instruction_size);
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, instruction_size);
+  MacroAssembler a(&cb);
+
+  int32_t offset = 0;
+  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
+  a.jalr(x0, t0, offset); // jalr
+
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+// MT-safe patching of a long jump instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  ShouldNotCallThis();
+}
+
+
+address NativeCallTrampolineStub::destination(nmethod *nm) const {
+  return ptr_at(data_offset);
+}
+
+void NativeCallTrampolineStub::set_destination(address new_destination) {
+  set_ptr_at(data_offset, new_destination);
+  OrderAccess::release();
+}
+
+uint32_t NativeMembar::get_kind() {
+  uint32_t insn = uint_at(0);
+
+  uint32_t predecessor = Assembler::extract(insn, 27, 24);
+  uint32_t successor = Assembler::extract(insn, 23, 20);
+
+  return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor);
+}
+
+void NativeMembar::set_kind(uint32_t order_kind) {
+  uint32_t predecessor = 0;
+  uint32_t successor = 0;
+
+  MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor);
+
+  uint32_t insn = uint_at(0);
+  address pInsn = (address) &insn;
+  Assembler::patch(pInsn, 27, 24, predecessor);
+  Assembler::patch(pInsn, 23, 20, successor);
+
+  address membar = addr_at(0);
+  *(unsigned int*) membar = insn;
+}
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
new file mode 100644
index 0000000000..183ab85fc9
--- /dev/null
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -0,0 +1,520 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP
+#define CPU_RISCV_NATIVEINST_RISCV_HPP
+
+#include "asm/assembler.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovRegMem
+// - - NativeJump
+// - - NativeGeneralJump
+// - - NativeIllegalInstruction
+// - - NativeCallTrampolineStub
+// - - NativeMembar
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeCall;
+
+class NativeInstruction {
+  friend class Relocation;
+  friend bool is_NativeCallTrampolineStub_at(address);
+ public:
+  enum {
+    instruction_size = 4,
+    compressed_instruction_size = 2,
+  };
+
+  juint encoding() const {
+    return uint_at(0);
+  }
+
+  bool is_jal()                             const { return is_jal_at(addr_at(0));         }
+  bool is_movptr()                          const { return is_movptr_at(addr_at(0));      }
+  bool is_call()                            const { return is_call_at(addr_at(0));        }
+  bool is_jump()                            const { return is_jump_at(addr_at(0));        }
+
+  static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; }
+  static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
+  static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; }
+  static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
+  static bool is_load_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; }
+  static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; }
+  static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; }
+  static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
+  static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
+  static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
+  static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; }
+  static bool is_slli_shift_at(address instr, uint32_t shift) {
+    assert_cond(instr != NULL);
+    return (extract_opcode(instr) == 0b0010011 && // opcode field
+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
+            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
+  }
+
+  static Register extract_rs1(address instr);
+  static Register extract_rs2(address instr);
+  static Register extract_rd(address instr);
+  static uint32_t extract_opcode(address instr);
+  static uint32_t extract_funct3(address instr);
+
+  // the instruction sequence of movptr is as below:
+  //     lui
+  //     addi
+  //     slli
+  //     addi
+  //     slli
+  //     addi/jalr/load
+  static bool check_movptr_data_dependency(address instr) {
+    address lui = instr;
+    address addi1 = lui + instruction_size;
+    address slli1 = addi1 + instruction_size;
+    address addi2 = slli1 + instruction_size;
+    address slli2 = addi2 + instruction_size;
+    address last_instr = slli2 + instruction_size;
+    return extract_rs1(addi1) == extract_rd(lui) &&
+           extract_rs1(addi1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(slli2) &&
+           extract_rs1(last_instr) == extract_rd(slli2);
+  }
+
+  // the instruction sequence of li32 is as below:
+  //     lui
+  //     addiw
+  static bool check_li32_data_dependency(address instr) {
+    address lui = instr;
+    address addiw = lui + instruction_size;
+
+    return extract_rs1(addiw) == extract_rd(lui) &&
+           extract_rs1(addiw) == extract_rd(addiw);
+  }
+
+  // the instruction sequence of pc-relative is as below:
+  //     auipc
+  //     jalr/addi/load/float_load
+  static bool check_pc_relative_data_dependency(address instr) {
+    address auipc = instr;
+    address last_instr = auipc + instruction_size;
+
+    return extract_rs1(last_instr) == extract_rd(auipc);
+  }
+
+  // the instruction sequence of load_label is as below:
+  //     auipc
+  //     load
+  static bool check_load_pc_relative_data_dependency(address instr) {
+    address auipc = instr;
+    address load = auipc + instruction_size;
+
+    return extract_rd(load) == extract_rd(auipc) &&
+           extract_rs1(load) == extract_rd(load);
+  }
+
+  static bool is_movptr_at(address instr);
+  static bool is_li32_at(address instr);
+  static bool is_pc_relative_at(address branch);
+  static bool is_load_pc_relative_at(address branch);
+
+  static bool is_call_at(address instr) {
+    if (is_jal_at(instr) || is_jalr_at(instr)) {
+      return true;
+    }
+    return false;
+  }
+  static bool is_lwu_to_zr(address instr);
+
+  inline bool is_nop();
+  inline bool is_jump_or_nop();
+  bool is_safepoint_poll();
+  bool is_sigill_zombie_not_entrant();
+  bool is_stop();
+
+ protected:
+  address addr_at(int offset) const    { return address(this) + offset; }
+
+  jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
+  juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
+
+  address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
+
+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
+
+
+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i; }
+  void set_uint_at(int offset, jint  i)       { *(juint*)addr_at(offset) = i; }
+  void set_ptr_at (int offset, address  ptr)  { *(address*) addr_at(offset) = ptr; }
+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o; }
+
+ public:
+
+  inline friend NativeInstruction* nativeInstruction_at(address addr);
+
+  static bool maybe_cpool_ref(address instr) {
+    return is_auipc_at(instr);
+  }
+
+  bool is_membar() {
+    return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0;
+  }
+};
+
+inline NativeInstruction* nativeInstruction_at(address addr) {
+  return (NativeInstruction*)addr;
+}
+
+// The natural type of an RISCV instruction is uint32_t
+inline NativeInstruction* nativeInstruction_at(uint32_t *addr) {
+  return (NativeInstruction*)addr;
+}
+
+inline NativeCall* nativeCall_at(address addr);
+// The NativeCall is an abstraction for accessing/manipulating native
+// call instructions (used to manipulate inline caches, primitive &
+// DSO calls, etc.).
+
+class NativeCall: public NativeInstruction {
+ public:
+  enum RISCV_specific_constants {
+    instruction_size            =    4,
+    instruction_offset          =    0,
+    displacement_offset         =    0,
+    return_address_offset       =    4
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(return_address_offset); }
+  address return_address() const            { return addr_at(return_address_offset); }
+  address destination() const;
+
+  void set_destination(address dest) {
+    assert(is_jal(), "Should be jal instruction!");
+    intptr_t offset = (intptr_t)(dest - instruction_address());
+    assert((offset & 0x1) == 0, "bad alignment");
+    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
+    unsigned int insn = 0b1101111; // jal
+    address pInsn = (address)(&insn);
+    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
+    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
+    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
+    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
+    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
+    set_int_at(displacement_offset, insn);
+  }
+
+  void verify_alignment() {} // do nothing on riscv
+  void verify();
+  void print();
+
+  // Creation
+  inline friend NativeCall* nativeCall_at(address addr);
+  inline friend NativeCall* nativeCall_before(address return_address);
+
+  static bool is_call_before(address return_address) {
+    return is_call_at(return_address - NativeCall::return_address_offset);
+  }
+
+  // MT-safe patching of a call instruction.
+  static void insert(address code_pos, address entry);
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  // Similar to replace_mt_safe, but just changes the destination.  The
+  // important thing is that free-running threads are able to execute
+  // this call instruction at all times.  If the call is an immediate BL
+  // instruction we can simply rely on atomicity of 32-bit writes to
+  // make sure other threads will see no intermediate states.
+
+  // We cannot rely on locks here, since the free-running threads must run at
+  // full speed.
+  //
+  // Used in the runtime linkage of calls; see class CompiledIC.
+  // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
+
+  // The parameter assert_lock disables the assertion during code generation.
+  void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+  address get_trampoline();
+};
+
+inline NativeCall* nativeCall_at(address addr) {
+  assert_cond(addr != NULL);
+  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
+  DEBUG_ONLY(call->verify());
+  return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+  assert_cond(return_address != NULL);
+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
+  DEBUG_ONLY(call->verify());
+  return call;
+}
+
+// An interface for accessing/manipulating native mov reg, imm instructions.
+// (used to manipulate inlined 64-bit data calls, etc.)
+class NativeMovConstReg: public NativeInstruction {
+ public:
+  enum RISCV_specific_constants {
+    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
+    movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
+    load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
+    instruction_offset                  =    0,
+    displacement_offset                 =    0
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  {
+    // if the instruction at 5 * instruction_size is addi,
+    // it means a lui + addi + slli + addi + slli + addi instruction sequence,
+    // and the next instruction address should be addr_at(6 * instruction_size).
+    // However, when the instruction at 5 * instruction_size isn't addi,
+    // the next instruction address should be addr_at(5 * instruction_size)
+    if (nativeInstruction_at(instruction_address())->is_movptr()) {
+      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
+        // Assume: lui, addi, slli, addi, slli, addi
+        return addr_at(movptr_instruction_size);
+      } else {
+        // Assume: lui, addi, slli, addi, slli
+        return addr_at(movptr_with_offset_instruction_size);
+      }
+    } else if (is_load_pc_relative_at(instruction_address())) {
+      // Assume: auipc, ld
+      return addr_at(load_pc_relative_instruction_size);
+    }
+    guarantee(false, "Unknown instruction in NativeMovConstReg");
+    return NULL;
+  }
+
+  intptr_t data() const;
+  void set_data(intptr_t x);
+
+  void flush() {
+    if (!maybe_cpool_ref(instruction_address())) {
+      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+    }
+  }
+
+  void verify();
+  void print();
+
+  // Creation
+  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
+  inline friend NativeMovConstReg* nativeMovConstReg_before(address addr);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
+  assert_cond(addr != NULL);
+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
+  DEBUG_ONLY(test->verify());
+  return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
+  assert_cond(addr != NULL);
+  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+  DEBUG_ONLY(test->verify());
+  return test;
+}
+
+// RISCV should not use C1 runtime patching, but still implement
+// NativeMovRegMem to keep some compilers happy.
+class NativeMovRegMem: public NativeInstruction {
+ public:
+  enum RISCV_specific_constants {
+    instruction_size            =    NativeInstruction::instruction_size,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    NativeInstruction::instruction_size
+  };
+
+  int instruction_start() const { return instruction_offset; }
+
+  address instruction_address() const { return addr_at(instruction_offset); }
+
+  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
+
+  int offset() const;
+
+  void set_offset(int x);
+
+  void add_offset_in_bytes(int add_offset) {
+    set_offset(offset() + add_offset);
+  }
+
+  void verify();
+  void print();
+
+ private:
+  inline friend NativeMovRegMem* nativeMovRegMem_at(address addr);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at(address addr) {
+  NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset);
+  DEBUG_ONLY(test->verify());
+  return test;
+}
+
+class NativeJump: public NativeInstruction {
+ public:
+  enum RISCV_specific_constants {
+    instruction_size            =    NativeInstruction::instruction_size,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    NativeInstruction::instruction_size
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  { return addr_at(instruction_size); }
+  address jump_destination() const;
+  void set_jump_destination(address dest);
+
+  // Creation
+  inline friend NativeJump* nativeJump_at(address address);
+
+  void verify();
+
+  // Insertion of native jump instruction
+  static void insert(address code_pos, address entry);
+  // MT-safe insertion of native jump at verified method entry
+  static void check_verified_entry_alignment(address entry, address verified_entry);
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+};
+
+inline NativeJump* nativeJump_at(address addr) {
+  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
+  DEBUG_ONLY(jump->verify());
+  return jump;
+}
+
+class NativeGeneralJump: public NativeJump {
+public:
+  enum RISCV_specific_constants {
+    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
+  };
+
+  address jump_destination() const;
+
+  static void insert_unconditional(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+  assert_cond(addr != NULL);
+  NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativeIllegalInstruction: public NativeInstruction {
+ public:
+  // Insert illegal opcode as specific address
+  static void insert(address code_pos);
+};
+
+inline bool NativeInstruction::is_nop()         {
+  uint32_t insn = *(uint32_t*)addr_at(0);
+  return insn == 0x13;
+}
+
+inline bool NativeInstruction::is_jump_or_nop() {
+  return is_nop() || is_jump();
+}
+
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+  enum RISCV_specific_constants {
+    // Refer to function emit_trampoline_stub.
+    instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address
+    data_offset      = 3 * NativeInstruction::instruction_size,            // auipc + ld + jr
+  };
+
+  address destination(nmethod *nm = NULL) const;
+  void set_destination(address new_destination);
+  ptrdiff_t destination_offset() const;
+};
+
+inline bool is_NativeCallTrampolineStub_at(address addr) {
+  // Ensure that the stub is exactly
+  //      ld   t0, L--->auipc + ld
+  //      jr   t0
+  // L:
+
+  // judge inst + register + imm
+  // 1). check the instructions: auipc + ld + jalr
+  // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
+  // 3). check if the offset in ld[31:20] equals the data_offset
+  assert_cond(addr != NULL);
+  const int instr_size = NativeInstruction::instruction_size;
+  if (NativeInstruction::is_auipc_at(addr) &&
+      NativeInstruction::is_ld_at(addr + instr_size) &&
+      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
+      (NativeInstruction::extract_rd(addr)                    == x5) &&
+      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
+      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
+      (NativeInstruction::extract_rs1(addr + 2 * instr_size)  == x5) &&
+      (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) {
+    return true;
+  }
+  return false;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+  assert_cond(addr != NULL);
+  assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found");
+  return (NativeCallTrampolineStub*)addr;
+}
+
+class NativeMembar : public NativeInstruction {
+public:
+  uint32_t get_kind();
+  void set_kind(uint32_t order_kind);
+};
+
+inline NativeMembar *NativeMembar_at(address addr) {
+  assert_cond(addr != NULL);
+  assert(nativeInstruction_at(addr)->is_membar(), "no membar found");
+  return (NativeMembar*)addr;
+}
+
+#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
new file mode 100644
index 0000000000..fef8ca9b64
--- /dev/null
+++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP
+#define CPU_RISCV_REGISTERMAP_RISCV_HPP
+
+// machine-dependent implemention for register maps
+  friend class frame;
+
+ private:
+  // This is the hook for finding a register in an "well-known" location,
+  // such as a register block of a predetermined format.
+  address pd_location(VMReg reg) const { return NULL; }
+
+  // no PD state to clear or copy:
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
new file mode 100644
index 0000000000..583f67573c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "interp_masm_riscv.hpp"
+#include "register_riscv.hpp"
+
+REGISTER_DEFINITION(Register, noreg);
+
+REGISTER_DEFINITION(Register, x0);
+REGISTER_DEFINITION(Register, x1);
+REGISTER_DEFINITION(Register, x2);
+REGISTER_DEFINITION(Register, x3);
+REGISTER_DEFINITION(Register, x4);
+REGISTER_DEFINITION(Register, x5);
+REGISTER_DEFINITION(Register, x6);
+REGISTER_DEFINITION(Register, x7);
+REGISTER_DEFINITION(Register, x8);
+REGISTER_DEFINITION(Register, x9);
+REGISTER_DEFINITION(Register, x10);
+REGISTER_DEFINITION(Register, x11);
+REGISTER_DEFINITION(Register, x12);
+REGISTER_DEFINITION(Register, x13);
+REGISTER_DEFINITION(Register, x14);
+REGISTER_DEFINITION(Register, x15);
+REGISTER_DEFINITION(Register, x16);
+REGISTER_DEFINITION(Register, x17);
+REGISTER_DEFINITION(Register, x18);
+REGISTER_DEFINITION(Register, x19);
+REGISTER_DEFINITION(Register, x20);
+REGISTER_DEFINITION(Register, x21);
+REGISTER_DEFINITION(Register, x22);
+REGISTER_DEFINITION(Register, x23);
+REGISTER_DEFINITION(Register, x24);
+REGISTER_DEFINITION(Register, x25);
+REGISTER_DEFINITION(Register, x26);
+REGISTER_DEFINITION(Register, x27);
+REGISTER_DEFINITION(Register, x28);
+REGISTER_DEFINITION(Register, x29);
+REGISTER_DEFINITION(Register, x30);
+REGISTER_DEFINITION(Register, x31);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(FloatRegister, f0);
+REGISTER_DEFINITION(FloatRegister, f1);
+REGISTER_DEFINITION(FloatRegister, f2);
+REGISTER_DEFINITION(FloatRegister, f3);
+REGISTER_DEFINITION(FloatRegister, f4);
+REGISTER_DEFINITION(FloatRegister, f5);
+REGISTER_DEFINITION(FloatRegister, f6);
+REGISTER_DEFINITION(FloatRegister, f7);
+REGISTER_DEFINITION(FloatRegister, f8);
+REGISTER_DEFINITION(FloatRegister, f9);
+REGISTER_DEFINITION(FloatRegister, f10);
+REGISTER_DEFINITION(FloatRegister, f11);
+REGISTER_DEFINITION(FloatRegister, f12);
+REGISTER_DEFINITION(FloatRegister, f13);
+REGISTER_DEFINITION(FloatRegister, f14);
+REGISTER_DEFINITION(FloatRegister, f15);
+REGISTER_DEFINITION(FloatRegister, f16);
+REGISTER_DEFINITION(FloatRegister, f17);
+REGISTER_DEFINITION(FloatRegister, f18);
+REGISTER_DEFINITION(FloatRegister, f19);
+REGISTER_DEFINITION(FloatRegister, f20);
+REGISTER_DEFINITION(FloatRegister, f21);
+REGISTER_DEFINITION(FloatRegister, f22);
+REGISTER_DEFINITION(FloatRegister, f23);
+REGISTER_DEFINITION(FloatRegister, f24);
+REGISTER_DEFINITION(FloatRegister, f25);
+REGISTER_DEFINITION(FloatRegister, f26);
+REGISTER_DEFINITION(FloatRegister, f27);
+REGISTER_DEFINITION(FloatRegister, f28);
+REGISTER_DEFINITION(FloatRegister, f29);
+REGISTER_DEFINITION(FloatRegister, f30);
+REGISTER_DEFINITION(FloatRegister, f31);
+
+REGISTER_DEFINITION(VectorRegister, vnoreg);
+
+REGISTER_DEFINITION(VectorRegister, v0);
+REGISTER_DEFINITION(VectorRegister, v1);
+REGISTER_DEFINITION(VectorRegister, v2);
+REGISTER_DEFINITION(VectorRegister, v3);
+REGISTER_DEFINITION(VectorRegister, v4);
+REGISTER_DEFINITION(VectorRegister, v5);
+REGISTER_DEFINITION(VectorRegister, v6);
+REGISTER_DEFINITION(VectorRegister, v7);
+REGISTER_DEFINITION(VectorRegister, v8);
+REGISTER_DEFINITION(VectorRegister, v9);
+REGISTER_DEFINITION(VectorRegister, v10);
+REGISTER_DEFINITION(VectorRegister, v11);
+REGISTER_DEFINITION(VectorRegister, v12);
+REGISTER_DEFINITION(VectorRegister, v13);
+REGISTER_DEFINITION(VectorRegister, v14);
+REGISTER_DEFINITION(VectorRegister, v15);
+REGISTER_DEFINITION(VectorRegister, v16);
+REGISTER_DEFINITION(VectorRegister, v17);
+REGISTER_DEFINITION(VectorRegister, v18);
+REGISTER_DEFINITION(VectorRegister, v19);
+REGISTER_DEFINITION(VectorRegister, v20);
+REGISTER_DEFINITION(VectorRegister, v21);
+REGISTER_DEFINITION(VectorRegister, v22);
+REGISTER_DEFINITION(VectorRegister, v23);
+REGISTER_DEFINITION(VectorRegister, v24);
+REGISTER_DEFINITION(VectorRegister, v25);
+REGISTER_DEFINITION(VectorRegister, v26);
+REGISTER_DEFINITION(VectorRegister, v27);
+REGISTER_DEFINITION(VectorRegister, v28);
+REGISTER_DEFINITION(VectorRegister, v29);
+REGISTER_DEFINITION(VectorRegister, v30);
+REGISTER_DEFINITION(VectorRegister, v31);
+
+REGISTER_DEFINITION(Register, c_rarg0);
+REGISTER_DEFINITION(Register, c_rarg1);
+REGISTER_DEFINITION(Register, c_rarg2);
+REGISTER_DEFINITION(Register, c_rarg3);
+REGISTER_DEFINITION(Register, c_rarg4);
+REGISTER_DEFINITION(Register, c_rarg5);
+REGISTER_DEFINITION(Register, c_rarg6);
+REGISTER_DEFINITION(Register, c_rarg7);
+
+REGISTER_DEFINITION(FloatRegister, c_farg0);
+REGISTER_DEFINITION(FloatRegister, c_farg1);
+REGISTER_DEFINITION(FloatRegister, c_farg2);
+REGISTER_DEFINITION(FloatRegister, c_farg3);
+REGISTER_DEFINITION(FloatRegister, c_farg4);
+REGISTER_DEFINITION(FloatRegister, c_farg5);
+REGISTER_DEFINITION(FloatRegister, c_farg6);
+REGISTER_DEFINITION(FloatRegister, c_farg7);
+
+REGISTER_DEFINITION(Register, j_rarg0);
+REGISTER_DEFINITION(Register, j_rarg1);
+REGISTER_DEFINITION(Register, j_rarg2);
+REGISTER_DEFINITION(Register, j_rarg3);
+REGISTER_DEFINITION(Register, j_rarg4);
+REGISTER_DEFINITION(Register, j_rarg5);
+REGISTER_DEFINITION(Register, j_rarg6);
+REGISTER_DEFINITION(Register, j_rarg7);
+
+REGISTER_DEFINITION(FloatRegister, j_farg0);
+REGISTER_DEFINITION(FloatRegister, j_farg1);
+REGISTER_DEFINITION(FloatRegister, j_farg2);
+REGISTER_DEFINITION(FloatRegister, j_farg3);
+REGISTER_DEFINITION(FloatRegister, j_farg4);
+REGISTER_DEFINITION(FloatRegister, j_farg5);
+REGISTER_DEFINITION(FloatRegister, j_farg6);
+REGISTER_DEFINITION(FloatRegister, j_farg7);
+
+REGISTER_DEFINITION(Register, zr);
+REGISTER_DEFINITION(Register, gp);
+REGISTER_DEFINITION(Register, tp);
+REGISTER_DEFINITION(Register, xmethod);
+REGISTER_DEFINITION(Register, ra);
+REGISTER_DEFINITION(Register, sp);
+REGISTER_DEFINITION(Register, fp);
+REGISTER_DEFINITION(Register, xheapbase);
+REGISTER_DEFINITION(Register, xcpool);
+REGISTER_DEFINITION(Register, xmonitors);
+REGISTER_DEFINITION(Register, xlocals);
+REGISTER_DEFINITION(Register, xthread);
+REGISTER_DEFINITION(Register, xbcp);
+REGISTER_DEFINITION(Register, xdispatch);
+REGISTER_DEFINITION(Register, esp);
+
+REGISTER_DEFINITION(Register, t0);
+REGISTER_DEFINITION(Register, t1);
+REGISTER_DEFINITION(Register, t2);
diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
new file mode 100644
index 0000000000..ef60cb3bb0
--- /dev/null
+++ b/src/hotspot/cpu/riscv/register_riscv.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_riscv.hpp"
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
+                                          RegisterImpl::max_slots_per_register;
+
+const int ConcreteRegisterImpl::max_fpr =
+    ConcreteRegisterImpl::max_gpr +
+    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
+
+const char* RegisterImpl::name() const {
+  static const char *const names[number_of_registers] = {
+    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
+    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
+    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
+    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  static const char *const names[number_of_registers] = {
+    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
+    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* VectorRegisterImpl::name() const {
+  static const char *const names[number_of_registers] = {
+    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
new file mode 100644
index 0000000000..f64a06eb89
--- /dev/null
+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_REGISTER_RISCV_HPP
+#define CPU_RISCV_REGISTER_RISCV_HPP
+
+#include "asm/register.hpp"
+
+#define CSR_FFLAGS   0x001        // Floating-Point Accrued Exceptions.
+#define CSR_FRM      0x002        // Floating-Point Dynamic Rounding Mode.
+#define CSR_FCSR     0x003        // Floating-Point Control and Status Register (frm + fflags).
+#define CSR_VSTART   0x008        // Vector start position
+#define CSR_VXSAT    0x009        // Fixed-Point Saturate Flag
+#define CSR_VXRM     0x00A        // Fixed-Point Rounding Mode
+#define CSR_VCSR     0x00F        // Vector control and status register
+#define CSR_VL       0xC20        // Vector length
+#define CSR_VTYPE    0xC21        // Vector data type register
+#define CSR_VLENB    0xC22        // VLEN/8 (vector register length in bytes)
+#define CSR_CYCLE    0xc00        // Cycle counter for RDCYCLE instruction.
+#define CSR_TIME     0xc01        // Timer for RDTIME instruction.
+#define CSR_INSTERT  0xc02        // Instructions-retired counter for RDINSTRET instruction.
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+inline Register as_Register(int encoding) {
+  return (Register)(intptr_t) encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers      = 32,
+    max_slots_per_register   = 2,
+
+    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
+    // for compressed instructions. See Table 17.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
+  };
+
+  // derived registers, offsets, and addresses
+  const Register successor() const { return as_Register(encoding() + 1); }
+
+  // construction
+  inline friend Register as_Register(int encoding);
+
+  VMReg as_VMReg() const;
+
+  // accessors
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
+  const char* name() const;
+
+  // for rvc
+  int compressed_encoding() const {
+    assert(is_compressed_valid(), "invalid compressed register");
+    return encoding() - compressed_register_base;
+  }
+
+  int compressed_encoding_nocheck() const {
+    return encoding_nocheck() - compressed_register_base;
+  }
+
+  bool is_compressed_valid() const {
+    return encoding_nocheck() >= compressed_register_base &&
+           encoding_nocheck() <= compressed_register_top;
+  }
+
+  // Return the bit which represents this register.  This is intended
+  // to be ORed into a bitmask: for usage see class RegSet below.
+  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
+};
+
+// The integer registers of the RISCV architecture
+
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(Register, x0,    (0));
+CONSTANT_REGISTER_DECLARATION(Register, x1,    (1));
+CONSTANT_REGISTER_DECLARATION(Register, x2,    (2));
+CONSTANT_REGISTER_DECLARATION(Register, x3,    (3));
+CONSTANT_REGISTER_DECLARATION(Register, x4,    (4));
+CONSTANT_REGISTER_DECLARATION(Register, x5,    (5));
+CONSTANT_REGISTER_DECLARATION(Register, x6,    (6));
+CONSTANT_REGISTER_DECLARATION(Register, x7,    (7));
+CONSTANT_REGISTER_DECLARATION(Register, x8,    (8));
+CONSTANT_REGISTER_DECLARATION(Register, x9,    (9));
+CONSTANT_REGISTER_DECLARATION(Register, x10,  (10));
+CONSTANT_REGISTER_DECLARATION(Register, x11,  (11));
+CONSTANT_REGISTER_DECLARATION(Register, x12,  (12));
+CONSTANT_REGISTER_DECLARATION(Register, x13,  (13));
+CONSTANT_REGISTER_DECLARATION(Register, x14,  (14));
+CONSTANT_REGISTER_DECLARATION(Register, x15,  (15));
+CONSTANT_REGISTER_DECLARATION(Register, x16,  (16));
+CONSTANT_REGISTER_DECLARATION(Register, x17,  (17));
+CONSTANT_REGISTER_DECLARATION(Register, x18,  (18));
+CONSTANT_REGISTER_DECLARATION(Register, x19,  (19));
+CONSTANT_REGISTER_DECLARATION(Register, x20,  (20));
+CONSTANT_REGISTER_DECLARATION(Register, x21,  (21));
+CONSTANT_REGISTER_DECLARATION(Register, x22,  (22));
+CONSTANT_REGISTER_DECLARATION(Register, x23,  (23));
+CONSTANT_REGISTER_DECLARATION(Register, x24,  (24));
+CONSTANT_REGISTER_DECLARATION(Register, x25,  (25));
+CONSTANT_REGISTER_DECLARATION(Register, x26,  (26));
+CONSTANT_REGISTER_DECLARATION(Register, x27,  (27));
+CONSTANT_REGISTER_DECLARATION(Register, x28,  (28));
+CONSTANT_REGISTER_DECLARATION(Register, x29,  (29));
+CONSTANT_REGISTER_DECLARATION(Register, x30,  (30));
+CONSTANT_REGISTER_DECLARATION(Register, x31,  (31));
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32,
+    max_slots_per_register  = 2,
+
+    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  VMReg as_VMReg() const;
+
+  // derived registers, offsets, and addresses
+  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+
+  // accessors
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
+  const char* name() const;
+
+  // for rvc
+  int compressed_encoding() const {
+    assert(is_compressed_valid(), "invalid compressed register");
+    return encoding() - compressed_register_base;
+  }
+
+  int compressed_encoding_nocheck() const {
+    return encoding_nocheck() - compressed_register_base;
+  }
+
+  bool is_compressed_valid() const {
+    return encoding_nocheck() >= compressed_register_base &&
+           encoding_nocheck() <= compressed_register_top;
+  }
+};
+
+// The float registers of the RISCV architecture
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+
+// Use VectorRegister as shortcut
+class VectorRegisterImpl;
+typedef VectorRegisterImpl* VectorRegister;
+
+inline VectorRegister as_VectorRegister(int encoding) {
+  return (VectorRegister)(intptr_t) encoding;
+}
+
+// The implementation of vector registers for RVV
+class VectorRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers    = 32,
+    max_slots_per_register = 4
+  };
+
+  // construction
+  inline friend VectorRegister as_VectorRegister(int encoding);
+
+  VMReg as_VMReg() const;
+
+  // derived registers, offsets, and addresses
+  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
+
+  // accessors
+  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
+  int encoding_nocheck() const    { return (intptr_t)this; }
+  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
+  const char* name() const;
+
+};
+
+// The vector registers of RVV
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v10    , (10));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v11    , (11));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v12    , (12));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v13    , (13));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v14    , (14));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v15    , (15));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v16    , (16));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v17    , (17));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v18    , (18));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v19    , (19));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v20    , (20));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v21    , (21));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v22    , (22));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v23    , (23));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v24    , (24));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v25    , (25));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v26    , (26));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v27    , (27));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v28    , (28));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v29    , (29));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v30    , (30));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, v31    , (31));
+
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+  // A big enough number for C2: all the registers plus flags
+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+  // There is no requirement that any ordering here matches any ordering c2 gives
+  // it's optoregs.
+
+    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
+  };
+
+  // added to make it compile
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+// A set of registers
+class RegSet {
+  uint32_t _bitset;
+
+  RegSet(uint32_t bitset) : _bitset(bitset) { }
+
+public:
+
+  RegSet() : _bitset(0) { }
+
+  RegSet(Register r1) : _bitset(r1->bit()) { }
+
+  RegSet operator+(const RegSet aSet) const {
+    RegSet result(_bitset | aSet._bitset);
+    return result;
+  }
+
+  RegSet operator-(const RegSet aSet) const {
+    RegSet result(_bitset & ~aSet._bitset);
+    return result;
+  }
+
+  RegSet &operator+=(const RegSet aSet) {
+    *this = *this + aSet;
+    return *this;
+  }
+
+  RegSet &operator-=(const RegSet aSet) {
+    *this = *this - aSet;
+    return *this;
+  }
+
+  static RegSet of(Register r1) {
+    return RegSet(r1);
+  }
+
+  static RegSet of(Register r1, Register r2) {
+    return of(r1) + r2;
+  }
+
+  static RegSet of(Register r1, Register r2, Register r3) {
+    return of(r1, r2) + r3;
+  }
+
+  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
+    return of(r1, r2, r3) + r4;
+  }
+
+  static RegSet range(Register start, Register end) {
+    uint32_t bits = ~0;
+    bits <<= start->encoding();
+    bits <<= 31 - end->encoding();
+    bits >>= 31 - end->encoding();
+
+    return RegSet(bits);
+  }
+
+  uint32_t bits() const { return _bitset; }
+
+private:
+
+  Register first() {
+    uint32_t first = _bitset & -_bitset;
+    return first ? as_Register(exact_log2(first)) : noreg;
+  }
+};
+
+#endif // CPU_RISCV_REGISTER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
new file mode 100644
index 0000000000..047ea2276c
--- /dev/null
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/relocInfo.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+  if (verify_only) {
+    return;
+  }
+
+  int bytes;
+
+  switch (type()) {
+    case relocInfo::oop_type: {
+      oop_Relocation *reloc = (oop_Relocation *)this;
+      if (NativeInstruction::is_load_pc_relative_at(addr())) {
+        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
+        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
+        assert(*(address*)constptr == x, "error in oop relocation");
+      } else {
+        bytes = MacroAssembler::patch_oop(addr(), x);
+      }
+      break;
+    }
+    default:
+      bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
+      break;
+  }
+  ICache::invalidate_range(addr(), bytes);
+}
+
+address Relocation::pd_call_destination(address orig_addr) {
+  assert(is_call(), "should be an address instruction here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline != NULL) {
+      return nativeCallTrampolineStub_at(trampoline)->destination();
+    }
+  }
+  if (orig_addr != NULL) {
+    // the extracted address from the instructions in address orig_addr
+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
+    // If call is branch to self, don't try to relocate it, just leave it
+    // as branch to self. This happens during code generation if the code
+    // buffer expands. It will be relocated to the trampoline above once
+    // code generation is complete.
+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
+    return new_addr;
+  }
+  return MacroAssembler::pd_call_destination(addr());
+}
+
+void Relocation::pd_set_call_destination(address x) {
+  assert(is_call(), "should be an address instruction here");
+  if (NativeCall::is_call_at(addr())) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline != NULL) {
+      nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false);
+      return;
+    }
+  }
+  MacroAssembler::pd_patch_instruction_size(addr(), x);
+  address pd_call = pd_call_destination(addr());
+  assert(pd_call == x, "fail in reloc");
+}
+
+address* Relocation::pd_address_in_code() {
+  assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!");
+  return (address*)(MacroAssembler::target_addr_for_insn(addr()));
+}
+
+address Relocation::pd_get_address_from_code() {
+  return MacroAssembler::pd_call_destination(addr());
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+  if (NativeInstruction::maybe_cpool_ref(addr())) {
+    address old_addr = old_addr_for(addr(), src, dest);
+    MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr));
+  }
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
new file mode 100644
index 0000000000..840ed935d8
--- /dev/null
+++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP
+#define CPU_RISCV_RELOCINFO_RISCV_HPP
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+    // Relocations are byte-aligned.
+    offset_unit        =  1,
+    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
+    format_width       =  1
+  };
+
+ public:
+
+  // This platform has no oops in the code that are not also
+  // listed in the oop section.
+  static bool mustIterateImmediateOopsInCode() { return false; }
+
+#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
new file mode 100644
index 0000000000..d54ae97200
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -0,0 +1,10273 @@
+//
+// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// We must define the 64 bit int registers in two 32 bit halves, the
+// real lower register and a virtual upper half register. upper halves
+// are used by the register allocator but are not actually supplied as
+// operands to memory ops.
+//
+// follow the C1 compiler in making registers
+//
+//   x7, x9-x17, x27-x31 volatile (caller save)
+//   x0-x4, x8, x23 system (no save, no allocate)
+//   x5-x6 non-allocatable (so we can use them as temporary regs)
+
+//
+// as regards Java usage. we don't use any callee save registers
+// because this makes it difficult to de-optimise a frame (see comment
+// in x86 implementation of Deoptimization::unwind_callee_save_values)
+//
+
+// General Registers
+
+reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
+reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
+reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
+reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
+reg_def R2      ( NS,  NS,  Op_RegI, 2,  x2->as_VMReg()         ); // sp
+reg_def R2_H    ( NS,  NS,  Op_RegI, 2,  x2->as_VMReg()->next() );
+reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
+reg_def R3_H    ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()->next() );
+reg_def R4      ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()         ); // tp
+reg_def R4_H    ( NS,  NS,  Op_RegI, 4,  x4->as_VMReg()->next() );
+reg_def R7      ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()         );
+reg_def R7_H    ( SOC, SOC, Op_RegI, 7,  x7->as_VMReg()->next() );
+reg_def R8      ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()         ); // fp
+reg_def R8_H    ( NS,  SOE, Op_RegI, 8,  x8->as_VMReg()->next() );
+reg_def R9      ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()         );
+reg_def R9_H    ( SOC, SOE, Op_RegI, 9,  x9->as_VMReg()->next() );
+reg_def R10     ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()        );
+reg_def R10_H   ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next());
+reg_def R11     ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()        );
+reg_def R11_H   ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next());
+reg_def R12     ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()        );
+reg_def R12_H   ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next());
+reg_def R13     ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()        );
+reg_def R13_H   ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next());
+reg_def R14     ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()        );
+reg_def R14_H   ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next());
+reg_def R15     ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()        );
+reg_def R15_H   ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next());
+reg_def R16     ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()        );
+reg_def R16_H   ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next());
+reg_def R17     ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()        );
+reg_def R17_H   ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next());
+reg_def R18     ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()        );
+reg_def R18_H   ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next());
+reg_def R19     ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()        );
+reg_def R19_H   ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next());
+reg_def R20     ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()        ); // caller esp
+reg_def R20_H   ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next());
+reg_def R21     ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()        );
+reg_def R21_H   ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next());
+reg_def R22     ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()        );
+reg_def R22_H   ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next());
+reg_def R23     ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()        ); // java thread
+reg_def R23_H   ( NS,  SOE, Op_RegI, 23, x23->as_VMReg()->next());
+reg_def R24     ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()        );
+reg_def R24_H   ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next());
+reg_def R25     ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()        );
+reg_def R25_H   ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next());
+reg_def R26     ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()        );
+reg_def R26_H   ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next());
+reg_def R27     ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()        ); // heapbase
+reg_def R27_H   ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next());
+reg_def R28     ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()        );
+reg_def R28_H   ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next());
+reg_def R29     ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()        );
+reg_def R29_H   ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next());
+reg_def R30     ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()        );
+reg_def R30_H   ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next());
+reg_def R31     ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()        );
+reg_def R31_H   ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next());
+
+// ----------------------------
+// Float/Double Registers
+// ----------------------------
+
+// Double Registers
+
+// The rules of ADL require that double registers be defined in pairs.
+// Each pair must be two 32-bit values, but not necessarily a pair of
+// single float registers. In each pair, ADLC-assigned register numbers
+// must be adjacent, with the lower number even. Finally, when the
+// CPU stores such a register pair to memory, the word associated with
+// the lower ADLC-assigned number must be stored to the lower address.
+
+// RISCV has 32 floating-point registers. Each can store a single
+// or double precision floating-point value.
+
+// for Java use float registers f0-f31 are always save on call whereas
+// the platform ABI treats f8-f9 and f18-f27 as callee save). Other
+// float registers are SOC as per the platform spec
+
+reg_def F0    ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()          );
+reg_def F0_H  ( SOC, SOC, Op_RegF,  0,  f0->as_VMReg()->next()  );
+reg_def F1    ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()          );
+reg_def F1_H  ( SOC, SOC, Op_RegF,  1,  f1->as_VMReg()->next()  );
+reg_def F2    ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()          );
+reg_def F2_H  ( SOC, SOC, Op_RegF,  2,  f2->as_VMReg()->next()  );
+reg_def F3    ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()          );
+reg_def F3_H  ( SOC, SOC, Op_RegF,  3,  f3->as_VMReg()->next()  );
+reg_def F4    ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()          );
+reg_def F4_H  ( SOC, SOC, Op_RegF,  4,  f4->as_VMReg()->next()  );
+reg_def F5    ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()          );
+reg_def F5_H  ( SOC, SOC, Op_RegF,  5,  f5->as_VMReg()->next()  );
+reg_def F6    ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()          );
+reg_def F6_H  ( SOC, SOC, Op_RegF,  6,  f6->as_VMReg()->next()  );
+reg_def F7    ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()          );
+reg_def F7_H  ( SOC, SOC, Op_RegF,  7,  f7->as_VMReg()->next()  );
+reg_def F8    ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()          );
+reg_def F8_H  ( SOC, SOE, Op_RegF,  8,  f8->as_VMReg()->next()  );
+reg_def F9    ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()          );
+reg_def F9_H  ( SOC, SOE, Op_RegF,  9,  f9->as_VMReg()->next()  );
+reg_def F10   ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()         );
+reg_def F10_H ( SOC, SOC, Op_RegF,  10, f10->as_VMReg()->next() );
+reg_def F11   ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()         );
+reg_def F11_H ( SOC, SOC, Op_RegF,  11, f11->as_VMReg()->next() );
+reg_def F12   ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()         );
+reg_def F12_H ( SOC, SOC, Op_RegF,  12, f12->as_VMReg()->next() );
+reg_def F13   ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()         );
+reg_def F13_H ( SOC, SOC, Op_RegF,  13, f13->as_VMReg()->next() );
+reg_def F14   ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()         );
+reg_def F14_H ( SOC, SOC, Op_RegF,  14, f14->as_VMReg()->next() );
+reg_def F15   ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()         );
+reg_def F15_H ( SOC, SOC, Op_RegF,  15, f15->as_VMReg()->next() );
+reg_def F16   ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()         );
+reg_def F16_H ( SOC, SOC, Op_RegF,  16, f16->as_VMReg()->next() );
+reg_def F17   ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()         );
+reg_def F17_H ( SOC, SOC, Op_RegF,  17, f17->as_VMReg()->next() );
+reg_def F18   ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()         );
+reg_def F18_H ( SOC, SOE, Op_RegF,  18, f18->as_VMReg()->next() );
+reg_def F19   ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()         );
+reg_def F19_H ( SOC, SOE, Op_RegF,  19, f19->as_VMReg()->next() );
+reg_def F20   ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()         );
+reg_def F20_H ( SOC, SOE, Op_RegF,  20, f20->as_VMReg()->next() );
+reg_def F21   ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()         );
+reg_def F21_H ( SOC, SOE, Op_RegF,  21, f21->as_VMReg()->next() );
+reg_def F22   ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()         );
+reg_def F22_H ( SOC, SOE, Op_RegF,  22, f22->as_VMReg()->next() );
+reg_def F23   ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()         );
+reg_def F23_H ( SOC, SOE, Op_RegF,  23, f23->as_VMReg()->next() );
+reg_def F24   ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()         );
+reg_def F24_H ( SOC, SOE, Op_RegF,  24, f24->as_VMReg()->next() );
+reg_def F25   ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()         );
+reg_def F25_H ( SOC, SOE, Op_RegF,  25, f25->as_VMReg()->next() );
+reg_def F26   ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()         );
+reg_def F26_H ( SOC, SOE, Op_RegF,  26, f26->as_VMReg()->next() );
+reg_def F27   ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()         );
+reg_def F27_H ( SOC, SOE, Op_RegF,  27, f27->as_VMReg()->next() );
+reg_def F28   ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()         );
+reg_def F28_H ( SOC, SOC, Op_RegF,  28, f28->as_VMReg()->next() );
+reg_def F29   ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()         );
+reg_def F29_H ( SOC, SOC, Op_RegF,  29, f29->as_VMReg()->next() );
+reg_def F30   ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()         );
+reg_def F30_H ( SOC, SOC, Op_RegF,  30, f30->as_VMReg()->next() );
+reg_def F31   ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()         );
+reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
+
+// ----------------------------
+// Special Registers
+// ----------------------------
+
+// On riscv, the physical flag register is missing, so we use t1 instead,
+// to bridge the RegFlag semantics in share/opto
+
+reg_def RFLAGS   (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg()        );
+
+// Specify priority of register selection within phases of register
+// allocation.  Highest priority is first.  A useful heuristic is to
+// give registers a low priority when they are required by machine
+// instructions, like EAX and EDX on I486, and choose no-save registers
+// before save-on-call, & save-on-call before save-on-entry.  Registers
+// which participate in fixed calling sequences should come last.
+// Registers which are used as pairs must fall on an even boundary.
+
+alloc_class chunk0(
+    // volatiles
+    R7,  R7_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H,
+
+    // arg registers
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+
+    // non-volatiles
+    R9,  R9_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+
+    // non-allocatable registers
+    R23, R23_H, // java thread
+    R27, R27_H, // heapbase
+    R4,  R4_H,  // thread
+    R8,  R8_H,  // fp
+    R0,  R0_H,  // zero
+    R1,  R1_H,  // ra
+    R2,  R2_H,  // sp
+    R3,  R3_H,  // gp
+);
+
+alloc_class chunk1(
+
+    // no save
+    F0,  F0_H,
+    F1,  F1_H,
+    F2,  F2_H,
+    F3,  F3_H,
+    F4,  F4_H,
+    F5,  F5_H,
+    F6,  F6_H,
+    F7,  F7_H,
+    F28, F28_H,
+    F29, F29_H,
+    F30, F30_H,
+    F31, F31_H,
+
+    // arg registers
+    F10, F10_H,
+    F11, F11_H,
+    F12, F12_H,
+    F13, F13_H,
+    F14, F14_H,
+    F15, F15_H,
+    F16, F16_H,
+    F17, F17_H,
+
+    // non-volatiles
+    F8,  F8_H,
+    F9,  F9_H,
+    F18, F18_H,
+    F19, F19_H,
+    F20, F20_H,
+    F21, F21_H,
+    F22, F22_H,
+    F23, F23_H,
+    F24, F24_H,
+    F25, F25_H,
+    F26, F26_H,
+    F27, F27_H,
+);
+
+alloc_class chunk2(RFLAGS);
+
+//----------Architecture Description Register Classes--------------------------
+// Several register classes are automatically defined based upon information in
+// this architecture description.
+// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
+// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
+// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
+// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+//
+
+// Class for all 32 bit general purpose registers
+reg_class all_reg32(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R7,
+    R8,
+    R9,
+    R10,
+    R11,
+    R12,
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26,
+    R27,
+    R28,
+    R29,
+    R30,
+    R31
+);
+
+// Class for any 32 bit integer registers (excluding zr)
+reg_class any_reg32 %{
+  return _ANY_REG32_mask;
+%}
+
+// Singleton class for R10 int register
+reg_class int_r10_reg(R10);
+
+// Singleton class for R12 int register
+reg_class int_r12_reg(R12);
+
+// Singleton class for R13 int register
+reg_class int_r13_reg(R13);
+
+// Singleton class for R14 int register
+reg_class int_r14_reg(R14);
+
+// Class for all long integer registers
+reg_class all_reg(
+    R0,  R0_H,
+    R1,  R1_H,
+    R2,  R2_H,
+    R3,  R3_H,
+    R4,  R4_H,
+    R7,  R7_H,
+    R8,  R8_H,
+    R9,  R9_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+    R27, R27_H,
+    R28, R28_H,
+    R29, R29_H,
+    R30, R30_H,
+    R31, R31_H
+);
+
+// Class for all long integer registers (excluding zr)
+reg_class any_reg %{
+  return _ANY_REG_mask;
+%}
+
+// Class for non-allocatable 32 bit registers
+reg_class non_allocatable_reg32(
+    R0,                       // zr
+    R1,                       // ra
+    R2,                       // sp
+    R3,                       // gp
+    R4,                       // tp
+    R23                       // java thread
+);
+
+// Class for non-allocatable 64 bit registers
+reg_class non_allocatable_reg(
+    R0,  R0_H,                // zr
+    R1,  R1_H,                // ra
+    R2,  R2_H,                // sp
+    R3,  R3_H,                // gp
+    R4,  R4_H,                // tp
+    R23, R23_H                // java thread
+);
+
+reg_class no_special_reg32 %{
+  return _NO_SPECIAL_REG32_mask;
+%}
+
+reg_class no_special_reg %{
+  return _NO_SPECIAL_REG_mask;
+%}
+
+reg_class ptr_reg %{
+  return _PTR_REG_mask;
+%}
+
+reg_class no_special_ptr_reg %{
+  return _NO_SPECIAL_PTR_REG_mask;
+%}
+
+// Class for 64 bit register r10
+reg_class r10_reg(
+    R10, R10_H
+);
+
+// Class for 64 bit register r11
+reg_class r11_reg(
+    R11, R11_H
+);
+
+// Class for 64 bit register r12
+reg_class r12_reg(
+    R12, R12_H
+);
+
+// Class for 64 bit register r13
+reg_class r13_reg(
+    R13, R13_H
+);
+
+// Class for 64 bit register r14
+reg_class r14_reg(
+    R14, R14_H
+);
+
+// Class for 64 bit register r15
+reg_class r15_reg(
+    R15, R15_H
+);
+
+// Class for 64 bit register r16
+reg_class r16_reg(
+    R16, R16_H
+);
+
+// Class for method register
+reg_class method_reg(
+    R31, R31_H
+);
+
+// Class for heapbase register
+reg_class heapbase_reg(
+    R27, R27_H
+);
+
+// Class for java thread register
+reg_class java_thread_reg(
+    R23, R23_H
+);
+
+reg_class r28_reg(
+    R28, R28_H
+);
+
+reg_class r29_reg(
+    R29, R29_H
+);
+
+reg_class r30_reg(
+    R30, R30_H
+);
+
+// Class for zero registesr
+reg_class zr_reg(
+    R0, R0_H
+);
+
+// Class for thread register
+reg_class thread_reg(
+    R4, R4_H
+);
+
+// Class for frame pointer register
+reg_class fp_reg(
+    R8, R8_H
+);
+
+// Class for link register
+reg_class ra_reg(
+    R1, R1_H
+);
+
+// Class for long sp register
+reg_class sp_reg(
+    R2, R2_H
+);
+
+// Class for all float registers
+reg_class float_reg(
+    F0,
+    F1,
+    F2,
+    F3,
+    F4,
+    F5,
+    F6,
+    F7,
+    F8,
+    F9,
+    F10,
+    F11,
+    F12,
+    F13,
+    F14,
+    F15,
+    F16,
+    F17,
+    F18,
+    F19,
+    F20,
+    F21,
+    F22,
+    F23,
+    F24,
+    F25,
+    F26,
+    F27,
+    F28,
+    F29,
+    F30,
+    F31
+);
+
+// Double precision float registers have virtual `high halves' that
+// are needed by the allocator.
+// Class for all double registers
+reg_class double_reg(
+    F0,  F0_H,
+    F1,  F1_H,
+    F2,  F2_H,
+    F3,  F3_H,
+    F4,  F4_H,
+    F5,  F5_H,
+    F6,  F6_H,
+    F7,  F7_H,
+    F8,  F8_H,
+    F9,  F9_H,
+    F10, F10_H,
+    F11, F11_H,
+    F12, F12_H,
+    F13, F13_H,
+    F14, F14_H,
+    F15, F15_H,
+    F16, F16_H,
+    F17, F17_H,
+    F18, F18_H,
+    F19, F19_H,
+    F20, F20_H,
+    F21, F21_H,
+    F22, F22_H,
+    F23, F23_H,
+    F24, F24_H,
+    F25, F25_H,
+    F26, F26_H,
+    F27, F27_H,
+    F28, F28_H,
+    F29, F29_H,
+    F30, F30_H,
+    F31, F31_H
+);
+
+// Class for 64 bit register f0
+reg_class f0_reg(
+    F0, F0_H
+);
+
+// Class for 64 bit register f1
+reg_class f1_reg(
+    F1, F1_H
+);
+
+// Class for 64 bit register f2
+reg_class f2_reg(
+    F2, F2_H
+);
+
+// Class for 64 bit register f3
+reg_class f3_reg(
+    F3, F3_H
+);
+
+// class for condition codes
+reg_class reg_flags(RFLAGS);
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+
+// we follow the ppc-aix port in using a simple cost model which ranks
+// register operations as cheap, memory ops as more expensive and
+// branches as most expensive. the first two have a low as well as a
+// normal cost. huge cost appears to be a way of saying don't do
+// something
+
+definitions %{
+  // The default cost (of a register move instruction).
+  int_def DEFAULT_COST         (  100,               100);
+  int_def ALU_COST             (  100,  1 * DEFAULT_COST);          // unknown, const, arith, shift, slt,
+                                                                    // multi, auipc, nop, logical, move
+  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
+  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
+  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
+  int_def BRANCH_COST          (  200,  2 * DEFAULT_COST);          // branch, jmp, call
+  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
+  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
+  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
+  int_def FMUL_SINGLE_COST     (  500,  5 * DEFAULT_COST);          // fadd, fmul, fmadd
+  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
+  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
+  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
+  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
+%}
+
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+
+#include "asm/macroAssembler.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "opto/addnode.hpp"
+#include "opto/convertnode.hpp"
+
+extern RegMask _ANY_REG32_mask;
+extern RegMask _ANY_REG_mask;
+extern RegMask _PTR_REG_mask;
+extern RegMask _NO_SPECIAL_REG32_mask;
+extern RegMask _NO_SPECIAL_REG_mask;
+extern RegMask _NO_SPECIAL_PTR_REG_mask;
+
+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    return MacroAssembler::far_branch_size();
+  }
+
+  static uint size_deopt_handler() {
+    // count auipc + far branch
+    return NativeInstruction::instruction_size + MacroAssembler::far_branch_size();
+  }
+};
+
+bool is_CAS(int opcode, bool maybe_volatile);
+
+// predicate controlling translation of CompareAndSwapX
+bool needs_acquiring_load_reserved(const Node *load);
+
+// predicate controlling translation of StoreCM
+bool unnecessary_storestore(const Node *storecm);
+
+// predicate controlling addressing modes
+bool size_fits_all_mem_uses(AddPNode* addp, int shift);
+%}
+
+source %{
+
+// Derived RegMask with conditionally allocatable registers
+
+RegMask _ANY_REG32_mask;
+RegMask _ANY_REG_mask;
+RegMask _PTR_REG_mask;
+RegMask _NO_SPECIAL_REG32_mask;
+RegMask _NO_SPECIAL_REG_mask;
+RegMask _NO_SPECIAL_PTR_REG_mask;
+
+void reg_mask_init() {
+
+  _ANY_REG32_mask = _ALL_REG32_mask;
+  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
+
+  _ANY_REG_mask = _ALL_REG_mask;
+  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
+
+  _PTR_REG_mask = _ALL_REG_mask;
+  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
+
+  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
+  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
+
+  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
+  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+
+  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
+  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
+
+  // x27 is not allocatable when compressed oops is on
+  if (UseCompressedOops) {
+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
+    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
+  }
+
+  // x8 is not allocatable when PreserveFramePointer is on
+  if (PreserveFramePointer) {
+    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
+    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
+  }
+}
+
+// is_CAS(int opcode, bool maybe_volatile)
+//
+// return true if opcode is one of the possible CompareAndSwapX
+// values otherwise false.
+bool is_CAS(int opcode, bool maybe_volatile)
+{
+  switch (opcode) {
+    // We handle these
+    case Op_CompareAndSwapI:
+    case Op_CompareAndSwapL:
+    case Op_CompareAndSwapP:
+    case Op_CompareAndSwapN:
+#if INCLUDE_SHENANDOAHGC
+    case Op_ShenandoahCompareAndSwapP:
+    case Op_ShenandoahCompareAndSwapN:
+#endif
+    case Op_CompareAndSwapB:
+    case Op_CompareAndSwapS:
+    case Op_GetAndSetI:
+    case Op_GetAndSetL:
+    case Op_GetAndSetP:
+    case Op_GetAndSetN:
+    case Op_GetAndAddI:
+    case Op_GetAndAddL:
+      return true;
+    case Op_CompareAndExchangeI:
+    case Op_CompareAndExchangeN:
+    case Op_CompareAndExchangeB:
+    case Op_CompareAndExchangeS:
+    case Op_CompareAndExchangeL:
+    case Op_CompareAndExchangeP:
+    case Op_WeakCompareAndSwapB:
+    case Op_WeakCompareAndSwapS:
+    case Op_WeakCompareAndSwapI:
+    case Op_WeakCompareAndSwapL:
+    case Op_WeakCompareAndSwapP:
+    case Op_WeakCompareAndSwapN:
+      return maybe_volatile;
+    default:
+      return false;
+  }
+}
+
+// predicate controlling translation of CAS
+//
+// returns true if CAS needs to use an acquiring load otherwise false
+bool needs_acquiring_load_reserved(const Node *n)
+{
+  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
+
+  LoadStoreNode* ldst = n->as_LoadStore();
+  if (n != NULL && is_CAS(n->Opcode(), false)) {
+    assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar");
+  } else {
+    return ldst != NULL && ldst->trailing_membar() != NULL;
+  }
+  // so we can just return true here
+  return true;
+}
+
+// predicate controlling translation of StoreCM
+//
+// returns true if a StoreStore must precede the card write otherwise
+// false
+
+bool unnecessary_storestore(const Node *storecm)
+{
+  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
+
+  // we need to generate a dmb ishst between an object put and the
+  // associated card mark when we are using CMS without conditional
+  // card marking
+
+  if (UseConcMarkSweepGC && !UseCondCardMark) {
+    return false;
+  }
+
+  // a storestore is unnecesary in all other cases
+
+  return true;
+}
+
+#define __ _masm.
+
+// advance declarations for helper functions to convert register
+// indices to register objects
+
+// the ad file has to provide implementations of certain methods
+// expected by the generic code
+//
+// REQUIRED FUNCTIONALITY
+
+//=============================================================================
+
+// !!!!! Special hack to get all types of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+
+int MachCallStaticJavaNode::ret_addr_offset()
+{
+  // jal
+  return 1 * NativeInstruction::instruction_size;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset()
+{
+  return 7 * NativeInstruction::instruction_size; // movptr, jal
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  // for generated stubs the call will be
+  //   jal(addr)
+  // or with far branches
+  //   jal(trampoline_stub)
+  // for real runtime callouts it will be 11 instructions
+  // see riscv_enc_java_to_runtime
+  //   la(t1, retaddr)                ->  auipc + addi
+  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
+  //   addi(sp, sp, -2 * wordSize)    ->  addi
+  //   sd(t1, Address(sp, wordSize))  ->  sd
+  //   jalr(t0)                       ->  jalr
+  CodeBlob *cb = CodeCache::find_blob(_entry_point);
+  if (cb != NULL) {
+    return 1 * NativeInstruction::instruction_size;
+  } else {
+    return 11 * NativeInstruction::instruction_size;
+  }
+}
+
+//
+// Compute padding required for nodes which need alignment
+//
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
+{
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// With RVC a call instruction may get 2-byte aligned.
+// The address of the call instruction needs to be 4-byte aligned to
+// ensure that it does not span a cache line so that it can be patched.
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+{
+  // skip the movptr in MacroAssembler::ic_call():
+  // lui + addi + slli + addi + slli + addi
+  // Though movptr() has already 4-byte aligned with or without RVC,
+  // We need to prevent from further changes by explicitly calculating the size.
+  const int movptr_size = 6 * NativeInstruction::instruction_size;
+  current_offset += movptr_size;
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// Indicate if the safepoint node needs the polling page as an input
+
+// the shared code plants the oop data at the start of the generated
+// code for the safepoint node and that needs ot be at the load
+// instruction itself. so we cannot plant a mov of the safepoint poll
+// address followed by a load. setting this to true means the mov is
+// scheduled as a prior instruction. that's better for scheduling
+// anyway.
+
+bool SafePointNode::needs_polling_address_input()
+{
+  return true;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  assert_cond(st != NULL);
+  st->print("BREAKPOINT");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  Assembler::CompressibleRegion cr(&_masm);
+  __ ebreak();
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+  void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
+    st->print("nop \t# %d bytes pad for loops and calls", _count);
+  }
+#endif
+
+  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
+    for (int i = 0; i < _count; i++) {
+      __ nop();
+    }
+  }
+
+  uint MachNopNode::size(PhaseRegAlloc*) const {
+    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
+  }
+
+//=============================================================================
+const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  // Empty encoding
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  return 0;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  assert_cond(st != NULL);
+  st->print("-- \t// MachConstantBaseNode (empty encoding)");
+}
+#endif
+
+#ifndef PRODUCT
+void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  assert_cond(st != NULL && ra_ != NULL);
+  Compile* C = ra_->C;
+
+  int framesize = C->frame_slots() << LogBytesPerInt;
+
+  if (C->need_stack_bang(framesize)) {
+    st->print("# stack bang size=%d\n\t", framesize);
+  }
+
+  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
+  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
+  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
+  st->print("sub sp, sp, #%d\n\t", framesize);
+}
+#endif
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  assert_cond(ra_ != NULL);
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  // n.b. frame size includes space for return pc and fp
+  const int framesize = C->frame_size_in_bytes();
+
+  // insert a nop at the start of the prolog so we can patch in a
+  // branch if we need to invalidate the method later
+  MacroAssembler::assert_alignment(__ pc());
+  __ nop();
+
+  assert_cond(C != NULL);
+
+  int bangsize = C->bang_size_in_bytes();
+  if (C->need_stack_bang(bangsize)) {
+    __ generate_stack_overflow_check(bangsize);
+  }
+
+  __ build_frame(framesize);
+
+  if (VerifyStackAtCalls) {
+    Unimplemented();
+  }
+
+  C->set_frame_complete(cbuf.insts_size());
+
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+uint MachPrologNode::size(PhaseRegAlloc* ra_) const
+{
+  assert_cond(ra_ != NULL);
+  return MachNode::size(ra_); // too many variables; just compute it
+                              // the hard way
+}
+
+int MachPrologNode::reloc() const
+{
+  return 0;
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  assert_cond(st != NULL && ra_ != NULL);
+  Compile* C = ra_->C;
+  assert_cond(C != NULL);
+  int framesize = C->frame_size_in_bytes();
+
+  st->print("# pop frame %d\n\t", framesize);
+
+  if (framesize == 0) {
+    st->print("ld  ra, [sp,#%d]\n\t", (2 * wordSize));
+    st->print("ld  fp, [sp,#%d]\n\t", (3 * wordSize));
+    st->print("add sp, sp, #%d\n\t", (2 * wordSize));
+  } else {
+    st->print("add  sp, sp, #%d\n\t", framesize);
+    st->print("ld  ra, [sp,#%d]\n\t", - 2 * wordSize);
+    st->print("ld  fp, [sp,#%d]\n\t", - wordSize);
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    st->print("# touch polling page\n\t");
+    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
+    st->print("ld  zr, [t0]");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  assert_cond(ra_ != NULL);
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+  assert_cond(C != NULL);
+  int framesize = C->frame_size_in_bytes();
+
+  __ remove_frame(framesize);
+
+  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
+    __ reserved_stack_check();
+  }
+
+  if (do_polling() && C->is_method_compilation()) {
+    __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  assert_cond(ra_ != NULL);
+  // Variable size. Determine dynamically.
+  return MachNode::size(ra_);
+}
+
+int MachEpilogNode::reloc() const {
+  // Return number of relocatable values contained in this instruction.
+  return 1; // 1 for polling page.
+}
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+// This method seems to be obsolete. It is declared in machnode.hpp
+// and defined in all *.ad files, but it is never called. Should we
+// get rid of it?
+int MachEpilogNode::safepoint_offset() const {
+  assert(do_polling(), "no return for this epilog node");
+  return 4;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float or
+// rc_stack.
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+
+static enum RC rc_class(OptoReg::Name reg) {
+
+  if (reg == OptoReg::Bad) {
+    return rc_bad;
+  }
+
+  // we have 30 int registers * 2 halves
+  // (t0 and t1 are omitted)
+  int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2);
+  if (reg < slots_of_int_registers) {
+    return rc_int;
+  }
+
+  // we have 32 float register * 2 halves
+  int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers;
+  if (reg < slots_of_int_registers + slots_of_float_registers) {
+    return rc_float;
+  }
+
+  // Between float regs & stack is the flags regs.
+  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+
+  return rc_stack;
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
+  assert_cond(ra_ != NULL);
+  Compile* C = ra_->C;
+
+  // Get registers to move.
+  OptoReg::Name src_hi = ra_->get_reg_second(in(1));
+  OptoReg::Name src_lo = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_hi = ra_->get_reg_second(this);
+  OptoReg::Name dst_lo = ra_->get_reg_first(this);
+
+  enum RC src_hi_rc = rc_class(src_hi);
+  enum RC src_lo_rc = rc_class(src_lo);
+  enum RC dst_hi_rc = rc_class(dst_hi);
+  enum RC dst_lo_rc = rc_class(dst_lo);
+
+  assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
+
+  if (src_hi != OptoReg::Bad) {
+    assert((src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+           (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi,
+           "expected aligned-adjacent pairs");
+  }
+
+  if (src_lo == dst_lo && src_hi == dst_hi) {
+    return 0;            // Self copy, no move.
+  }
+
+  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
+  int src_offset = ra_->reg2offset(src_lo);
+  int dst_offset = ra_->reg2offset(dst_lo);
+
+  if (cbuf != NULL) {
+    MacroAssembler _masm(cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    switch (src_lo_rc) {
+      case rc_int:
+        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
+          if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass
+            __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32);
+          } else {
+            __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]));
+          }
+        } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+          if (is64) {
+            __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                       as_Register(Matcher::_regEncode[src_lo]));
+          } else {
+            __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                       as_Register(Matcher::_regEncode[src_lo]));
+          }
+        } else {                    // gpr --> stack spill
+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
+          __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
+        }
+        break;
+      case rc_float:
+        if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
+          if (is64) {
+            __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]),
+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
+          } else {
+            __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]),
+                       as_FloatRegister(Matcher::_regEncode[src_lo]));
+          }
+        } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+          if (is64) {
+            __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+          } else {
+            __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+          }
+        } else {                    // fpr --> stack spill
+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
+          __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+                   is64, dst_offset);
+        }
+        break;
+      case rc_stack:
+        if (dst_lo_rc == rc_int) {  // stack --> gpr load
+          if (this->ideal_reg() == Op_RegI) {
+            __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+          } else { // // zero extended for narrow oop or klass
+            __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+          }
+        } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+          __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     is64, src_offset);
+        } else {                    // stack --> stack copy
+          assert(dst_lo_rc == rc_stack, "spill to bad register class");
+          if (this->ideal_reg() == Op_RegI) {
+            __ unspill(t0, is64, src_offset);
+          } else { // zero extended for narrow oop or klass
+            __ unspillu(t0, is64, src_offset);
+          }
+          __ spill(t0, is64, dst_offset);
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  if (st != NULL) {
+    st->print("spill ");
+    if (src_lo_rc == rc_stack) {
+      st->print("[sp, #%d] -> ", src_offset);
+    } else {
+      st->print("%s -> ", Matcher::regName[src_lo]);
+    }
+    if (dst_lo_rc == rc_stack) {
+      st->print("[sp, #%d]", dst_offset);
+    } else {
+      st->print("%s", Matcher::regName[dst_lo]);
+    }
+    st->print("\t# spill size = %d", is64 ? 64 : 32);
+  }
+
+  return 0;
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  if (ra_ == NULL) {
+    st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
+  } else {
+    implementation(NULL, ra_, false, st);
+  }
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation(&cbuf, ra_, false, NULL);
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
+  assert_cond(ra_ != NULL && st != NULL);
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("add %s, sp, #%d\t# box lock",
+            Matcher::regName[reg], offset);
+}
+#endif
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+
+  assert_cond(ra_ != NULL);
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg    = ra_->get_encode(this);
+
+  if (is_imm_in_range(offset, 12, 0)) {
+    __ addi(as_Register(reg), sp, offset);
+  } else if (is_imm_in_range(offset, 32, 0)) {
+    __ li32(t0, offset);
+    __ add(as_Register(reg), sp, t0);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+
+  if (is_imm_in_range(offset, 12, 0)) {
+    return NativeInstruction::instruction_size;
+  } else {
+    return 3 * NativeInstruction::instruction_size; // lui + addiw + add;
+  }
+}
+
+//=============================================================================
+
+#ifndef PRODUCT
+void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
+{
+  assert_cond(st != NULL);
+  st->print_cr("# MachUEPNode");
+  if (UseCompressedClassPointers) {
+    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+    if (Universe::narrow_klass_shift() != 0) {
+      st->print_cr("\tdecode_klass_not_null t0, t0");
+    }
+  } else {
+    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
+  }
+  st->print_cr("\tbeq t0, t1, ic_hit");
+  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
+  st->print_cr("\tic_hit:");
+}
+#endif
+
+void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
+{
+  // This is the unverified entry point.
+  MacroAssembler _masm(&cbuf);
+
+  Label skip;
+  __ cmp_klass(j_rarg0, t1, t0, skip);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+  __ bind(skip);
+
+  // These NOPs are critical so that verified entry point is properly
+  // 4 bytes aligned for patching by NativeJump::patch_verified_entry()
+  __ align(NativeInstruction::instruction_size);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc* ra_) const
+{
+  assert_cond(ra_ != NULL);
+  return MachNode::size(ra_);
+}
+
+// REQUIRED EMIT CODE
+
+//=============================================================================
+
+// Emit exception handler code.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
+{
+  // la_patchable t0, #exception_blob_entry_point
+  // jr (offset)t0
+  // or
+  // j #exception_blob_entry_point
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+  int offset = __ offset();
+  __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
+{
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_deopt_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+  int offset = __ offset();
+
+  __ auipc(ra, 0);
+  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
+
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+
+}
+// REQUIRED MATCHER CODE
+
+//=============================================================================
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode)) {
+    return false;
+  }
+
+  switch (opcode) {
+    case Op_PopCountI:
+    case Op_PopCountL:
+      return UsePopCountInstruction;
+
+    case Op_CountLeadingZerosI:
+    case Op_CountLeadingZerosL:
+    case Op_CountTrailingZerosI:
+    case Op_CountTrailingZerosL:
+      return UseZbb;
+  }
+
+  return true; // Per default match rules are supported.
+}
+
+// Identify extra cases that we might want to provide match rules for vector nodes and
+// other intrinsics guarded with vector length (vlen).
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+  return false;
+}
+
+const bool Matcher::has_predicated_vectors(void) {
+  return false;
+}
+
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum)
+{
+  Unimplemented();
+  return 0;
+}
+
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+//       this method should return false for offset 0.
+// |---label(L1)-----|
+// |-----------------|
+// |-----------------|----------eq: float-------------------
+// |-----------------| // far_cmpD_branch   |   cmpD_branch
+// |------- ---------|    feq;              |      feq;
+// |-far_cmpD_branch-|    beqz done;        |      bnez L;
+// |-----------------|    j L;              |
+// |-----------------|    bind(done);       |
+// |-----------------|--------------------------------------
+// |-----------------| // so shortBrSize = br_size - 4;
+// |-----------------| // so offs = offset - shortBrSize + 4;
+// |---label(L2)-----|
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+  int shortBrSize = br_size - 4;
+  int offs = offset - shortBrSize + 4;
+  return (-4096 <= offs && offs < 4096);
+}
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  // Probably always true, even if a temp register is required.
+  return true;
+}
+
+// true just means we have fast l2f conversion
+const bool Matcher::convL2FSupported(void) {
+  return true;
+}
+
+// Vector width in bytes.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  return 0;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  return vector_width_in_bytes(bt) / type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+  return max_vector_size(bt);
+}
+
+// Vector ideal reg.
+const uint Matcher::vector_ideal_reg(int len) {
+  ShouldNotReachHere();
+  return 0;
+}
+
+const uint Matcher::vector_shift_count_ideal_reg(int size) {
+  fatal("vector shift is not supported");
+  return Node::NotAMachineReg;
+}
+
+// AES support not yet implemented
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+// RISC-V supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return true;
+}
+
+// false => size gets scaled to BytesPerLong, ok.
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Use conditional move (CMOVL)
+const int Matcher::long_cmove_cost() {
+  // long cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+const int Matcher::float_cmove_cost() {
+  // float cmoves are no more expensive than int cmoves
+  return 0;
+}
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+const bool Matcher::need_masked_shift_count = false;
+
+// This affects two different things:
+//  - how Decode nodes are matched
+//  - how ImplicitNullCheck opportunities are recognized
+// If true, the matcher will try to remove all Decodes and match them
+// (as operands) into nodes. NullChecks are not prepared to deal with
+// Decodes by final_graph_reshaping().
+// If false, final_graph_reshaping() forces the decode behind the Cmp
+// for a NullCheck. The matcher matches the Decode node into a register.
+// Implicit_null_check optimization moves the Decode along with the
+// memory operation back up before the NullCheck.
+bool Matcher::narrow_oop_use_complex_address() {
+  return Universe::narrow_oop_shift() == 0;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+// TODO
+// decide whether we need to set this to true
+  return false;
+}
+
+bool Matcher::const_oop_prefer_decode() {
+  // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
+  return Universe::narrow_oop_base() == NULL;
+}
+
+bool Matcher::const_klass_prefer_decode() {
+  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
+  return Universe::narrow_klass_base() == NULL;
+}
+
+// Is it better to copy float constants, or load them directly from
+// memory?  Intel can load a float constant from a direct address,
+// requiring no extra registers.  Most RISCs will have to materialize
+// an address into a register first, so they would do better to copy
+// the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+
+// If CPU can load and store mis-aligned doubles directly then no
+// fixup is needed.  Else we split the double into 2 integer pieces
+// and move it piece-by-piece.  Only happens when passing doubles into
+// C code as the Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = true;
+
+// No-op on amd64
+void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
+  Unimplemented();
+}
+
+// Advertise here if the CPU requires explicit rounding operations to
+// implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+
+// Are floats converted to double when stored to stack during
+// deoptimization?
+bool Matcher::float_in_double() { return false; }
+
+// Do ints take an entire long register or just half?
+// The relevant question is how the int is callee-saved:
+// the whole long is written but de-opt'ing will have to extract
+// the relevant 32 bits.
+const bool Matcher::int_in_long = true;
+
+// Return whether or not this register is ever used as an argument.
+// This function is used on startup to build the trampoline stubs in
+// generateOptoStub.  Registers not mentioned will be killed by the VM
+// call in the trampoline, and arguments in those registers not be
+// available to the callee.
+bool Matcher::can_be_java_arg(int reg)
+{
+  return
+    reg ==  R10_num || reg == R10_H_num ||
+    reg ==  R11_num || reg == R11_H_num ||
+    reg ==  R12_num || reg == R12_H_num ||
+    reg ==  R13_num || reg == R13_H_num ||
+    reg ==  R14_num || reg == R14_H_num ||
+    reg ==  R15_num || reg == R15_H_num ||
+    reg ==  R16_num || reg == R16_H_num ||
+    reg ==  R17_num || reg == R17_H_num ||
+    reg ==  F10_num || reg == F10_H_num ||
+    reg ==  F11_num || reg == F11_H_num ||
+    reg ==  F12_num || reg == F12_H_num ||
+    reg ==  F13_num || reg == F13_H_num ||
+    reg ==  F14_num || reg == F14_H_num ||
+    reg ==  F15_num || reg == F15_H_num ||
+    reg ==  F16_num || reg == F16_H_num ||
+    reg ==  F17_num || reg == F17_H_num;
+}
+
+bool Matcher::is_spillable_arg(int reg)
+{
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
+  return false;
+}
+
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI.
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL.
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODL projection of divmodL.
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return FP_REG_mask();
+}
+
+bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
+  assert_cond(addp != NULL);
+  for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
+    Node* u = addp->fast_out(i);
+    if (u != NULL && u->is_Mem()) {
+      int opsize = u->as_Mem()->memory_size();
+      assert(opsize > 0, "unexpected memory operand size");
+      if (u->as_Mem()->memory_size() != (1 << shift)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+const bool Matcher::convi2l_type_required = false;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?
+bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
+  return clone_base_plus_offset_address(m, mstack, address_visited);
+}
+
+void Compile::reshape_address(AddPNode* addp) {
+}
+
+%}
+
+
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes are parameterized macros
+// used by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.
+//
+// Instructions specify two basic values for encoding.  Again, a
+// function is available to check if the constant displacement is an
+// oop. They use the ins_encode keyword to specify their encoding
+// classes (which must be a sequence of enc_class names, and their
+// parameters, specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular
+// instruction needs for encoding need to be specified.
+encode %{
+  // BEGIN Non-volatile memory access
+
+  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    int64_t con = (int64_t)$src$$constant;
+    Register dst_reg = as_Register($dst$$reg);
+    __ mv(dst_reg, con);
+  %}
+
+  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL || con == (address)1) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      if (rtype == relocInfo::oop_type) {
+        __ movoop(dst_reg, (jobject)con, /*immediate*/true);
+      } else if (rtype == relocInfo::metadata_type) {
+        __ mov_metadata(dst_reg, (Metadata*)con);
+      } else {
+        assert(rtype == relocInfo::none, "unexpected reloc type");
+        __ mv(dst_reg, $src$$constant);
+      }
+    }
+  %}
+
+  enc_class riscv_enc_mov_p1(iRegP dst) %{
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mv(dst_reg, 1);
+  %}
+
+  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
+    MacroAssembler _masm(&cbuf);
+    int32_t offset = 0;
+    address page = (address)$src$$constant;
+    unsigned long align = (unsigned long)page & 0xfff;
+    assert(align == 0, "polling page must be page aligned");
+    Register dst_reg = as_Register($dst$$reg);
+    __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset);
+    __ addi(dst_reg, dst_reg, offset);
+  %}
+
+  enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{
+    MacroAssembler _masm(&cbuf);
+    __ load_byte_map_base($dst$$Register);
+  %}
+
+  enc_class riscv_enc_mov_n(iRegN dst, immN src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      assert(rtype == relocInfo::oop_type, "unexpected reloc type");
+      __ set_narrow_oop(dst_reg, (jobject)con);
+    }
+  %}
+
+  enc_class riscv_enc_mov_zero(iRegNorP dst) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    __ mv(dst_reg, zr);
+  %}
+
+  enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    address con = (address)$src$$constant;
+    if (con == NULL) {
+      ShouldNotReachHere();
+    } else {
+      relocInfo::relocType rtype = $src->constant_reloc();
+      assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
+      __ set_narrow_klass(dst_reg, (Klass *)con);
+    }
+  %}
+
+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
+    MacroAssembler _masm(&cbuf);
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+               /*result as bool*/ true);
+  %}
+
+  // compare and branch instruction encodings
+
+  enc_class riscv_enc_j(label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* L = $lbl$$label;
+    __ j(*L);
+  %}
+
+  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
+    MacroAssembler _masm(&cbuf);
+    Label* L = $lbl$$label;
+    switch ($cmp$$cmpcode) {
+      case(BoolTest::ge):
+        __ j(*L);
+        break;
+      case(BoolTest::lt):
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  // call instruction encodings
+
+  enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{
+    Register sub_reg = as_Register($sub$$reg);
+    Register super_reg = as_Register($super$$reg);
+    Register temp_reg = as_Register($temp$$reg);
+    Register result_reg = as_Register($result$$reg);
+    Register cr_reg = t1;
+
+    Label miss;
+    Label done;
+    MacroAssembler _masm(&cbuf);
+    __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
+                                     NULL, &miss);
+    if ($primary) {
+      __ mv(result_reg, zr);
+    } else {
+      __ mv(cr_reg, zr);
+      __ j(done);
+    }
+
+    __ bind(miss);
+    if (!$primary) {
+      __ mv(cr_reg, 1);
+    }
+
+    __ bind(done);
+  %}
+
+  enc_class riscv_enc_java_static_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    address addr = (address)$meth$$method;
+    address call = NULL;
+    assert_cond(addr != NULL);
+    if (!_method) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+      if (call == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    } else {
+      int method_index = resolved_method_index(cbuf);
+      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+                                                  : static_call_Relocation::spec(method_index);
+      call = __ trampoline_call(Address(addr, rspec), &cbuf);
+      if (call == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+
+      // Emit stub for static call
+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+      if (stub == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    }
+  %}
+
+  enc_class riscv_enc_java_dynamic_call(method meth) %{
+    MacroAssembler _masm(&cbuf);
+    int method_index = resolved_method_index(cbuf);
+    address call = __ ic_call((address)$meth$$method, method_index);
+    if (call == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+  %}
+
+  enc_class riscv_enc_call_epilog() %{
+    MacroAssembler _masm(&cbuf);
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      __ call_Unimplemented();
+    }
+  %}
+
+  enc_class riscv_enc_java_to_runtime(method meth) %{
+    MacroAssembler _masm(&cbuf);
+
+    // some calls to generated routines (arraycopy code) are scheduled
+    // by C2 as runtime calls. if so we can call them using a jr (they
+    // will be in a reachable segment) otherwise we have to use a jalr
+    // which loads the absolute address into a register.
+    address entry = (address)$meth$$method;
+    CodeBlob *cb = CodeCache::find_blob(entry);
+    if (cb != NULL) {
+      address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
+      if (call == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    } else {
+      Label retaddr;
+      __ la(t1, retaddr);
+      __ la(t0, RuntimeAddress(entry));
+      // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
+      __ addi(sp, sp, -2 * wordSize);
+      __ sd(t1, Address(sp, wordSize));
+      __ jalr(t0);
+      __ bind(retaddr);
+      __ addi(sp, sp, 2 * wordSize);
+    }
+  %}
+
+  // using the cr register as the bool result: 0 for success; others failed.
+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register flag = t1;
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp1$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+
+    assert_different_registers(oop, box, tmp, disp_hdr, t0);
+
+    // Load markWord from object into displaced_header.
+    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ mv(flag, 1);
+      return;
+    }
+
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
+    }
+
+    // Check for existing monitor
+    if ((EmitSync & 0x02) == 0) {
+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
+      __ bnez(t0, object_has_monitor);
+    }
+
+    // Set tmp to be (markWord of object | UNLOCK_VALUE).
+    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
+
+    // Initialize the box. (Must happen before we update the object mark!)
+    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Compare object markWord with an unlocked value (tmp) and if
+    // equal exchange the stack address of our box with object markWord.
+    // On failure disp_hdr contains the possibly locked markWord.
+    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
+               Assembler::rl, /*result*/disp_hdr);
+    __ mv(flag, zr);
+    __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas
+
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    // If the compare-and-exchange succeeded, then we found an unlocked
+    // object, will have now locked it will continue at label cont
+    // We did not see an unlocked object so try the fast recursive case.
+
+    // Check if the owner is self by comparing the value in the
+    // markWord of object (disp_hdr) with the stack pointer.
+    __ sub(disp_hdr, disp_hdr, sp);
+    __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
+    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
+    // hence we can store 0 as the displaced header in the box, which indicates that it is a
+    // recursive lock.
+    __ andr(tmp/*==0?*/, disp_hdr, tmp);
+    __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+    __ mv(flag, tmp); // we can use the value of tmp as the result here
+
+    if ((EmitSync & 0x02) == 0) {
+      __ j(cont);
+
+      // Handle existing monitor.
+      __ bind(object_has_monitor);
+      // The object's monitor m is unlocked iff m->owner == NULL,
+      // otherwise m->owner may contain a thread or a stack address.
+      //
+      // Try to CAS m->owner from NULL to current thread.
+      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
+      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
+                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
+
+      // Store a non-null value into the box to avoid looking like a re-entrant
+      // lock. The fast-path monitor unlock code checks for
+      // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
+      // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
+      __ mv(tmp, (address)markOopDesc::unused_mark());
+      __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+    }
+
+    __ bind(cont);
+  %}
+
+  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
+    MacroAssembler _masm(&cbuf);
+    Register flag = t1;
+    Register oop = as_Register($object$$reg);
+    Register box = as_Register($box$$reg);
+    Register disp_hdr = as_Register($tmp1$$reg);
+    Register tmp = as_Register($tmp2$$reg);
+    Label cont;
+    Label object_has_monitor;
+
+    assert_different_registers(oop, box, tmp, disp_hdr, flag);
+
+    // Always do locking in runtime.
+    if (EmitSync & 0x01) {
+      __ mv(flag, 1);
+      return;
+    }
+
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      __ biased_locking_exit(oop, tmp, cont, flag);
+    }
+
+    // Find the lock address and load the displaced header from the stack.
+    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // If the displaced header is 0, we have a recursive unlock.
+    __ mv(flag, disp_hdr);
+    __ beqz(disp_hdr, cont);
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+      __ andi(t0, tmp, markOopDesc::monitor_value);
+      __ bnez(t0, object_has_monitor);
+    }
+
+    // Check if it is still a light weight lock, this is true if we
+    // see the stack address of the basicLock in the markWord of the
+    // object.
+
+    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
+               Assembler::rl, /*result*/tmp);
+    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
+    __ j(cont);
+
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    // Handle existing monitor.
+    if ((EmitSync & 0x02) == 0) {
+      __ bind(object_has_monitor);
+      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
+      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
+      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
+      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
+      __ bnez(flag, cont);
+
+      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
+      __ bnez(flag, cont);
+      // need a release store here
+      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+      __ sd(zr, Address(tmp)); // set unowned
+    }
+
+    __ bind(cont);
+  %}
+
+  // arithmetic encodings
+
+  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
+  %}
+
+  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
+  %}
+
+  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
+  %}
+
+  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
+    MacroAssembler _masm(&cbuf);
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
+  %}
+
+  enc_class riscv_enc_tail_call(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    Register target_reg = as_Register($jump_target$$reg);
+    __ jr(target_reg);
+  %}
+
+  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    Register target_reg = as_Register($jump_target$$reg);
+    // exception oop should be in x10
+    // ret addr has been popped into ra
+    // callee expects it in x13
+    __ mv(x13, ra);
+    __ jr(target_reg);
+  %}
+
+  enc_class riscv_enc_rethrow() %{
+    MacroAssembler _masm(&cbuf);
+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
+  %}
+
+  enc_class riscv_enc_ret() %{
+    MacroAssembler _masm(&cbuf);
+    Assembler::CompressibleRegion cr(&_masm);
+    __ ret();
+  %}
+
+%}
+
+//----------FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add OptoReg::stack0())
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |        |        |  3
+//  |     |        +--------+
+//  V     |        | old out|      Empty on Intel, window on Sparc
+//        |    old |preserve|      Must be even aligned.
+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
+//        |        |   in   |  3   area for Intel ret address
+//     Owned by    |preserve|      Empty on Sparc.
+//       SELF      +--------+
+//        |        |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> OptoReg::stack0(), even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//     Owned by    +--------+
+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
+//        |    new |preserve|      Must be even-aligned.
+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
+//        |        |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//           (the latter is true on Intel but is it false on RISCV?)
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+frame %{
+  // What direction does stack grow in (assumed to be same for C & Java)
+  stack_direction(TOWARDS_LOW);
+
+  // These three registers define part of the calling convention
+  // between compiled code and the interpreter.
+
+  // Inline Cache Register or methodOop for I2C.
+  inline_cache_reg(R31);
+
+  // Method Oop Register when calling interpreter.
+  interpreter_method_oop_reg(R31);
+
+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+  cisc_spilling_operand_name(indOffset);
+
+  // Number of stack slots consumed by locking an object
+  // generate Compile::sync_stack_slots
+  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
+  sync_stack_slots(1 * VMRegImpl::slots_per_word);
+
+  // Compiled code's Frame Pointer
+  frame_pointer(R2);
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+  interpreter_frame_pointer(R8);
+
+  // Stack alignment requirement
+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots. RISC-V needs two slots for
+  // return address and fp.
+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  // TODO this may well be correct but need to check why that - 2 is there
+  // ppc port uses 0 but we definitely need to allow for fixed_slots
+  // which folds in the space used for monitors
+  return_addr(STACK - 2 +
+              align_up((Compile::current()->in_preserve_stack_slots() +
+                        Compile::current()->fixed_slots()),
+                       stack_alignment_in_slots()));
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+  calling_convention
+  %{
+    // No difference between ingoing/outgoing just pass false
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+  c_calling_convention
+  %{
+    // This is obviously always outgoing
+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
+  %}
+
+  // Location of compiled Java return values.  Same as C for now.
+  return_value
+  %{
+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
+           "only return normal values");
+
+    static const int lo[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      R10_num,                           // Op_RegN
+      R10_num,                           // Op_RegI
+      R10_num,                           // Op_RegP
+      F10_num,                           // Op_RegF
+      F10_num,                           // Op_RegD
+      R10_num                            // Op_RegL
+    };
+
+    static const int hi[Op_RegL + 1] = { // enum name
+      0,                                 // Op_Node
+      0,                                 // Op_Set
+      OptoReg::Bad,                      // Op_RegN
+      OptoReg::Bad,                      // Op_RegI
+      R10_H_num,                         // Op_RegP
+      OptoReg::Bad,                      // Op_RegF
+      F10_H_num,                         // Op_RegD
+      R10_H_num                          // Op_RegL
+    };
+
+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
+  %}
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(1);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
+ins_attrib ins_size(32);        // Required size attribute (in bits)
+ins_attrib ins_short_branch(0); // Required flag: is this instruction
+                                // a non-matching short branch variant
+                                // of some long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must
+                                // be a power of 2) specifies the
+                                // alignment that some part of the
+                                // instruction (not necessarily the
+                                // start) requires.  If > 1, a
+                                // compute_padding() function must be
+                                // provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+//----------Simple Operands----------------------------------------------------
+
+// Integer operands 32 bit
+// 32 bit immediate
+operand immI()
+%{
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit zero
+operand immI0()
+%{
+  predicate(n->get_int() == 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit increment
+operand immI_1()
+%{
+  predicate(n->get_int() == 1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit unit decrement
+operand immI_M1()
+%{
+  predicate(n->get_int() == -1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Unsigned Integer Immediate:  6-bit int, greater than 32
+operand uimmI6_ge32() %{
+  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_le_4()
+%{
+  predicate(n->get_int() <= 4);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_16()
+%{
+  predicate(n->get_int() == 16);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24()
+%{
+  predicate(n->get_int() == 24);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_31()
+%{
+  predicate(n->get_int() == 31);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_63()
+%{
+  predicate(n->get_int() == 63);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for add immediate
+operand immIAdd()
+%{
+  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 32 bit integer valid for sub immediate
+operand immISub()
+%{
+  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 5 bit signed value.
+operand immI5()
+%{
+  predicate(n->get_int() <= 15 && n->get_int() >= -16);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 5 bit signed value (simm5)
+operand immL5()
+%{
+  predicate(n->get_long() <= 15 && n->get_long() >= -16);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer operands 64 bit
+// 64 bit immediate
+operand immL()
+%{
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit zero
+operand immL0()
+%{
+  predicate(n->get_long() == 0);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer operands
+// Pointer Immediate
+operand immP()
+%{
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP0()
+%{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate One
+// this is used in object initialization (initial object header)
+operand immP_1()
+%{
+  predicate(n->get_ptr() == 1);
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Polling Page Pointer Immediate
+operand immPollPage()
+%{
+  predicate((address)n->get_ptr() == os::get_polling_page());
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Card Table Byte Map Base
+operand immByteMapBase()
+%{
+  // Get base of card map
+  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
+  match(ConP);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Int Immediate: low 16-bit mask
+operand immI_16bits()
+%{
+  predicate(n->get_int() == 0xFFFF);
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIpowerOf2() %{
+  predicate(is_power_of_2((juint)(n->get_int())));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_32bits()
+%{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit unit decrement
+operand immL_M1()
+%{
+  predicate(n->get_long() == -1);
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+
+// 32 bit offset of pc in thread anchor
+
+operand immL_pc_off()
+%{
+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for add immediate
+operand immLAdd()
+%{
+  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// 64 bit integer valid for sub immediate
+operand immLSub()
+%{
+  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow pointer operands
+// Narrow Pointer Immediate
+operand immN()
+%{
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Narrow NULL Pointer Immediate
+operand immN0()
+%{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass()
+%{
+  match(ConNKlass);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float and Double operands
+// Double Immediate
+operand immD()
+%{
+  match(ConD);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double Immediate: +0.0d
+operand immD0()
+%{
+  predicate(jlong_cast(n->getd()) == 0);
+  match(ConD);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate
+operand immF()
+%{
+  match(ConF);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Float Immediate: +0.0f.
+operand immF0()
+%{
+  predicate(jint_cast(n->getf()) == 0);
+  match(ConF);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIOffset()
+%{
+  predicate(is_imm_in_range(n->get_int(), 12, 0));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLOffset()
+%{
+  predicate(is_imm_in_range(n->get_long(), 12, 0));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Scale values
+operand immIScale()
+%{
+  predicate(1 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Integer 32 bit Register Operands
+operand iRegI()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 32 bit Register not Special
+operand iRegINoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R10 only
+operand iRegI_R10()
+%{
+  constraint(ALLOC_IN_RC(int_r10_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R12 only
+operand iRegI_R12()
+%{
+  constraint(ALLOC_IN_RC(int_r12_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R13 only
+operand iRegI_R13()
+%{
+  constraint(ALLOC_IN_RC(int_r13_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R14 only
+operand iRegI_R14()
+%{
+  constraint(ALLOC_IN_RC(int_r14_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register Operands
+operand iRegL()
+%{
+  constraint(ALLOC_IN_RC(any_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegLNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg));
+  match(RegL);
+  match(iRegL_R10);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R28 only
+operand iRegL_R28()
+%{
+  constraint(ALLOC_IN_RC(r28_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R29 only
+operand iRegL_R29()
+%{
+  constraint(ALLOC_IN_RC(r29_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R30 only
+operand iRegL_R30()
+%{
+  constraint(ALLOC_IN_RC(r30_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Pointer Register
+operand iRegP()
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  match(iRegP_R10);
+  match(javaThread_RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register not Special
+operand iRegPNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
+  match(RegP);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegP_R10()
+%{
+  constraint(ALLOC_IN_RC(r10_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R11 only
+operand iRegP_R11()
+%{
+  constraint(ALLOC_IN_RC(r11_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegP_R12()
+%{
+  constraint(ALLOC_IN_RC(r12_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R13 only
+operand iRegP_R13()
+%{
+  constraint(ALLOC_IN_RC(r13_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegP_R14()
+%{
+  constraint(ALLOC_IN_RC(r14_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegP_R15()
+%{
+  constraint(ALLOC_IN_RC(r15_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegP_R16()
+%{
+  constraint(ALLOC_IN_RC(r16_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer 64 bit Register R28 only
+operand iRegP_R28()
+%{
+  constraint(ALLOC_IN_RC(r28_reg));
+  match(RegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register Operands
+// Narrow Pointer Register
+operand iRegN()
+%{
+  constraint(ALLOC_IN_RC(any_reg32));
+  match(RegN);
+  match(iRegNNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Integer 64 bit Register not Special
+operand iRegNNoSp()
+%{
+  constraint(ALLOC_IN_RC(no_special_reg32));
+  match(RegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// heap base register -- used for encoding immN0
+operand iRegIHeapbase()
+%{
+  constraint(ALLOC_IN_RC(heapbase_reg));
+  match(RegI);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R10 only
+operand iRegL_R10()
+%{
+  constraint(ALLOC_IN_RC(r10_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Float Register
+// Float register operands
+operand fRegF()
+%{
+  constraint(ALLOC_IN_RC(float_reg));
+  match(RegF);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Double Register
+// Double register operands
+operand fRegD()
+%{
+  constraint(ALLOC_IN_RC(double_reg));
+  match(RegD);
+
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Java Thread Register
+operand javaThread_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
+  match(reg);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+// RISCV has only base_plus_offset and literal address mode, so no need to use
+// index and scale. Here set index as 0xffffffff and scale as 0x0.
+operand indirect(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(reg);
+  op_cost(0);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffI(iRegP reg, immIOffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffL(iRegP reg, immLOffset off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indirectN(iRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+  op_cost(0);
+  format %{ "[$reg]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffIN(iRegN reg, immIOffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN(iRegN reg, immLOffset off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// RISCV opto stubs need to write to the pc slot in the thread anchor
+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg off);
+  op_cost(0);
+  format %{ "[$reg, $off]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotI(sRegI reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegI);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x02);  // RSP
+    index(0xffffffff);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegF);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x02);  // RSP
+    index(0xffffffff);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegD);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x02);  // RSP
+    index(0xffffffff);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg)
+%{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  // match(RegL);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x02);  // RSP
+    index(0xffffffff);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+// Special operand allowing long args to int ops to be truncated for free
+
+operand iRegL2I(iRegL reg) %{
+
+  op_cost(0);
+
+  match(ConvL2I reg);
+
+  format %{ "l2i($reg)" %}
+
+  interface(REG_INTER)
+%}
+
+
+// Comparison Operands
+// NOTE: Label is a predefined operand which should not be redefined in
+//       the AD file. It is generically handled within the ADLC.
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+
+// used for signed integral comparisons and fp comparisons
+operand cmpOp()
+%{
+  match(Bool);
+
+  format %{ "" %}
+
+  // the values in interface derives from struct BoolTest::mask
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    greater(0x1, "gt");
+    overflow(0x2, "overflow");
+    less(0x3, "lt");
+    not_equal(0x4, "ne");
+    less_equal(0x5, "le");
+    no_overflow(0x6, "no_overflow");
+    greater_equal(0x7, "ge");
+  %}
+%}
+
+// used for unsigned integral comparisons
+operand cmpOpU()
+%{
+  match(Bool);
+
+  format %{ "" %}
+  // the values in interface derives from struct BoolTest::mask
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    greater(0x1, "gtu");
+    overflow(0x2, "overflow");
+    less(0x3, "ltu");
+    not_equal(0x4, "ne");
+    less_equal(0x5, "leu");
+    no_overflow(0x6, "no_overflow");
+    greater_equal(0x7, "geu");
+  %}
+%}
+
+// used for certain integral comparisons which can be
+// converted to bxx instructions
+operand cmpOpEqNe()
+%{
+  match(Bool);
+  op_cost(0);
+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
+            n->as_Bool()->_test._test == BoolTest::eq);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    greater(0x1, "gt");
+    overflow(0x2, "overflow");
+    less(0x3, "lt");
+    not_equal(0x4, "ne");
+    less_equal(0x5, "le");
+    no_overflow(0x6, "no_overflow");
+    greater_equal(0x7, "ge");
+  %}
+%}
+
+operand cmpOpULtGe()
+%{
+  match(Bool);
+  op_cost(0);
+  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
+            n->as_Bool()->_test._test == BoolTest::ge);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    greater(0x1, "gtu");
+    overflow(0x2, "overflow");
+    less(0x3, "ltu");
+    not_equal(0x4, "ne");
+    less_equal(0x5, "leu");
+    no_overflow(0x6, "no_overflow");
+    greater_equal(0x7, "geu");
+  %}
+%}
+
+operand cmpOpUEqNeLeGt()
+%{
+  match(Bool);
+  op_cost(0);
+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
+            n->as_Bool()->_test._test == BoolTest::eq ||
+            n->as_Bool()->_test._test == BoolTest::le ||
+            n->as_Bool()->_test._test == BoolTest::gt);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x0, "eq");
+    greater(0x1, "gtu");
+    overflow(0x2, "overflow");
+    less(0x3, "ltu");
+    not_equal(0x4, "ne");
+    less_equal(0x5, "leu");
+    no_overflow(0x6, "no_overflow");
+    greater_equal(0x7, "geu");
+  %}
+%}
+
+
+// Flags register, used as output of compare logic
+operand rFlagsReg()
+%{
+  constraint(ALLOC_IN_RC(reg_flags));
+  match(RegFlags);
+
+  op_cost(0);
+  format %{ "RFLAGS" %}
+  interface(REG_INTER);
+%}
+
+// Special Registers
+
+// Method Register
+operand inline_cache_RegP(iRegP reg)
+%{
+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
+  match(reg);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------OPERAND CLASSES----------------------------------------------------
+// Operand Classes are groups of operands that are used as to simplify
+// instruction definitions by not requiring the AD writer to specify
+// separate instructions for every form of operand when the
+// instruction accepts multiple operand types with the same basic
+// encoding and format. The classic case of this is memory operands.
+
+// memory is used to define read/write location for load/store
+// instruction defs. we can turn a memory op into an Address
+
+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
+
+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
+// operations. it allows the src to be either an iRegI or a (ConvL2I
+// iRegL). in the latter case the l2i normally planted for a ConvL2I
+// can be elided because the 32-bit instruction will just employ the
+// lower 32 bits anyway.
+//
+// n.b. this does not elide all L2I conversions. if the truncated
+// value is consumed by more than one operation then the ConvL2I
+// cannot be bundled into the consuming nodes so an l2i gets planted
+// (actually a mvw $dst $src) and the downstream instructions consume
+// the result of the l2i as an iRegI input. That's a shame since the
+// mvw is actually redundant but its not too costly.
+
+opclass iRegIorL2I(iRegI, iRegL2I);
+opclass iRegIorL(iRegI, iRegL);
+opclass iRegNorP(iRegN, iRegP);
+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
+opclass immIorL(immI, immL);
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+
+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
+//pipe_desc(ID, EX, MEM, WR);
+#define ID   S0
+#define EX   S1
+#define MEM  S2
+#define WR   S3
+
+// Integer ALU reg operation
+pipeline %{
+
+attributes %{
+  // RISC-V instructions are of fixed length
+  fixed_size_instructions;           // Fixed size instructions TODO does
+  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
+  // RISC-V instructions come in 32-bit word units
+  instruction_unit_size = 4;         // An instruction is 4 bytes long
+  instruction_fetch_unit_size = 64;  // The processor fetches one line
+  instruction_fetch_units = 1;       // of 64 bytes
+
+  // List of nop instructions
+  nops( MachNop );
+%}
+
+// We don't use an actual pipeline model so don't care about resources
+// or description. we do use pipeline classes to introduce fixed
+// latencies
+
+//----------RESOURCES----------------------------------------------------------
+// Resources are the functional units available to the machine
+
+// Generic RISC-V pipeline
+// 1 decoder
+// 1 instruction decoded per cycle
+// 1 load/store ops per cycle, 1 branch, 1 FPU
+// 1 mul, 1 div
+
+resources ( DECODE,
+            ALU,
+            MUL,
+            DIV,
+            BRANCH,
+            LDST,
+            FPU);
+
+//----------PIPELINE DESCRIPTION-----------------------------------------------
+// Pipeline Description specifies the stages in the machine's pipeline
+
+// Define the pipeline as a generic 6 stage pipeline
+pipe_desc(S0, S1, S2, S3, S4, S5);
+
+//----------PIPELINE CLASSES---------------------------------------------------
+// Pipeline Classes describe the stages in which input and output are
+// referenced by the hardware pipeline.
+
+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+  single_instruction;
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_uop_s(fRegF dst, fRegF src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_uop_d(fRegD dst, fRegD src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_d2f(fRegF dst, fRegD src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_f2d(fRegD dst, fRegF src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_l2f(fRegF dst, iRegL src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
+%{
+  single_instruction;
+  src    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+  single_instruction;
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+  single_instruction;
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
+%{
+  single_instruction;
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
+%{
+  single_instruction;
+  src1   : S1(read);
+  src2   : S2(read);
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_load_constant_s(fRegF dst)
+%{
+  single_instruction;
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_load_constant_d(fRegD dst)
+%{
+  single_instruction;
+  dst    : S5(write);
+  DECODE : ID;
+  FPU    : S5;
+%}
+
+pipe_class fp_load_mem_s(fRegF dst, memory mem)
+%{
+  single_instruction;
+  mem    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+pipe_class fp_load_mem_d(fRegD dst, memory mem)
+%{
+  single_instruction;
+  mem    : S1(read);
+  dst    : S5(write);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+pipe_class fp_store_reg_s(fRegF src, memory mem)
+%{
+  single_instruction;
+  src    : S1(read);
+  mem    : S5(write);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+pipe_class fp_store_reg_d(fRegD src, memory mem)
+%{
+  single_instruction;
+  src    : S1(read);
+  mem    : S5(write);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+//------- Integer ALU operations --------------------------
+
+// Integer ALU reg-reg operation
+// Operands needs in ID, result generated in EX
+// E.g.  ADD   Rd, Rs1, Rs2
+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+// Integer ALU reg operation with constant shift
+// E.g. SLLI    Rd, Rs1, #shift
+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX(write);
+  src1   : ID(read);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+// Integer ALU reg-reg operation with variable shift
+// both operands must be available in ID
+// E.g. SLL   Rd, Rs1, Rs2
+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : EX(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+// Integer ALU reg operation
+// E.g. NEG   Rd, Rs2
+pipe_class ialu_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : EX(write);
+  src    : ID(read);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+// Integer ALU reg immediate operation
+// E.g. ADDI   Rd, Rs1, #imm
+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
+%{
+  single_instruction;
+  dst    : EX(write);
+  src1   : ID(read);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+// Integer ALU immediate operation (no source operands)
+// E.g. LI    Rd, #imm
+pipe_class ialu_imm(iRegI dst)
+%{
+  single_instruction;
+  dst    : EX(write);
+  DECODE : ID;
+  ALU    : EX;
+%}
+
+//------- Multiply pipeline operations --------------------
+
+// Multiply reg-reg
+// E.g. MULW   Rd, Rs1, Rs2
+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  MUL    : WR;
+%}
+
+// E.g. MUL   RD, Rs1, Rs2
+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(3); // Maximum latency for 64 bit mul
+  dst    : WR(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  MUL    : WR;
+%}
+
+//------- Divide pipeline operations --------------------
+
+// E.g. DIVW   Rd, Rs1, Rs2
+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(8); // Maximum latency for 32 bit divide
+  dst    : WR(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  DIV    : WR;
+%}
+
+// E.g. DIV   RD, Rs1, Rs2
+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  fixed_latency(16); // Maximum latency for 64 bit divide
+  dst    : WR(write);
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  DIV    : WR;
+%}
+
+//------- Load pipeline operations ------------------------
+
+// Load - reg, mem
+// E.g. LA    Rd, mem
+pipe_class iload_reg_mem(iRegI dst, memory mem)
+%{
+  single_instruction;
+  dst    : WR(write);
+  mem    : ID(read);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+// Load - reg, reg
+// E.g. LD    Rd, Rs
+pipe_class iload_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : WR(write);
+  src    : ID(read);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+//------- Control transfer pipeline operations ------------
+
+// Store - zr, mem
+// E.g. SD    zr, mem
+pipe_class istore_mem(memory mem)
+%{
+  single_instruction;
+  mem    : ID(read);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+// Store - reg, mem
+// E.g. SD    Rs, mem
+pipe_class istore_reg_mem(iRegI src, memory mem)
+%{
+  single_instruction;
+  mem    : ID(read);
+  src    : EX(read);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+// Store - reg, reg
+// E.g. SD    Rs2, Rs1
+pipe_class istore_reg_reg(iRegI dst, iRegI src)
+%{
+  single_instruction;
+  dst    : ID(read);
+  src    : EX(read);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+//------- Store pipeline operations -----------------------
+
+// Branch
+pipe_class pipe_branch()
+%{
+  single_instruction;
+  DECODE : ID;
+  BRANCH : EX;
+%}
+
+// Branch
+pipe_class pipe_branch_reg(iRegI src)
+%{
+  single_instruction;
+  src    : ID(read);
+  DECODE : ID;
+  BRANCH : EX;
+%}
+
+// Compare & Branch
+// E.g. BEQ   Rs1, Rs2, L
+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
+%{
+  single_instruction;
+  src1   : ID(read);
+  src2   : ID(read);
+  DECODE : ID;
+  BRANCH : EX;
+%}
+
+// E.g. BEQZ Rs, L
+pipe_class pipe_cmpz_branch(iRegI src)
+%{
+  single_instruction;
+  src    : ID(read);
+  DECODE : ID;
+  BRANCH : EX;
+%}
+
+//------- Synchronisation operations ----------------------
+// Any operation requiring serialization
+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
+pipe_class pipe_serial()
+%{
+  single_instruction;
+  force_serialization;
+  fixed_latency(16);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+pipe_class pipe_slow()
+%{
+  instruction_count(10);
+  multiple_bundles;
+  force_serialization;
+  fixed_latency(16);
+  DECODE : ID;
+  LDST   : MEM;
+%}
+
+// Empty pipeline class
+pipe_class pipe_class_empty()
+%{
+  single_instruction;
+  fixed_latency(0);
+%}
+
+// Default pipeline class.
+pipe_class pipe_class_default()
+%{
+  single_instruction;
+  fixed_latency(2);
+%}
+
+// Pipeline class for compares.
+pipe_class pipe_class_compare()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for memory operations.
+pipe_class pipe_class_memory()
+%{
+  single_instruction;
+  fixed_latency(16);
+%}
+
+// Pipeline class for call.
+pipe_class pipe_class_call()
+%{
+  single_instruction;
+  fixed_latency(100);
+%}
+
+// Define the class for the Nop node.
+define %{
+   MachNop = pipe_class_empty;
+%}
+%}
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               rrspectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+// ============================================================================
+// Memory (Load/Store) Instructions
+
+// Load Instructions
+
+// Load Byte (8 bit signed)
+instruct loadB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadB mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
+
+  ins_encode %{
+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit signed) into long
+instruct loadB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(LOAD_COST);
+  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
+
+  ins_encode %{
+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned)
+instruct loadUB(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUB mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
+
+  ins_encode %{
+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Byte (8 bit unsigned) into long
+instruct loadUB2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(LOAD_COST);
+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
+
+  ins_encode %{
+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed)
+instruct loadS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadS mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
+
+  ins_encode %{
+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short (16 bit signed) into long
+instruct loadS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(LOAD_COST);
+  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
+
+  ins_encode %{
+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Char (16 bit unsigned)
+instruct loadUS(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadUS mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
+
+  ins_encode %{
+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Short/Char (16 bit unsigned) into long
+instruct loadUS2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(LOAD_COST);
+  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
+
+  ins_encode %{
+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed)
+instruct loadI(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadI mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit signed) into long
+instruct loadI2L(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(LOAD_COST);
+  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Integer (32 bit unsigned) into long
+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+  ins_cost(LOAD_COST);
+  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
+
+  ins_encode %{
+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Long (64 bit signed)
+instruct loadL(iRegLNoSp dst, memory mem)
+%{
+  match(Set dst (LoadL mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Range
+instruct loadRange(iRegINoSp dst, memory mem)
+%{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
+
+  ins_encode %{
+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Pointer
+instruct loadP(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadP mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Compressed Pointer
+instruct loadN(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadN mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
+
+  ins_encode %{
+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Klass Pointer
+instruct loadKlass(iRegPNoSp dst, memory mem)
+%{
+  match(Set dst (LoadKlass mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Narrow Klass Pointer
+instruct loadNKlass(iRegNNoSp dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
+
+  ins_encode %{
+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(iload_reg_mem);
+%}
+
+// Load Float
+instruct loadF(fRegF dst, memory mem)
+%{
+  match(Set dst (LoadF mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
+
+  ins_encode %{
+    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(fp_load_mem_s);
+%}
+
+// Load Double
+instruct loadD(fRegD dst, memory mem)
+%{
+  match(Set dst (LoadD mem));
+
+  ins_cost(LOAD_COST);
+  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(fp_load_mem_d);
+%}
+
+// Load Int Constant
+instruct loadConI(iRegINoSp dst, immI src)
+%{
+  match(Set dst src);
+
+  ins_cost(ALU_COST);
+  format %{ "li $dst, $src\t# int, #@loadConI" %}
+
+  ins_encode(riscv_enc_li_imm(dst, src));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Long Constant
+instruct loadConL(iRegLNoSp dst, immL src)
+%{
+  match(Set dst src);
+
+  ins_cost(ALU_COST);
+  format %{ "li $dst, $src\t# long, #@loadConL" %}
+
+  ins_encode(riscv_enc_li_imm(dst, src));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant
+instruct loadConP(iRegPNoSp dst, immP con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
+
+  ins_encode(riscv_enc_mov_p(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Null Pointer Constant
+instruct loadConP0(iRegPNoSp dst, immP0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
+
+  ins_encode(riscv_enc_mov_zero(dst));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Pointer Constant One
+instruct loadConP1(iRegPNoSp dst, immP_1 con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
+
+  ins_encode(riscv_enc_mov_p1(dst));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Poll Page Constant
+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST * 6);
+  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
+
+  ins_encode(riscv_enc_mov_poll_page(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Byte Map Base Constant
+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
+%{
+  match(Set dst con);
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
+
+  ins_encode(riscv_enc_mov_byte_map_base(dst));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Pointer Constant
+instruct loadConN(iRegNNoSp dst, immN con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST * 4);
+  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
+
+  ins_encode(riscv_enc_mov_n(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Null Pointer Constant
+instruct loadConN0(iRegNNoSp dst, immN0 con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
+
+  ins_encode(riscv_enc_mov_zero(dst));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Narrow Klass Constant
+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
+%{
+  match(Set dst con);
+
+  ins_cost(ALU_COST * 6);
+  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
+
+  ins_encode(riscv_enc_mov_nk(dst, con));
+
+  ins_pipe(ialu_imm);
+%}
+
+// Load Float Constant
+instruct loadConF(fRegF dst, immF con) %{
+  match(Set dst con);
+
+  ins_cost(LOAD_COST);
+  format %{
+    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
+  %}
+
+  ins_encode %{
+    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(fp_load_constant_s);
+%}
+
+instruct loadConF0(fRegF dst, immF0 con) %{
+  match(Set dst con);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
+
+  ins_encode %{
+    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
+  %}
+
+  ins_pipe(fp_load_constant_s);
+%}
+
+// Load Double Constant
+instruct loadConD(fRegD dst, immD con) %{
+  match(Set dst con);
+
+  ins_cost(LOAD_COST);
+  format %{
+    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
+  %}
+
+  ins_encode %{
+    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+
+  ins_pipe(fp_load_constant_d);
+%}
+
+instruct loadConD0(fRegD dst, immD0 con) %{
+  match(Set dst con);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
+
+  ins_encode %{
+    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
+  %}
+
+  ins_pipe(fp_load_constant_d);
+%}
+
+// Store Instructions
+// Store CMS card-mark Immediate
+instruct storeimmCM0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreCM mem zero));
+  predicate(unnecessary_storestore(n));
+
+  ins_cost(STORE_COST);
+  format %{ "storestore (elided)\n\t"
+            "sb zr, $mem\t# byte, #@storeimmCM0" %}
+
+  ins_encode %{
+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store CMS card-mark Immediate with intervening StoreStore
+// needed when using CMS with no conditional card marking
+instruct storeimmCM0_ordered(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(ALU_COST + STORE_COST);
+  format %{ "membar(StoreStore)\n\t"
+            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
+
+  ins_encode %{
+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Byte
+instruct storeB(iRegIorL2I src, memory mem)
+%{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
+
+  ins_encode %{
+    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmB0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreB mem zero));
+
+  ins_cost(STORE_COST);
+  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
+
+  ins_encode %{
+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Char/Short
+instruct storeC(iRegIorL2I src, memory mem)
+%{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sh  $src, $mem\t# short, #@storeC" %}
+
+  ins_encode %{
+    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmC0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(STORE_COST);
+  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
+
+  ins_encode %{
+    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Integer
+instruct storeI(iRegIorL2I src, memory mem)
+%{
+  match(Set mem(StoreI mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sw  $src, $mem\t# int, #@storeI" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeimmI0(immI0 zero, memory mem)
+%{
+  match(Set mem(StoreI mem zero));
+
+  ins_cost(STORE_COST);
+  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
+
+  ins_encode %{
+    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeL(iRegL src, memory mem)
+%{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sd  $src, $mem\t# long, #@storeL" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Long (64 bit signed)
+instruct storeimmL0(immL0 zero, memory mem)
+%{
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(STORE_COST);
+  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
+
+  ins_encode %{
+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Pointer
+instruct storeP(iRegP src, memory mem)
+%{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Pointer
+instruct storeimmP0(immP0 zero, memory mem)
+%{
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(STORE_COST);
+  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
+
+  ins_encode %{
+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_mem);
+%}
+
+// Store Compressed Pointer
+instruct storeN(iRegN src, memory mem)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
+%{
+  match(Set mem (StoreN mem zero));
+  predicate(Universe::narrow_oop_base() == NULL &&
+            Universe::narrow_klass_base() == NULL);
+
+  ins_cost(STORE_COST);
+  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
+
+  ins_encode %{
+    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// Store Float
+instruct storeF(fRegF src, memory mem)
+%{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
+
+  ins_encode %{
+    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(fp_store_reg_s);
+%}
+
+// Store Double
+instruct storeD(fRegD src, memory mem)
+%{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(fp_store_reg_d);
+%}
+
+// Store Compressed Klass Pointer
+instruct storeNKlass(iRegN src, memory mem)
+%{
+  match(Set mem (StoreNKlass mem src));
+
+  ins_cost(STORE_COST);
+  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
+  %}
+
+  ins_pipe(istore_reg_mem);
+%}
+
+// ============================================================================
+// Atomic operation instructions
+//
+// Intel and SPARC both implement Ideal Node LoadPLocked and
+// Store{PIL}Conditional instructions using a normal load for the
+// LoadPLocked and a CAS for the Store{PIL}Conditional.
+//
+// The ideal code appears only to use LoadPLocked/storePConditional as a
+// pair to lock object allocations from Eden space when not using
+// TLABs.
+//
+// There does not appear to be a Load{IL}Locked Ideal Node and the
+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
+// and to use StoreIConditional only for 32-bit and StoreLConditional
+// only for 64-bit.
+//
+// We implement LoadPLocked and storePConditional instructions using,
+// respectively the RISCV hw load-reserve and store-conditional
+// instructions. Whereas we must implement each of
+// Store{IL}Conditional using a CAS which employs a pair of
+// instructions comprising a load-reserve followed by a
+// store-conditional.
+
+
+// Locked-load (load reserved) of the current heap-top
+// used when updating the eden heap top
+// implemented using lr_d on RISCV64
+instruct loadPLocked(iRegPNoSp dst, indirect mem)
+%{
+  match(Set dst (LoadPLocked mem));
+
+  ins_cost(ALU_COST * 2 + LOAD_COST);
+
+  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
+
+  ins_encode %{
+    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
+    __ lr_d($dst$$Register, t0, Assembler::aq);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+// implemented using sc_d on RISCV64.
+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
+%{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+  ins_cost(ALU_COST * 2 + STORE_COST);
+
+  format %{
+    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
+  %}
+
+  ins_encode %{
+    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
+    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
+// when attempting to rebias a lock towards the current thread.  We
+// must use the acquire form of cmpxchg in order to guarantee acquire
+// semantics in this case.
+instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
+
+  format %{
+    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+// storeIConditional also has acquire semantics, for no better reason
+// than matching storeLConditional.
+instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr)
+%{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
+
+  format %{
+    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+// standard CompareAndSwapX when we are using barriers
+// these have higher priority than the rules selected by a predicate
+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+
+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+
+  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
+  %}
+
+  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
+  %}
+
+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
+  %}
+
+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
+  %}
+
+  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// alternative CompareAndSwapX when we are eliding barriers
+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
+  %}
+
+  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
+  %}
+
+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
+  %}
+
+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
+
+  format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
+  %}
+
+  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
+
+  ins_pipe(pipe_slow);
+%}
+
+// Sundry CAS operations.  Note that release is always true,
+// regardless of the memory ordering of the CAS.  This is because we
+// need the volatile case to be sequentially consistent but there is
+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
+// can't check the type of memory ordering here, so we always emit a
+// sc_d(w) with rl bit set.
+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
+
+  effect(TEMP_DEF res);
+
+  format %{
+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapB"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapS"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapI"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapL"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapN"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapP"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
+                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
+
+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
+  %}
+
+  ins_encode %{
+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapIAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapLAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "# $res == 1 when success, #@weakCompareAndSwapNAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+
+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
+
+  format %{
+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
+    "\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
+  %}
+
+  ins_encode %{
+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
+%{
+  match(Set prev (GetAndSetI mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
+
+  ins_encode %{
+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
+%{
+  match(Set prev (GetAndSetL mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
+
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
+%{
+  match(Set prev (GetAndSetN mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
+
+  ins_encode %{
+    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
+%{
+  match(Set prev (GetAndSetP mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
+
+  ins_encode %{
+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set prev (GetAndSetI mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
+
+  ins_encode %{
+    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set prev (GetAndSetL mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
+
+  ins_encode %{
+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set prev (GetAndSetN mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
+
+  ins_encode %{
+    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set prev (GetAndSetP mem newv));
+
+  ins_cost(ALU_COST);
+
+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
+
+  ins_encode %{
+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
+%{
+  match(Set newval (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
+
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used());
+
+  match(Set dummy (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
+
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
+%{
+  match(Set newval (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
+
+  ins_encode %{
+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used());
+
+  match(Set dummy (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
+
+  ins_encode %{
+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
+%{
+  match(Set newval (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
+
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used());
+
+  match(Set dummy (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
+
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
+%{
+  match(Set newval (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
+
+  ins_encode %{
+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used());
+
+  match(Set dummy (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
+
+  ins_encode %{
+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set newval (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
+
+  ins_encode %{
+    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+  match(Set dummy (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
+
+  ins_encode %{
+    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set newval (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
+
+  ins_encode %{
+    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+  match(Set dummy (GetAndAddL mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
+
+  ins_encode %{
+    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set newval (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
+
+  ins_encode %{
+    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+  match(Set dummy (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
+
+  ins_encode %{
+    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
+%{
+  predicate(needs_acquiring_load_reserved(n));
+
+  match(Set newval (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
+
+  ins_encode %{
+    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
+%{
+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
+
+  match(Set dummy (GetAndAddI mem incr));
+
+  ins_cost(ALU_COST);
+
+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
+
+  ins_encode %{
+    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Arithmetic Instructions
+//
+
+// Integer Addition
+
+// TODO
+// these currently employ operations which do not set CR and hence are
+// not flagged as killing CR but we would like to isolate the cases
+// where we want to set flags from those where we don't. need to work
+// out how to do that.
+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ addw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
+  match(Set dst (AddI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    int32_t con = (int32_t)$src2$$constant;
+    __ addiw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
+  match(Set dst (AddI (ConvL2I src1) src2));
+
+  ins_cost(ALU_COST);
+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ addiw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Pointer Addition
+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
+  match(Set dst (AddP src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// If we shift more than 32 bits, we need not convert I2L.
+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
+  match(Set dst (LShiftL (ConvI2L src) scale));
+  ins_cost(ALU_COST);
+  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Pointer Immediate Addition
+// n.b. this needs to be more expensive than using an indirect memory
+// operand
+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
+  match(Set dst (AddP src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // src2 is imm, so actually call the addi
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Addition
+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (AddL src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Addition.
+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+  match(Set dst (AddL src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // src2 is imm, so actually call the addi
+    __ add(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Subtraction
+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ subw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Subtraction
+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
+  match(Set dst (SubI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // src2 is imm, so actually call the addiw
+    __ subw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Long Subtraction
+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sub(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// No constant pool entries requiredLong Immediate Subtraction.
+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // src2 is imm, so actually call the addi
+    __ sub(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           $src2$$constant);
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Integer Negation (special case for sub)
+
+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
+  match(Set dst (SubI zero src));
+  ins_cost(ALU_COST);
+  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}
+
+  ins_encode %{
+    // actually call the subw
+    __ negw(as_Register($dst$$reg),
+            as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Long Negation
+
+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
+  match(Set dst (SubL zero src));
+  ins_cost(ALU_COST);
+  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}
+
+  ins_encode %{
+    // actually call the sub
+    __ neg(as_Register($dst$$reg),
+           as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Integer Multiply
+
+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (MulI src1 src2));
+  ins_cost(IMUL_COST);
+  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}
+
+  //this means 2 word multi, and no sign extend to 64 bits
+  ins_encode %{
+    // riscv64 mulw will sign-extension to high 32 bits in dst reg
+    __ mulw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(imul_reg_reg);
+%}
+
+// Long Multiply
+
+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (MulL src1 src2));
+  ins_cost(IMUL_COST);
+  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}
+
+  ins_encode %{
+    __ mul(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
+%{
+  match(Set dst (MulHiL src1 src2));
+  ins_cost(IMUL_COST);
+  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
+
+  ins_encode %{
+    __ mulh(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(lmul_reg_reg);
+%}
+
+// Integer Divide
+
+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (DivI src1 src2));
+  ins_cost(IDIVSI_COST);
+  format %{ "divw  $dst, $src1, $src2\t#@divI"%}
+
+  ins_encode(riscv_enc_divw(dst, src1, src2));
+  ins_pipe(idiv_reg_reg);
+%}
+
+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
+  match(Set dst (URShiftI (RShiftI src1 div1) div2));
+  ins_cost(ALU_COST);
+  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
+
+  ins_encode %{
+    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Long Divide
+
+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (DivL src1 src2));
+  ins_cost(IDIVDI_COST);
+  format %{ "div  $dst, $src1, $src2\t#@divL" %}
+
+  ins_encode(riscv_enc_div(dst, src1, src2));
+  ins_pipe(ldiv_reg_reg);
+%}
+
+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
+  match(Set dst (URShiftL (RShiftL src1 div1) div2));
+  ins_cost(ALU_COST);
+  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
+  %}
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Integer Remainder
+
+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (ModI src1 src2));
+  ins_cost(IDIVSI_COST);
+  format %{ "remw  $dst, $src1, $src2\t#@modI" %}
+
+  ins_encode(riscv_enc_modw(dst, src1, src2));
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Long Remainder
+
+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (ModL src1 src2));
+  ins_cost(IDIVDI_COST);
+  format %{ "rem  $dst, $src1, $src2\t#@modL" %}
+
+  ins_encode(riscv_enc_mod(dst, src1, src2));
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Integer Shifts
+
+// Shift Left Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
+
+  ins_encode %{
+    __ sllw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (LShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
+
+  ins_encode %{
+    // the shift amount is encoded in the lower
+    // 5 bits of the I-immediate field for RV32I
+    __ slliw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             (unsigned) $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
+
+  ins_encode %{
+    __ srlw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (URShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
+
+  ins_encode %{
+    // the shift amount is encoded in the lower
+    // 6 bits of the I-immediate field for RV64I
+    __ srliw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             (unsigned) $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
+
+  ins_encode %{
+    // riscv will sign-ext dst high 32 bits
+    __ sraw(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
+  match(Set dst (RShiftI src1 src2));
+  ins_cost(ALU_COST);
+  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
+
+  ins_encode %{
+    // riscv will sign-ext dst high 32 bits
+    __ sraiw(as_Register($dst$$reg),
+             as_Register($src1$$reg),
+             (unsigned) $src2$$constant & 0x1f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Long Shifts
+
+// Shift Left Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
+
+  ins_encode %{
+    __ sll(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Left Immediate
+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (LShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // the shift amount is encoded in the lower
+    // 6 bits of the I-immediate field for RV64I
+    __ slli(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned) $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Logical Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
+
+  ins_encode %{
+    __ srl(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Logical Immediate
+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (URShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // the shift amount is encoded in the lower
+    // 6 bits of the I-immediate field for RV64I
+    __ srli(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned) $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// A special-case pattern for card table stores.
+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
+  match(Set dst (URShiftL (CastP2X src1) src2));
+
+  ins_cost(ALU_COST);
+  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // the shift amount is encoded in the lower
+    // 6 bits of the I-immediate field for RV64I
+    __ srli(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned) $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// Shift Right Arithmetic Register
+// In RV64I, only the low 6 bits of src2 are considered for the shift amount
+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
+
+  ins_encode %{
+    __ sra(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg_vshift);
+%}
+
+// Shift Right Arithmetic Immediate
+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
+  match(Set dst (RShiftL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    // the shift amount is encoded in the lower
+    // 6 bits of the I-immediate field for RV64I
+    __ srai(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (unsigned) $src2$$constant & 0x3f);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
+  match(Set dst (XorI src1 m1));
+  ins_cost(ALU_COST);
+  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
+
+  ins_encode %{
+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
+  match(Set dst (XorL src1 m1));
+  ins_cost(ALU_COST);
+  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
+
+  ins_encode %{
+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+
+// ============================================================================
+// Floating Point Arithmetic Instructions
+
+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (AddF src1 src2));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}
+
+  ins_encode %{
+    __ fadd_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (AddD src1 src2));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}
+
+  ins_encode %{
+    __ fadd_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (SubF src1 src2));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}
+
+  ins_encode %{
+    __ fsub_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (SubD src1 src2));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}
+
+  ins_encode %{
+    __ fsub_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (MulF src1 src2));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}
+
+  ins_encode %{
+    __ fmul_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (MulD src1 src2));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}
+
+  ins_encode %{
+    __ fmul_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_dop_reg_reg_d);
+%}
+
+// src1 * src2 + src3
+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF src3 (Binary src1 src2)));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
+
+  ins_encode %{
+    __ fmadd_s(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 + src3
+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD src3 (Binary src1 src2)));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
+
+  ins_encode %{
+    __ fmadd_d(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
+
+  ins_encode %{
+    __ fmsub_s(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// src1 * src2 - src3
+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
+
+  ins_encode %{
+    __ fmsub_d(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src1$$reg),
+               as_FloatRegister($src2$$reg),
+               as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
+  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
+
+  ins_encode %{
+    __ fnmsub_s(as_FloatRegister($dst$$reg),
+                as_FloatRegister($src1$$reg),
+                as_FloatRegister($src2$$reg),
+                as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 + src3
+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
+  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
+
+  ins_encode %{
+    __ fnmsub_d(as_FloatRegister($dst$$reg),
+                as_FloatRegister($src1$$reg),
+                as_FloatRegister($src2$$reg),
+                as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
+  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
+
+  ins_cost(FMUL_SINGLE_COST);
+  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
+
+  ins_encode %{
+    __ fnmadd_s(as_FloatRegister($dst$$reg),
+                as_FloatRegister($src1$$reg),
+                as_FloatRegister($src2$$reg),
+                as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// -src1 * src2 - src3
+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
+  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
+
+  ins_cost(FMUL_DOUBLE_COST);
+  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
+
+  ins_encode %{
+    __ fnmadd_d(as_FloatRegister($dst$$reg),
+                as_FloatRegister($src1$$reg),
+                as_FloatRegister($src2$$reg),
+                as_FloatRegister($src3$$reg));
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Math.max(FF)F
+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (MaxF src1 src2));
+  effect(TEMP_DEF dst);
+
+  format %{ "maxF $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ minmax_FD(as_FloatRegister($dst$$reg),
+                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+                 false /* is_double */, false /* is_min */);
+  %}
+
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+// Math.min(FF)F
+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (MinF src1 src2));
+  effect(TEMP_DEF dst);
+
+  format %{ "minF $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ minmax_FD(as_FloatRegister($dst$$reg),
+                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+                 false /* is_double */, true /* is_min */);
+  %}
+
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+// Math.max(DD)D
+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (MaxD src1 src2));
+  effect(TEMP_DEF dst);
+
+  format %{ "maxD $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ minmax_FD(as_FloatRegister($dst$$reg),
+                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+                 true /* is_double */, false /* is_min */);
+  %}
+
+  ins_pipe(fp_dop_reg_reg_d);
+%}
+
+// Math.min(DD)D
+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (MinD src1 src2));
+  effect(TEMP_DEF dst);
+
+  format %{ "minD $dst, $src1, $src2" %}
+
+  ins_encode %{
+    __ minmax_FD(as_FloatRegister($dst$$reg),
+                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
+                 true /* is_double */, true /* is_min */);
+  %}
+
+  ins_pipe(fp_dop_reg_reg_d);
+%}
+
+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
+  match(Set dst (DivF src1  src2));
+
+  ins_cost(FDIV_COST);
+  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}
+
+  ins_encode %{
+    __ fdiv_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_div_s);
+%}
+
+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
+  match(Set dst (DivD src1  src2));
+
+  ins_cost(FDIV_COST);
+  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}
+
+  ins_encode %{
+    __ fdiv_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+  %}
+
+  ins_pipe(fp_div_d);
+%}
+
+instruct negF_reg_reg(fRegF dst, fRegF src) %{
+  match(Set dst (NegF src));
+
+  ins_cost(XFER_COST);
+  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}
+
+  ins_encode %{
+    __ fneg_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_uop_s);
+%}
+
+instruct negD_reg_reg(fRegD dst, fRegD src) %{
+  match(Set dst (NegD src));
+
+  ins_cost(XFER_COST);
+  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}
+
+  ins_encode %{
+    __ fneg_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_uop_d);
+%}
+
+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (AbsI src));
+
+  ins_cost(ALU_COST * 3);
+  format %{
+    "sraiw  t0, $src, 0x1f\n\t"
+    "addw  $dst, $src, t0\n\t"
+    "xorr  $dst, $dst, t0\t#@absI_reg"
+  %}
+
+  ins_encode %{
+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
+    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
+  match(Set dst (AbsL src));
+
+  ins_cost(ALU_COST * 3);
+  format %{
+    "srai  t0, $src, 0x3f\n\t"
+    "add  $dst, $src, t0\n\t"
+    "xorr  $dst, $dst, t0\t#@absL_reg"
+  %}
+
+  ins_encode %{
+    __ srai(t0, as_Register($src$$reg), 0x3f);
+    __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
+    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct absF_reg(fRegF dst, fRegF src) %{
+  match(Set dst (AbsF src));
+
+  ins_cost(XFER_COST);
+  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
+  ins_encode %{
+    __ fabs_s(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_uop_s);
+%}
+
+instruct absD_reg(fRegD dst, fRegD src) %{
+  match(Set dst (AbsD src));
+
+  ins_cost(XFER_COST);
+  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
+  ins_encode %{
+    __ fabs_d(as_FloatRegister($dst$$reg),
+              as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_uop_d);
+%}
+
+instruct sqrtF_reg(fRegF dst, fRegF src) %{
+  match(Set dst (SqrtF src));
+
+  ins_cost(FSQRT_COST);
+  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
+  ins_encode %{
+    __ fsqrt_s(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_sqrt_s);
+%}
+
+instruct sqrtD_reg(fRegD dst, fRegD src) %{
+  match(Set dst (SqrtD src));
+
+  ins_cost(FSQRT_COST);
+  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
+  ins_encode %{
+    __ fsqrt_d(as_FloatRegister($dst$$reg),
+               as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_sqrt_d);
+%}
+
+// Arithmetic Instructions End
+
+// ============================================================================
+// Logical Instructions
+
+// Register And
+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ andr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And
+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ andi(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Or
+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or
+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ ori(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor
+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ xorr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor
+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ xori(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register And Long
+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ andr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate And Long
+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+  match(Set dst (AndL src1 src2));
+
+  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ andi(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Or Long
+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ orr(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Or Long
+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+  match(Set dst (OrL src1 src2));
+
+  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ ori(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// Register Xor Long
+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
+  match(Set dst (XorL src1 src2));
+
+  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ xorr(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Immediate Xor Long
+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
+  match(Set dst (XorL src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}
+
+  ins_encode %{
+    __ xori(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            (int32_t)($src2$$constant));
+  %}
+
+  ins_pipe(ialu_reg_imm);
+%}
+
+// ============================================================================
+// BSWAP Instructions
+
+instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
+  match(Set dst (ReverseBytesI src));
+  effect(TEMP cr);
+
+  ins_cost(ALU_COST * 13);
+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
+
+  ins_encode %{
+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
+  match(Set dst (ReverseBytesL src));
+  effect(TEMP cr);
+
+  ins_cost(ALU_COST * 29);
+  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
+
+  ins_encode %{
+    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesUS src));
+
+  ins_cost(ALU_COST * 5);
+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
+
+  ins_encode %{
+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
+  match(Set dst (ReverseBytesS src));
+
+  ins_cost(ALU_COST * 5);
+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
+
+  ins_encode %{
+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// ============================================================================
+// MemBar Instruction
+
+instruct load_fence() %{
+  match(LoadFence);
+  ins_cost(ALU_COST);
+
+  format %{ "#@load_fence" %}
+
+  ins_encode %{
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  ins_cost(ALU_COST);
+
+  format %{ "#@membar_acquire\n\t"
+            "fence ir iorw" %}
+
+  ins_encode %{
+    __ block_comment("membar_acquire");
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_acquire_lock() %{
+  match(MemBarAcquireLock);
+  ins_cost(0);
+
+  format %{ "#@membar_acquire_lock (elided)" %}
+
+  ins_encode %{
+    __ block_comment("membar_acquire_lock (elided)");
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct store_fence() %{
+  match(StoreFence);
+  ins_cost(ALU_COST);
+
+  format %{ "#@store_fence" %}
+
+  ins_encode %{
+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  ins_cost(ALU_COST);
+
+  format %{ "#@membar_release\n\t"
+            "fence iorw ow" %}
+
+  ins_encode %{
+    __ block_comment("membar_release");
+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+  ins_cost(ALU_COST);
+
+  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
+
+  ins_encode %{
+    __ membar(MacroAssembler::StoreStore);
+  %}
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_release_lock() %{
+  match(MemBarReleaseLock);
+  ins_cost(0);
+
+  format %{ "#@membar_release_lock (elided)" %}
+
+  ins_encode %{
+    __ block_comment("membar_release_lock (elided)");
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(ALU_COST);
+
+  format %{ "#@membar_volatile\n\t"
+             "fence iorw iorw"%}
+
+  ins_encode %{
+    __ block_comment("membar_volatile");
+    __ membar(MacroAssembler::StoreLoad);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+// ============================================================================
+// Cast Instructions (Java-level type cast)
+
+instruct castX2P(iRegPNoSp dst, iRegL src) %{
+  match(Set dst (CastX2P src));
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    if ($dst$$reg != $src$$reg) {
+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castP2X(iRegLNoSp dst, iRegP src) %{
+  match(Set dst (CastP2X src));
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    if ($dst$$reg != $src$$reg) {
+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
+    }
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct castPP(iRegPNoSp dst)
+%{
+  match(Set dst (CastPP dst));
+  ins_cost(0);
+
+  size(0);
+  format %{ "# castPP of $dst, #@castPP" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct castII(iRegI dst)
+%{
+  match(Set dst (CastII dst));
+
+  size(0);
+  format %{ "# castII of $dst, #@castII" %}
+  ins_encode(/* empty encoding */);
+  ins_cost(0);
+  ins_pipe(pipe_class_empty);
+%}
+
+instruct checkCastPP(iRegPNoSp dst)
+%{
+  match(Set dst (CheckCastPP dst));
+
+  size(0);
+  ins_cost(0);
+  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(pipe_class_empty);
+%}
+
+// ============================================================================
+// Convert Instructions
+
+// int to bool
+instruct convI2Bool(iRegINoSp dst, iRegI src)
+%{
+  match(Set dst (Conv2B src));
+
+  ins_cost(ALU_COST);
+  format %{ "snez  $dst, $src\t#@convI2Bool" %}
+
+  ins_encode %{
+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// pointer to bool
+instruct convP2Bool(iRegINoSp dst, iRegP src)
+%{
+  match(Set dst (Conv2B src));
+
+  ins_cost(ALU_COST);
+  format %{ "snez  $dst, $src\t#@convP2Bool" %}
+
+  ins_encode %{
+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int <-> long
+
+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
+%{
+  match(Set dst (ConvI2L src));
+
+  ins_cost(ALU_COST);
+  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
+  ins_encode %{
+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
+  match(Set dst (ConvL2I src));
+
+  ins_cost(ALU_COST);
+  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}
+
+  ins_encode %{
+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int to unsigned long (Zero-extend)
+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
+%{
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// float <-> double
+
+instruct convF2D_reg(fRegD dst, fRegF src) %{
+  match(Set dst (ConvF2D src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}
+
+  ins_encode %{
+    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_f2d);
+%}
+
+instruct convD2F_reg(fRegF dst, fRegD src) %{
+  match(Set dst (ConvD2F src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}
+
+  ins_encode %{
+    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_d2f);
+%}
+
+// float <-> int
+
+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
+  match(Set dst (ConvF2I src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
+  %}
+
+  ins_pipe(fp_f2i);
+%}
+
+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
+  match(Set dst (ConvI2F src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_i2f);
+%}
+
+// float <-> long
+
+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
+  match(Set dst (ConvF2L src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
+  %}
+
+  ins_pipe(fp_f2l);
+%}
+
+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
+  match(Set dst (ConvL2F src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_l2f);
+%}
+
+// double <-> int
+
+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
+  match(Set dst (ConvD2I src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
+  %}
+
+  ins_pipe(fp_d2i);
+%}
+
+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
+  match(Set dst (ConvI2D src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_i2d);
+%}
+
+// double <-> long
+
+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
+  match(Set dst (ConvD2L src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
+  %}
+
+  ins_pipe(fp_d2l);
+%}
+
+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
+  match(Set dst (ConvL2D src));
+
+  ins_cost(XFER_COST);
+  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}
+
+  ins_encode %{
+    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_l2d);
+%}
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I(iRegINoSp dst, iRegP src) %{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ zero_extend($dst$$Register, $src$$Register, 32);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(iRegINoSp dst, iRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  ins_cost(ALU_COST);
+  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ mv($dst$$Register, $src$$Register);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
+  match(Set dst (EncodeP src));
+  ins_cost(ALU_COST);
+  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ encode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
+  match(Set dst (DecodeN src));
+
+  ins_cost(0);
+  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
+  match(Set dst (DecodeN src));
+
+  ins_cost(0);
+  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    __ decode_heap_oop_not_null(d, s);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Convert klass pointer into compressed form.
+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
+  match(Set dst (EncodePKlass src));
+
+  ins_cost(ALU_COST);
+  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}
+
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    __ encode_klass_not_null(dst_reg, src_reg, t0);
+  %}
+
+   ins_pipe(ialu_reg);
+%}
+
+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
+  match(Set dst (DecodeNKlass src));
+
+  effect(TEMP tmp);
+
+  ins_cost(ALU_COST);
+  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
+
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    Register tmp_reg = as_Register($tmp$$reg);
+    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
+  %}
+
+   ins_pipe(ialu_reg);
+%}
+
+// stack <-> reg and reg <-> reg shuffles with no conversion
+
+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(LOAD_COST);
+
+  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(LOAD_COST);
+
+  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}
+
+  ins_encode %{
+    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(LOAD_COST);
+
+  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(iload_reg_reg);
+
+%}
+
+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(LOAD_COST);
+
+  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(STORE_COST);
+
+  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}
+
+  ins_encode %{
+    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(STORE_COST);
+
+  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(STORE_COST);
+
+  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(pipe_class_memory);
+
+%}
+
+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(STORE_COST);
+
+  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
+  %}
+
+  ins_pipe(istore_reg_reg);
+
+%}
+
+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
+
+  match(Set dst (MoveF2I src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}
+
+  ins_encode %{
+    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_f2i);
+
+%}
+
+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
+
+  match(Set dst (MoveI2F src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}
+
+  ins_encode %{
+    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_i2f);
+
+%}
+
+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
+
+  match(Set dst (MoveD2L src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
+
+  ins_encode %{
+    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
+  %}
+
+  ins_pipe(fp_d2l);
+
+%}
+
+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
+
+  match(Set dst (MoveL2D src));
+
+  effect(DEF dst, USE src);
+
+  ins_cost(XFER_COST);
+
+  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}
+
+  ins_encode %{
+    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(fp_l2d);
+%}
+
+// ============================================================================
+// Compare Instructions which set the result float comparisons in dest register.
+
+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
+%{
+  match(Set dst (CmpF3 op1 op2));
+
+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
+  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
+            "bgtz   $dst, done\n\t"
+            "feq.s  $dst, $op1, $op2\n\t"
+            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
+  %}
+
+  ins_encode %{
+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
+    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
+                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
+%{
+  match(Set dst (CmpD3 op1 op2));
+
+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
+  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
+            "bgtz   $dst, done\n\t"
+            "feq.d  $dst, $op1, $op2\n\t"
+            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
+  %}
+
+  ins_encode %{
+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
+    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
+%{
+  match(Set dst (CmpL3 op1 op2));
+
+  ins_cost(ALU_COST * 3 + BRANCH_COST);
+  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
+            "bnez  $dst, done\n\t"
+            "slt  $dst, $op1, $op2\n\t"
+            "neg   $dst, $dst\t#@cmpL3_reg_reg"
+  %}
+  ins_encode %{
+    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
+    __ mv(as_Register($dst$$reg), t0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
+%{
+  match(Set dst (CmpLTMask p q));
+
+  ins_cost(2 * ALU_COST);
+
+  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
+            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
+  %}
+
+  ins_encode %{
+    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
+    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
+%{
+  match(Set dst (CmpLTMask op zero));
+
+  ins_cost(ALU_COST);
+
+  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
+
+  ins_encode %{
+    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+
+// ============================================================================
+// Max and Min
+
+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
+%{
+  match(Set dst (MinI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2);
+
+  ins_cost(BRANCH_COST + ALU_COST * 2);
+  format %{
+    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
+    "mv $dst, $src2\n\t"
+    "j Ldone\n\t"
+    "bind Lsrc1\n\t"
+    "mv $dst, $src1\n\t"
+    "bind\t#@minI_rReg"
+  %}
+
+  ins_encode %{
+    Label Lsrc1, Ldone;
+    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
+    __ j(Ldone);
+    __ bind(Lsrc1);
+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+    __ bind(Ldone);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
+%{
+  match(Set dst (MaxI src1 src2));
+
+  effect(DEF dst, USE src1, USE src2);
+
+  ins_cost(BRANCH_COST + ALU_COST * 2);
+  format %{
+    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
+    "mv $dst, $src2\n\t"
+    "j Ldone\n\t"
+    "bind Lsrc1\n\t"
+    "mv $dst, $src1\n\t"
+    "bind\t#@maxI_rReg"
+  %}
+
+  ins_encode %{
+    Label Lsrc1, Ldone;
+    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
+    __ j(Ldone);
+    __ bind(Lsrc1);
+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
+    __ bind(Ldone);
+
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// ============================================================================
+// Branch Instructions
+// Direct Branch.
+instruct branch(label lbl)
+%{
+  match(Goto);
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "j  $lbl\t#@branch" %}
+
+  ins_encode(riscv_enc_j(lbl));
+
+  ins_pipe(pipe_branch);
+%}
+
+// ============================================================================
+// Compare and Branch Instructions
+
+// Patterns for short (< 12KiB) variants
+
+// Compare flags and branch near instructions.
+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
+  match(If cmp cr);
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
+  %}
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare signed int and branch near instructions
+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
+%{
+  // Same match rule as `far_cmpI_branch'.
+  match(If cmp (CmpI op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
+%{
+  // Same match rule as `far_cmpI_loop'.
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare unsigned int and branch near instructions
+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+%{
+  // Same match rule as `far_cmpU_branch'.
+  match(If cmp (CmpU op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
+%{
+  // Same match rule as `far_cmpU_loop'.
+  match(CountedLoopEnd cmp (CmpU op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare signed long and branch near instructions
+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
+%{
+  // Same match rule as `far_cmpL_branch'.
+  match(If cmp (CmpL op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
+%{
+  // Same match rule as `far_cmpL_loop'.
+  match(CountedLoopEnd cmp (CmpL op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare unsigned long and branch near instructions
+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+%{
+  // Same match rule as `far_cmpUL_branch'.
+  match(If cmp (CmpUL op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
+%{
+  // Same match rule as `far_cmpUL_loop'.
+  match(CountedLoopEnd cmp (CmpUL op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare pointer and branch near instructions
+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+  // Same match rule as `far_cmpP_branch'.
+  match(If cmp (CmpP op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+  // Same match rule as `far_cmpP_loop'.
+  match(CountedLoopEnd cmp (CmpP op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare narrow pointer and branch near instructions
+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+  // Same match rule as `far_cmpN_branch'.
+  match(If cmp (CmpN op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+  // Same match rule as `far_cmpN_loop'.
+  match(CountedLoopEnd cmp (CmpN op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// Compare float and branch near instructions
+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+  // Same match rule as `far_cmpF_branch'.
+  match(If cmp (CmpF op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST);
+  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_class_compare);
+  ins_short_branch(1);
+%}
+
+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+  // Same match rule as `far_cmpF_loop'.
+  match(CountedLoopEnd cmp (CmpF op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST);
+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_class_compare);
+  ins_short_branch(1);
+%}
+
+// Compare double and branch near instructions
+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+  // Same match rule as `far_cmpD_branch'.
+  match(If cmp (CmpD op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST);
+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                        as_FloatRegister($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_class_compare);
+  ins_short_branch(1);
+%}
+
+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+  // Same match rule as `far_cmpD_loop'.
+  match(CountedLoopEnd cmp (CmpD op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST);
+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                        as_FloatRegister($op2$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_class_compare);
+  ins_short_branch(1);
+%}
+
+// Compare signed int with zero and branch near instructions
+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpI_reg_imm0_branch'.
+  match(If cmp (CmpI op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpI_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpI op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare unsigned int with zero and branch near instructions
+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
+  match(If cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
+
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare signed long with zero and branch near instructions
+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpL_reg_imm0_branch'.
+  match(If cmp (CmpL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpL_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare unsigned long with zero and branch near instructions
+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
+  match(If cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare pointer with zero and branch near instructions
+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+  // Same match rule as `far_cmpP_reg_imm0_branch'.
+  match(If cmp (CmpP op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+  // Same match rule as `far_cmpP_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpP op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare narrow pointer with zero and branch near instructions
+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+  // Same match rule as `far_cmpN_reg_imm0_branch'.
+  match(If cmp (CmpN op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+  // Same match rule as `far_cmpN_reg_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpN op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Compare narrow pointer with pointer zero and branch near instructions
+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
+  match(If cmp (CmpP (DecodeN op1) zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+  ins_short_branch(1);
+%}
+
+// Patterns for far (20KiB) variants
+
+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
+  match(If cmp cr);
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+// Compare signed int and branch far instructions
+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
+  match(If cmp (CmpI op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  // the format instruction [far_b$cmp] here is be used as two insructions
+  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+  match(If cmp (CmpU op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
+  match(CountedLoopEnd cmp (CmpU op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
+  match(If cmp (CmpL op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
+  match(CountedLoopEnd cmp (CmpL op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+  match(If cmp (CmpUL op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
+  match(CountedLoopEnd cmp (CmpUL op1 op2));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+  match(If cmp (CmpP op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpP op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+  match(If cmp (CmpN op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpN op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
+                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmp_branch);
+%}
+
+// Float compare and branch instructions
+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+  match(If cmp (CmpF op1 op2));
+
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST * 2);
+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                        *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpF op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST * 2);
+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
+                        *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+// Double compare and branch instructions
+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+  match(If cmp (CmpD op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST * 2);
+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpD op1 op2));
+  effect(USE lbl);
+
+  ins_cost(XFER_COST + BRANCH_COST * 2);
+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
+
+  ins_encode %{
+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(If cmp (CmpI op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpI op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(If cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
+
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+// compare lt/ge unsigned instructs has no short instruct with same match
+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(If cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
+
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpU op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
+
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(If cmp (CmpL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(If cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+// compare lt/ge unsigned instructs has no short instruct with same match
+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(If cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
+
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
+%{
+  match(CountedLoopEnd cmp (CmpUL op1 zero));
+
+  effect(USE op1, USE lbl);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
+
+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+  match(If cmp (CmpP op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
+  match(CountedLoopEnd cmp (CmpP op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+  match(If cmp (CmpN op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
+  match(CountedLoopEnd cmp (CmpN op1 zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+
+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+  match(If cmp (CmpP (DecodeN op1) zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
+  effect(USE lbl);
+
+  ins_cost(BRANCH_COST * 2);
+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
+
+  ins_encode %{
+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
+  %}
+
+  ins_pipe(pipe_cmpz_branch);
+%}
+
+// ============================================================================
+// Conditional Move Instructions
+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
+  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
+  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{
+  match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{
+  match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src)));
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{
+    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t"
+  %}
+
+  ins_encode %{
+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
+                 as_Register($op1$$reg), as_Register($op2$$reg),
+                 as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(pipe_class_compare);
+%}
+
+// ============================================================================
+// Procedure Call/Return Instructions
+
+// Call Java Static Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallStaticJavaDirect(method meth)
+%{
+  match(CallStaticJava);
+
+  effect(USE meth);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
+
+  ins_encode(riscv_enc_java_static_call(meth),
+             riscv_enc_call_epilog);
+
+  ins_pipe(pipe_class_call);
+  ins_alignment(4);
+%}
+
+// TO HERE
+
+// Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
+%{
+  match(CallDynamicJava);
+
+  effect(USE meth, KILL cr);
+
+  ins_cost(BRANCH_COST + ALU_COST * 6);
+
+  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
+
+  ins_encode(riscv_enc_java_dynamic_call(meth),
+             riscv_enc_call_epilog);
+
+  ins_pipe(pipe_class_call);
+  ins_alignment(4);
+%}
+
+// Call Runtime Instruction
+
+instruct CallRuntimeDirect(method meth, rFlagsReg cr)
+%{
+  match(CallRuntime);
+
+  effect(USE meth, KILL cr);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
+
+  ins_encode(riscv_enc_java_to_runtime(meth));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafDirect(method meth, rFlagsReg cr)
+%{
+  match(CallLeaf);
+
+  effect(USE meth, KILL cr);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
+
+  ins_encode(riscv_enc_java_to_runtime(meth));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Call Runtime Instruction
+
+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
+%{
+  match(CallLeafNoFP);
+
+  effect(USE meth, KILL cr);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
+
+  ins_encode(riscv_enc_java_to_runtime(meth));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// ============================================================================
+// Partial Subtype Check
+//
+// superklass array for an instance of the superklass.  Set a hidden
+// internal cache on a hit (cache is checked with exposed code in
+// gen_subtype_check()).  Return zero for a hit.  The encoding
+// ALSO sets flags.
+
+instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
+%{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect(KILL tmp, KILL cr);
+
+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
+  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
+
+  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
+
+  opcode(0x1); // Force zero of result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
+                                   immP0 zero, rFlagsReg cr)
+%{
+  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
+  effect(KILL tmp, KILL result);
+
+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
+  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
+
+  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
+
+  opcode(0x0); // Don't zero result reg on hit
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
+  ins_encode %{
+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                      StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                      StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
+%{
+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                      StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
+                          rFlagsReg cr)
+%{
+  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+                      StrIntrinsicNode::LU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      $tmp5$$Register, $tmp6$$Register,
+                      $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      $tmp5$$Register, $tmp6$$Register,
+                      $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
+                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
+                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
+
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $tmp3$$Register, $tmp4$$Register,
+                      $tmp5$$Register, $tmp6$$Register,
+                      $result$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
+
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+                                 $cnt1$$Register, zr,
+                                 $tmp1$$Register, $tmp2$$Register,
+                                 $tmp3$$Register, $tmp4$$Register,
+                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+                                 $cnt1$$Register, zr,
+                                 $tmp1$$Register, $tmp2$$Register,
+                                 $tmp3$$Register, $tmp4$$Register,
+                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
+                              immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
+                                 $cnt1$$Register, zr,
+                                 $tmp1$$Register, $tmp2$$Register,
+                                 $tmp3$$Register, $tmp4$$Register,
+                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
+%{
+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+
+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+  ins_encode %{
+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
+                           $tmp3$$Register, $tmp4$$Register, false /* isU */);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+
+// clearing of an array
+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
+%{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL cnt, USE_KILL base);
+
+  ins_cost(4 * DEFAULT_COST);
+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
+
+  ins_encode %{
+    address tpc = __ zero_words($base$$Register, $cnt$$Register);
+    if (tpc == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
+%{
+  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL base, KILL cr);
+
+  ins_cost(4 * DEFAULT_COST);
+  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
+
+  ins_encode %{
+    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
+  %}
+
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+                        iRegI_R10 result, rFlagsReg cr)
+%{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
+  ins_encode %{
+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+    __ string_equals($str1$$Register, $str2$$Register,
+                     $result$$Register, $cnt$$Register, 1);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
+                        iRegI_R10 result, rFlagsReg cr)
+%{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
+  ins_encode %{
+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
+    __ string_equals($str1$$Register, $str2$$Register,
+                     $result$$Register, $cnt$$Register, 2);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+%{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (AryEq ary1 ary2));
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+
+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
+  ins_encode %{
+    __ arrays_equals($ary1$$Register, $ary2$$Register,
+                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
+                     $result$$Register, $tmp5$$Register, 1);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
+                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
+%{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
+
+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
+  ins_encode %{
+    __ arrays_equals($ary1$$Register, $ary2$$Register,
+                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
+                     $result$$Register, $tmp5$$Register, 2);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
+// ============================================================================
+// Safepoint Instructions
+
+instruct safePoint(iRegP poll)
+%{
+  match(SafePoint poll);
+
+  ins_cost(2 * LOAD_COST);
+  format %{
+    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
+  %}
+  ins_encode %{
+    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
+  %}
+  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
+%}
+
+// ============================================================================
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(javaThread_RegP dst)
+%{
+  match(Set dst (ThreadLocal));
+
+  ins_cost(0);
+
+  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+// inlined locking and unlocking
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp1, TEMP tmp2);
+
+  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
+  format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}
+
+  ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
+%{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp1, TEMP tmp2);
+
+  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
+  format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
+
+  ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));
+
+  ins_pipe(pipe_serial);
+%}
+
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
+%{
+  match(TailCall jump_target method_oop);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
+
+  ins_encode(riscv_enc_tail_call(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
+%{
+  match(TailJump jump_target ex_oop);
+
+  ins_cost(ALU_COST + BRANCH_COST);
+
+  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
+
+  ins_encode(riscv_enc_tail_jmp(jump_target));
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler. No code emitted.
+instruct CreateException(iRegP_R10 ex_oop)
+%{
+  match(Set ex_oop (CreateEx));
+
+  ins_cost(0);
+  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
+
+  size(0);
+
+  ins_encode( /*empty*/ );
+
+  ins_pipe(pipe_class_empty);
+%}
+
+// Rethrow exception: The exception oop will come in the first
+// argument position. Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+  match(Rethrow);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "j rethrow_stub\t#@RethrowException" %}
+
+  ins_encode(riscv_enc_rethrow());
+
+  ins_pipe(pipe_class_call);
+%}
+
+// Return Instruction
+// epilog node loads ret address into ra as part of frame pop
+instruct Ret()
+%{
+  match(Return);
+
+  ins_cost(BRANCH_COST);
+  format %{ "ret\t// return register, #@Ret" %}
+
+  ins_encode(riscv_enc_ret());
+
+  ins_pipe(pipe_branch);
+%}
+
+// Die now.
+instruct ShouldNotReachHere() %{
+  match(Halt);
+
+  ins_cost(BRANCH_COST);
+
+  format %{ "#@ShouldNotReachHere" %}
+
+  ins_encode %{
+    Assembler::CompressibleRegion cr(&_masm);
+    if (is_reachable()) {
+      __ halt();
+    }
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == RAX_enc)
+// Only one replacement instruction
+//
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
new file mode 100644
index 0000000000..7dda004cd3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/riscv_b.ad
@@ -0,0 +1,466 @@
+//
+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// RISCV Bit-Manipulation Extension Architecture Description File
+
+// Convert oop into int for vectors alignment masking
+instruct convP2I_b(iRegINoSp dst, iRegP src) %{
+  predicate(UseZba);
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// byte to int
+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
+  predicate(UseZbb);
+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int to short
+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
+  predicate(UseZbb);
+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
+
+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// short to unsigned int
+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
+  predicate(UseZbb);
+  match(Set dst (AndI src mask));
+
+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// int to unsigned long (zero extend)
+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
+  predicate(UseZba);
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
+
+  ins_cost(ALU_COST);
+  ins_encode %{
+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg_shift);
+%}
+
+// BSWAP instructions
+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseZbb);
+  match(Set dst (ReverseBytesI src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
+
+  ins_encode %{
+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
+  predicate(UseZbb);
+  match(Set dst (ReverseBytesL src));
+
+  ins_cost(ALU_COST);
+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
+
+  ins_encode %{
+    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseZbb);
+  match(Set dst (ReverseBytesUS src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
+
+  ins_encode %{
+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseZbb);
+  match(Set dst (ReverseBytesS src));
+
+  ins_cost(ALU_COST * 2);
+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
+
+  ins_encode %{
+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Shift Add Pointer
+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
+  predicate(UseZba);
+  match(Set dst (AddP src1 (LShiftL src2 imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
+  predicate(UseZba);
+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Shift Add Long
+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
+  predicate(UseZba);
+  match(Set dst (AddL src1 (LShiftL src2 imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
+  predicate(UseZba);
+  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
+
+  ins_cost(ALU_COST);
+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
+
+  ins_encode %{
+    __ shadd(as_Register($dst$$reg),
+             as_Register($src2$$reg),
+             as_Register($src1$$reg),
+             t0,
+             $imm$$constant);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Zeros Count instructions
+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseZbb);
+  match(Set dst (CountLeadingZerosI src));
+
+  ins_cost(ALU_COST);
+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
+
+  ins_encode %{
+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
+  predicate(UseZbb);
+  match(Set dst (CountLeadingZerosL src));
+
+  ins_cost(ALU_COST);
+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
+
+  ins_encode %{
+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UseZbb);
+  match(Set dst (CountTrailingZerosI src));
+
+  ins_cost(ALU_COST);
+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
+
+  ins_encode %{
+    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
+  predicate(UseZbb);
+  match(Set dst (CountTrailingZerosL src));
+
+  ins_cost(ALU_COST);
+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
+
+  ins_encode %{
+    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Population Count instructions
+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+
+  ins_cost(ALU_COST);
+  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
+
+  ins_encode %{
+    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long/bitCount(long) returns an int.
+instruct popCountL_b(iRegINoSp dst, iRegL src) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+
+  ins_cost(ALU_COST);
+  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
+
+  ins_encode %{
+    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Max and Min
+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  predicate(UseZbb);
+  match(Set dst (MinI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
+
+  ins_encode %{
+    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
+  predicate(UseZbb);
+  match(Set dst (MaxI src1 src2));
+
+  ins_cost(ALU_COST);
+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
+
+  ins_encode %{
+    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Abs
+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
+  predicate(UseZbb);
+  match(Set dst (AbsI src));
+
+  ins_cost(ALU_COST * 2);
+  format %{
+    "negw  t0, $src\n\t"
+    "max  $dst, $src, t0\t#@absI_reg_b"
+  %}
+
+  ins_encode %{
+    __ negw(t0, as_Register($src$$reg));
+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
+  predicate(UseZbb);
+  match(Set dst (AbsL src));
+
+  ins_cost(ALU_COST * 2);
+  format %{
+    "neg  t0, $src\n\t"
+    "max  $dst, $src, t0\t#@absL_reg_b"
+  %}
+
+  ins_encode %{
+    __ neg(t0, as_Register($src$$reg));
+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// And Not
+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+  predicate(UseZbb);
+  match(Set dst (AndI src1 (XorI src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
+
+  ins_encode %{
+    __ andn(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+  predicate(UseZbb);
+  match(Set dst (AndL src1 (XorL src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
+
+  ins_encode %{
+    __ andn(as_Register($dst$$reg),
+            as_Register($src1$$reg),
+            as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+// Or Not
+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
+  predicate(UseZbb);
+  match(Set dst (OrI src1 (XorI src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
+
+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
+  predicate(UseZbb);
+  match(Set dst (OrL src1 (XorL src2 m1)));
+
+  ins_cost(ALU_COST);
+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
+
+  ins_encode %{
+    __ orn(as_Register($dst$$reg),
+           as_Register($src1$$reg),
+           as_Register($src2$$reg));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+
+%}
+
+// AndI 0b0..010..0 + ConvI2B
+instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{
+  predicate(UseZbs);
+  match(Set dst (Conv2B (AndI src mask)));
+  ins_cost(ALU_COST);
+
+  format %{ "bexti  $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %}
+  ins_encode %{
+    __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant)));
+  %}
+
+  ins_pipe(ialu_reg_reg);
+%}
\ No newline at end of file
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
new file mode 100644
index 0000000000..7b1112b388
--- /dev/null
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -0,0 +1,2661 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "logging/log.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/align.hpp"
+#include "utilities/formatBuffer.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+#ifdef COMPILER2
+#include "adfiles/ad_riscv.hpp"
+#include "opto/runtime.hpp"
+#endif
+
+#define __ masm->
+
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
+class SimpleRuntimeFrame {
+public:
+
+  // Most of the runtime stubs have this simple frame layout.
+  // This class exists to make the layout shared in one place.
+  // Offsets are for compiler stack slots, which are jints.
+  enum layout {
+    // The frame sender code expects that fp will be in the "natural" place and
+    // will override any oopMap setting for it. We must therefore force the layout
+    // so that it agrees with the frame sender code.
+    // we don't expect any arg reg save area so riscv asserts that
+    // frame::arg_reg_save_area_bytes == 0
+    fp_off = 0, fp_off2,
+    return_off, return_off2,
+    framesize
+  };
+};
+
+class RegisterSaver {
+ public:
+  RegisterSaver() {}
+  ~RegisterSaver() {}
+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+  void restore_live_registers(MacroAssembler* masm);
+
+  // Offsets into the register save area
+  // Used by deoptimization when it is managing result register
+  // values on its own
+  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
+  // |---f0---|<---SP
+  // |---f1---|
+  // |   ..   |
+  // |---f31--|
+  // |---reserved slot for stack alignment---|
+  // |---x5---|
+  // |   x6   |
+  // |---.. --|
+  // |---x31--|
+  // |---fp---|
+  // |---ra---|
+  int f0_offset_in_bytes(void) {
+    return 0;
+  }
+  int reserved_slot_offset_in_bytes(void) {
+    return f0_offset_in_bytes() +
+           FloatRegisterImpl::max_slots_per_register *
+           FloatRegisterImpl::number_of_registers *
+           BytesPerInt;
+  }
+
+  int reg_offset_in_bytes(Register r) {
+    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
+    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
+  }
+
+  int freg_offset_in_bytes(FloatRegister f) {
+    return f0_offset_in_bytes() + f->encoding() * wordSize;
+  }
+
+  int ra_offset_in_bytes(void) {
+    return reserved_slot_offset_in_bytes() +
+           (RegisterImpl::number_of_registers - 3) *
+           RegisterImpl::max_slots_per_register *
+           BytesPerInt;
+  }
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
+  // The caller will allocate additional_frame_words
+  int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
+  // CodeBlob frame size is in words.
+  int frame_size_in_words = frame_size_in_bytes / wordSize;
+  *total_frame_words = frame_size_in_words;
+
+  // Save Integer and Float registers.
+  __ enter();
+  __ push_CPU_state();
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+  assert_cond(oop_maps != NULL && oop_map != NULL);
+
+  int sp_offset_in_slots = 0;
+  int step_in_slots = 0;
+
+  step_in_slots = FloatRegisterImpl::max_slots_per_register;
+  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+    FloatRegister r = as_FloatRegister(i);
+    oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
+  }
+
+  step_in_slots = RegisterImpl::max_slots_per_register;
+  // skip the slot reserved for alignment, see MacroAssembler::push_reg;
+  // also skip x5 ~ x6 on the stack because they are caller-saved registers.
+  sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
+  // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
+  for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
+    Register r = as_Register(i);
+    if (r != xthread) {
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
+    }
+  }
+
+  return oop_map;
+}
+
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+  __ pop_CPU_state();
+  __ leave();
+}
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+bool SharedRuntime::is_wide_vector(int size) {
+  return false;
+}
+
+size_t SharedRuntime::trampoline_size() {
+  return 6 * NativeInstruction::instruction_size;
+}
+
+void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
+  int32_t offset = 0;
+  __ movptr_with_offset(t0, destination, offset);
+  __ jalr(x0, t0, offset);
+}
+
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+static int reg2offset_in(VMReg r) {
+  // Account for saved fp and ra
+  // This should really be in_preserve_stack_slots
+  return r->reg2stack() * VMRegImpl::stack_slot_size;
+}
+
+static int reg2offset_out(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
+// quantities.  Values less than VMRegImpl::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
+// up to RegisterImpl::number_of_registers) are the 64-bit
+// integer registers.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words,
+// which are 64-bit.  The OUTPUTS are in 32-bit units.
+
+// The Java calling convention is a "shifted" version of the C ABI.
+// By skipping the first C ABI register we can call non-static jni
+// methods with small numbers of arguments without having to shuffle
+// the arguments at all. Since we control the java ABI we ought to at
+// least get some advantage out of it.
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+  // Create the mapping between argument positions and
+  // registers.
+  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
+    j_rarg0, j_rarg1, j_rarg2, j_rarg3,
+    j_rarg4, j_rarg5, j_rarg6, j_rarg7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
+    j_farg0, j_farg1, j_farg2, j_farg3,
+    j_farg4, j_farg5, j_farg6, j_farg7
+  };
+
+  uint int_args = 0;
+  uint fp_args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+      case T_BOOLEAN: // fall through
+      case T_CHAR:    // fall through
+      case T_BYTE:    // fall through
+      case T_SHORT:   // fall through
+      case T_INT:
+        if (int_args < Argument::n_int_register_parameters_j) {
+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_VOID:
+        // halves of T_LONG or T_DOUBLE
+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+        regs[i].set_bad();
+        break;
+      case T_LONG:      // fall through
+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      case T_OBJECT:    // fall through
+      case T_ARRAY:     // fall through
+      case T_ADDRESS:
+        if (int_args < Argument::n_int_register_parameters_j) {
+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_FLOAT:
+        if (fp_args < Argument::n_float_register_parameters_j) {
+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_DOUBLE:
+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+        if (fp_args < Argument::n_float_register_parameters_j) {
+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  return align_up(stk_args, 2);
+}
+
+// Patch the callers callsite with entry to compiled code if it exists.
+static void patch_callers_callsite(MacroAssembler *masm) {
+  Label L;
+  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
+  __ beqz(t0, L);
+
+  __ enter();
+  __ push_CPU_state();
+
+  // VM needs caller's callsite
+  // VM needs target method
+  // This needs to be a long call since we will relocate this adapter to
+  // the codeBuffer and it may not reach
+
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+
+  __ mv(c_rarg0, xmethod);
+  __ mv(c_rarg1, ra);
+  int32_t offset = 0;
+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
+  __ jalr(x1, t0, offset);
+
+  __ pop_CPU_state();
+  // restore sp
+  __ leave();
+  __ bind(L);
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs,
+                            Label& skip_fixup) {
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  patch_callers_callsite(masm);
+
+  __ bind(skip_fixup);
+
+  int words_pushed = 0;
+
+  // Since all args are passed on the stack, total_args_passed *
+  // Interpreter::stackElementSize is the space we need.
+
+  int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+  __ mv(x30, sp);
+
+  // stack is aligned, keep it that way
+  extraspace = align_up(extraspace, 2 * wordSize);
+
+  if (extraspace) {
+    __ sub(sp, sp, extraspace);
+  }
+
+  // Now write the args into the outgoing interpreter space
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // offset to start parameters
+    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
+    int next_off = st_off - Interpreter::stackElementSize;
+
+    // Say 4 args:
+    // i   st_off
+    // 0   32 T_LONG
+    // 1   24 T_VOID
+    // 2   16 T_OBJECT
+    // 3    8 T_BOOL
+    // -    0 return address
+    //
+    // However to make thing extra confusing. Because we can fit a Java long/double in
+    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+    // leaves one slot empty and only stores to a single slot. In this case the
+    // slot that is occupied is the T_VOID slot. See I said it was confusing.
+
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // memory to memory use t0
+      int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
+                    + extraspace
+                    + words_pushed * wordSize);
+      if (!r_2->is_valid()) {
+        __ lwu(t0, Address(sp, ld_off));
+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+      } else {
+        __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
+
+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+        // T_DOUBLE and T_LONG use two slots in the interpreter
+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+          // ld_off == LSW, ld_off+wordSize == MSW
+          // st_off == MSW, next_off == LSW
+          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
+#ifdef ASSERT
+          // Overwrite the unused slot with known junk
+          __ mv(t0, 0xdeadffffdeadaaaaul);
+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+        } else {
+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+        }
+      }
+    } else if (r_1->is_Register()) {
+      Register r = r_1->as_Register();
+      if (!r_2->is_valid()) {
+        // must be only an int (or less ) so move only 32bits to slot
+        __ sd(r, Address(sp, st_off));
+      } else {
+        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
+        // T_DOUBLE and T_LONG use two slots in the interpreter
+        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
+          // long/double in gpr
+#ifdef ASSERT
+          // Overwrite the unused slot with known junk
+          __ mv(t0, 0xdeadffffdeadaaabul);
+          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+          __ sd(r, Address(sp, next_off));
+        } else {
+          __ sd(r, Address(sp, st_off));
+        }
+      }
+    } else {
+      assert(r_1->is_FloatRegister(), "");
+      if (!r_2->is_valid()) {
+        // only a float use just part of the slot
+        __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
+      } else {
+#ifdef ASSERT
+        // Overwrite the unused slot with known junk
+        __ mv(t0, 0xdeadffffdeadaaacul);
+        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
+#endif /* ASSERT */
+        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
+      }
+    }
+  }
+
+  __ mv(esp, sp); // Interp expects args on caller's expression stack
+
+  __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
+  __ jr(t0);
+}
+
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
+  // Cut-out for having no stack args.
+  int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
+  if (comp_args_on_stack != 0) {
+    __ sub(t0, sp, comp_words_on_stack * wordSize);
+    __ andi(sp, t0, -16);
+  }
+
+  // Will jump to the compiled code just as if compiled code was doing it.
+  // Pre-load the register-jump target early, to schedule it better.
+  __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
+
+  // Now generate the shuffle code.
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // Pick up 0, 1 or 2 words from SP+offset.
+
+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+           "scrambled load targets?");
+    // Load in argument order going down.
+    int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
+    // Point to interpreter value (vs. tag)
+    int next_off = ld_off - Interpreter::stackElementSize;
+
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // Convert stack slot to an SP offset (+ wordSize to account for return address )
+      int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
+      if (!r_2->is_valid()) {
+        __ lw(t0, Address(esp, ld_off));
+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
+      } else {
+        //
+        // We are using two optoregs. This can be either T_OBJECT,
+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
+        // So we must adjust where to pick up the data to match the
+        // interpreter.
+        //
+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+        // are accessed as negative so LSW is at LOW address
+
+        // ld_off is MSW so get LSW
+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
+                           next_off : ld_off;
+        __ ld(t0, Address(esp, offset));
+        // st_off is LSW (i.e. reg.first())
+        __ sd(t0, Address(sp, st_off), /*temp register*/t2);
+      }
+    } else if (r_1->is_Register()) {  // Register argument
+      Register r = r_1->as_Register();
+      if (r_2->is_valid()) {
+        //
+        // We are using two VMRegs. This can be either T_OBJECT,
+        // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
+        // two slots but only uses one for thr T_LONG or T_DOUBLE case
+        // So we must adjust where to pick up the data to match the
+        // interpreter.
+
+        const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
+                           next_off : ld_off;
+
+        // this can be a misaligned move
+        __ ld(r, Address(esp, offset));
+      } else {
+        // sign extend and use a full word?
+        __ lw(r, Address(esp, ld_off));
+      }
+    } else {
+      if (!r_2->is_valid()) {
+        __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
+      } else {
+        __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
+      }
+    }
+  }
+
+  // 6243940 We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find there should this case occur.
+
+  __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
+
+  __ jr(t1);
+}
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            int comp_args_on_stack,
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs,
+                                                            AdapterFingerPrint* fingerprint) {
+  address i2c_entry = __ pc();
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+
+  Label ok;
+
+  const Register holder = t1;
+  const Register receiver = j_rarg0;
+  const Register tmp = t2;  // A call-clobbered register not used for arg passing
+
+  // -------------------------------------------------------------------------
+  // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
+  // to the interpreter.  The args start out packed in the compiled layout.  They
+  // need to be unpacked into the interpreter layout.  This will almost always
+  // require some stack space.  We grow the current (compiled) stack, then repack
+  // the args.  We  finally end in a jump to the generic interpreter entry point.
+  // On exit from the interpreter, the interpreter will restore our SP (lest the
+  // compiled code, which relys solely on SP and not FP, get sick).
+
+  {
+    __ block_comment("c2i_unverified_entry {");
+    __ load_klass(t0, receiver);
+    __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
+    __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
+    __ beq(t0, tmp, ok);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+    __ bind(ok);
+    // Method might have been compiled since the call site was patched to
+    // interpreted; if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
+    __ beqz(t0, skip_fixup);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+    __ block_comment("} c2i_unverified_entry");
+  }
+
+  address c2i_entry = __ pc();
+
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  __ flush();
+  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         VMRegPair *regs2,
+                                         int total_args_passed) {
+  assert(regs2 == NULL, "not needed on riscv");
+
+  // We return the amount of VMRegImpl stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots.
+
+  static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
+    c_rarg0, c_rarg1, c_rarg2, c_rarg3,
+    c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
+    c_farg0, c_farg1, c_farg2, c_farg3,
+    c_farg4, c_farg5, c_farg6, c_farg7
+  };
+
+  uint int_args = 0;
+  uint fp_args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+      case T_BOOLEAN:  // fall through
+      case T_CHAR:     // fall through
+      case T_BYTE:     // fall through
+      case T_SHORT:    // fall through
+      case T_INT:
+        if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_LONG:      // fall through
+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+      case T_OBJECT:    // fall through
+      case T_ARRAY:     // fall through
+      case T_ADDRESS:   // fall through
+      case T_METADATA:
+        if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_FLOAT:
+        if (fp_args < Argument::n_float_register_parameters_c) {
+          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+        } else if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set1(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_DOUBLE:
+        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
+        if (fp_args < Argument::n_float_register_parameters_c) {
+          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+        } else if (int_args < Argument::n_int_register_parameters_c) {
+          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+        } else {
+          regs[i].set2(VMRegImpl::stack2reg(stk_args));
+          stk_args += 2;
+        }
+        break;
+      case T_VOID: // Halves of longs and doubles
+        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+        regs[i].set_bad();
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  return stk_args;
+}
+
+// The C ABI specifies:
+// "integer scalars narrower than XLEN bits are widened according to the sign
+// of their type up to 32 bits, then sign-extended to XLEN bits."
+// Applies for both passed in register and stack.
+//
+// Java uses 32-bit stack slots; jint, jshort, jchar, jbyte uses one slot.
+// Native uses 64-bit stack slots for all integer scalar types.
+//
+// lw loads the Java stack slot, sign-extends and
+// sd store this widened integer into a 64 bit native stack slot.
+static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ lw(t0, Address(fp, reg2offset_in(src.first())));
+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      // stack to reg
+      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+  } else {
+    if (dst.first() != src.first()) {
+      // 32bits extend sign
+      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
+    }
+  }
+}
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+                        OopMap* map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int* receiver_offset) {
+  // must pass a handle. First figure out the location we use as a handle
+  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
+
+  // See if oop is NULL if it is we need no handle
+
+  if (src.first()->is_stack()) {
+
+    // Oop is already on the stack as an argument
+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+    if (is_receiver) {
+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
+    }
+
+    __ ld(t0, Address(fp, reg2offset_in(src.first())));
+    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
+    // conditionally move a NULL
+    Label notZero1;
+    __ bnez(t0, notZero1);
+    __ mv(rHandle, zr);
+    __ bind(notZero1);
+  } else {
+
+    // Oop is in an a register we must store it to the space we reserve
+    // on the stack for oop_handles and pass a handle if oop is non-NULL
+
+    const Register rOop = src.first()->as_Register();
+    int oop_slot = -1;
+    if (rOop == j_rarg0) {
+      oop_slot = 0;
+    } else if (rOop == j_rarg1) {
+      oop_slot = 1;
+    } else if (rOop == j_rarg2) {
+      oop_slot = 2;
+    } else if (rOop == j_rarg3) {
+      oop_slot = 3;
+    } else if (rOop == j_rarg4) {
+      oop_slot = 4;
+    } else if (rOop == j_rarg5) {
+      oop_slot = 5;
+    } else if (rOop == j_rarg6) {
+      oop_slot = 6;
+    } else {
+      assert(rOop == j_rarg7, "wrong register");
+      oop_slot = 7;
+    }
+
+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
+    int offset = oop_slot * VMRegImpl::stack_slot_size;
+
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+    // Store oop in handle area, may be NULL
+    __ sd(rOop, Address(sp, offset));
+    if (is_receiver) {
+      *receiver_offset = offset;
+    }
+
+    //rOop maybe the same as rHandle
+    if (rOop == rHandle) {
+      Label isZero;
+      __ beqz(rOop, isZero);
+      __ la(rHandle, Address(sp, offset));
+      __ bind(isZero);
+    } else {
+      Label notZero2;
+      __ la(rHandle, Address(sp, offset));
+      __ bnez(rOop, notZero2);
+      __ mv(rHandle, zr);
+      __ bind(notZero2);
+    }
+  }
+
+  // If arg is on the stack then place it otherwise it is already in correct reg.
+  if (dst.first()->is_stack()) {
+    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
+  }
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
+      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
+    } else if (dst.first()->is_Register()) {
+      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (src.first() != dst.first()) {
+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
+      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    } else {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+    } else {
+      // stack to reg
+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
+  } else {
+    if (dst.first() != src.first()) {
+      __ mv(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
+         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ ld(t0, Address(fp, reg2offset_in(src.first())));
+      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
+    } else if (dst.first()-> is_Register()) {
+      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (src.first() != dst.first()) {
+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
+      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    } else {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ fsw(f10, Address(fp, -3 * wordSize));
+      break;
+    case T_DOUBLE:
+      __ fsd(f10, Address(fp, -3 * wordSize));
+      break;
+    case T_VOID:  break;
+    default: {
+      __ sd(x10, Address(fp, -3 * wordSize));
+    }
+  }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ flw(f10, Address(fp, -3 * wordSize));
+      break;
+    case T_DOUBLE:
+      __ fld(f10, Address(fp, -3 * wordSize));
+      break;
+    case T_VOID:  break;
+    default: {
+      __ ld(x10, Address(fp, -3 * wordSize));
+    }
+  }
+}
+
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  RegSet x;
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      x = x + args[i].first()->as_Register();
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ addi(sp, sp, -2 * wordSize);
+      __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
+    }
+  }
+  __ push_reg(x, sp);
+}
+
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  RegSet x;
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      x = x + args[i].first()->as_Register();
+    } else {
+      ;
+    }
+  }
+  __ pop_reg(x, sp);
+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+    if (args[i].first()->is_Register()) {
+      ;
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
+      __ add(sp, sp, 2 * wordSize);
+    }
+  }
+}
+
+static void rt_call(MacroAssembler* masm, address dest) {
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  if (cb) {
+    __ far_call(RuntimeAddress(dest));
+  } else {
+    int32_t offset = 0;
+    __ la_patchable(t0, RuntimeAddress(dest), offset);
+    __ jalr(x1, t0, offset);
+  }
+}
+
+static void verify_oop_args(MacroAssembler* masm,
+                            const methodHandle& method,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  const Register temp_reg = x9;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < method->size_of_parameters(); i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 const methodHandle& method,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, method, sig_bt, regs);
+  vmIntrinsics::ID iid = method->intrinsic_id();
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+  if (ref_kind != 0) {
+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
+    member_reg = x9;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (iid == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal("unexpected intrinsic id %d", iid);
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+    VMReg r = regs[member_arg_pos].first();
+    if (r->is_stack()) {
+      __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(method->size_of_parameters() > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      fatal("receiver always in a register");
+      receiver_reg = x12;  // known to be free at this point
+      __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, iid,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GCLocker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
+nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                const methodHandle& method,
+                                                int compile_id,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
+                                                BasicType ret_type,
+                                                address critical_entry) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+
+    // First instruction must be a nop as it may need to be patched on deoptimisation
+    MacroAssembler::assert_alignment(__ pc());
+    __ nop();
+    gen_special_dispatch(masm,
+                         method,
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
+  address native_func = method->native_function();
+  assert(native_func != NULL, "must have function");
+
+  // An OopMap for lock (and class if static)
+  OopMapSet *oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  intptr_t start = (intptr_t)__ pc();
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
+
+  const int total_in_args = method->size_of_parameters();
+  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
+
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
+
+  int argc = 0;
+  out_sig_bt[argc++] = T_ADDRESS;
+  if (method->is_static()) {
+    out_sig_bt[argc++] = T_OBJECT;
+  }
+
+  for (int i = 0; i < total_in_args ; i++) {
+    out_sig_bt[argc++] = in_sig_bt[i];
+  }
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require.
+  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Compute framesize for the wrapper.  We need to handlize all oops in
+  // incoming registers
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Now the space for the inbound oop handle area
+  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
+
+  int oop_handle_offset = stack_slots;
+  stack_slots += total_save_slots;
+
+  // Now any space we need for handlizing a klass if static method
+
+  int klass_slot_offset = 0;
+  int klass_offset = -1;
+  int lock_slot_offset = 0;
+  bool is_static = false;
+
+  if (method->is_static()) {
+    klass_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+    is_static = true;
+  }
+
+  // Plus a lock if needed
+
+  if (method->is_synchronized()) {
+    lock_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+  }
+
+  // Now a place (+2) to save return values or temp during shuffling
+  // + 4 for return address (which we own) and saved fp
+  stack_slots += 6;
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      | 2 slots (ra)        |
+  //      | 2 slots (fp)        |
+  //      |---------------------|
+  //      | 2 slots for moves   |
+  //      |---------------------|
+  //      | lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //      | klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //      | oopHandle area      |
+  //      |---------------------| <- oop_handle_offset (8 java arg registers)
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  // First thing make an ic check to see if we should even be here
+
+  // We are free to use all registers as temps without saving them and
+  // restoring them except fp. fp is the only callee save register
+  // as far as the interpreter and the compiler(s) are concerned.
+
+
+  const Register ic_reg = t1;
+  const Register receiver = j_rarg0;
+
+  Label hit;
+  Label exception_pending;
+
+  assert_different_registers(ic_reg, receiver, t0);
+  __ verify_oop(receiver);
+  __ cmp_klass(receiver, ic_reg, t0, hit);
+
+  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+  // Verified entry point must be aligned
+  __ align(8);
+
+  __ bind(hit);
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.
+  MacroAssembler::assert_alignment(__ pc());
+  __ nop();
+
+  // Generate stack overflow check
+  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
+
+  // Generate a new frame for the wrapper.
+  __ enter();
+  // -2 because return address is already present and so is saved fp
+  __ sub(sp, sp, stack_size - 2 * wordSize);
+
+  // Frame is now completed as far as size and linkage.
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+  // We use x18 as the oop handle for the receiver/klass
+  // It is callee save so it survives the call to native
+
+  const Register oop_handle_reg = x18;
+
+  //
+  // We immediately shuffle the arguments so that any vm call we have to
+  // make from here on out (sync slow path, jvmti, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for
+  // them.
+
+  // -----------------
+  // The Grand Shuffle
+
+  // The Java calling convention is either equal (linux) or denser (win64) than the
+  // c calling convention. However the because of the jni_env argument the c calling
+  // convention always has at least one more (and two for static) arguments than Java.
+  // Therefore if we move the args from java -> c backwards then we will never have
+  // a register->register conflict and we don't have to build a dependency graph
+  // and figure out how to break any cycles.
+  //
+
+  // Record esp-based slot for receiver on stack for non-static methods
+  int receiver_offset = -1;
+
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the
+  // caller.
+  //
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+  assert_cond(map != NULL);
+
+  int float_args = 0;
+  int int_args = 0;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  // For JNI natives the incoming and outgoing registers are offset upwards.
+  GrowableArray<int> arg_order(2 * total_in_args);
+  VMRegPair tmp_vmreg;
+  tmp_vmreg.set2(x9->as_VMReg());
+
+  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+    arg_order.push(i);
+    arg_order.push(c_arg);
+  }
+
+  int temploc = -1;
+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
+    int i = arg_order.at(ai);
+    int c_arg = arg_order.at(ai + 1);
+    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
+    assert(c_arg != -1 && i != -1, "wrong order");
+#ifdef ASSERT
+    if (in_regs[i].first()->is_Register()) {
+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
+    } else if (in_regs[i].first()->is_FloatRegister()) {
+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
+    }
+    if (out_regs[c_arg].first()->is_Register()) {
+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+    }
+#endif /* ASSERT */
+    switch (in_sig_bt[i]) {
+      case T_ARRAY:
+      case T_OBJECT:
+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
+                    ((i == 0) && (!is_static)),
+                    &receiver_offset);
+        int_args++;
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        float_move(masm, in_regs[i], out_regs[c_arg]);
+        float_args++;
+        break;
+
+      case T_DOUBLE:
+        assert( i + 1 < total_in_args &&
+                in_sig_bt[i + 1] == T_VOID &&
+                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
+        double_move(masm, in_regs[i], out_regs[c_arg]);
+        float_args++;
+        break;
+
+      case T_LONG :
+        long_move(masm, in_regs[i], out_regs[c_arg]);
+        int_args++;
+        break;
+
+      case T_ADDRESS:
+        assert(false, "found T_ADDRESS in java args");
+        break;
+
+      default:
+        move32_64(masm, in_regs[i], out_regs[c_arg]);
+        int_args++;
+    }
+  }
+
+  // point c_arg at the first arg that is already loaded in case we
+  // need to spill before we call out
+  int c_arg = total_c_args - total_in_args;
+
+  // Pre-load a static method's oop into c_rarg1.
+  if (method->is_static()) {
+
+    //  load oop into a register
+    __ movoop(c_rarg1,
+              JNIHandles::make_local(method->method_holder()->java_mirror()),
+              /*immediate*/true);
+
+    // Now handlize the static class mirror it's known not-null.
+    __ sd(c_rarg1, Address(sp, klass_offset));
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+    // Now get the handle
+    __ la(c_rarg1, Address(sp, klass_offset));
+    // and protect the arg if we must spill
+    c_arg--;
+  }
+
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a stack traversal).
+  // We use the same pc/oopMap repeatedly when we call out
+
+  Label native_return;
+  __ set_last_Java_frame(sp, noreg, native_return, t0);
+
+  Label dtrace_method_entry, dtrace_method_entry_done;
+  {
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ lbu(t0, Address(t0, offset));
+    __ addw(t0, t0, zr);
+    __ bnez(t0, dtrace_method_entry);
+    __ bind(dtrace_method_entry_done);
+  }
+
+  // RedefineClasses() tracing support for obsolete method entry
+  if (log_is_enabled(Trace, redefine, class, obsolete)) {
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
+      xthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
+  // Lock a synchronized method
+
+  // Register definitions used by locking and unlocking
+
+  const Register swap_reg = x10;
+  const Register obj_reg  = x9;  // Will contain the oop
+  const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
+  const Register old_hdr  = x30;  // value of old header at unlock time
+  const Register tmp      = ra;
+
+  Label slow_path_lock;
+  Label lock_done;
+
+  if (method->is_synchronized()) {
+
+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+    // Get the handle (the 2nd argument)
+    __ mv(oop_handle_reg, c_rarg1);
+
+    // Get address of the box
+
+    __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+
+    // Load the oop from the handle
+    __ ld(obj_reg, Address(oop_handle_reg, 0));
+
+    if (UseBiasedLocking) {
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
+    }
+
+    // Load (object->mark() | 1) into swap_reg % x10
+    __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+    __ ori(swap_reg, t0, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+
+    // src -> dest if dest == x10 else x10 <- dest
+    {
+      Label here;
+      __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
+    }
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) sp <= mark < mark + os::pagesize()
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
+
+    __ sub(swap_reg, swap_reg, sp);
+    __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
+
+    // Save the test result, for recursive case, the result is zero
+    __ sd(swap_reg, Address(lock_reg, mark_word_offset));
+    __ bnez(swap_reg, slow_path_lock);
+
+    // Slow path will re-enter here
+    __ bind(lock_done);
+  }
+
+
+  // Finally just about ready to make the JNI call
+
+  // get JNIEnv* which is first argument to native
+  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
+
+  // Now set thread in native
+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+  __ mv(t0, _thread_in_native);
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+  __ sw(t0, Address(t1));
+
+  rt_call(masm, native_func);
+
+  __ bind(native_return);
+
+  intptr_t return_pc = (intptr_t) __ pc();
+  oop_maps->add_gc_map(return_pc - start, map);
+
+  // Unpack native results.
+  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
+    __ cast_primitive_type(ret_type, x10);
+  }
+
+  Label safepoint_in_progress, safepoint_in_progress_done;
+  Label after_transition;
+
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
+  //     Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization is progress, and escapes.
+  __ mv(t0, _thread_in_native_trans);
+
+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+  // Force this write out before the read below
+  __ membar(MacroAssembler::AnyAny);
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    __ safepoint_poll_acquire(safepoint_in_progress);
+    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
+    __ bnez(t0, safepoint_in_progress);
+    __ bind(safepoint_in_progress_done);
+  }
+
+  // change thread state
+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+  __ mv(t0, _thread_in_Java);
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+  __ sw(t0, Address(t1));
+  __ bind(after_transition);
+
+  Label reguard;
+  Label reguard_done;
+  __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
+  __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled);
+  __ beq(t0, t1, reguard);
+  __ bind(reguard_done);
+
+  // native result if any is live
+
+  // Unlock
+  Label unlock_done;
+  Label slow_path_unlock;
+  if (method->is_synchronized()) {
+
+    // Get locked oop from the handle we passed to jni
+    __ ld(obj_reg, Address(oop_handle_reg, 0));
+
+    Label done;
+
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, old_hdr, done);
+    }
+
+    // Simple recursive lock?
+    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ beqz(t0, done);
+
+    // Must save x10 if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    // get address of the stack lock
+    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    //  get old displaced header
+    __ ld(old_hdr, Address(x10, 0));
+
+    // Atomic swap old header if oop still contains the stack lock
+    Label succeed;
+    __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
+    __ bind(succeed);
+
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+  }
+
+  Label dtrace_method_exit, dtrace_method_exit_done;
+  {
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ lbu(t0, Address(t0, offset));
+    __ bnez(t0, dtrace_method_exit);
+    __ bind(dtrace_method_exit_done);
+  }
+
+  __ reset_last_Java_frame(false);
+
+  // Unbox oop result, e.g. JNIHandles::resolve result.
+  if (is_reference_type(ret_type)) {
+    __ resolve_jobject(x10, xthread, t1);
+  }
+
+  if (CheckJNICalls) {
+    // clear_pending_jni_exception_check
+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
+  }
+
+  // reset handle block
+  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
+  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
+
+  __ leave();
+
+  // Any exception pending?
+  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+  __ bnez(t0, exception_pending);
+
+  // We're done
+  __ ret();
+
+  // Unexpected paths are out of line and go here
+
+  // forward the exception
+  __ bind(exception_pending);
+
+  // and forward the exception
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    __ block_comment("Slow path lock {");
+    __ bind(slow_path_lock);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    __ mv(c_rarg0, obj_reg);
+    __ mv(c_rarg1, lock_reg);
+    __ mv(c_rarg2, xthread);
+
+    // Not a leaf but we have last_Java_frame setup as we want
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+      __ beqz(t0, L);
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ j(lock_done);
+
+    __ block_comment("} Slow path lock");
+
+    __ block_comment("Slow path unlock {");
+    __ bind(slow_path_unlock);
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ mv(c_rarg2, xthread);
+    __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ mv(c_rarg0, obj_reg);
+
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+    // NOTE that obj_reg == x9 currently
+    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+
+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+      __ beqz(t0, L);
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
+#endif /* ASSERT */
+
+    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+    __ j(unlock_done);
+
+    __ block_comment("} Slow path unlock");
+
+  } // synchronized
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+  restore_native_result(masm, ret_type, stack_slots);
+  // and continue
+  __ j(reguard_done);
+
+  // SLOW PATH safepoint
+  {
+    __ block_comment("safepoint {");
+    __ bind(safepoint_in_progress);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ mv(c_rarg0, xthread);
+#ifndef PRODUCT
+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+    int32_t offset = 0;
+    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
+    __ jalr(x1, t0, offset);
+
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
+
+    __ j(safepoint_in_progress_done);
+    __ block_comment("} safepoint");
+  }
+
+  // SLOW PATH dtrace support
+  {
+    __ block_comment("dtrace entry {");
+    __ bind(dtrace_method_entry);
+
+    // We have all of the arguments setup at this point. We must not touch any register
+    // argument registers at this point (what if we save/restore them there are no oop?
+
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      xthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+    __ j(dtrace_method_entry_done);
+    __ block_comment("} dtrace entry");
+  }
+
+  {
+    __ block_comment("dtrace exit {");
+    __ bind(dtrace_method_exit);
+    save_native_result(masm, ret_type, stack_slots);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         xthread, c_rarg1);
+    restore_native_result(masm, ret_type, stack_slots);
+    __ j(dtrace_method_exit_done);
+    __ block_comment("} dtrace exit");
+  }
+
+  __ flush();
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            vep_offset,
+                                            frame_complete,
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+                                            oop_maps);
+  assert(nm != NULL, "create native nmethod fail!");
+  return nm;
+}
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+  assert(callee_locals >= callee_parameters,
+         "test and remove; got more parms than locals");
+  if (callee_locals < callee_parameters) {
+    return 0;                   // No adjustment for negative locals
+  }
+  int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+  // diff is counted in stack words
+  return align_up(diff, 2);
+}
+
+//------------------------------generate_deopt_blob----------------------------
+void SharedRuntime::generate_deopt_blob() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  int pad = 0;
+  CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  int frame_size_in_words = -1;
+  OopMap* map = NULL;
+  OopMapSet *oop_maps = new OopMapSet();
+  assert_cond(masm != NULL && oop_maps != NULL);
+  RegisterSaver reg_saver;
+
+  // -------------
+  // This code enters when returning to a de-optimized nmethod.  A return
+  // address has been pushed on the the stack, and return values are in
+  // registers.
+  // If we are doing a normal deopt then we were called from the patched
+  // nmethod from the point we returned to the nmethod. So the return
+  // address on the stack is wrong by NativeCall::instruction_size
+  // We will adjust the value so it looks like we have the original return
+  // address on the stack (like when we eagerly deoptimized).
+  // In the case of an exception pending when deoptimizing, we enter
+  // with a return address on the stack that points after the call we patched
+  // into the exception handler. We have the following register state from,
+  // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
+  //    x10: exception oop
+  //    x9: exception handler
+  //    x13: throwing pc
+  // So in this case we simply jam x13 into the useless return address and
+  // the stack looks just like we want.
+  //
+  // At this point we need to de-opt.  We save the argument return
+  // registers.  We call the first C routine, fetch_unroll_info().  This
+  // routine captures the return values and returns a structure which
+  // describes the current frame size and the sizes of all replacement frames.
+  // The current frame is compiled code and may contain many inlined
+  // functions, each with their own JVM state.  We pop the current frame, then
+  // push all the new frames.  Then we call the C routine unpack_frames() to
+  // populate these frames.  Finally unpack_frames() returns us the new target
+  // address.  Notice that callee-save registers are BLOWN here; they have
+  // already been captured in the vframeArray at the time the return PC was
+  // patched.
+  address start = __ pc();
+  Label cont;
+
+  // Prolog for non exception case!
+
+  // Save everything in sight.
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+  // Normal deoptimization.  Save exec mode for unpack_frames.
+  __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
+  __ j(cont);
+
+  int reexecute_offset = __ pc() - start;
+
+  // Reexecute case
+  // return address is the pc describes what bci to do re-execute at
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+  __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
+  __ j(cont);
+
+  int exception_offset = __ pc() - start;
+
+  // Prolog for exception case
+
+  // all registers are dead at this entry point, except for x10, and
+  // x13 which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+
+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
+
+  int exception_in_tls_offset = __ pc() - start;
+
+  // new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+  // tos: stack at point of call to method that threw the exception (i.e. only
+  // args are on the stack, no return address)
+
+  // The return address pushed by save_live_registers will be patched
+  // later with the throwing pc. The correct value is not available
+  // now because loading it from memory would destroy registers.
+
+  // NB: The SP at this point must be the SP of the method that is
+  // being deoptimized.  Deoptimization assumes that the frame created
+  // here by save_live_registers is immediately below the method's SP.
+  // This is a somewhat fragile mechanism.
+
+  // Save everything in sight.
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+  // Now it is safe to overwrite any register
+
+  // Deopt during an exception.  Save exec mode for unpack_frames.
+  __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved
+
+  // load throwing pc from JavaThread and patch it as the return address
+  // of the current frame. Then clear the field in JavaThread
+
+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
+  __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+  __ verify_oop(x10);
+
+  // verify that there is no pending exception
+  Label no_pending_exception;
+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+  __ beqz(t0, no_pending_exception);
+  __ stop("must not have pending exception here");
+  __ bind(no_pending_exception);
+#endif
+
+  __ bind(cont);
+
+  // Call C code.  Need thread and this frame, but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.
+  //
+  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
+
+  // fetch_unroll_info needs to call last_java_frame().
+
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld(t0, Address(xthread,
+                              JavaThread::last_Java_fp_offset()));
+    __ beqz(t0, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif // ASSERT
+  __ mv(c_rarg0, xthread);
+  __ mv(c_rarg1, xcpool);
+  int32_t offset = 0;
+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
+  __ jalr(x1, t0, offset);
+  __ bind(retaddr);
+
+  // Need to have an oopmap that tells fetch_unroll_info where to
+  // find any register it might need.
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+  __ reset_last_Java_frame(false);
+
+  // Load UnrollBlock* into x15
+  __ mv(x15, x10);
+
+  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+  Label noException;
+  __ mv(t0, Deoptimization::Unpack_exception);
+  __ bne(xcpool, t0, noException); // Was exception pending?
+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+
+  __ verify_oop(x10);
+
+  // Overwrite the result registers with the exception results.
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+  __ bind(noException);
+
+  // Only register save data is on the stack.
+  // Now restore the result registers.  Everything else is either dead
+  // or captured in the vframeArray.
+
+  // Restore fp result register
+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  // Restore integer result register
+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+  // Pop all of the register save area off the stack
+  __ add(sp, sp, frame_size_in_words * wordSize);
+
+  // All of the register save area has been popped of the stack. Only the
+  // return address remains.
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+  //
+  // Note: by leaving the return address of self-frame on the stack
+  // and using the size of frame 2 to adjust the stack
+  // when we are done the return to frame 3 will still be on the stack.
+
+  // Pop deoptimized frame
+  __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
+  __ sub(x12, x12, 2 * wordSize);
+  __ add(sp, sp, x12);
+  __ ld(fp, Address(sp, 0));
+  __ ld(ra, Address(sp, wordSize));
+  __ addi(sp, sp, 2 * wordSize);
+  // RA should now be the return address to the caller (3)
+
+#ifdef ASSERT
+  // Compilers generate code that bang the stack by as much as the
+  // interpreter would need. So this stack banging should never
+  // trigger a fault. Verify that it does not on non product builds.
+  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
+  __ bang_stack_size(x9, x12);
+#endif
+  // Load address of array of frame pcs into x12
+  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+  // Load address of array of frame sizes into x14
+  __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
+
+  // Load counter into x13
+  __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
+
+  // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+
+  const Register sender_sp = x16;
+
+  __ mv(sender_sp, sp);
+  __ lwu(x9, Address(x15,
+                     Deoptimization::UnrollBlock::
+                     caller_adjustment_offset_in_bytes()));
+  __ sub(sp, sp, x9);
+
+  // Push interpreter frames in a loop
+  __ mv(t0, 0xDEADDEAD);               // Make a recognizable pattern
+  __ mv(t1, t0);
+  Label loop;
+  __ bind(loop);
+  __ ld(x9, Address(x14, 0));          // Load frame size
+  __ addi(x14, x14, wordSize);
+  __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
+  __ ld(ra, Address(x12, 0));          // Load pc
+  __ addi(x12, x12, wordSize);
+  __ enter();                          // Save old & set new fp
+  __ sub(sp, sp, x9);                  // Prolog
+  // This value is corrected by layout_activation_impl
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+  __ mv(sender_sp, sp);                // Pass sender_sp to next frame
+  __ addi(x13, x13, -1);               // Decrement counter
+  __ bnez(x13, loop);
+
+    // Re-push self-frame
+  __ ld(ra, Address(x12));
+  __ enter();
+
+  // Allocate a full sized register save area.  We subtract 2 because
+  // enter() just pushed 2 words
+  __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
+
+  // Restore frame locals after moving the frame
+  __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  //
+  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
+
+  // Use fp because the frames look interpreted now
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+  __ mv(c_rarg0, xthread);
+  __ mv(c_rarg1, xcpool); // second arg: exec_mode
+  offset = 0;
+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
+  __ jalr(x1, t0, offset);
+
+  // Set an oopmap for the call site
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start,
+                       new OopMap(frame_size_in_words, 0));
+
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true);
+
+  // Collect return values
+  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
+  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
+
+  // Pop self-frame.
+  __ leave();                           // Epilog
+
+  // Jump to interpreter
+  __ ret();
+
+  // Make sure all code is generated
+  masm->flush();
+
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  assert(_deopt_blob != NULL, "create deoptimization blob fail!");
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+uint SharedRuntime::out_preserve_stack_slots() {
+  return 0;
+}
+
+#ifdef COMPILER2
+//------------------------------generate_uncommon_trap_blob--------------------
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  assert_cond(masm != NULL);
+
+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+  address start = __ pc();
+
+  // Push self-frame.  We get here with a return address in RA
+  // and sp should be 16 byte aligned
+  // push fp and retaddr by hand
+  __ addi(sp, sp, -2 * wordSize);
+  __ sd(ra, Address(sp, wordSize));
+  __ sd(fp, Address(sp, 0));
+  // we don't expect an arg reg save area
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+  // compiler left unloaded_class_index in j_rarg0 move to where the
+  // runtime expects it.
+  __ addiw(c_rarg1, j_rarg0, 0);
+
+  // we need to set the past SP to the stack pointer of the stub frame
+  // and the pc to the address where this runtime call will return
+  // although actually any pc in this code blob will do).
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // capture callee-saved registers as well as return values.
+  //
+  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
+  //
+  // n.b. 3 gp args, 0 fp args, integral return type
+
+  __ mv(c_rarg0, xthread);
+  __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  int32_t offset = 0;
+  __ la_patchable(t0,
+        RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                        Deoptimization::uncommon_trap)), offset);
+  __ jalr(x1, t0, offset);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
+  assert_cond(oop_maps != NULL && map != NULL);
+
+  // location of fp is known implicitly by the frame sender code
+
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+  __ reset_last_Java_frame(false);
+
+  // move UnrollBlock* into x14
+  __ mv(x14, x10);
+
+#ifdef ASSERT
+  { Label L;
+    __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+    __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
+    __ beq(t0, t1, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+
+  __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
+
+  // Pop deoptimized frame (int)
+  __ lwu(x12, Address(x14,
+                      Deoptimization::UnrollBlock::
+                      size_of_deoptimized_frame_offset_in_bytes()));
+  __ sub(x12, x12, 2 * wordSize);
+  __ add(sp, sp, x12);
+  __ ld(fp, sp, 0);
+  __ ld(ra, sp, wordSize);
+  __ addi(sp, sp, 2 * wordSize);
+  // RA should now be the return address to the caller (3) frame
+
+#ifdef ASSERT
+  // Compilers generate code that bang the stack by as much as the
+  // interpreter would need. So this stack banging should never
+  // trigger a fault. Verify that it does not on non product builds.
+  __ lwu(x11, Address(x14,
+                      Deoptimization::UnrollBlock::
+                      total_frame_sizes_offset_in_bytes()));
+  __ bang_stack_size(x11, x12);
+#endif
+
+  // Load address of array of frame pcs into x12 (address*)
+  __ ld(x12, Address(x14,
+                     Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
+
+  // Load address of array of frame sizes into x15 (intptr_t*)
+  __ ld(x15, Address(x14,
+                     Deoptimization::UnrollBlock::
+                     frame_sizes_offset_in_bytes()));
+
+  // Counter
+  __ lwu(x13, Address(x14,
+                      Deoptimization::UnrollBlock::
+                      number_of_frames_offset_in_bytes())); // (int)
+
+  // Now adjust the caller's stack to make up for the extra locals but
+  // record the original sp so that we can save it in the skeletal
+  // interpreter frame and the stack walking of interpreter_sender
+  // will get the unextended sp value and not the "real" sp value.
+
+  const Register sender_sp = t1; // temporary register
+
+  __ lwu(x11, Address(x14,
+                      Deoptimization::UnrollBlock::
+                      caller_adjustment_offset_in_bytes())); // (int)
+  __ mv(sender_sp, sp);
+  __ sub(sp, sp, x11);
+
+  // Push interpreter frames in a loop
+  Label loop;
+  __ bind(loop);
+  __ ld(x11, Address(x15, 0));       // Load frame size
+  __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
+  __ ld(ra, Address(x12, 0));        // Save return address
+  __ enter();                        // and old fp & set new fp
+  __ sub(sp, sp, x11);               // Prolog
+  __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
+  // This value is corrected by layout_activation_impl
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ mv(sender_sp, sp);              // Pass sender_sp to next frame
+  __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
+  __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
+  __ subw(x13, x13, 1);              // Decrement counter
+  __ bgtz(x13, loop);
+  __ ld(ra, Address(x12, 0));        // save final return address
+  // Re-push self-frame
+  __ enter();                        // & old fp & set new fp
+
+  // Use fp because the frames look interpreted now
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  //
+  // BasicType unpack_frames(JavaThread* thread, int exec_mode)
+  //
+
+  // n.b. 2 gp args, 0 fp args, integral return type
+
+  // sp should already be aligned
+  __ mv(c_rarg0, xthread);
+  __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  offset = 0;
+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
+  __ jalr(x1, t0, offset);
+
+  // Set an oopmap for the call site
+  // Use the same PC we used for the last java frame
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  // Clear fp AND pc
+  __ reset_last_Java_frame(true);
+
+  // Pop self-frame.
+  __ leave();                 // Epilog
+
+  // Jump to interpreter
+  __ ret();
+
+  // Make sure all code is generated
+  masm->flush();
+
+  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
+                                                  SimpleRuntimeFrame::framesize >> 1);
+}
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob------
+//
+// Generate a special Compile2Runtime blob that saves all registers,
+// and setup oopmap.
+//
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  OopMap* map = NULL;
+
+  // Allocate space for the code.  Setup code generation tools.
+  CodeBuffer buffer("handler_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  assert_cond(masm != NULL);
+
+  address start   = __ pc();
+  address call_pc = NULL;
+  int frame_size_in_words = -1;
+  bool cause_return = (poll_type == POLL_AT_RETURN);
+  RegisterSaver reg_saver;
+
+  // Save Integer and Float registers.
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+  // The following is basically a call_VM.  However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselves.
+
+  Label retaddr;
+  __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+  // The return address must always be correct so that frame constructor never
+  // sees an invalid pc.
+
+  if (!cause_return) {
+    // overwrite the return address pushed by save_live_registers
+    // Additionally, x18 is a callee-saved register so we can look at
+    // it later to determine if someone changed the return address for
+    // us!
+    __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
+  }
+
+  // Do the call
+  __ mv(c_rarg0, xthread);
+  int32_t offset = 0;
+  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
+  __ jalr(x1, t0, offset);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+
+  oop_maps->add_gc_map( __ pc() - start, map);
+
+  Label noException;
+
+  __ reset_last_Java_frame(false);
+
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+
+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+  __ beqz(t0, noException);
+
+  // Exception pending
+
+  reg_saver.restore_live_registers(masm);
+
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+  // No exception case
+  __ bind(noException);
+
+  Label no_adjust, bail;
+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+    // If our stashed return pc was modified by the runtime we avoid touching it
+    __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
+    __ bne(x18, t0, no_adjust);
+
+#ifdef ASSERT
+    // Verify the correct encoding of the poll we're about to skip.
+    // See NativeInstruction::is_lwu_to_zr()
+    __ lwu(t0, Address(x18));
+    __ andi(t1, t0, 0b0000011);
+    __ mv(t2, 0b0000011);
+    __ bne(t1, t2, bail); // 0-6:0b0000011
+    __ srli(t1, t0, 7);
+    __ andi(t1, t1, 0b00000);
+    __ bnez(t1, bail);    // 7-11:0b00000
+    __ srli(t1, t0, 12);
+    __ andi(t1, t1, 0b110);
+    __ mv(t2, 0b110);
+    __ bne(t1, t2, bail); // 12-14:0b110
+#endif
+    // Adjust return pc forward to step over the safepoint poll instruction
+    __ add(x18, x18, NativeInstruction::instruction_size);
+    __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
+  }
+
+  __ bind(no_adjust);
+  // Normal exit, restore registers and exit.
+
+  reg_saver.restore_live_registers(masm);
+  __ ret();
+
+#ifdef ASSERT
+  __ bind(bail);
+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  // allocate space for the code
+  ResourceMark rm;
+
+  CodeBuffer buffer(name, 1000, 512);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  assert_cond(masm != NULL);
+
+  int frame_size_in_words = -1;
+  RegisterSaver reg_saver;
+
+  OopMapSet *oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+  OopMap* map = NULL;
+
+  int start = __ offset();
+
+  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
+
+  int frame_complete = __ offset();
+
+  {
+    Label retaddr;
+    __ set_last_Java_frame(sp, noreg, retaddr, t0);
+
+    __ mv(c_rarg0, xthread);
+    int32_t offset = 0;
+    __ la_patchable(t0, RuntimeAddress(destination), offset);
+    __ jalr(x1, t0, offset);
+    __ bind(retaddr);
+  }
+
+  // Set an oopmap for the call site.
+  // We need this not only for callee-saved registers, but also for volatile
+  // registers that the compiler might be keeping live across a safepoint.
+
+  oop_maps->add_gc_map( __ offset() - start, map);
+
+  // x10 contains the address we are going to jump to assuming no exception got installed
+
+  // clear last_Java_sp
+  __ reset_last_Java_frame(false);
+  // check for pending exceptions
+  Label pending;
+  __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+  __ bnez(t0, pending);
+
+  // get the returned Method*
+  __ get_vm_result_2(xmethod, xthread);
+  __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
+
+  // x10 is where we want to jump, overwrite t0 which is saved and temporary
+  __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
+  reg_saver.restore_live_registers(masm);
+
+  // We are back the the original state on entry and ready to go.
+
+  __ jr(t0);
+
+  // Pending exception after the safepoint
+
+  __ bind(pending);
+
+  reg_saver.restore_live_registers(masm);
+
+  // exception pending => remove activation and forward to exception handler
+
+  __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
+
+  __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
+  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // return the  blob
+  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
+}
+
+#ifdef COMPILER2
+//------------------------------generate_exception_blob---------------------------
+// creates exception blob at the end
+// Using exception blob, this code is jumped from a compiled method.
+// (see emit_exception_handler in riscv.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jmp.
+//
+// Arguments:
+//   x10: exception oop
+//   x13: exception pc
+//
+// Results:
+//   x10: exception oop
+//   x13: exception pc in caller
+//   destination: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
+//
+
+void OptoRuntime::generate_exception_blob() {
+  assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
+  assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
+  assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
+
+  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
+
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("exception_blob", 2048, 1024);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+  assert_cond(masm != NULL);
+
+  // TODO check various assumptions made here
+  //
+  // make sure we do so before running this
+
+  address start = __ pc();
+
+  // push fp and retaddr by hand
+  // Exception pc is 'return address' for stack walker
+  __ addi(sp, sp, -2 * wordSize);
+  __ sd(ra, Address(sp, wordSize));
+  __ sd(fp, Address(sp));
+  // there are no callee save registers and we don't expect an
+  // arg reg save area
+#ifndef PRODUCT
+  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+  // Store exception in Thread object. We cannot pass any arguments to the
+  // handle_exception call, since we do not want to make any assumption
+  // about the size of the frame where the exception happened in.
+  __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
+  __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
+
+  // This call does all the hard work.  It checks if an exception handler
+  // exists in the method.
+  // If so, it returns the handler address.
+  // If not, it prepares for stack-unwinding, restoring the callee-save
+  // registers of the frame being removed.
+  //
+  // address OptoRuntime::handle_exception_C(JavaThread* thread)
+  //
+  // n.b. 1 gp arg, 0 fp args, integral return type
+
+  // the stack should always be aligned
+  address the_pc = __ pc();
+  __ set_last_Java_frame(sp, noreg, the_pc, t0);
+  __ mv(c_rarg0, xthread);
+  int32_t offset = 0;
+  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
+  __ jalr(x1, t0, offset);
+
+
+  // handle_exception_C is a special VM call which does not require an explicit
+  // instruction sync afterwards.
+
+  // Set an oopmap for the call site.  This oopmap will only be used if we
+  // are unwinding the stack.  Hence, all locations will be dead.
+  // Callee-saved registers will be the same as the frame above (i.e.,
+  // handle_exception_stub), since they were restored when we got the
+  // exception.
+
+  OopMapSet* oop_maps = new OopMapSet();
+  assert_cond(oop_maps != NULL);
+
+  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
+
+  __ reset_last_Java_frame(false);
+
+  // Restore callee-saved registers
+
+  // fp is an implicitly saved callee saved register (i.e. the calling
+  // convention will save restore it in prolog/epilog) Other than that
+  // there are no callee save registers now that adapter frames are gone.
+  // and we dont' expect an arg reg save area
+  __ ld(fp, Address(sp));
+  __ ld(x13, Address(sp, wordSize));
+  __ addi(sp, sp , 2 * wordSize);
+
+  // x10: exception handler
+
+  // We have a handler in x10 (could be deopt blob).
+  __ mv(t0, x10);
+
+  // Get the exception oop
+  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
+  // Get the exception pc in case we are deoptimized
+  __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
+#ifdef ASSERT
+  __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
+  __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
+#endif
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
+
+  // x10: exception oop
+  // t0:  exception handler
+  // x14: exception pc
+  // Jump to handler
+
+  __ jr(t0);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Set exception blob
+  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
+}
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
new file mode 100644
index 0000000000..272dd9aeb3
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -0,0 +1,3743 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_riscv.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/method.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/align.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+#if INCLUDE_ZGC
+#include "gc/z/zThreadLocalData.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#undef __
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Stub Code definitions
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+#ifdef PRODUCT
+#define inc_counter_np(counter) ((void)0)
+#else
+  void inc_counter_np_(int& counter) {
+    __ la(t1, ExternalAddress((address)&counter));
+    __ lwu(t0, Address(t1, 0));
+    __ addiw(t0, t0, 1);
+    __ sw(t0, Address(t1, 0));
+  }
+#define inc_counter_np(counter) \
+  BLOCK_COMMENT("inc_counter " #counter); \
+  inc_counter_np_(counter);
+#endif
+
+  // Call stubs are used to call Java from C
+  //
+  // Arguments:
+  //    c_rarg0:   call wrapper address                   address
+  //    c_rarg1:   result                                 address
+  //    c_rarg2:   result type                            BasicType
+  //    c_rarg3:   method                                 Method*
+  //    c_rarg4:   (interpreter) entry point              address
+  //    c_rarg5:   parameters                             intptr_t*
+  //    c_rarg6:   parameter size (in words)              int
+  //    c_rarg7:   thread                                 Thread*
+  //
+  // There is no return from the stub itself as any Java result
+  // is written to result
+  //
+  // we save x1 (ra) as the return PC at the base of the frame and
+  // link x8 (fp) below it as the frame pointer installing sp (x2)
+  // into fp.
+  //
+  // we save x10-x17, which accounts for all the c arguments.
+  //
+  // TODO: strictly do we need to save them all? they are treated as
+  // volatile by C so could we omit saving the ones we are going to
+  // place in global registers (thread? method?) or those we only use
+  // during setup of the Java call?
+  //
+  // we don't need to save x5 which C uses as an indirect result location
+  // return register.
+  //
+  // we don't need to save x6-x7 and x28-x31 which both C and Java treat as
+  // volatile
+  //
+  // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary
+  // registers and C expects to be callee-save
+  //
+  // so the stub frame looks like this when we enter Java code
+  //
+  //     [ return_from_Java     ] <--- sp
+  //     [ argument word n      ]
+  //      ...
+  // -34 [ argument word 1      ]
+  // -33 [ saved f27            ] <--- sp_after_call
+  // -32 [ saved f26            ]
+  // -31 [ saved f25            ]
+  // -30 [ saved f24            ]
+  // -29 [ saved f23            ]
+  // -28 [ saved f22            ]
+  // -27 [ saved f21            ]
+  // -26 [ saved f20            ]
+  // -25 [ saved f19            ]
+  // -24 [ saved f18            ]
+  // -23 [ saved f9             ]
+  // -22 [ saved f8             ]
+  // -21 [ saved x27            ]
+  // -20 [ saved x26            ]
+  // -19 [ saved x25            ]
+  // -18 [ saved x24            ]
+  // -17 [ saved x23            ]
+  // -16 [ saved x22            ]
+  // -15 [ saved x21            ]
+  // -14 [ saved x20            ]
+  // -13 [ saved x19            ]
+  // -12 [ saved x18            ]
+  // -11 [ saved x9             ]
+  // -10 [ call wrapper   (x10) ]
+  //  -9 [ result         (x11) ]
+  //  -8 [ result type    (x12) ]
+  //  -7 [ method         (x13) ]
+  //  -6 [ entry point    (x14) ]
+  //  -5 [ parameters     (x15) ]
+  //  -4 [ parameter size (x16) ]
+  //  -3 [ thread         (x17) ]
+  //  -2 [ saved fp       (x8)  ]
+  //  -1 [ saved ra       (x1)  ]
+  //   0 [                      ] <--- fp == saved sp (x2)
+
+  // Call stub stack layout word offsets from fp
+  enum call_stub_layout {
+    sp_after_call_off  = -33,
+
+    f27_off            = -33,
+    f26_off            = -32,
+    f25_off            = -31,
+    f24_off            = -30,
+    f23_off            = -29,
+    f22_off            = -28,
+    f21_off            = -27,
+    f20_off            = -26,
+    f19_off            = -25,
+    f18_off            = -24,
+    f9_off             = -23,
+    f8_off             = -22,
+
+    x27_off            = -21,
+    x26_off            = -20,
+    x25_off            = -19,
+    x24_off            = -18,
+    x23_off            = -17,
+    x22_off            = -16,
+    x21_off            = -15,
+    x20_off            = -14,
+    x19_off            = -13,
+    x18_off            = -12,
+    x9_off             = -11,
+
+    call_wrapper_off   = -10,
+    result_off         = -9,
+    result_type_off    = -8,
+    method_off         = -7,
+    entry_point_off    = -6,
+    parameters_off     = -5,
+    parameter_size_off = -4,
+    thread_off         = -3,
+    fp_f               = -2,
+    retaddr_off        = -1,
+  };
+
+  address generate_call_stub(address& return_address) {
+    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
+           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
+           "adjust this code");
+
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    const Address sp_after_call (fp, sp_after_call_off  * wordSize);
+
+    const Address call_wrapper  (fp, call_wrapper_off   * wordSize);
+    const Address result        (fp, result_off         * wordSize);
+    const Address result_type   (fp, result_type_off    * wordSize);
+    const Address method        (fp, method_off         * wordSize);
+    const Address entry_point   (fp, entry_point_off    * wordSize);
+    const Address parameters    (fp, parameters_off     * wordSize);
+    const Address parameter_size(fp, parameter_size_off * wordSize);
+
+    const Address thread        (fp, thread_off         * wordSize);
+
+    const Address f27_save      (fp, f27_off            * wordSize);
+    const Address f26_save      (fp, f26_off            * wordSize);
+    const Address f25_save      (fp, f25_off            * wordSize);
+    const Address f24_save      (fp, f24_off            * wordSize);
+    const Address f23_save      (fp, f23_off            * wordSize);
+    const Address f22_save      (fp, f22_off            * wordSize);
+    const Address f21_save      (fp, f21_off            * wordSize);
+    const Address f20_save      (fp, f20_off            * wordSize);
+    const Address f19_save      (fp, f19_off            * wordSize);
+    const Address f18_save      (fp, f18_off            * wordSize);
+    const Address f9_save       (fp, f9_off             * wordSize);
+    const Address f8_save       (fp, f8_off             * wordSize);
+
+    const Address x27_save      (fp, x27_off            * wordSize);
+    const Address x26_save      (fp, x26_off            * wordSize);
+    const Address x25_save      (fp, x25_off            * wordSize);
+    const Address x24_save      (fp, x24_off            * wordSize);
+    const Address x23_save      (fp, x23_off            * wordSize);
+    const Address x22_save      (fp, x22_off            * wordSize);
+    const Address x21_save      (fp, x21_off            * wordSize);
+    const Address x20_save      (fp, x20_off            * wordSize);
+    const Address x19_save      (fp, x19_off            * wordSize);
+    const Address x18_save      (fp, x18_off            * wordSize);
+
+    const Address x9_save       (fp, x9_off             * wordSize);
+
+    // stub code
+
+    address riscv_entry = __ pc();
+
+    // set up frame and move sp to end of save area
+    __ enter();
+    __ addi(sp, fp, sp_after_call_off * wordSize);
+
+    // save register parameters and Java temporary/global registers
+    // n.b. we save thread even though it gets installed in
+    // xthread because we want to sanity check tp later
+    __ sd(c_rarg7, thread);
+    __ sw(c_rarg6, parameter_size);
+    __ sd(c_rarg5, parameters);
+    __ sd(c_rarg4, entry_point);
+    __ sd(c_rarg3, method);
+    __ sd(c_rarg2, result_type);
+    __ sd(c_rarg1, result);
+    __ sd(c_rarg0, call_wrapper);
+
+    __ sd(x9, x9_save);
+
+    __ sd(x18, x18_save);
+    __ sd(x19, x19_save);
+    __ sd(x20, x20_save);
+    __ sd(x21, x21_save);
+    __ sd(x22, x22_save);
+    __ sd(x23, x23_save);
+    __ sd(x24, x24_save);
+    __ sd(x25, x25_save);
+    __ sd(x26, x26_save);
+    __ sd(x27, x27_save);
+
+    __ fsd(f8,  f8_save);
+    __ fsd(f9,  f9_save);
+    __ fsd(f18, f18_save);
+    __ fsd(f19, f19_save);
+    __ fsd(f20, f20_save);
+    __ fsd(f21, f21_save);
+    __ fsd(f22, f22_save);
+    __ fsd(f23, f23_save);
+    __ fsd(f24, f24_save);
+    __ fsd(f25, f25_save);
+    __ fsd(f26, f26_save);
+    __ fsd(f27, f27_save);
+
+    // install Java thread in global register now we have saved
+    // whatever value it held
+    __ mv(xthread, c_rarg7);
+
+    // And method
+    __ mv(xmethod, c_rarg3);
+
+    // set up the heapbase register
+    __ reinit_heapbase();
+
+#ifdef ASSERT
+    // make sure we have no pending exceptions
+    {
+      Label L;
+      __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
+      __ beqz(t0, L);
+      __ stop("StubRoutines::call_stub: entered with pending exception");
+      __ BIND(L);
+    }
+#endif
+    // pass parameters if any
+    __ mv(esp, sp);
+    __ slli(t0, c_rarg6, LogBytesPerWord);
+    __ sub(t0, sp, t0); // Move SP out of the way
+    __ andi(sp, t0, -2 * wordSize);
+
+    BLOCK_COMMENT("pass parameters if any");
+    Label parameters_done;
+    // parameter count is still in c_rarg6
+    // and parameter pointer identifying param 1 is in c_rarg5
+    __ beqz(c_rarg6, parameters_done);
+
+    address loop = __ pc();
+    __ ld(t0, c_rarg5, 0);
+    __ addi(c_rarg5, c_rarg5, wordSize);
+    __ addi(c_rarg6, c_rarg6, -1);
+    __ push_reg(t0);
+    __ bgtz(c_rarg6, loop);
+
+    __ BIND(parameters_done);
+
+    // call Java entry -- passing methdoOop, and current sp
+    //      xmethod: Method*
+    //      x30: sender sp
+    BLOCK_COMMENT("call Java function");
+    __ mv(x30, sp);
+    __ jalr(c_rarg4);
+
+    // save current address for use by exception handling code
+
+    return_address = __ pc();
+
+    // store result depending on type (everything that is not
+    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    // n.b. this assumes Java returns an integral result in x10
+    // and a floating result in j_farg0
+    __ ld(j_rarg2, result);
+    Label is_long, is_float, is_double, exit;
+    __ ld(j_rarg1, result_type);
+    __ mv(t0, (u1)T_OBJECT);
+    __ beq(j_rarg1, t0, is_long);
+    __ mv(t0, (u1)T_LONG);
+    __ beq(j_rarg1, t0, is_long);
+    __ mv(t0, (u1)T_FLOAT);
+    __ beq(j_rarg1, t0, is_float);
+    __ mv(t0, (u1)T_DOUBLE);
+    __ beq(j_rarg1, t0, is_double);
+
+    // handle T_INT case
+    __ sw(x10, Address(j_rarg2));
+
+    __ BIND(exit);
+
+    // pop parameters
+    __ addi(esp, fp, sp_after_call_off * wordSize);
+
+#ifdef ASSERT
+    // verify that threads correspond
+    {
+      Label L, S;
+      __ ld(t0, thread);
+      __ bne(xthread, t0, S);
+      __ get_thread(t0);
+      __ beq(xthread, t0, L);
+      __ BIND(S);
+      __ stop("StubRoutines::call_stub: threads must correspond");
+      __ BIND(L);
+    }
+#endif
+
+    // restore callee-save registers
+    __ fld(f27, f27_save);
+    __ fld(f26, f26_save);
+    __ fld(f25, f25_save);
+    __ fld(f24, f24_save);
+    __ fld(f23, f23_save);
+    __ fld(f22, f22_save);
+    __ fld(f21, f21_save);
+    __ fld(f20, f20_save);
+    __ fld(f19, f19_save);
+    __ fld(f18, f18_save);
+    __ fld(f9,  f9_save);
+    __ fld(f8,  f8_save);
+
+    __ ld(x27, x27_save);
+    __ ld(x26, x26_save);
+    __ ld(x25, x25_save);
+    __ ld(x24, x24_save);
+    __ ld(x23, x23_save);
+    __ ld(x22, x22_save);
+    __ ld(x21, x21_save);
+    __ ld(x20, x20_save);
+    __ ld(x19, x19_save);
+    __ ld(x18, x18_save);
+
+    __ ld(x9, x9_save);
+
+    __ ld(c_rarg0, call_wrapper);
+    __ ld(c_rarg1, result);
+    __ ld(c_rarg2, result_type);
+    __ ld(c_rarg3, method);
+    __ ld(c_rarg4, entry_point);
+    __ ld(c_rarg5, parameters);
+    __ ld(c_rarg6, parameter_size);
+    __ ld(c_rarg7, thread);
+
+    // leave frame and return to caller
+    __ leave();
+    __ ret();
+
+    // handle return types different from T_INT
+
+    __ BIND(is_long);
+    __ sd(x10, Address(j_rarg2, 0));
+    __ j(exit);
+
+    __ BIND(is_float);
+    __ fsw(j_farg0, Address(j_rarg2, 0), t0);
+    __ j(exit);
+
+    __ BIND(is_double);
+    __ fsd(j_farg0, Address(j_rarg2, 0), t0);
+    __ j(exit);
+
+    return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code.  The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  //
+  // Note: Usually the parameters are removed by the callee. In case
+  // of an exception crossing an activation frame boundary, that is
+  // not the case if the callee is compiled code => need to setup the
+  // sp.
+  //
+  // x10: exception oop
+
+  address generate_catch_exception() {
+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
+    address start = __ pc();
+
+    // same as in generate_call_stub():
+    const Address thread(fp, thread_off * wordSize);
+
+#ifdef ASSERT
+    // verify that threads correspond
+    {
+      Label L, S;
+      __ ld(t0, thread);
+      __ bne(xthread, t0, S);
+      __ get_thread(t0);
+      __ beq(xthread, t0, L);
+      __ bind(S);
+      __ stop("StubRoutines::catch_exception: threads must correspond");
+      __ bind(L);
+    }
+#endif
+
+    // set pending exception
+    __ verify_oop(x10);
+
+    __ sd(x10, Address(xthread, Thread::pending_exception_offset()));
+    __ mv(t0, (address)__FILE__);
+    __ sd(t0, Address(xthread, Thread::exception_file_offset()));
+    __ mv(t0, (int)__LINE__);
+    __ sw(t0, Address(xthread, Thread::exception_line_offset()));
+
+    // complete return to VM
+    assert(StubRoutines::_call_stub_return_address != NULL,
+           "_call_stub_return_address must have been generated before");
+    __ j(StubRoutines::_call_stub_return_address);
+
+    return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception.  The pending exception check happened in the runtime
+  // or native call stub.  The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Contract with Java-level exception handlers:
+  // x10: exception
+  // x13: throwing pc
+  //
+  // NOTE: At entry of this stub, exception-pc must be in RA !!
+
+  // NOTE: this is always used as a jump target within generated code
+  // so it just needs to be generated code with no x86 prolog
+
+  address generate_forward_exception() {
+    StubCodeMark mark(this, "StubRoutines", "forward exception");
+    address start = __ pc();
+
+    // Upon entry, RA points to the return address returning into
+    // Java (interpreted or compiled) code; i.e., the return address
+    // becomes the throwing pc.
+    //
+    // Arguments pushed before the runtime call are still on the stack
+    // but the exception handler will reset the stack pointer ->
+    // ignore them.  A potential result in registers can be ignored as
+    // well.
+
+#ifdef ASSERT
+    // make sure this code is only executed if there is a pending exception
+    {
+      Label L;
+      __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+      __ bnez(t0, L);
+      __ stop("StubRoutines::forward exception: no pending exception (1)");
+      __ bind(L);
+    }
+#endif
+
+    // compute exception handler into x9
+
+    // call the VM to find the handler address associated with the
+    // caller address. pass thread in x10 and caller pc (ret address)
+    // in x11. n.b. the caller pc is in ra, unlike x86 where it is on
+    // the stack.
+    __ mv(c_rarg1, ra);
+    // ra will be trashed by the VM call so we move it to x9
+    // (callee-saved) because we also need to pass it to the handler
+    // returned by this call.
+    __ mv(x9, ra);
+    BLOCK_COMMENT("call exception_handler_for_return_address");
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+                         SharedRuntime::exception_handler_for_return_address),
+                    xthread, c_rarg1);
+    // we should not really care that ra is no longer the callee
+    // address. we saved the value the handler needs in x9 so we can
+    // just copy it to x13. however, the C2 handler will push its own
+    // frame and then calls into the VM and the VM code asserts that
+    // the PC for the frame above the handler belongs to a compiled
+    // Java method. So, we restore ra here to satisfy that assert.
+    __ mv(ra, x9);
+    // setup x10 & x13 & clear pending exception
+    __ mv(x13, x9);
+    __ mv(x9, x10);
+    __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
+    __ sd(zr, Address(xthread, Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+    // make sure exception is set
+    {
+      Label L;
+      __ bnez(x10, L);
+      __ stop("StubRoutines::forward exception: no pending exception (2)");
+      __ bind(L);
+    }
+#endif
+
+    // continue at exception handler
+    // x10: exception
+    // x13: throwing pc
+    // x9: exception handler
+    __ verify_oop(x10);
+    __ jr(x9);
+
+    return start;
+  }
+
+  // Non-destructive plausibility checks for oops
+  //
+  // Arguments:
+  //    x10: oop to verify
+  //    t0: error message
+  //
+  // Stack after saving c_rarg3:
+  //    [tos + 0]: saved c_rarg3
+  //    [tos + 1]: saved c_rarg2
+  //    [tos + 2]: saved ra
+  //    [tos + 3]: saved t1
+  //    [tos + 4]: saved x10
+  //    [tos + 5]: saved t0
+  address generate_verify_oop() {
+
+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
+    address start = __ pc();
+
+    Label exit, error;
+
+    __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3
+
+    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
+    __ ld(c_rarg3, Address(c_rarg2));
+    __ add(c_rarg3, c_rarg3, 1);
+    __ sd(c_rarg3, Address(c_rarg2));
+
+    // object is in x10
+    // make sure object is 'reasonable'
+    __ beqz(x10, exit); // if obj is NULL it is OK
+
+    // Check if the oop is in the right area of memory
+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask());
+    __ andr(c_rarg2, x10, c_rarg3);
+    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
+
+    // Compare c_rarg2 and c_rarg3.
+    __ bne(c_rarg2, c_rarg3, error);
+
+    // make sure klass is 'reasonable', which is not zero.
+    __ load_klass(x10, x10);  // get klass
+    __ beqz(x10, error);      // if klass is NULL it is broken
+
+    // return if everything seems ok
+    __ bind(exit);
+
+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);  // pop c_rarg2 and c_rarg3
+    __ ret();
+
+    // handle errors
+    __ bind(error);
+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3
+
+    __ pusha();
+    // debug(char* msg, int64_t pc, int64_t regs[])
+    __ mv(c_rarg0, t0);             // pass address of error message
+    __ mv(c_rarg1, ra);             // pass return address
+    __ mv(c_rarg2, sp);             // pass address of regs on stack
+#ifndef PRODUCT
+    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
+#endif
+    BLOCK_COMMENT("call MacroAssembler::debug");
+    int32_t offset = 0;
+    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
+    __ jalr(x1, t0, offset);
+    __ ebreak();
+
+    return start;
+  }
+
+  // The inner part of zero_words().
+  //
+  // Inputs:
+  // x28: the HeapWord-aligned base address of an array to zero.
+  // x29: the count in HeapWords, x29 > 0.
+  //
+  // Returns x28 and x29, adjusted for the caller to clear.
+  // x28: the base address of the tail of words left to clear.
+  // x29: the number of words in the tail.
+  //      x29 < MacroAssembler::zero_words_block_size.
+
+  address generate_zero_blocks() {
+    Label done;
+
+    const Register base = x28, cnt = x29;
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
+    address start = __ pc();
+
+    {
+      // Clear the remaining blocks.
+      Label loop;
+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
+      __ bltz(cnt, done);
+      __ bind(loop);
+      for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
+        __ sd(zr, Address(base, 0));
+        __ add(base, base, 8);
+      }
+      __ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
+      __ bgez(cnt, loop);
+      __ bind(done);
+      __ add(cnt, cnt, MacroAssembler::zero_words_block_size);
+    }
+
+    __ ret();
+
+    return start;
+  }
+
+  typedef enum {
+    copy_forwards = 1,
+    copy_backwards = -1
+  } copy_direction;
+
+  // Bulk copy of blocks of 8 words.
+  //
+  // count is a count of words.
+  //
+  // Precondition: count >= 8
+  //
+  // Postconditions:
+  //
+  // The least significant bit of count contains the remaining count
+  // of words to copy.  The rest of count is trash.
+  //
+  // s and d are adjusted to point to the remaining words to copy
+  //
+  void generate_copy_longs(Label &start, Register s, Register d, Register count,
+                           copy_direction direction) {
+    int unit = wordSize * direction;
+    int bias = wordSize;
+
+    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
+      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
+
+    const Register stride = x30;
+
+    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
+      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
+    assert_different_registers(s, d, count, t0);
+
+    Label again, drain;
+    const char* stub_name = NULL;
+    if (direction == copy_forwards) {
+      stub_name = "forward_copy_longs";
+    } else {
+      stub_name = "backward_copy_longs";
+    }
+    StubCodeMark mark(this, "StubRoutines", stub_name);
+    __ align(CodeEntryAlignment);
+    __ bind(start);
+
+    if (direction == copy_forwards) {
+      __ sub(s, s, bias);
+      __ sub(d, d, bias);
+    }
+
+#ifdef ASSERT
+    // Make sure we are never given < 8 words
+    {
+      Label L;
+
+      __ mv(t0, 8);
+      __ bge(count, t0, L);
+      __ stop("genrate_copy_longs called with < 8 words");
+      __ bind(L);
+    }
+#endif
+
+    __ ld(tmp_reg0, Address(s, 1 * unit));
+    __ ld(tmp_reg1, Address(s, 2 * unit));
+    __ ld(tmp_reg2, Address(s, 3 * unit));
+    __ ld(tmp_reg3, Address(s, 4 * unit));
+    __ ld(tmp_reg4, Address(s, 5 * unit));
+    __ ld(tmp_reg5, Address(s, 6 * unit));
+    __ ld(tmp_reg6, Address(s, 7 * unit));
+    __ ld(tmp_reg7, Address(s, 8 * unit));
+    __ addi(s, s, 8 * unit);
+
+    __ sub(count, count, 16);
+    __ bltz(count, drain);
+
+    __ bind(again);
+
+    __ sd(tmp_reg0, Address(d, 1 * unit));
+    __ sd(tmp_reg1, Address(d, 2 * unit));
+    __ sd(tmp_reg2, Address(d, 3 * unit));
+    __ sd(tmp_reg3, Address(d, 4 * unit));
+    __ sd(tmp_reg4, Address(d, 5 * unit));
+    __ sd(tmp_reg5, Address(d, 6 * unit));
+    __ sd(tmp_reg6, Address(d, 7 * unit));
+    __ sd(tmp_reg7, Address(d, 8 * unit));
+
+    __ ld(tmp_reg0, Address(s, 1 * unit));
+    __ ld(tmp_reg1, Address(s, 2 * unit));
+    __ ld(tmp_reg2, Address(s, 3 * unit));
+    __ ld(tmp_reg3, Address(s, 4 * unit));
+    __ ld(tmp_reg4, Address(s, 5 * unit));
+    __ ld(tmp_reg5, Address(s, 6 * unit));
+    __ ld(tmp_reg6, Address(s, 7 * unit));
+    __ ld(tmp_reg7, Address(s, 8 * unit));
+
+    __ addi(s, s, 8 * unit);
+    __ addi(d, d, 8 * unit);
+
+    __ sub(count, count, 8);
+    __ bgez(count, again);
+
+    // Drain
+    __ bind(drain);
+
+    __ sd(tmp_reg0, Address(d, 1 * unit));
+    __ sd(tmp_reg1, Address(d, 2 * unit));
+    __ sd(tmp_reg2, Address(d, 3 * unit));
+    __ sd(tmp_reg3, Address(d, 4 * unit));
+    __ sd(tmp_reg4, Address(d, 5 * unit));
+    __ sd(tmp_reg5, Address(d, 6 * unit));
+    __ sd(tmp_reg6, Address(d, 7 * unit));
+    __ sd(tmp_reg7, Address(d, 8 * unit));
+    __ addi(d, d, 8 * unit);
+
+    {
+      Label L1, L2;
+      __ andi(t0, count, 4);
+      __ beqz(t0, L1);
+
+      __ ld(tmp_reg0, Address(s, 1 * unit));
+      __ ld(tmp_reg1, Address(s, 2 * unit));
+      __ ld(tmp_reg2, Address(s, 3 * unit));
+      __ ld(tmp_reg3, Address(s, 4 * unit));
+      __ addi(s, s, 4 * unit);
+
+      __ sd(tmp_reg0, Address(d, 1 * unit));
+      __ sd(tmp_reg1, Address(d, 2 * unit));
+      __ sd(tmp_reg2, Address(d, 3 * unit));
+      __ sd(tmp_reg3, Address(d, 4 * unit));
+      __ addi(d, d, 4 * unit);
+
+      __ bind(L1);
+
+      if (direction == copy_forwards) {
+        __ addi(s, s, bias);
+        __ addi(d, d, bias);
+      }
+
+      __ andi(t0, count, 2);
+      __ beqz(t0, L2);
+      if (direction == copy_backwards) {
+        __ addi(s, s, 2 * unit);
+        __ ld(tmp_reg0, Address(s));
+        __ ld(tmp_reg1, Address(s, wordSize));
+        __ addi(d, d, 2 * unit);
+        __ sd(tmp_reg0, Address(d));
+        __ sd(tmp_reg1, Address(d, wordSize));
+      } else {
+        __ ld(tmp_reg0, Address(s));
+        __ ld(tmp_reg1, Address(s, wordSize));
+        __ addi(s, s, 2 * unit);
+        __ sd(tmp_reg0, Address(d));
+        __ sd(tmp_reg1, Address(d, wordSize));
+        __ addi(d, d, 2 * unit);
+      }
+      __ bind(L2);
+    }
+
+    __ ret();
+  }
+
+  Label copy_f, copy_b;
+
+  // All-singing all-dancing memory copy.
+  //
+  // Copy count units of memory from s to d.  The size of a unit is
+  // step, which can be positive or negative depending on the direction
+  // of copy.  If is_aligned is false, we align the source address.
+  //
+  /*
+   * if (is_aligned) {
+   *   goto copy_8_bytes;
+   * }
+   * bool is_backwards = step < 0;
+   * int granularity = uabs(step);
+   * count = count  *  granularity;   * count bytes
+   *
+   * if (is_backwards) {
+   *   s += count;
+   *   d += count;
+   * }
+   *
+   * count limit maybe greater than 16, for better performance
+   * if (count < 16) {
+   *   goto copy_small;
+   * }
+   *
+   * if ((dst % 8) == (src % 8)) {
+   *   aligned;
+   *   goto copy8;
+   * }
+   *
+   * copy_small:
+   *   load element one by one;
+   * done;
+   */
+
+  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
+
+  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
+    bool is_backward = step < 0;
+    int granularity = uabs(step);
+
+    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
+    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
+    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
+    Label loop_forward, loop_backward, done;
+
+    __ mv(dst, d);
+    __ mv(src, s);
+    __ mv(cnt, count);
+
+    __ bind(loop_forward);
+    __ vsetvli(vl, cnt, sew, Assembler::m8);
+    if (is_backward) {
+      __ bne(vl, cnt, loop_backward);
+    }
+
+    __ vlex_v(v0, src, sew);
+    __ sub(cnt, cnt, vl);
+    __ slli(vl, vl, (int)sew);
+    __ add(src, src, vl);
+
+    __ vsex_v(v0, dst, sew);
+    __ add(dst, dst, vl);
+    __ bnez(cnt, loop_forward);
+
+    if (is_backward) {
+      __ j(done);
+
+      __ bind(loop_backward);
+      __ sub(tmp, cnt, vl);
+      __ slli(tmp, tmp, sew);
+      __ add(tmp1, s, tmp);
+      __ vlex_v(v0, tmp1, sew);
+      __ add(tmp2, d, tmp);
+      __ vsex_v(v0, tmp2, sew);
+      __ sub(cnt, cnt, vl);
+      __ bnez(cnt, loop_forward);
+      __ bind(done);
+    }
+  }
+
+  void copy_memory(bool is_aligned, Register s, Register d,
+                   Register count, Register tmp, int step) {
+    if (UseRVV) {
+      return copy_memory_v(s, d, count, tmp, step);
+    }
+
+    bool is_backwards = step < 0;
+    int granularity = uabs(step);
+
+    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
+
+    Label same_aligned;
+    Label copy8, copy_small, done;
+
+    copy_insn ld_arr = NULL, st_arr = NULL;
+    switch (granularity) {
+      case 1 :
+        ld_arr = (copy_insn)&MacroAssembler::lbu;
+        st_arr = (copy_insn)&MacroAssembler::sb;
+        break;
+      case 2 :
+        ld_arr = (copy_insn)&MacroAssembler::lhu;
+        st_arr = (copy_insn)&MacroAssembler::sh;
+        break;
+      case 4 :
+        ld_arr = (copy_insn)&MacroAssembler::lwu;
+        st_arr = (copy_insn)&MacroAssembler::sw;
+        break;
+      case 8 :
+        ld_arr = (copy_insn)&MacroAssembler::ld;
+        st_arr = (copy_insn)&MacroAssembler::sd;
+        break;
+      default :
+        ShouldNotReachHere();
+    }
+
+    __ beqz(count, done);
+    __ slli(cnt, count, exact_log2(granularity));
+    if (is_backwards) {
+      __ add(src, s, cnt);
+      __ add(dst, d, cnt);
+    } else {
+      __ mv(src, s);
+      __ mv(dst, d);
+    }
+
+    if (is_aligned) {
+      __ addi(tmp, cnt, -8);
+      __ bgez(tmp, copy8);
+      __ j(copy_small);
+    }
+
+    __ mv(tmp, 16);
+    __ blt(cnt, tmp, copy_small);
+
+    __ xorr(tmp, src, dst);
+    __ andi(tmp, tmp, 0b111);
+    __ bnez(tmp, copy_small);
+
+    __ bind(same_aligned);
+    __ andi(tmp, src, 0b111);
+    __ beqz(tmp, copy8);
+    if (is_backwards) {
+      __ addi(src, src, step);
+      __ addi(dst, dst, step);
+    }
+    (_masm->*ld_arr)(tmp3, Address(src), t0);
+    (_masm->*st_arr)(tmp3, Address(dst), t0);
+    if (!is_backwards) {
+      __ addi(src, src, step);
+      __ addi(dst, dst, step);
+    }
+    __ addi(cnt, cnt, -granularity);
+    __ beqz(cnt, done);
+    __ j(same_aligned);
+
+    __ bind(copy8);
+    if (is_backwards) {
+      __ addi(src, src, -wordSize);
+      __ addi(dst, dst, -wordSize);
+    }
+    __ ld(tmp3, Address(src));
+    __ sd(tmp3, Address(dst));
+    if (!is_backwards) {
+      __ addi(src, src, wordSize);
+      __ addi(dst, dst, wordSize);
+    }
+    __ addi(cnt, cnt, -wordSize);
+    __ addi(tmp4, cnt, -8);
+    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
+
+    __ beqz(cnt, done);
+
+    __ bind(copy_small);
+    if (is_backwards) {
+      __ addi(src, src, step);
+      __ addi(dst, dst, step);
+    }
+    (_masm->*ld_arr)(tmp3, Address(src), t0);
+    (_masm->*st_arr)(tmp3, Address(dst), t0);
+    if (!is_backwards) {
+      __ addi(src, src, step);
+      __ addi(dst, dst, step);
+    }
+    __ addi(cnt, cnt, -granularity);
+    __ bgtz(cnt, copy_small);
+
+    __ bind(done);
+  }
+
+  // Scan over array at a for count oops, verifying each one.
+  // Preserves a and count, clobbers t0 and t1.
+  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
+    Label loop, end;
+    __ mv(t1, zr);
+    __ slli(t0, count, exact_log2(size));
+    __ bind(loop);
+    __ bgeu(t1, t0, end);
+
+    __ add(temp, a, t1);
+    if (size == (size_t)wordSize) {
+      __ ld(temp, Address(temp, 0));
+      __ verify_oop(temp);
+    } else {
+      __ lwu(temp, Address(temp, 0));
+      __ decode_heap_oop(temp); // calls verify_oop
+    }
+    __ add(t1, t1, size);
+    __ j(loop);
+    __ bind(end);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry,
+                                 const char* name, bool dest_uninitialized = false) {
+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+    RegSet saved_reg = RegSet::of(s, d, count);
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+    __ enter();
+
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+
+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
+    if (dest_uninitialized) {
+      decorators |= IS_DEST_UNINITIALIZED;
+    }
+    if (aligned) {
+      decorators |= ARRAYCOPY_ALIGNED;
+    }
+
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg);
+
+    if (is_oop) {
+      // save regs before copy_memory
+      __ push_reg(RegSet::of(d, count), sp);
+    }
+
+    copy_memory(aligned, s, d, count, t0, size);
+
+    if (is_oop) {
+      __ pop_reg(RegSet::of(d, count), sp);
+      if (VerifyOops) {
+        verify_oop_array(size, d, count, t2);
+      }
+    }
+
+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
+
+    __ leave();
+    __ mv(x10, zr); // return 0
+    __ ret();
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+                                 address* entry, const char* name,
+                                 bool dest_uninitialized = false) {
+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+    RegSet saved_regs = RegSet::of(s, d, count);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+    __ enter();
+
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
+
+    // use fwd copy when (d-s) above_equal (count*size)
+    __ sub(t0, d, s);
+    __ slli(t1, count, exact_log2(size));
+    __ bgeu(t0, t1, nooverlap_target);
+
+    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
+    if (dest_uninitialized) {
+      decorators |= IS_DEST_UNINITIALIZED;
+    }
+    if (aligned) {
+      decorators |= ARRAYCOPY_ALIGNED;
+    }
+
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+    bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs);
+
+    if (is_oop) {
+      // save regs before copy_memory
+      __ push_reg(RegSet::of(d, count), sp);
+    }
+
+    copy_memory(aligned, s, d, count, t0, -size);
+
+    if (is_oop) {
+      __ pop_reg(RegSet::of(d, count), sp);
+      if (VerifyOops) {
+        verify_oop_array(size, d, count, t2);
+      }
+    }
+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
+    __ leave();
+    __ mv(x10, zr); // return 0
+    __ ret();
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_byte_copy().
+  //
+  address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
+                                      address* entry, const char* name) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_short_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_short_copy().
+  //
+  address generate_disjoint_short_copy(bool aligned,
+                                       address* entry, const char* name) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
+                                       address* entry, const char* name) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_int_copy(bool aligned, address* entry,
+                                     const char* name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
+                                     address* entry, const char* name,
+                                     bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  // Side Effects:
+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
+  //
+  address generate_disjoint_long_copy(bool aligned, address* entry,
+                                      const char* name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  address generate_conjoint_long_copy(bool aligned,
+                                      address nooverlap_target, address* entry,
+                                      const char* name, bool dest_uninitialized = false) {
+    const bool not_oop = false;
+    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  // Side Effects:
+  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
+  //
+  address generate_disjoint_oop_copy(bool aligned, address* entry,
+                                     const char* name, bool dest_uninitialized) {
+    const bool is_oop = true;
+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as size_t, can be zero
+  //
+  address generate_conjoint_oop_copy(bool aligned,
+                                     address nooverlap_target, address* entry,
+                                     const char* name, bool dest_uninitialized) {
+    const bool is_oop = true;
+    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
+                                  name, dest_uninitialized);
+  }
+
+  // Helper for generating a dynamic type check.
+  // Smashes t0, t1.
+  void generate_type_check(Register sub_klass,
+                           Register super_check_offset,
+                           Register super_klass,
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
+
+    // Fall through on failure!
+    __ BIND(L_miss);
+  }
+
+  //
+  //  Generate checkcasting array copy stub
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - element count, treated as ssize_t, can be zero
+  //    c_rarg3   - size_t ckoff (super_check_offset)
+  //    c_rarg4   - oop ckval (super_klass)
+  //
+  //  Output:
+  //    x10 ==  0  -  success
+  //    x10 == -1^K - failure, where K is partial transfer count
+  //
+  address generate_checkcast_copy(const char* name, address* entry,
+                                  bool dest_uninitialized = false) {
+    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
+
+    // Input registers (after setup_arg_regs)
+    const Register from        = c_rarg0;   // source array address
+    const Register to          = c_rarg1;   // destination array address
+    const Register count       = c_rarg2;   // elementscount
+    const Register ckoff       = c_rarg3;   // super_check_offset
+    const Register ckval       = c_rarg4;   // super_klass
+
+    RegSet wb_pre_saved_regs   = RegSet::range(c_rarg0, c_rarg4);
+    RegSet wb_post_saved_regs  = RegSet::of(count);
+
+    // Registers used as temps (x7, x9, x18 are save-on-entry)
+    const Register count_save  = x19;       // orig elementscount
+    const Register start_to    = x18;       // destination array start address
+    const Register copied_oop  = x7;        // actual oop copied
+    const Register r9_klass    = x9;        // oop._klass
+
+    //---------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the two arrays are subtypes of Object[] but the
+    // destination array type is not equal to or a supertype
+    // of the source type.  Each element must be separately
+    // checked.
+
+    assert_different_registers(from, to, count, ckoff, ckval, start_to,
+                               copied_oop, r9_klass, count_save);
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // Caller of this entry point must set up the argument registers.
+    if (entry != NULL) {
+      *entry = __ pc();
+      BLOCK_COMMENT("Entry:");
+    }
+
+    // Empty array:  Nothing to do
+    __ beqz(count, L_done);
+
+    __ push_reg(RegSet::of(x7, x9, x18, x19), sp);
+
+#ifdef ASSERT
+    BLOCK_COMMENT("assert consistent ckoff/ckval");
+    // The ckoff and ckval must be mutually consistent,
+    // even though caller generates both.
+    { Label L;
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ lwu(start_to, Address(ckval, sco_offset));
+      __ beq(ckoff, start_to, L);
+      __ stop("super_check_offset inconsistent");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
+    bool is_oop = true;
+    if (dest_uninitialized) {
+      decorators |= IS_DEST_UNINITIALIZED;
+    }
+
+    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+    bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
+
+    // save the original count
+    __ mv(count_save, count);
+
+    // Copy from low to high addresses
+    __ mv(start_to, to);              // Save destination array start address
+    __ j(L_load_element);
+
+    // ======== begin loop ========
+    // (Loop is rotated; its entry is L_load_element.)
+    // Loop control:
+    //   for count to 0 do
+    //     copied_oop = load_heap_oop(from++)
+    //     ... generate_type_check ...
+    //     store_heap_oop(to++, copied_oop)
+    //   end
+
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_store_element);
+    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
+    __ add(to, to, UseCompressedOops ? 4 : 8);
+    __ sub(count, count, 1);
+    __ beqz(count, L_do_card_marks);
+
+    // ======== loop entry is here ========
+    __ BIND(L_load_element);
+    __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
+    __ add(from, from, UseCompressedOops ? 4 : 8);
+    __ beqz(copied_oop, L_store_element);
+
+    __ load_klass(r9_klass, copied_oop);// query the object klass
+    generate_type_check(r9_klass, ckoff, ckval, L_store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register count = remaining oops, count_orig = total oops.
+    // Emit GC store barriers for the oops we have copied and report
+    // their number to the caller.
+
+    __ sub(count, count_save, count);     // K = partially copied oop count
+    __ xori(count, count, -1);                   // report (-1^K) to caller
+    __ beqz(count, L_done_pop);
+
+    __ BIND(L_do_card_marks);
+    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs);
+
+    __ bind(L_done_pop);
+    __ pop_reg(RegSet::of(x7, x9, x18, x19), sp);
+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
+
+    __ bind(L_done);
+    __ mv(x10, count);
+    __ leave();
+    __ ret();
+
+    return start;
+  }
+
+  // Perform range checks on the proposed arraycopy.
+  // Kills temp, but nothing else.
+  // Also, clean the sign bits of src_pos and dst_pos.
+  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
+                              Register src_pos, // source position (c_rarg1)
+                              Register dst,     // destination array oo (c_rarg2)
+                              Register dst_pos, // destination position (c_rarg3)
+                              Register length,
+                              Register temp,
+                              Label& L_failed) {
+    BLOCK_COMMENT("arraycopy_range_checks:");
+
+    assert_different_registers(t0, temp);
+
+    // if [src_pos + length > arrayOop(src)->length()] then FAIL
+    __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes()));
+    __ addw(temp, length, src_pos);
+    __ bgtu(temp, t0, L_failed);
+
+    // if [dst_pos + length > arrayOop(dst)->length()] then FAIL
+    __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+    __ addw(temp, length, dst_pos);
+    __ bgtu(temp, t0, L_failed);
+
+    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
+    __ zero_extend(src_pos, src_pos, 32);
+    __ zero_extend(dst_pos, dst_pos, 32);
+
+    BLOCK_COMMENT("arraycopy_range_checks done");
+  }
+
+  //
+  //  Generate 'unsafe' array copy stub
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t argument instead of an element count.
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
+  //
+  // Examines the alignment of the operands and dispatches
+  // to a long, int, short, or byte copy loop.
+  //
+  address generate_unsafe_copy(const char* name,
+                               address byte_copy_entry,
+                               address short_copy_entry,
+                               address int_copy_entry,
+                               address long_copy_entry) {
+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
+                int_copy_entry != NULL && long_copy_entry != NULL);
+    Label L_long_aligned, L_int_aligned, L_short_aligned;
+    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // bump this on entry, not on exit:
+    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
+
+    __ orr(t0, s, d);
+    __ orr(t0, t0, count);
+
+    __ andi(t0, t0, BytesPerLong - 1);
+    __ beqz(t0, L_long_aligned);
+    __ andi(t0, t0, BytesPerInt - 1);
+    __ beqz(t0, L_int_aligned);
+    __ andi(t0, t0, 1);
+    __ beqz(t0, L_short_aligned);
+    __ j(RuntimeAddress(byte_copy_entry));
+
+    __ BIND(L_short_aligned);
+    __ srli(count, count, LogBytesPerShort);  // size => short_count
+    __ j(RuntimeAddress(short_copy_entry));
+    __ BIND(L_int_aligned);
+    __ srli(count, count, LogBytesPerInt);    // size => int_count
+    __ j(RuntimeAddress(int_copy_entry));
+    __ BIND(L_long_aligned);
+    __ srli(count, count, LogBytesPerLong);   // size => long_count
+    __ j(RuntimeAddress(long_copy_entry));
+
+    return start;
+  }
+
+  //
+  //  Generate generic array copy stubs
+  //
+  //  Input:
+  //    c_rarg0    -  src oop
+  //    c_rarg1    -  src_pos (32-bits)
+  //    c_rarg2    -  dst oop
+  //    c_rarg3    -  dst_pos (32-bits)
+  //    c_rarg4    -  element count (32-bits)
+  //
+  //  Output:
+  //    x10 ==  0  -  success
+  //    x10 == -1^K - failure, where K is partial transfer count
+  //
+  address generate_generic_copy(const char* name,
+                                address byte_copy_entry, address short_copy_entry,
+                                address int_copy_entry, address oop_copy_entry,
+                                address long_copy_entry, address checkcast_copy_entry) {
+    assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL &&
+                int_copy_entry != NULL && oop_copy_entry != NULL &&
+                long_copy_entry != NULL && checkcast_copy_entry != NULL);
+    Label L_failed, L_failed_0, L_objArray;
+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
+
+    // Input registers
+    const Register src        = c_rarg0;  // source array oop
+    const Register src_pos    = c_rarg1;  // source position
+    const Register dst        = c_rarg2;  // destination array oop
+    const Register dst_pos    = c_rarg3;  // destination position
+    const Register length     = c_rarg4;
+
+    // Registers used as temps
+    const Register dst_klass = c_rarg5;
+
+    __ align(CodeEntryAlignment);
+
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // bump this on entry, not on exit:
+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
+
+    //-----------------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the following conditions are met:
+    //
+    // (1) src and dst must not be null.
+    // (2) src_pos must not be negative.
+    // (3) dst_pos must not be negative.
+    // (4) length  must not be negative.
+    // (5) src klass and dst klass should be the same and not NULL.
+    // (6) src and dst should be arrays.
+    // (7) src_pos + length must not exceed length of src.
+    // (8) dst_pos + length must not exceed length of dst.
+    //
+
+    // if [src == NULL] then return -1
+    __ beqz(src, L_failed);
+
+    // if [src_pos < 0] then return -1
+    // i.e. sign bit set
+    __ andi(t0, src_pos, 1UL << 31);
+    __ bnez(t0, L_failed);
+
+    // if [dst == NULL] then return -1
+    __ beqz(dst, L_failed);
+
+    // if [dst_pos < 0] then return -1
+    // i.e. sign bit set
+    __ andi(t0, dst_pos, 1UL << 31);
+    __ bnez(t0, L_failed);
+
+    // registers used as temp
+    const Register scratch_length    = x28; // elements count to copy
+    const Register scratch_src_klass = x29; // array klass
+    const Register lh                = x30; // layout helper
+
+    // if [length < 0] then return -1
+    __ addw(scratch_length, length, zr);    // length (elements count, 32-bits value)
+    // i.e. sign bit set
+    __ andi(t0, scratch_length, 1UL << 31);
+    __ bnez(t0, L_failed);
+
+    __ load_klass(scratch_src_klass, src);
+#ifdef ASSERT
+    {
+      BLOCK_COMMENT("assert klasses not null {");
+      Label L1, L2;
+      __ bnez(scratch_src_klass, L2);   // it is broken if klass is NULL
+      __ bind(L1);
+      __ stop("broken null klass");
+      __ bind(L2);
+      __ load_klass(t0, dst);
+      __ beqz(t0, L1);     // this would be broken also
+      BLOCK_COMMENT("} assert klasses not null done");
+    }
+#endif
+
+    // Load layout helper (32-bits)
+    //
+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
+    // 32        30    24            16              8     2                 0
+    //
+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+    //
+
+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+    // Handle objArrays completely differently...
+    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ lw(lh, Address(scratch_src_klass, lh_offset));
+    __ mvw(t0, objArray_lh);
+    __ beq(lh, t0, L_objArray);
+
+    // if [src->klass() != dst->klass()] then return -1
+    __ load_klass(t1, dst);
+    __ bne(t1, scratch_src_klass, L_failed);
+
+    // if [src->is_Array() != NULL] then return -1
+    // i.e. (lh >= 0)
+    __ andi(t0, lh, 1UL << 31);
+    __ beqz(t0, L_failed);
+
+    // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+    {
+      BLOCK_COMMENT("assert primitive array {");
+      Label L;
+      __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
+      __ bge(lh, t1, L);
+      __ stop("must be a primitive array");
+      __ bind(L);
+      BLOCK_COMMENT("} assert primitive array done");
+    }
+#endif
+
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                           t1, L_failed);
+
+    // TypeArrayKlass
+    //
+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize)
+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize)
+    //
+
+    const Register t0_offset = t0;    // array offset
+    const Register x22_elsize = lh;   // element size
+
+    // Get array_header_in_bytes()
+    int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1);
+    int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width;
+    __ slli(t0_offset, lh, XLEN - lh_header_size_msb);          // left shift to remove 24 ~ 32;
+    __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset
+
+    __ add(src, src, t0_offset);           // src array offset
+    __ add(dst, dst, t0_offset);           // dst array offset
+    BLOCK_COMMENT("choose copy loop based on element size");
+
+    // next registers should be set before the jump to corresponding stub
+    const Register from     = c_rarg0;  // source array address
+    const Register to       = c_rarg1;  // destination array address
+    const Register count    = c_rarg2;  // elements count
+
+    // 'from', 'to', 'count' registers should be set in such order
+    // since they are the same as 'src', 'src_pos', 'dst'.
+
+    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
+
+    // The possible values of elsize are 0-3, i.e. exact_log2(element
+    // size in bytes).  We do a simple bitwise binary search.
+  __ BIND(L_copy_bytes);
+    __ andi(t0, x22_elsize, 2);
+    __ bnez(t0, L_copy_ints);
+    __ andi(t0, x22_elsize, 1);
+    __ bnez(t0, L_copy_shorts);
+    __ add(from, src, src_pos); // src_addr
+    __ add(to, dst, dst_pos); // dst_addr
+    __ addw(count, scratch_length, zr); // length
+    __ j(RuntimeAddress(byte_copy_entry));
+
+  __ BIND(L_copy_shorts);
+    __ shadd(from, src_pos, src, t0, 1); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 1); // dst_addr
+    __ addw(count, scratch_length, zr); // length
+    __ j(RuntimeAddress(short_copy_entry));
+
+  __ BIND(L_copy_ints);
+    __ andi(t0, x22_elsize, 1);
+    __ bnez(t0, L_copy_longs);
+    __ shadd(from, src_pos, src, t0, 2); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 2); // dst_addr
+    __ addw(count, scratch_length, zr); // length
+    __ j(RuntimeAddress(int_copy_entry));
+
+  __ BIND(L_copy_longs);
+#ifdef ASSERT
+    {
+      BLOCK_COMMENT("assert long copy {");
+      Label L;
+      __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize
+      __ addw(lh, lh, zr);
+      __ mvw(t0, LogBytesPerLong);
+      __ beq(x22_elsize, t0, L);
+      __ stop("must be long copy, but elsize is wrong");
+      __ bind(L);
+      BLOCK_COMMENT("} assert long copy done");
+    }
+#endif
+    __ shadd(from, src_pos, src, t0, 3); // src_addr
+    __ shadd(to, dst_pos, dst, t0, 3); // dst_addr
+    __ addw(count, scratch_length, zr); // length
+    __ j(RuntimeAddress(long_copy_entry));
+
+    // ObjArrayKlass
+  __ BIND(L_objArray);
+    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
+
+    Label L_plain_copy, L_checkcast_copy;
+    // test array classes for subtyping
+    __ load_klass(t2, dst);
+    __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality
+
+    // Identically typed arrays can be copied without element-wise checks.
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                           t1, L_failed);
+
+    __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
+    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+    __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
+    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+    __ addw(count, scratch_length, zr); // length
+  __ BIND(L_plain_copy);
+    __ j(RuntimeAddress(oop_copy_entry));
+
+  __ BIND(L_checkcast_copy);
+    // live at this point:  scratch_src_klass, scratch_length, t2 (dst_klass)
+    {
+      // Before looking at dst.length, make sure dst is also an objArray.
+      __ lwu(t0, Address(t2, lh_offset));
+      __ mvw(t1, objArray_lh);
+      __ bne(t0, t1, L_failed);
+
+      // It is safe to examine both src.length and dst.length.
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                             t2, L_failed);
+
+      __ load_klass(dst_klass, dst); // reload
+
+      // Marshal the base address arguments now, freeing registers.
+      __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop);
+      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+      __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop);
+      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+      __ addw(count, length, zr);           // length (reloaded)
+      const Register sco_temp = c_rarg3;      // this register is free now
+      assert_different_registers(from, to, count, sco_temp,
+                                 dst_klass, scratch_src_klass);
+
+      // Generate the type check.
+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
+
+      // Smashes t0, t1
+      generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy);
+
+      // Fetch destination element klass from the ObjArrayKlass header.
+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+      __ ld(dst_klass, Address(dst_klass, ek_offset));
+      __ lwu(sco_temp, Address(dst_klass, sco_offset));
+
+      // the checkcast_copy loop needs two extra arguments:
+      assert(c_rarg3 == sco_temp, "#3 already in place");
+      // Set up arguments for checkcast_copy_entry.
+      __ mv(c_rarg4, dst_klass);  // dst.klass.element_klass
+      __ j(RuntimeAddress(checkcast_copy_entry));
+    }
+
+  __ BIND(L_failed);
+    __ mv(x10, -1);
+    __ leave();   // required for proper stackwalking of RuntimeStub frame
+    __ ret();
+
+    return start;
+  }
+
+  //
+  // Generate stub for array fill. If "aligned" is true, the
+  // "to" address is assumed to be heapword aligned.
+  //
+  // Arguments for generated stub:
+  //   to:    c_rarg0
+  //   value: c_rarg1
+  //   count: c_rarg2 treated as signed
+  //
+  address generate_fill(BasicType t, bool aligned, const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    BLOCK_COMMENT("Entry:");
+
+    const Register to        = c_rarg0;  // source array address
+    const Register value     = c_rarg1;  // value
+    const Register count     = c_rarg2;  // elements count
+
+    const Register bz_base   = x28;      // base for block_zero routine
+    const Register cnt_words = x29;      // temp register
+    const Register tmp_reg   = t1;
+
+    __ enter();
+
+    Label L_fill_elements, L_exit1;
+
+    int shift = -1;
+    switch (t) {
+      case T_BYTE:
+        shift = 0;
+
+        // Zero extend value
+        // 8 bit -> 16 bit
+        __ andi(value, value, 0xff);
+        __ mv(tmp_reg, value);
+        __ slli(tmp_reg, tmp_reg, 8);
+        __ orr(value, value, tmp_reg);
+
+        // 16 bit -> 32 bit
+        __ mv(tmp_reg, value);
+        __ slli(tmp_reg, tmp_reg, 16);
+        __ orr(value, value, tmp_reg);
+
+        __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element
+        __ bltu(count, tmp_reg, L_fill_elements);
+        break;
+      case T_SHORT:
+        shift = 1;
+        // Zero extend value
+        // 16 bit -> 32 bit
+        __ andi(value, value, 0xffff);
+        __ mv(tmp_reg, value);
+        __ slli(tmp_reg, tmp_reg, 16);
+        __ orr(value, value, tmp_reg);
+
+        // Short arrays (< 8 bytes) fill by element
+        __ mv(tmp_reg, 8 >> shift);
+        __ bltu(count, tmp_reg, L_fill_elements);
+        break;
+      case T_INT:
+        shift = 2;
+
+        // Short arrays (< 8 bytes) fill by element
+        __ mv(tmp_reg, 8 >> shift);
+        __ bltu(count, tmp_reg, L_fill_elements);
+        break;
+      default: ShouldNotReachHere();
+    }
+
+    // Align source address at 8 bytes address boundary.
+    Label L_skip_align1, L_skip_align2, L_skip_align4;
+    if (!aligned) {
+      switch (t) {
+        case T_BYTE:
+          // One byte misalignment happens only for byte arrays.
+          __ andi(t0, to, 1);
+          __ beqz(t0, L_skip_align1);
+          __ sb(value, Address(to, 0));
+          __ addi(to, to, 1);
+          __ addiw(count, count, -1);
+          __ bind(L_skip_align1);
+          // Fallthrough
+        case T_SHORT:
+          // Two bytes misalignment happens only for byte and short (char) arrays.
+          __ andi(t0, to, 2);
+          __ beqz(t0, L_skip_align2);
+          __ sh(value, Address(to, 0));
+          __ addi(to, to, 2);
+          __ addiw(count, count, -(2 >> shift));
+          __ bind(L_skip_align2);
+          // Fallthrough
+        case T_INT:
+          // Align to 8 bytes, we know we are 4 byte aligned to start.
+          __ andi(t0, to, 4);
+          __ beqz(t0, L_skip_align4);
+          __ sw(value, Address(to, 0));
+          __ addi(to, to, 4);
+          __ addiw(count, count, -(4 >> shift));
+          __ bind(L_skip_align4);
+          break;
+        default: ShouldNotReachHere();
+      }
+    }
+
+    //
+    //  Fill large chunks
+    //
+    __ srliw(cnt_words, count, 3 - shift); // number of words
+
+    // 32 bit -> 64 bit
+    __ andi(value, value, 0xffffffff);
+    __ mv(tmp_reg, value);
+    __ slli(tmp_reg, tmp_reg, 32);
+    __ orr(value, value, tmp_reg);
+
+    __ slli(tmp_reg, cnt_words, 3 - shift);
+    __ subw(count, count, tmp_reg);
+    {
+      __ fill_words(to, cnt_words, value);
+    }
+
+    // Remaining count is less than 8 bytes. Fill it by a single store.
+    // Note that the total length is no less than 8 bytes.
+    if (t == T_BYTE || t == T_SHORT) {
+      __ beqz(count, L_exit1);
+      __ shadd(to, count, to, tmp_reg, shift); // points to the end
+      __ sd(value, Address(to, -8)); // overwrite some elements
+      __ bind(L_exit1);
+      __ leave();
+      __ ret();
+    }
+
+    // Handle copies less than 8 bytes.
+    Label L_fill_2, L_fill_4, L_exit2;
+    __ bind(L_fill_elements);
+    switch (t) {
+      case T_BYTE:
+        __ andi(t0, count, 1);
+        __ beqz(t0, L_fill_2);
+        __ sb(value, Address(to, 0));
+        __ addi(to, to, 1);
+        __ bind(L_fill_2);
+        __ andi(t0, count, 2);
+        __ beqz(t0, L_fill_4);
+        __ sh(value, Address(to, 0));
+        __ addi(to, to, 2);
+        __ bind(L_fill_4);
+        __ andi(t0, count, 4);
+        __ beqz(t0, L_exit2);
+        __ sw(value, Address(to, 0));
+        break;
+      case T_SHORT:
+        __ andi(t0, count, 1);
+        __ beqz(t0, L_fill_4);
+        __ sh(value, Address(to, 0));
+        __ addi(to, to, 2);
+        __ bind(L_fill_4);
+        __ andi(t0, count, 2);
+        __ beqz(t0, L_exit2);
+        __ sw(value, Address(to, 0));
+        break;
+      case T_INT:
+        __ beqz(count, L_exit2);
+        __ sw(value, Address(to, 0));
+        break;
+      default: ShouldNotReachHere();
+    }
+    __ bind(L_exit2);
+    __ leave();
+    __ ret();
+    return start;
+  }
+
+  void generate_arraycopy_stubs() {
+    address entry                     = NULL;
+    address entry_jbyte_arraycopy     = NULL;
+    address entry_jshort_arraycopy    = NULL;
+    address entry_jint_arraycopy      = NULL;
+    address entry_oop_arraycopy       = NULL;
+    address entry_jlong_arraycopy     = NULL;
+    address entry_checkcast_arraycopy = NULL;
+
+    generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
+    generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
+
+    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
+
+    //*** jbyte
+    // Always need aligned and unaligned versions
+    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, &entry,
+                                                                                   "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy                   = generate_conjoint_byte_copy(false, entry,
+                                                                                   &entry_jbyte_arraycopy,
+                                                                                   "jbyte_arraycopy");
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
+                                                                                   "arrayof_jbyte_disjoint_arraycopy");
+    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
+                                                                                   "arrayof_jbyte_arraycopy");
+
+    //*** jshort
+    // Always need aligned and unaligned versions
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
+                                                                                    "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
+                                                                                    &entry_jshort_arraycopy,
+                                                                                    "jshort_arraycopy");
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
+                                                                                    "arrayof_jshort_disjoint_arraycopy");
+    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
+                                                                                    "arrayof_jshort_arraycopy");
+
+    //*** jint
+    // Aligned versions
+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
+                                                                                  "arrayof_jint_disjoint_arraycopy");
+    StubRoutines::_arrayof_jint_arraycopy            = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+                                                                                  "arrayof_jint_arraycopy");
+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+    // entry_jint_arraycopy always points to the unaligned version
+    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_copy(false, &entry,
+                                                                                  "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy                    = generate_conjoint_int_copy(false, entry,
+                                                                                  &entry_jint_arraycopy,
+                                                                                  "jint_arraycopy");
+
+    //*** jlong
+    // It is always aligned
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, &entry,
+                                                                                   "arrayof_jlong_disjoint_arraycopy");
+    StubRoutines::_arrayof_jlong_arraycopy           = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+                                                                                   "arrayof_jlong_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy          = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+    StubRoutines::_jlong_arraycopy                   = StubRoutines::_arrayof_jlong_arraycopy;
+
+    //*** oops
+    {
+      // With compressed oops we need unaligned versions; notice that
+      // we overwrite entry_oop_arraycopy.
+      bool aligned = !UseCompressedOops;
+
+      StubRoutines::_arrayof_oop_disjoint_arraycopy
+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
+                                     /*dest_uninitialized*/false);
+      StubRoutines::_arrayof_oop_arraycopy
+        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
+                                     /*dest_uninitialized*/false);
+      // Aligned versions without pre-barriers
+      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
+                                     /*dest_uninitialized*/true);
+      StubRoutines::_arrayof_oop_arraycopy_uninit
+        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
+                                     /*dest_uninitialized*/true);
+    }
+
+    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
+    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
+
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+
+
+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
+                                                              entry_jbyte_arraycopy,
+                                                              entry_jshort_arraycopy,
+                                                              entry_jint_arraycopy,
+                                                              entry_jlong_arraycopy);
+
+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
+                                                               entry_jbyte_arraycopy,
+                                                               entry_jshort_arraycopy,
+                                                               entry_jint_arraycopy,
+                                                               entry_oop_arraycopy,
+                                                               entry_jlong_arraycopy,
+                                                               entry_checkcast_arraycopy);
+
+    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+  }
+
+  // Safefetch stubs.
+  void generate_safefetch(const char* name, int size, address* entry,
+                          address* fault_pc, address* continuation_pc) {
+    // safefetch signatures:
+    //   int      SafeFetch32(int*      adr, int      errValue)
+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue)
+    //
+    // arguments:
+    //   c_rarg0 = adr
+    //   c_rarg1 = errValue
+    //
+    // result:
+    //   PPC_RET  = *adr or errValue
+    assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL);
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // Entry point, pc or function descriptor.
+    *entry = __ pc();
+
+    // Load *adr into c_rarg1, may fault.
+    *fault_pc = __ pc();
+    switch (size) {
+      case 4:
+        // int32_t
+        __ lw(c_rarg1, Address(c_rarg0, 0));
+        break;
+      case 8:
+        // int64_t
+        __ ld(c_rarg1, Address(c_rarg0, 0));
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // return errValue or *adr
+    *continuation_pc = __ pc();
+    __ mv(x10, c_rarg1);
+    __ ret();
+  }
+
+  // code for comparing 16 bytes of strings with same encoding
+  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
+    __ ld(tmp5, Address(str1));
+    __ addi(str1, str1, 8);
+    __ xorr(tmp4, tmp1, tmp2);
+    __ ld(cnt1, Address(str2));
+    __ addi(str2, str2, 8);
+    __ bnez(tmp4, DIFF1);
+    __ ld(tmp1, Address(str1));
+    __ addi(str1, str1, 8);
+    __ xorr(tmp4, tmp5, cnt1);
+    __ ld(tmp2, Address(str2));
+    __ addi(str2, str2, 8);
+    __ bnez(tmp4, DIFF2);
+  }
+
+  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
+  void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
+                              Label &DIFF2) {
+    const Register strU = x12, curU = x7, strL = x29, tmp = x30;
+    __ ld(tmpL, Address(strL));
+    __ addi(strL, strL, 8);
+    __ ld(tmpU, Address(strU));
+    __ addi(strU, strU, 8);
+    __ inflate_lo32(tmp, tmpL);
+    __ mv(t0, tmp);
+    __ xorr(tmp, curU, t0);
+    __ bnez(tmp, DIFF2);
+
+    __ ld(curU, Address(strU));
+    __ addi(strU, strU, 8);
+    __ inflate_hi32(tmp, tmpL);
+    __ mv(t0, tmp);
+    __ xorr(tmp, tmpU, t0);
+    __ bnez(tmp, DIFF1);
+  }
+
+  // x10  = result
+  // x11  = str1
+  // x12  = cnt1
+  // x13  = str2
+  // x14  = cnt2
+  // x28  = tmp1
+  // x29  = tmp2
+  // x30  = tmp3
+  address generate_compare_long_string_different_encoding(bool isLU) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
+    address entry = __ pc();
+    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
+          DONE, CALCULATE_DIFFERENCE;
+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
+
+    // cnt2 == amount of characters left to compare
+    // Check already loaded first 4 symbols
+    __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2);
+    __ mv(isLU ? tmp1 : tmp2, tmp3);
+    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
+    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
+    __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
+    __ push_reg(spilled_regs, sp);
+
+    if (isLU) {
+      __ add(str1, str1, cnt2);
+      __ shadd(str2, cnt2, str2, t0, 1);
+    } else {
+      __ shadd(str1, cnt2, str1, t0, 1);
+      __ add(str2, str2, cnt2);
+    }
+    __ xorr(tmp3, tmp1, tmp2);
+    __ mv(tmp5, tmp2);
+    __ bnez(tmp3, CALCULATE_DIFFERENCE);
+
+    Register strU = isLU ? str2 : str1,
+             strL = isLU ? str1 : str2,
+             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
+             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
+
+    __ sub(tmp2, strL, cnt2); // strL pointer to load from
+    __ slli(t0, cnt2, 1);
+    __ sub(cnt1, strU, t0); // strU pointer to load from
+
+    __ ld(tmp4, Address(cnt1));
+    __ addi(cnt1, cnt1, 8);
+    __ beqz(cnt2, LOAD_LAST); // no characters left except last load
+    __ sub(cnt2, cnt2, 16);
+    __ bltz(cnt2, TAIL);
+    __ bind(SMALL_LOOP); // smaller loop
+      __ sub(cnt2, cnt2, 16);
+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+      __ bgez(cnt2, SMALL_LOOP);
+      __ addi(t0, cnt2, 16);
+      __ beqz(t0, LOAD_LAST);
+    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
+      // Address of 8 bytes before last 4 characters in UTF-16 string
+      __ shadd(cnt1, cnt2, cnt1, t0, 1);
+      // Address of 16 bytes before last 4 characters in Latin1 string
+      __ add(tmp2, tmp2, cnt2);
+      __ ld(tmp4, Address(cnt1, -8));
+      // last 16 characters before last load
+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
+      __ j(LOAD_LAST);
+    __ bind(DIFF2);
+      __ mv(tmpU, tmp4);
+    __ bind(DIFF1);
+      __ mv(tmpL, t0);
+      __ j(CALCULATE_DIFFERENCE);
+    __ bind(LOAD_LAST);
+      // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
+      // No need to load it again
+      __ mv(tmpU, tmp4);
+      __ ld(tmpL, Address(strL));
+      __ inflate_lo32(tmp3, tmpL);
+      __ mv(tmpL, tmp3);
+      __ xorr(tmp3, tmpU, tmpL);
+      __ beqz(tmp3, DONE);
+
+      // Find the first different characters in the longwords and
+      // compute their difference.
+    __ bind(CALCULATE_DIFFERENCE);
+      __ ctzc_bit(tmp4, tmp3);
+      __ srl(tmp1, tmp1, tmp4);
+      __ srl(tmp5, tmp5, tmp4);
+      __ andi(tmp1, tmp1, 0xFFFF);
+      __ andi(tmp5, tmp5, 0xFFFF);
+      __ sub(result, tmp1, tmp5);
+    __ bind(DONE);
+      __ pop_reg(spilled_regs, sp);
+      __ ret();
+    return entry;
+  }
+
+  // x10  = result
+  // x11  = str1
+  // x12  = cnt1
+  // x13  = str2
+  // x14  = cnt2
+  // x28  = tmp1
+  // x29  = tmp2
+  // x30  = tmp3
+  // x31  = tmp4
+  address generate_compare_long_string_same_encoding(bool isLL) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", isLL ?
+                      "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU");
+    address entry = __ pc();
+    Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL,
+          LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF;
+    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
+                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
+    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
+
+    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
+    // update cnt2 counter with already loaded 8 bytes
+    __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2));
+    // update pointers, because of previous read
+    __ add(str1, str1, wordSize);
+    __ add(str2, str2, wordSize);
+    // less than 16 bytes left?
+    __ sub(cnt2, cnt2, isLL ? 16 : 8);
+    __ push_reg(spilled_regs, sp);
+    __ bltz(cnt2, TAIL);
+    __ bind(SMALL_LOOP);
+      compare_string_16_bytes_same(DIFF, DIFF2);
+      __ sub(cnt2, cnt2, isLL ? 16 : 8);
+      __ bgez(cnt2, SMALL_LOOP);
+    __ bind(TAIL);
+      __ addi(cnt2, cnt2, isLL ? 16 : 8);
+      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
+      __ sub(cnt2, cnt2, isLL ? 8 : 4);
+      __ blez(cnt2, CHECK_LAST);
+      __ xorr(tmp4, tmp1, tmp2);
+      __ bnez(tmp4, DIFF);
+      __ ld(tmp1, Address(str1));
+      __ addi(str1, str1, 8);
+      __ ld(tmp2, Address(str2));
+      __ addi(str2, str2, 8);
+      __ sub(cnt2, cnt2, isLL ? 8 : 4);
+    __ bind(CHECK_LAST);
+      if (!isLL) {
+        __ add(cnt2, cnt2, cnt2); // now in bytes
+      }
+      __ xorr(tmp4, tmp1, tmp2);
+      __ bnez(tmp4, DIFF);
+      __ add(str1, str1, cnt2);
+      __ ld(tmp5, Address(str1));
+      __ add(str2, str2, cnt2);
+      __ ld(cnt1, Address(str2));
+      __ xorr(tmp4, tmp5, cnt1);
+      __ beqz(tmp4, LENGTH_DIFF);
+      // Find the first different characters in the longwords and
+      // compute their difference.
+    __ bind(DIFF2);
+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
+      __ srl(tmp5, tmp5, tmp3);
+      __ srl(cnt1, cnt1, tmp3);
+      if (isLL) {
+        __ andi(tmp5, tmp5, 0xFF);
+        __ andi(cnt1, cnt1, 0xFF);
+      } else {
+        __ andi(tmp5, tmp5, 0xFFFF);
+        __ andi(cnt1, cnt1, 0xFFFF);
+      }
+      __ sub(result, tmp5, cnt1);
+      __ j(LENGTH_DIFF);
+    __ bind(DIFF);
+      __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb
+      __ srl(tmp1, tmp1, tmp3);
+      __ srl(tmp2, tmp2, tmp3);
+      if (isLL) {
+        __ andi(tmp1, tmp1, 0xFF);
+        __ andi(tmp2, tmp2, 0xFF);
+      } else {
+        __ andi(tmp1, tmp1, 0xFFFF);
+        __ andi(tmp2, tmp2, 0xFFFF);
+      }
+      __ sub(result, tmp1, tmp2);
+      __ j(LENGTH_DIFF);
+    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
+      __ xorr(tmp4, tmp1, tmp2);
+      __ bnez(tmp4, DIFF);
+    __ bind(LENGTH_DIFF);
+      __ pop_reg(spilled_regs, sp);
+      __ ret();
+    return entry;
+  }
+
+  void generate_compare_long_strings() {
+    StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true);
+    StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false);
+    StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true);
+    StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false);
+  }
+
+  // x10 result
+  // x11 src
+  // x12 src count
+  // x13 pattern
+  // x14 pattern count
+  address generate_string_indexof_linear(bool needle_isL, bool haystack_isL)
+  {
+    const char* stubName = needle_isL
+           ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul")
+           : "indexof_linear_uu";
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", stubName);
+    address entry = __ pc();
+
+    int needle_chr_size = needle_isL ? 1 : 2;
+    int haystack_chr_size = haystack_isL ? 1 : 2;
+    int needle_chr_shift = needle_isL ? 0 : 1;
+    int haystack_chr_shift = haystack_isL ? 0 : 1;
+    bool isL = needle_isL && haystack_isL;
+    // parameters
+    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
+    // temporary registers
+    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
+    // redefinitions
+    Register ch1 = x28, ch2 = x29;
+    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
+
+    __ push_reg(spilled_regs, sp);
+
+    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
+          L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
+          L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
+          L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
+          L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
+          L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
+
+    __ ld(ch1, Address(needle));
+    __ ld(ch2, Address(haystack));
+    // src.length - pattern.length
+    __ sub(haystack_len, haystack_len, needle_len);
+
+    // first is needle[0]
+    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
+    uint64_t mask0101 = UCONST64(0x0101010101010101);
+    uint64_t mask0001 = UCONST64(0x0001000100010001);
+    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
+    __ mul(first, first, mask1);
+    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
+    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
+    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
+    if (needle_isL != haystack_isL) {
+      __ mv(tmp, ch1);
+    }
+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1);
+    __ blez(haystack_len, L_SMALL);
+
+    if (needle_isL != haystack_isL) {
+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
+    }
+    // xorr, sub, orr, notr, andr
+    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
+    // eg:
+    // first:        aa aa aa aa aa aa aa aa
+    // ch2:          aa aa li nx jd ka aa aa
+    // match_mask:   80 80 00 00 00 00 80 80
+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
+
+    // search first char of needle, if success, goto L_HAS_ZERO;
+    __ bnez(match_mask, L_HAS_ZERO);
+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
+    __ add(result, result, wordSize / haystack_chr_size);
+    __ add(haystack, haystack, wordSize);
+    __ bltz(haystack_len, L_POST_LOOP);
+
+    __ bind(L_LOOP);
+    __ ld(ch2, Address(haystack));
+    __ compute_match_mask(ch2, first, match_mask, mask1, mask2);
+    __ bnez(match_mask, L_HAS_ZERO);
+
+    __ bind(L_LOOP_PROCEED);
+    __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size);
+    __ add(haystack, haystack, wordSize);
+    __ add(result, result, wordSize / haystack_chr_size);
+    __ bgez(haystack_len, L_LOOP);
+
+    __ bind(L_POST_LOOP);
+    __ mv(ch2, -wordSize / haystack_chr_size);
+    __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check
+    __ ld(ch2, Address(haystack));
+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
+    __ neg(haystack_len, haystack_len);
+    __ xorr(ch2, first, ch2);
+    __ sub(match_mask, ch2, mask1);
+    __ orr(ch2, ch2, mask2);
+    __ mv(trailing_zeros, -1); // all bits set
+    __ j(L_SMALL_PROCEED);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_SMALL);
+    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
+    __ neg(haystack_len, haystack_len);
+    if (needle_isL != haystack_isL) {
+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
+    }
+    __ xorr(ch2, first, ch2);
+    __ sub(match_mask, ch2, mask1);
+    __ orr(ch2, ch2, mask2);
+    __ mv(trailing_zeros, -1); // all bits set
+
+    __ bind(L_SMALL_PROCEED);
+    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
+    __ notr(ch2, ch2);
+    __ andr(match_mask, match_mask, ch2);
+    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
+    __ beqz(match_mask, NOMATCH);
+
+    __ bind(L_SMALL_HAS_ZERO_LOOP);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+    __ mv(ch2, wordSize / haystack_chr_size);
+    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+
+    __ bind(L_SMALL_CMP_LOOP);
+    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
+    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
+    __ add(trailing_zeros, trailing_zeros, 1);
+    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
+    __ beq(first, ch2, L_SMALL_CMP_LOOP);
+
+    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
+    __ beqz(match_mask, NOMATCH);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+    __ add(result, result, 1);
+    __ add(haystack, haystack, haystack_chr_size);
+    __ j(L_SMALL_HAS_ZERO_LOOP);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP);
+    __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+    __ j(DONE);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
+    __ j(DONE);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_HAS_ZERO);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
+    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
+    __ sub(result, result, 1); // array index from 0, so result -= 1
+
+    __ bind(L_HAS_ZERO_LOOP);
+    __ mv(needle_len, wordSize / haystack_chr_size);
+    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
+    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
+    // load next 8 bytes from haystack, and increase result index
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+    __ add(result, result, 1);
+    __ mv(trailing_zeros, wordSize / haystack_chr_size);
+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
+
+    // compare one char
+    __ bind(L_CMP_LOOP);
+    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
+    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
+    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
+    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
+    __ add(trailing_zeros, trailing_zeros, 1); // next char index
+    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
+    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
+    __ beq(needle_len, ch2, L_CMP_LOOP);
+
+    __ bind(L_CMP_LOOP_NOMATCH);
+    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
+    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
+    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
+    __ add(haystack, haystack, haystack_chr_size);
+    __ j(L_HAS_ZERO_LOOP);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_CMP_LOOP_LAST_CMP);
+    __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH);
+    __ j(DONE);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_CMP_LOOP_LAST_CMP2);
+    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
+    __ add(result, result, 1);
+    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
+    __ j(DONE);
+
+    __ align(OptoLoopAlignment);
+    __ bind(L_HAS_ZERO_LOOP_NOMATCH);
+    // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
+    // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
+    // so, result was increased at max by wordSize/str2_chr_size - 1, so,
+    // respective high bit wasn't changed. L_LOOP_PROCEED will increase
+    // result by analyzed characters value, so, we can just reset lower bits
+    // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
+    // 2) restore needle_len and haystack_len values from "compressed" haystack_len
+    // 3) advance haystack value to represent next haystack octet. result & 7/3 is
+    // index of last analyzed substring inside current octet. So, haystack in at
+    // respective start address. We need to advance it to next octet
+    __ andi(match_mask, result, wordSize / haystack_chr_size - 1);
+    __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2);
+    __ andi(result, result, haystack_isL ? -8 : -4);
+    __ slli(tmp, match_mask, haystack_chr_shift);
+    __ sub(haystack, haystack, tmp);
+    __ addw(haystack_len, haystack_len, zr);
+    __ j(L_LOOP_PROCEED);
+
+    __ align(OptoLoopAlignment);
+    __ bind(NOMATCH);
+    __ mv(result, -1);
+
+    __ bind(DONE);
+    __ pop_reg(spilled_regs, sp);
+    __ ret();
+    return entry;
+  }
+
+  void generate_string_indexof_stubs()
+  {
+    StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
+    StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
+    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
+  }
+
+#ifdef COMPILER2
+  address generate_mulAdd()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+    address entry = __ pc();
+
+    const Register out     = x10;
+    const Register in      = x11;
+    const Register offset  = x12;
+    const Register len     = x13;
+    const Register k       = x14;
+    const Register tmp     = x28;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ mul_add(out, in, offset, len, k, tmp);
+    __ leave();
+    __ ret();
+
+    return entry;
+  }
+
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - x address
+   *    c_rarg1   - x length
+   *    c_rarg2   - y address
+   *    c_rarg3   - y length
+   *    c_rarg4   - z address
+   *    c_rarg5   - z length
+   */
+  address generate_multiplyToLen()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+    address entry = __ pc();
+
+    const Register x     = x10;
+    const Register xlen  = x11;
+    const Register y     = x12;
+    const Register ylen  = x13;
+    const Register z     = x14;
+    const Register zlen  = x15;
+
+    const Register tmp1  = x16;
+    const Register tmp2  = x17;
+    const Register tmp3  = x7;
+    const Register tmp4  = x28;
+    const Register tmp5  = x29;
+    const Register tmp6  = x30;
+    const Register tmp7  = x31;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret();
+
+    return entry;
+  }
+
+  address generate_squareToLen()
+  {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
+    address entry = __ pc();
+
+    const Register x     = x10;
+    const Register xlen  = x11;
+    const Register z     = x12;
+    const Register zlen  = x13;
+    const Register y     = x14; // == x
+    const Register ylen  = x15; // == xlen
+
+    const Register tmp1  = x16;
+    const Register tmp2  = x17;
+    const Register tmp3  = x7;
+    const Register tmp4  = x28;
+    const Register tmp5  = x29;
+    const Register tmp6  = x30;
+    const Register tmp7  = x31;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter();
+    __ mv(y, x);
+    __ mv(ylen, xlen);
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave();
+    __ ret();
+
+    return entry;
+  }
+#endif
+
+#ifdef COMPILER2
+  class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
+      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj;
+
+    RegSet _toSave;
+    bool _squaring;
+
+  public:
+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+      : MacroAssembler(as->code()), _squaring(squaring) {
+
+      // Register allocation
+
+      Register reg = c_rarg0;
+      Pa_base = reg;       // Argument registers
+      if (squaring) {
+        Pb_base = Pa_base;
+      } else {
+        Pb_base = ++reg;
+      }
+      Pn_base = ++reg;
+      Rlen= ++reg;
+      inv = ++reg;
+      Pm_base = ++reg;
+
+                        // Working registers:
+      Ra =  ++reg;      // The current digit of a, b, n, and m.
+      Rb =  ++reg;
+      Rm =  ++reg;
+      Rn =  ++reg;
+
+      Pa =  ++reg;      // Pointers to the current/next digit of a, b, n, and m.
+      Pb =  ++reg;
+      Pm =  ++reg;
+      Pn =  ++reg;
+
+      tmp0 =  ++reg;    // Three registers which form a
+      tmp1 =  ++reg;    // triple-precision accumuator.
+      tmp2 =  ++reg;
+
+      Ri =  x6;         // Inner and outer loop indexes.
+      Rj =  x7;
+
+      Rhi_ab = x28;     // Product registers: low and high parts
+      Rlo_ab = x29;     // of a*b and m*n.
+      Rhi_mn = x30;
+      Rlo_mn = x31;
+
+      // x18 and up are callee-saved.
+      _toSave = RegSet::range(x18, reg) + Pm_base;
+    }
+
+  private:
+    void save_regs() {
+      push_reg(_toSave, sp);
+    }
+
+    void restore_regs() {
+      pop_reg(_toSave, sp);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block) {
+      Label loop, end, odd;
+      beqz(count, end);
+      andi(t0, count, 0x1);
+      bnez(t0, odd);
+      align(16);
+      bind(loop);
+      (this->*block)();
+      bind(odd);
+      (this->*block)();
+      addi(count, count, -2);
+      bgtz(count, loop);
+      bind(end);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+      Label loop, end, odd;
+      beqz(count, end);
+      andi(tmp, count, 0x1);
+      bnez(tmp, odd);
+      align(16);
+      bind(loop);
+      (this->*block)(d, s, tmp);
+      bind(odd);
+      (this->*block)(d, s, tmp);
+      addi(count, count, -2);
+      bgtz(count, loop);
+      bind(end);
+    }
+
+    void pre1(RegisterOrConstant i) {
+      block_comment("pre1");
+      // Pa = Pa_base;
+      // Pb = Pb_base + i;
+      // Pm = Pm_base;
+      // Pn = Pn_base + i;
+      // Ra = *Pa;
+      // Rb = *Pb;
+      // Rm = *Pm;
+      // Rn = *Pn;
+      if (i.is_register()) {
+        slli(t0, i.as_register(), LogBytesPerWord);
+      } else {
+        mv(t0, i.as_constant());
+        slli(t0, t0, LogBytesPerWord);
+      }
+
+      mv(Pa, Pa_base);
+      add(Pb, Pb_base, t0);
+      mv(Pm, Pm_base);
+      add(Pn, Pn_base, t0);
+
+      ld(Ra, Address(Pa));
+      ld(Rb, Address(Pb));
+      ld(Rm, Address(Pm));
+      ld(Rn, Address(Pn));
+
+      // Zero the m*n result.
+      mv(Rhi_mn, zr);
+      mv(Rlo_mn, zr);
+    }
+
+    // The core multiply-accumulate step of a Montgomery
+    // multiplication.  The idea is to schedule operations as a
+    // pipeline so that instructions with long latencies (loads and
+    // multiplies) have time to complete before their results are
+    // used.  This most benefits in-order implementations of the
+    // architecture but out-of-order ones also benefit.
+    void step() {
+      block_comment("step");
+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      mulhu(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      addi(Pa, Pa, wordSize);
+      ld(Ra, Address(Pa));
+      addi(Pb, Pb, -wordSize);
+      ld(Rb, Address(Pb));
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the
+                                            // previous iteration.
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      mulhu(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      addi(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      addi(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+    }
+
+    void post1() {
+      block_comment("post1");
+
+      // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      mulhu(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+
+      // *Pm = Rm = tmp0 * inv;
+      mul(Rm, tmp0, inv);
+      sd(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, tmp0, Rlo_mn);
+        Label ok;
+        beqz(Rlo_mn, ok);
+        stop("broken Montgomery multiply");
+        bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
+      // tmp0 != 0.  So, rather than do a mul and an cad we just set
+      // the carry flag iff tmp0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // cad(zr, tmp0, Rlo_mn);
+      addi(t0, tmp0, -1);
+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+      cadc(tmp0, tmp1, Rhi_mn, t0);
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    void pre2(Register i, Register len) {
+      block_comment("pre2");
+      // Pa = Pa_base + i-len;
+      // Pb = Pb_base + len;
+      // Pm = Pm_base + i-len;
+      // Pn = Pn_base + len;
+
+      sub(Rj, i, len);
+      // Rj == i-len
+
+      // Ra as temp register
+      slli(Ra, Rj, LogBytesPerWord);
+      add(Pa, Pa_base, Ra);
+      add(Pm, Pm_base, Ra);
+      slli(Ra, len, LogBytesPerWord);
+      add(Pb, Pb_base, Ra);
+      add(Pn, Pn_base, Ra);
+
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      add(Pa, Pa, wordSize);
+      ld(Ra, Address(Pa));
+      add(Pb, Pb, -wordSize);
+      ld(Rb, Address(Pb));
+      add(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      add(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+
+      mv(Rhi_mn, zr);
+      mv(Rlo_mn, zr);
+    }
+
+    void post2(Register i, Register len) {
+      block_comment("post2");
+      sub(Rj, i, len);
+
+      cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part
+
+      // As soon as we know the least significant digit of our result,
+      // store it.
+      // Pm_base[i-len] = tmp0;
+      // Rj as temp register
+      slli(Rj, Rj, LogBytesPerWord);
+      add(Rj, Pm_base, Rj);
+      sd(tmp0, Address(Rj));
+
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    // A carry in tmp0 after Montgomery multiplication means that we
+    // should subtract multiples of n from our result in m.  We'll
+    // keep doing that until there is no carry.
+    void normalize(Register len) {
+      block_comment("normalize");
+      // while (tmp0)
+      //   tmp0 = sub(Pm_base, Pn_base, tmp0, len);
+      Label loop, post, again;
+      Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now
+      beqz(tmp0, post); {
+        bind(again); {
+          mv(i, zr);
+          mv(cnt, len);
+          slli(Rn, i, LogBytesPerWord);
+          add(Rm, Pm_base, Rn);
+          ld(Rm, Address(Rm));
+          add(Rn, Pn_base, Rn);
+          ld(Rn, Address(Rn));
+          mv(t0, 1); // set carry flag, i.e. no borrow
+          align(16);
+          bind(loop); {
+            notr(Rn, Rn);
+            add(Rm, Rm, t0);
+            add(Rm, Rm, Rn);
+            sltu(t0, Rm, Rn);
+            slli(Rn, i, LogBytesPerWord); // Rn as temp register
+            add(Rn, Pm_base, Rn);
+            sd(Rm, Address(Rn));
+            add(i, i, 1);
+            slli(Rn, i, LogBytesPerWord);
+            add(Rm, Pm_base, Rn);
+            ld(Rm, Address(Rm));
+            add(Rn, Pn_base, Rn);
+            ld(Rn, Address(Rn));
+            sub(cnt, cnt, 1);
+          } bnez(cnt, loop);
+          addi(tmp0, tmp0, -1);
+          add(tmp0, tmp0, t0);
+        } bnez(tmp0, again);
+      } bind(post);
+    }
+
+    // Move memory at s to d, reversing words.
+    //    Increments d to end of copied memory
+    //    Destroys tmp1, tmp2
+    //    Preserves len
+    //    Leaves s pointing to the address which was in d at start
+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
+
+      slli(tmp1, len, LogBytesPerWord);
+      add(s, s, tmp1);
+      mv(tmp1, len);
+      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+      slli(tmp1, len, LogBytesPerWord);
+      sub(s, d, tmp1);
+    }
+    // [63...0] -> [31...0][63...32]
+    void reverse1(Register d, Register s, Register tmp) {
+      addi(s, s, -wordSize);
+      ld(tmp, Address(s));
+      ror_imm(tmp, tmp, 32, t0);
+      sd(tmp, Address(d));
+      addi(d, d, wordSize);
+    }
+
+    void step_squaring() {
+      // An extra ACC
+      step();
+      acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+    }
+
+    void last_squaring(Register i) {
+      Label dont;
+      // if ((i & 1) == 0) {
+      andi(t0, i, 0x1);
+      bnez(t0, dont); {
+        // MACC(Ra, Rb, tmp0, tmp1, tmp2);
+        // Ra = *++Pa;
+        // Rb = *--Pb;
+        mulhu(Rhi_ab, Ra, Rb);
+        mul(Rlo_ab, Ra, Rb);
+        acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2);
+      } bind(dont);
+    }
+
+    void extra_step_squaring() {
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      mulhu(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      addi(Pm, Pm, wordSize);
+      ld(Rm, Address(Pm));
+      addi(Pn, Pn, -wordSize);
+      ld(Rn, Address(Pn));
+    }
+
+    void post1_squaring() {
+      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
+
+      // *Pm = Rm = tmp0 * inv;
+      mul(Rm, tmp0, inv);
+      sd(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, tmp0, tmp1, tmp2);
+      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
+      mulhu(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, tmp0, Rlo_mn);
+        Label ok;
+        beqz(Rlo_mn, ok); {
+          stop("broken Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -tmp0.  tmp0 + (-tmp0) must generate a carry iff
+      // tmp0 != 0.  So, rather than do a mul and a cad we just set
+      // the carry flag iff tmp0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // cad(zr, tmp, Rlo_mn);
+      addi(t0, tmp0, -1);
+      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
+      cadc(tmp0, tmp1, Rhi_mn, t0);
+      adc(tmp1, tmp2, zr, t0);
+      mv(tmp2, zr);
+    }
+
+    // use t0 as carry
+    void acc(Register Rhi, Register Rlo,
+             Register tmp0, Register tmp1, Register tmp2) {
+      cad(tmp0, tmp0, Rlo, t0);
+      cadc(tmp1, tmp1, Rhi, t0);
+      adc(tmp2, tmp2, zr, t0);
+    }
+
+  public:
+    /**
+     * Fast Montgomery multiplication.  The derivation of the
+     * algorithm is in A Cryptographic Library for the Motorola
+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+     *
+     * Arguments:
+     *
+     * Inputs for multiplication:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements b
+     *   c_rarg2   - int array elements n (the modulus)
+     *   c_rarg3   - int length
+     *   c_rarg4   - int inv
+     *   c_rarg5   - int array elements m (the result)
+     *
+     * Inputs for squaring:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_multiply() {
+      Label argh, nothing;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      beqz(Rlen, nothing);
+
+      enter();
+
+      // Make room.
+      mv(Ra, 512);
+      bgt(Rlen, Ra, argh);
+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+      sub(Ra, sp, Ra);
+      andi(sp, Ra, -2 * wordSize);
+
+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
+        if (!_squaring)
+          reverse(Ra, Pb_base, Rlen, Ri, Rj);
+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+#ifndef PRODUCT
+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+      {
+        ld(Rn, Address(Pn_base));
+        mul(Rlo_mn, Rn, inv);
+        mv(t0, -1);
+        Label ok;
+        beq(Rlo_mn, t0, ok);
+        stop("broken inverse in Montgomery multiply");
+        bind(ok);
+      }
+#endif
+
+      mv(Pm_base, Ra);
+
+      mv(tmp0, zr);
+      mv(tmp1, zr);
+      mv(tmp2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mv(Ri, zr); {
+        Label loop, end;
+        bge(Ri, Rlen, end);
+
+        bind(loop);
+        pre1(Ri);
+
+        block_comment("  for (j = i; j; j--) {"); {
+          mv(Rj, Ri);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post1();
+        addw(Ri, Ri, 1);
+        blt(Ri, Rlen, loop);
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mv(Ri, Rlen); {
+        Label loop, end;
+        slli(t0, Rlen, 1);
+        bge(Ri, t0, end);
+
+        bind(loop);
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
+          slliw(Rj, Rlen, 1);
+          subw(Rj, Rj, Ri);
+          subw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addw(Ri, Ri, 1);
+        slli(t0, Rlen, 1);
+        blt(Ri, t0, loop);
+        bind(end);
+      }
+      block_comment("} // i");
+
+      normalize(Rlen);
+
+      mv(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+      leave();
+      bind(nothing);
+      ret();
+
+      return entry;
+    }
+
+    /**
+     *
+     * Arguments:
+     *
+     * Inputs:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_square() {
+      Label argh;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      enter();
+
+      // Make room.
+      mv(Ra, 512);
+      bgt(Rlen, Ra, argh);
+      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
+      sub(Ra, sp, Ra);
+      andi(sp, Ra, -2 * wordSize);
+
+      srliw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, Ri, Rj);
+        reverse(Ra, Pn_base, Rlen, Ri, Rj);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+      mv(Pm_base, Ra);
+
+      mv(tmp0, zr);
+      mv(tmp1, zr);
+      mv(tmp2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mv(Ri, zr); {
+        Label loop, end;
+        bind(loop);
+        bge(Ri, Rlen, end);
+
+        pre1(Ri);
+
+        block_comment("for (j = (i+1)/2; j; j--) {"); {
+          addi(Rj, Ri, 1);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = i/2; j; j--) {"); {
+          srliw(Rj, Ri, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post1_squaring();
+        addi(Ri, Ri, 1);
+        blt(Ri, Rlen, loop);
+
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mv(Ri, Rlen); {
+        Label loop, end;
+        bind(loop);
+        slli(t0, Rlen, 1);
+        bge(Ri, t0, end);
+
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
+          slli(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          sub(Rj, Rj, 1);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
+          slli(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          srliw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addi(Ri, Ri, 1);
+        slli(t0, Rlen, 1);
+        blt(Ri, t0, loop);
+
+        bind(end);
+        block_comment("} // i");
+      }
+
+      normalize(Rlen);
+
+      mv(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, Ri, Rj);
+
+      leave();
+      ret();
+
+      return entry;
+    }
+  };
+#endif // COMPILER2
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Since we need to preserve callee-saved values (currently
+  // only for C2, but done for C1 as well) we need a callee-saved oop
+  // map and therefore have to make these stubs into RuntimeStubs
+  // rather than BufferBlobs.  If the compiler needs all registers to
+  // be preserved between the fault point and the exception handler
+  // then it must assume responsibility for that in
+  // AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+
+#undef __
+#define __ masm->
+
+  address generate_throw_exception(const char* name,
+                                   address runtime_entry,
+                                   Register arg1 = noreg,
+                                   Register arg2 = noreg) {
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+    // if they expect all registers to be preserved.
+    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
+    assert_cond(runtime_entry != NULL);
+    enum layout {
+      fp_off = 0,
+      fp_off2,
+      return_off,
+      return_off2,
+      framesize // inclusive of return address
+    };
+
+    const int insts_size = 512;
+    const int locs_size  = 64;
+
+    CodeBuffer code(name, insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+    assert_cond(oop_maps != NULL && masm != NULL);
+
+    address start = __ pc();
+
+    // This is an inlined and slightly modified version of call_VM
+    // which has the ability to fetch the return PC out of
+    // thread-local storage and also sets up last_Java_sp slightly
+    // differently than the real call_VM
+
+    __ enter(); // Save FP and RA before call
+
+    assert(is_even(framesize / 2), "sp not 16-byte aligned");
+
+    // ra and fp are already in place
+    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
+
+    int frame_complete = __ pc() - start;
+
+    // Set up last_Java_sp and last_Java_fp
+    address the_pc = __ pc();
+    __ set_last_Java_frame(sp, fp, the_pc, t0);
+
+    // Call runtime
+    if (arg1 != noreg) {
+      assert(arg2 != c_rarg1, "clobbered");
+      __ mv(c_rarg1, arg1);
+    }
+    if (arg2 != noreg) {
+      __ mv(c_rarg2, arg2);
+    }
+    __ mv(c_rarg0, xthread);
+    BLOCK_COMMENT("call runtime_entry");
+    int32_t offset = 0;
+    __ movptr_with_offset(t0, runtime_entry, offset);
+    __ jalr(x1, t0, offset);
+
+    // Generate oop map
+    OopMap* map = new OopMap(framesize, 0);
+    assert_cond(map != NULL);
+
+    oop_maps->add_gc_map(the_pc - start, map);
+
+    __ reset_last_Java_frame(true);
+
+    __ leave();
+
+    // check for pending exceptions
+#ifdef ASSERT
+    Label L;
+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+    __ bnez(t0, L);
+    __ should_not_reach_here();
+    __ bind(L);
+#endif // ASSERT
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+
+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
+    RuntimeStub* stub =
+      RuntimeStub::new_runtime_stub(name,
+                                    &code,
+                                    frame_complete,
+                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
+                                    oop_maps, false);
+    assert(stub != NULL, "create runtime stub fail!");
+    return stub->entry_point();
+  }
+
+  // Initialization
+  void generate_initial() {
+    // Generate initial stubs and initializes the entry points
+
+    // entry points that exist in all platforms Note: This is code
+    // that could be shared among different platforms - however the
+    // benefit seems to be smaller than the disadvantage of having a
+    // much more complicated generator structure. See also comment in
+    // stubRoutines.hpp.
+
+    StubRoutines::_forward_exception_entry = generate_forward_exception();
+
+    StubRoutines::_call_stub_entry =
+      generate_call_stub(StubRoutines::_call_stub_return_address);
+
+    // is referenced by megamorphic call
+    StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+    // Build this early so it's available for the interpreter.
+    StubRoutines::_throw_StackOverflowError_entry =
+      generate_throw_exception("StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::throw_StackOverflowError));
+    StubRoutines::_throw_delayed_StackOverflowError_entry =
+      generate_throw_exception("delayed StackOverflowError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::throw_delayed_StackOverflowError));
+    // Safefetch stubs.
+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
+                                                       &StubRoutines::_safefetch32_fault_pc,
+                                                       &StubRoutines::_safefetch32_continuation_pc);
+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+                                                       &StubRoutines::_safefetchN_fault_pc,
+                                                       &StubRoutines::_safefetchN_continuation_pc);
+  }
+
+  void generate_all() {
+    // support for verify_oop (must happen after universe_init)
+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
+    StubRoutines::_throw_AbstractMethodError_entry =
+      generate_throw_exception("AbstractMethodError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_AbstractMethodError));
+
+    StubRoutines::_throw_IncompatibleClassChangeError_entry =
+      generate_throw_exception("IncompatibleClassChangeError throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_IncompatibleClassChangeError));
+
+    StubRoutines::_throw_NullPointerException_at_call_entry =
+      generate_throw_exception("NullPointerException at call throw_exception",
+                               CAST_FROM_FN_PTR(address,
+                                                SharedRuntime::
+                                                throw_NullPointerException_at_call));
+    // arraycopy stubs used by compilers
+    generate_arraycopy_stubs();
+
+#ifdef COMPILER2
+    if (UseMulAddIntrinsic) {
+      StubRoutines::_mulAdd = generate_mulAdd();
+    }
+
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+
+    if (UseSquareToLenIntrinsic) {
+      StubRoutines::_squareToLen = generate_squareToLen();
+    }
+
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
+    }
+
+    if (UseMontgomerySquareIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+      StubRoutines::_montgomerySquare = g.generate_square();
+    }
+#endif
+
+    generate_compare_long_strings();
+
+    generate_string_indexof_stubs();
+
+    StubRoutines::riscv::set_completed();
+  }
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+
+  ~StubGenerator() {}
+}; // end class declaration
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
new file mode 100644
index 0000000000..9202d9ec4b
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Implementation of the platform-specific part of StubRoutines - for
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+address StubRoutines::riscv::_get_previous_sp_entry = NULL;
+
+address StubRoutines::riscv::_f2i_fixup = NULL;
+address StubRoutines::riscv::_f2l_fixup = NULL;
+address StubRoutines::riscv::_d2i_fixup = NULL;
+address StubRoutines::riscv::_d2l_fixup = NULL;
+address StubRoutines::riscv::_float_sign_mask = NULL;
+address StubRoutines::riscv::_float_sign_flip = NULL;
+address StubRoutines::riscv::_double_sign_mask = NULL;
+address StubRoutines::riscv::_double_sign_flip = NULL;
+address StubRoutines::riscv::_zero_blocks = NULL;
+address StubRoutines::riscv::_compare_long_string_LL = NULL;
+address StubRoutines::riscv::_compare_long_string_UU = NULL;
+address StubRoutines::riscv::_compare_long_string_LU = NULL;
+address StubRoutines::riscv::_compare_long_string_UL = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ll = NULL;
+address StubRoutines::riscv::_string_indexof_linear_uu = NULL;
+address StubRoutines::riscv::_string_indexof_linear_ul = NULL;
+address StubRoutines::riscv::_large_byte_array_inflate = NULL;
+
+bool StubRoutines::riscv::_completed = false;
diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
new file mode 100644
index 0000000000..0c9445e18a
--- /dev/null
+++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP
+#define CPU_RISCV_STUBROUTINES_RISCV_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+static bool returns_to_call_stub(address return_pc) {
+  return return_pc == _call_stub_return_address;
+}
+
+enum platform_dependent_constants {
+  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
+  code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
+};
+
+class riscv {
+ friend class StubGenerator;
+
+ private:
+  static address _get_previous_sp_entry;
+
+  static address _f2i_fixup;
+  static address _f2l_fixup;
+  static address _d2i_fixup;
+  static address _d2l_fixup;
+
+  static address _float_sign_mask;
+  static address _float_sign_flip;
+  static address _double_sign_mask;
+  static address _double_sign_flip;
+
+  static address _zero_blocks;
+
+  static address _compare_long_string_LL;
+  static address _compare_long_string_LU;
+  static address _compare_long_string_UL;
+  static address _compare_long_string_UU;
+  static address _string_indexof_linear_ll;
+  static address _string_indexof_linear_uu;
+  static address _string_indexof_linear_ul;
+  static address _large_byte_array_inflate;
+
+  static bool _completed;
+
+ public:
+
+  static address get_previous_sp_entry() {
+    return _get_previous_sp_entry;
+  }
+
+  static address f2i_fixup() {
+    return _f2i_fixup;
+  }
+
+  static address f2l_fixup() {
+    return _f2l_fixup;
+  }
+
+  static address d2i_fixup() {
+    return _d2i_fixup;
+  }
+
+  static address d2l_fixup() {
+    return _d2l_fixup;
+  }
+
+  static address float_sign_mask() {
+    return _float_sign_mask;
+  }
+
+  static address float_sign_flip() {
+    return _float_sign_flip;
+  }
+
+  static address double_sign_mask() {
+    return _double_sign_mask;
+  }
+
+  static address double_sign_flip() {
+    return _double_sign_flip;
+  }
+
+  static address zero_blocks() {
+    return _zero_blocks;
+  }
+
+  static address compare_long_string_LL() {
+    return _compare_long_string_LL;
+  }
+
+  static address compare_long_string_LU() {
+    return _compare_long_string_LU;
+  }
+
+  static address compare_long_string_UL() {
+    return _compare_long_string_UL;
+  }
+
+  static address compare_long_string_UU() {
+    return _compare_long_string_UU;
+  }
+
+  static address string_indexof_linear_ul() {
+    return _string_indexof_linear_ul;
+  }
+
+  static address string_indexof_linear_ll() {
+    return _string_indexof_linear_ll;
+  }
+
+  static address string_indexof_linear_uu() {
+    return _string_indexof_linear_uu;
+  }
+
+  static address large_byte_array_inflate() {
+    return _large_byte_array_inflate;
+  }
+
+  static bool complete() {
+    return _completed;
+  }
+
+  static void set_completed() {
+    _completed = true;
+  }
+};
+
+#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
new file mode 100644
index 0000000000..34c85e8145
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -0,0 +1,1833 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/bytecodeTracer.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateInterpreterGenerator.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/jniHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#include <sys/types.h>
+
+#ifndef PRODUCT
+#include "oops/method.hpp"
+#endif // !PRODUCT
+
+// Size of interpreter code.  Increase if too small.  Interpreter will
+// fail with a guarantee ("not enough space for interpreter generation");
+// if too small.
+// Run with +PrintInterpreter to get the VM to print out the size.
+// Max size with JVMTI
+int TemplateInterpreter::InterpreterCodeSize = 256 * 1024;
+
+#define __ _masm->
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_slow_signature_handler() {
+  address entry = __ pc();
+
+  __ andi(esp, esp, -16);
+  __ mv(c_rarg3, esp);
+  // xmethod
+  // xlocals
+  // c_rarg3: first stack arg - wordSize
+  // adjust sp
+
+  __ addi(sp, c_rarg3, -18 * wordSize);
+  __ addi(sp, sp, -2 * wordSize);
+  __ sd(ra, Address(sp, 0));
+
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::slow_signature_handler),
+             xmethod, xlocals, c_rarg3);
+
+  // x10: result handler
+
+  // Stack layout:
+  // sp: return address           <- sp
+  //      1 garbage
+  //      8 integer args (if static first is unused)
+  //      1 float/double identifiers
+  //      8 double args
+  //        stack args              <- esp
+  //        garbage
+  //        expression stack bottom
+  //        bcp (NULL)
+  //        ...
+
+  // Restore ra
+  __ ld(ra, Address(sp, 0));
+  __ addi(sp, sp , 2 * wordSize);
+
+  // Do FP first so we can use c_rarg3 as temp
+  __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers
+
+  for (int i = 0; i < Argument::n_float_register_parameters_c; i++) {
+    const FloatRegister r = g_FPArgReg[i];
+    Label d, done;
+
+    __ andi(t0, c_rarg3, 1UL << i);
+    __ bnez(t0, d);
+    __ flw(r, Address(sp, (10 + i) * wordSize));
+    __ j(done);
+    __ bind(d);
+    __ fld(r, Address(sp, (10 + i) * wordSize));
+    __ bind(done);
+  }
+
+  // c_rarg0 contains the result from the call of
+  // InterpreterRuntime::slow_signature_handler so we don't touch it
+  // here.  It will be loaded with the JNIEnv* later.
+  for (int i = 1; i < Argument::n_int_register_parameters_c; i++) {
+    const Register rm = g_INTArgReg[i];
+    __ ld(rm, Address(sp, i * wordSize));
+  }
+
+  __ addi(sp, sp, 18 * wordSize);
+  __ ret();
+
+  return entry;
+}
+
+// Various method entries
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+  // xmethod: Method*
+  // x30: sender sp
+  // esp: args
+
+  if (!InlineIntrinsics) {
+    return NULL; // Generate a vanilla entry
+  }
+
+  // These don't need a safepoint check because they aren't virtually
+  // callable. We won't enter these intrinsics from compiled code.
+  // If in the future we added an intrinsic which was virtually callable
+  // we'd have to worry about how to safepoint so that this code is used.
+
+  // mathematical functions inlined by compiler
+  // (interpreter must provide identical implementation
+  // in order to avoid monotonicity bugs when switching
+  // from interpreter to compiler in the middle of some
+  // computation)
+  //
+  // stack:
+  //        [ arg ] <-- esp
+  //        [ arg ]
+  // retaddr in ra
+
+  address fn = NULL;
+  address entry_point = NULL;
+  Register continuation = ra;
+  switch (kind) {
+    case Interpreter::java_lang_math_abs:
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ fabs_d(f10, f10);
+      __ mv(sp, x30); // Restore caller's SP
+      break;
+    case Interpreter::java_lang_math_sqrt:
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ fsqrt_d(f10, f10);
+      __ mv(sp, x30);
+      break;
+    case Interpreter::java_lang_math_sin :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dsin() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_cos :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dcos() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_tan :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dtan() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_log :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dlog() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_log10 :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dlog10() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_exp :
+      entry_point = __ pc();
+      __ fld(f10, Address(esp));
+      __ mv(sp, x30);
+      __ mv(x9, ra);
+      continuation = x9;  // The first callee-saved register
+      if (StubRoutines::dexp() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_pow :
+      entry_point = __ pc();
+      __ mv(x9, ra);
+      continuation = x9;
+      __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
+      __ fld(f11, Address(esp));
+      __ mv(sp, x30);
+      if (StubRoutines::dpow() == NULL) {
+        fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      } else {
+        fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
+      }
+      __ mv(t0, fn);
+      __ jalr(t0);
+      break;
+    case Interpreter::java_lang_math_fmaD :
+      if (UseFMA) {
+        entry_point = __ pc();
+        __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize));
+        __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize));
+        __ fld(f12, Address(esp));
+        __ fmadd_d(f10, f10, f11, f12);
+        __ mv(sp, x30); // Restore caller's SP
+      }
+      break;
+    case Interpreter::java_lang_math_fmaF :
+      if (UseFMA) {
+        entry_point = __ pc();
+        __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize));
+        __ flw(f11, Address(esp, Interpreter::stackElementSize));
+        __ flw(f12, Address(esp));
+        __ fmadd_s(f10, f10, f11, f12);
+        __ mv(sp, x30); // Restore caller's SP
+      }
+      break;
+    default:
+      ;
+  }
+  if (entry_point != NULL) {
+    __ jr(continuation);
+  }
+
+  return entry_point;
+}
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address TemplateInterpreterGenerator::generate_abstract_entry(void) {
+  // xmethod: Method*
+  // x30: sender SP
+
+  address entry_point = __ pc();
+
+  // abstract method entry
+
+  //  pop return address, reset last_sp to NULL
+  __ empty_expression_stack();
+  __ restore_bcp();      // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
+
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_AbstractMethodErrorWithMethod),
+                                     xmethod);
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  return entry_point;
+}
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
+    __ mv(t1, sp);
+    // maximal sp for current fp (stack grows negative)
+    // check if frame is complete
+    __ bge(t0, t1, L);
+    __ stop ("interpreter frame not set up");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // setup parameters
+
+  // convention: expect aberrant index in register x11
+  __ zero_extend(c_rarg2, x11, 32);
+  // convention: expect array in register x13
+  __ mv(c_rarg1, x13);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::
+                              throw_ArrayIndexOutOfBoundsException),
+             c_rarg1, c_rarg2);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+
+  // object is at TOS
+  __ pop_reg(c_rarg1);
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::
+                              throw_ClassCastException),
+             c_rarg1);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+  const char* name, const char* message, bool pass_oop) {
+  assert(!pass_oop || message == NULL, "either oop or message but not both");
+  address entry = __ pc();
+  if (pass_oop) {
+    // object is at TOS
+    __ pop_reg(c_rarg2);
+  }
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  __ la(c_rarg1, Address((address)name));
+  if (pass_oop) {
+    __ call_VM(x10, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::
+                                     create_klass_exception),
+               c_rarg1, c_rarg2);
+  } else {
+    // kind of lame ExternalAddress can't take NULL because
+    // external_word_Relocation will assert.
+    if (message != NULL) {
+      __ la(c_rarg2, Address((address)message));
+    } else {
+      __ mv(c_rarg2, NULL_WORD);
+    }
+    __ call_VM(x10,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
+               c_rarg1, c_rarg2);
+  }
+  // throw exception
+  __ j(address(Interpreter::throw_exception_entry()));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+  address entry = __ pc();
+
+  // Restore stack bottom in case i2c adjusted stack
+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // and NULL it as marker that esp is now tos until next java call
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(xmethod);
+
+  if (state == atos) {
+    Register obj = x10;
+    Register mdp = x11;
+    Register tmp = x12;
+    __ ld(mdp, Address(xmethod, Method::method_data_offset()));
+    __ profile_return_type(mdp, obj, tmp);
+  }
+
+  // Pop N words from the stack
+  __ get_cache_and_index_at_bcp(x11, x12, 1, index_size);
+  __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+  __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask);
+
+  __ shadd(esp, x11, esp, t0, 3);
+
+  // Restore machine SP
+  __ ld(t0, Address(xmethod, Method::const_offset()));
+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+  __ ld(t1,
+        Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ slli(t0, t0, 3);
+  __ sub(t0, t1, t0);
+  __ andi(sp, t0, -16);
+
+ __ check_and_handle_popframe(xthread);
+ __ check_and_handle_earlyret(xthread);
+
+  __ get_dispatch();
+  __ dispatch_next(state, step);
+
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+                                                               int step,
+                                                               address continuation) {
+  address entry = __ pc();
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(xmethod);
+  __ get_dispatch();
+
+  // Calculate stack limit
+  __ ld(t0, Address(xmethod, Method::const_offset()));
+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+  __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ slli(t0, t0, 3);
+  __ sub(t0, t1, t0);
+  __ andi(sp, t0, -16);
+
+  // Restore expression stack pointer
+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // NULL last_sp until next java call
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+  // handle exceptions
+  {
+    Label L;
+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+    __ beqz(t0, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  if (continuation == NULL) {
+    __ dispatch_next(state, step);
+  } else {
+    __ jump_to_entry(continuation);
+  }
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  address entry = __ pc();
+  if (type == T_OBJECT) {
+    // retrieve result from frame
+    __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+    // and verify it
+    __ verify_oop(x10);
+  } else {
+   __ cast_primitive_type(type, x10);
+  }
+
+  __ ret();                                  // return from result handler
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state,
+                                                                address runtime_entry) {
+  assert_cond(runtime_entry != NULL);
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ membar(MacroAssembler::AnyAny);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+  return entry;
+}
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+//       so we have a 'sticky' overflow test
+//
+// xmethod: method
+//
+void TemplateInterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
+  Label done;
+  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
+  if (TieredCompilation) {
+    int increment = InvocationCounter::count_increment;
+    Label no_mdo;
+    if (ProfileInterpreter) {
+      // Are we profiling?
+      __ ld(x10, Address(xmethod, Method::method_data_offset()));
+      __ beqz(x10, no_mdo);
+      // Increment counter in the MDO
+      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
+                                                in_bytes(InvocationCounter::counter_offset()));
+      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
+      __ j(done);
+    }
+    __ bind(no_mdo);
+    // Increment counter in MethodCounters
+    const Address invocation_counter(t1,
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
+    __ get_method_counters(xmethod, t1, done);
+    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
+    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
+    __ bind(done);
+  } else { // not TieredCompilation
+    const Address backedge_counter(t1,
+                  MethodCounters::backedge_counter_offset() +
+                  InvocationCounter::counter_offset());
+    const Address invocation_counter(t1,
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
+
+    __ get_method_counters(xmethod, t1, done);
+
+    if (ProfileInterpreter) { // %%% Merge this into MethodData*
+      __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
+      __ addw(x11, x11, 1);
+      __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset()));
+    }
+    // Update standard invocation counters
+    __ lwu(x11, invocation_counter);
+    __ lwu(x10, backedge_counter);
+
+    __ addw(x11, x11, InvocationCounter::count_increment);
+    __ andi(x10, x10, InvocationCounter::count_mask_value);
+
+    __ sw(x11, invocation_counter);
+    __ addw(x10, x10, x11);                // add both counters
+
+    // profile_method is non-null only for interpreted method so
+    // profile_method != NULL == !native_call
+
+    if (ProfileInterpreter && profile_method != NULL) {
+      // Test to see if we should create a method data oop
+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
+      __ blt(x10, t1, *profile_method_continue);
+
+      // if no method data exists, go to profile_method
+      __ test_method_data_pointer(t1, *profile_method);
+    }
+
+    {
+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
+      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
+      __ bltu(x10, t1, done);
+      __ j(*overflow);
+    }
+    __ bind(done);
+  }
+}
+
+void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
+  __ mv(c_rarg1, zr);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1);
+  __ j(do_continue);
+}
+
+// See if we've got enough room on the stack for locals plus overhead
+// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
+// without going through the signal handler, i.e., reserved and yellow zones
+// will not be made usable. The shadow zone must suffice to handle the
+// overflow.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+//      x13: number of additional locals this frame needs (what we must check)
+//      xmethod: Method*
+//
+// Kills:
+//      x10
+void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
+
+  // monitor entry size: see picture of stack set
+  // (generate_method_entry) and frame_amd64.hpp
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+  // total overhead size: entry_size + (saved fp through expr stack
+  // bottom).  be sure to change this if you add/subtract anything
+  // to/from the overhead area
+  const int overhead_size =
+    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
+
+  const int page_size = os::vm_page_size();
+
+  Label after_frame_check;
+
+  // see if the frame is greater than one page in size. If so,
+  // then we need to verify there is enough stack space remaining
+  // for the additional locals.
+  __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize);
+  __ bleu(x13, t0, after_frame_check);
+
+  // compute sp as if this were going to be the last frame on
+  // the stack before the red zone
+
+  // locals + overhead, in bytes
+  __ mv(x10, overhead_size);
+  __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize);  // 2 slots per parameter.
+
+  const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset());
+  __ ld(t0, stack_limit);
+
+#ifdef ASSERT
+  Label limit_okay;
+  // Verify that thread stack limit is non-zero.
+  __ bnez(t0, limit_okay);
+  __ stop("stack overflow limit is zero");
+  __ bind(limit_okay);
+#endif
+
+  // Add stack limit to locals.
+  __ add(x10, x10, t0);
+
+  // Check against the current stack bottom.
+  __ bgtu(sp, x10, after_frame_check);
+
+  // Remove the incoming args, peeling the machine SP back to where it
+  // was in the caller.  This is not strictly necessary, but unless we
+  // do so the stack frame may have a garbage FP; this ensures a
+  // correct call stack that we can always unwind.  The ANDI should be
+  // unnecessary because the sender SP in x30 is always aligned, but
+  // it doesn't hurt.
+  __ andi(sp, x30, -16);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry()));
+
+  // all done with frame size check
+  __ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+//
+// Args:
+//      xmethod: Method*
+//      xlocals: locals
+//
+// Kills:
+//      x10
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
+//      t0, t1 (temporary regs)
+void TemplateInterpreterGenerator::lock_method() {
+  // synchronize method
+  const Address access_flags(xmethod, Method::access_flags_offset());
+  const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+  __ lwu(x10, access_flags);
+  __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false);
+#endif // ASSERT
+
+  // get synchronization object
+  {
+    Label done;
+    __ lwu(x10, access_flags);
+    __ andi(t0, x10, JVM_ACC_STATIC);
+    // get receiver (assume this is frequent case)
+    __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0)));
+    __ beqz(t0, done);
+    __ load_mirror(x10, xmethod);
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ bnez(x10, L);
+      __ stop("synchronization object is NULL");
+      __ bind(L);
+    }
+#endif // ASSERT
+
+    __ bind(done);
+  }
+
+  // add space for monitor & lock
+  __ add(sp, sp, - entry_size); // add space for a monitor entry
+  __ add(esp, esp, - entry_size);
+  __ mv(t0, esp);
+  __ sd(t0, monitor_block_top);  // set new monitor block top
+  // store object
+  __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
+  __ mv(c_rarg1, esp); // object address
+  __ lock_object(c_rarg1);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+//
+// Args:
+//      ra: return address
+//      xmethod: Method*
+//      xlocals: pointer to locals
+//      xcpool: cp cache
+//      stack_pointer: previous sp
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+  // initialize fixed part of activation frame
+  if (native_call) {
+    __ add(esp, sp, - 14 * wordSize);
+    __ mv(xbcp, zr);
+    __ add(sp, sp, - 14 * wordSize);
+    // add 2 zero-initialized slots for native calls
+    __ sd(zr, Address(sp, 13 * wordSize));
+    __ sd(zr, Address(sp, 12 * wordSize));
+  } else {
+    __ add(esp, sp, - 12 * wordSize);
+    __ ld(t0, Address(xmethod, Method::const_offset()));     // get ConstMethod
+    __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase
+    __ add(sp, sp, - 12 * wordSize);
+  }
+  __ sd(xbcp, Address(sp, wordSize));
+  __ sd(esp, Address(sp, 0));
+
+  if (ProfileInterpreter) {
+    Label method_data_continue;
+    __ ld(t0, Address(xmethod, Method::method_data_offset()));
+    __ beqz(t0, method_data_continue);
+    __ la(t0, Address(t0, in_bytes(MethodData::data_offset())));
+    __ bind(method_data_continue);
+  }
+
+  __ sd(xmethod, Address(sp, 7 * wordSize));
+  __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize));
+
+  // Get mirror and store it in the frame as GC root for this Method*
+#if INCLUDE_SHENANDOAHGC
+  if (UseShenandoahGC) {
+    __ load_mirror(x28, xmethod);
+    __ sd(zr, Address(sp, 5 * wordSize));
+    __ sd(x28, Address(sp, 4 * wordSize));
+  } else
+#endif
+  {
+    __ load_mirror(t2, xmethod);
+    __ sd(zr, Address(sp, 5 * wordSize));
+    __ sd(t2, Address(sp, 4 * wordSize));
+  }
+
+  __ ld(xcpool, Address(xmethod, Method::const_offset()));
+  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
+  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
+  __ sd(xcpool, Address(sp, 3 * wordSize));
+  __ sd(xlocals, Address(sp, 2 * wordSize));
+
+  __ sd(ra, Address(sp, 11 * wordSize));
+  __ sd(fp, Address(sp, 10 * wordSize));
+  __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp
+
+  // set sender sp
+  // leave last_sp as null
+  __ sd(x30, Address(sp, 9 * wordSize));
+  __ sd(zr, Address(sp, 8 * wordSize));
+
+  // Move SP out of the way
+  if (!native_call) {
+    __ ld(t0, Address(xmethod, Method::const_offset()));
+    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
+    __ slli(t0, t0, 3);
+    __ sub(t0, sp, t0);
+    __ andi(sp, t0, -16);
+  }
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Method entry for java.lang.ref.Reference.get.
+address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_entry.
+  //
+  // xmethod: Method*
+  // x30: senderSP must preserve for slow path, set SP to it on fast path
+
+  // ra is live.  It must be saved around calls.
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  Label slow_path;
+  const Register local_0 = c_rarg0;
+  // Check if local 0 != NULL
+  // If the receiver is null then it is OK to jump to the slow path.
+  __ ld(local_0, Address(esp, 0));
+  __ beqz(local_0, slow_path);
+
+  __ mv(x9, x30);   // Move senderSP to a callee-saved register
+
+  // Load the value of the referent field.
+  const Address field_address(local_0, referent_offset);
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0);
+
+  // areturn
+  __ andi(sp, x9, -16);  // done with stack
+  __ ret();
+
+  // generate a vanilla interpreter entry as the slow path
+  __ bind(slow_path);
+  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
+  return entry;
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
+  // TODO: Unimplemented generate_CRC32_update_entry
+  return 0;
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  // TODO: Unimplemented generate_CRC32_updateBytes_entry
+  return 0;
+}
+
+/**
+ * Method entry for intrinsic-candidate (non-native) methods:
+ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
+ * Unlike CRC32, CRC32C does not have any methods marked as native
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
+ */
+address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  // TODO: Unimplemented generate_CRC32C_updateBytes_entry
+  return 0;
+}
+
+void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
+  // Bang each page in the shadow zone. We can't assume it's been done for
+  // an interpreter frame with greater than a page of locals, so each page
+  // needs to be checked.  Only true for non-native.
+  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
+  const int start_page = native_call ? n_shadow_pages : 1;
+  const int page_size = os::vm_page_size();
+  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
+    __ sub(t0, sp, pages * page_size);
+    __ sd(zr, Address(t0));
+  }
+}
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+  // x11: Method*
+  // x30: sender sp
+
+  address entry_point = __ pc();
+
+  const Address constMethod       (xmethod, Method::const_offset());
+  const Address access_flags      (xmethod, Method::access_flags_offset());
+  const Address size_of_parameters(x12, ConstMethod::
+                                   size_of_parameters_offset());
+
+  // get parameter size (always needed)
+  __ ld(x12, constMethod);
+  __ load_unsigned_short(x12, size_of_parameters);
+
+  // Native calls don't need the stack size check since they have no
+  // expression stack and the arguments are already on the stack and
+  // we only add a handful of words to the stack.
+
+  // xmethod: Method*
+  // x12: size of parameters
+  // x30: sender sp
+
+  // for natives the size of locals is zero
+
+  // compute beginning of parameters (xlocals)
+  __ shadd(xlocals, x12, esp, xlocals, 3);
+  __ addi(xlocals, xlocals, -wordSize);
+
+  // Pull SP back to minimum size: this avoids holes in the stack
+  __ andi(sp, esp, -16);
+
+  // initialize fixed part of activation frame
+  generate_fixed_frame(true);
+
+  // make sure method is native & not abstract
+#ifdef ASSERT
+  __ lwu(x10, access_flags);
+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false);
+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
+#endif
+
+  // Since at this point in the method invocation the exception
+  // handler would try to exit the monitor of synchronized methods
+  // which hasn't been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation
+  // will check this flag.
+
+  const Address do_not_unlock_if_synchronized(xthread,
+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  __ mv(t1, true);
+  __ sb(t1, do_not_unlock_if_synchronized);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+  bang_stack_shadow_pages(true);
+
+  // reset the _do_not_unlock_if_synchronized flag
+  __ sb(zr, do_not_unlock_if_synchronized);
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    __ lwu(x10, access_flags);
+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
+#endif
+  }
+
+  // start execution
+#ifdef ASSERT
+  __ verify_frame_setup();
+#endif
+
+  // jvmti support
+  __ notify_method_entry();
+
+  // work registers
+  const Register t = x18;
+  const Register result_handler = x19;
+
+  // allocate space for parameters
+  __ ld(t, Address(xmethod, Method::const_offset()));
+  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
+
+  __ slli(t, t, Interpreter::logStackElementSize);
+  __ sub(x30, esp, t);
+  __ andi(sp, x30, -16);
+  __ mv(esp, x30);
+
+  // get signature handler
+  {
+    Label L;
+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
+    __ bnez(t, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::prepare_native_call),
+               xmethod);
+    __ ld(t, Address(xmethod, Method::signature_handler_offset()));
+    __ bind(L);
+  }
+
+  // call signature handler
+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals,
+         "adjust this code");
+  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp,
+         "adjust this code");
+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0,
+          "adjust this code");
+
+  // The generated handlers do not touch xmethod (the method).
+  // However, large signatures cannot be cached and are generated
+  // each time here.  The slow-path generator can do a GC on return,
+  // so we must reload it after the call.
+  __ jalr(t);
+  __ get_method(xmethod);        // slow path can do a GC, reload xmethod
+
+
+  // result handler is in x10
+  // set result handler
+  __ mv(result_handler, x10);
+  // pass mirror handle if static call
+  {
+    Label L;
+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
+    __ andi(t0, t, JVM_ACC_STATIC);
+    __ beqz(t0, L);
+    // get mirror
+    __ load_mirror(t, xmethod);
+    // copy mirror into activation frame
+    __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+    // pass handle to mirror
+    __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ bind(L);
+  }
+
+  // get native function entry point in x28
+  {
+    Label L;
+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
+    address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+    __ mv(t1, unsatisfied);
+    __ ld(t1, t1);
+    __ bne(x28, t1, L);
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::prepare_native_call),
+               xmethod);
+    __ get_method(xmethod);
+    __ ld(x28, Address(xmethod, Method::native_function_offset()));
+    __ bind(L);
+  }
+
+  // pass JNIEnv
+  __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset()));
+
+  // It is enough that the pc() points into the right code
+  // segment. It does not have to be the correct return pc.
+  Label native_return;
+  __ set_last_Java_frame(esp, fp, native_return, x30);
+
+  // change thread state
+#ifdef ASSERT
+  {
+    Label L;
+    __ lwu(t, Address(xthread, JavaThread::thread_state_offset()));
+    __ addi(t0, zr, (u1)_thread_in_Java);
+    __ beq(t, t0, L);
+    __ stop("Wrong thread state in native stub");
+    __ bind(L);
+  }
+#endif
+
+  // Change state to native
+  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
+  __ mv(t0, _thread_in_native);
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+  __ sw(t0, Address(t1));
+
+  // Call the native method.
+  __ jalr(x28);
+  __ bind(native_return);
+  __ get_method(xmethod);
+  // result potentially in x10 or f10
+
+  // make room for the pushes we're about to do
+  __ sub(t0, esp, 4 * wordSize);
+  __ andi(sp, t0, -16);
+
+  // NOTE: The order of these pushes is known to frame::interpreter_frame_result
+  // in order to extract the result of a method call. If the order of these
+  // pushes change or anything else is added to the stack then the code in
+  // interpreter_frame_result must also change.
+  __ push(dtos);
+  __ push(ltos);
+
+  // change thread state
+  // Force all preceding writes to be observed prior to thread state change
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+
+  __ mv(t0, _thread_in_native_trans);
+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+  // Force this write out before the read below
+  __ membar(MacroAssembler::AnyAny);
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    Label L, Continue;
+    __ safepoint_poll_acquire(L);
+    __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset()));
+    __ beqz(t1, Continue);
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception
+    // and forward it and never return here preventing us from
+    // clearing _last_native_pc down below. So we do a runtime call by
+    // hand.
+    //
+    __ mv(c_rarg0, xthread);
+    __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+    __ jalr(t1);
+    __ get_method(xmethod);
+    __ reinit_heapbase();
+    __ bind(Continue);
+  }
+
+  // change thread state
+  // Force all preceding writes to be observed prior to thread state change
+  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+
+  __ mv(t0, _thread_in_Java);
+  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
+
+  // reset_last_Java_frame
+  __ reset_last_Java_frame(true);
+
+  if (CheckJNICalls) {
+    // clear_pending_jni_exception_check
+    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
+  }
+
+  // reset handle block
+  __ ld(t, Address(xthread, JavaThread::active_handles_offset()));
+  __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes()));
+
+  // If result is an oop unbox and store it in frame where gc will see it
+  // and result handler will pick it up
+
+  {
+    Label no_oop;
+    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
+    __ bne(t, result_handler, no_oop);
+    // Unbox oop result, e.g. JNIHandles::resolve result.
+    __ pop(ltos);
+    __ resolve_jobject(x10, xthread, t);
+    __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize));
+    // keep stack depth as expected by pushing oop which will eventually be discarded
+    __ push(ltos);
+    __ bind(no_oop);
+  }
+
+  {
+    Label no_reguard;
+    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
+    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
+    __ bne(t0, t1, no_reguard);
+
+    __ pusha(); // only save smashed registers
+    __ mv(c_rarg0, xthread);
+    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+    __ jalr(t1);
+    __ popa(); // only restore smashed registers
+    __ bind(no_reguard);
+  }
+
+  // The method register is junk from after the thread_in_native transition
+  // until here.  Also can't call_VM until the bcp has been
+  // restored.  Need bcp for throwing exception below so get it now.
+  __ get_method(xmethod);
+
+  // restore bcp to have legal interpreter frame, i.e., bci == 0 <=>
+  // xbcp == code_base()
+  __ ld(xbcp, Address(xmethod, Method::const_offset()));   // get ConstMethod*
+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));          // get codebase
+  // handle exceptions (exception handling will handle unlocking!)
+  {
+    Label L;
+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
+    __ beqz(t0, L);
+    // Note: At some point we may want to unify this with the code
+    // used in call_VM_base(); i.e., we should use the
+    // StubRoutines::forward_exception code. For now this doesn't work
+    // here because the sp is not correctly set at this point.
+    __ MacroAssembler::call_VM(noreg,
+                               CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  // do unlocking if necessary
+  {
+    Label L;
+    __ lwu(t, Address(xmethod, Method::access_flags_offset()));
+    __ andi(t0, t, JVM_ACC_SYNCHRONIZED);
+    __ beqz(t0, L);
+    // the code below should be shared with interpreter macro
+    // assembler implementation
+    {
+      Label unlock;
+      // BasicObjectLock will be first in list, since this is a
+      // synchronized method. However, need to check that the object
+      // has not been unlocked by an explicit monitorexit bytecode.
+
+      // monitor expect in c_rarg1 for slow unlock path
+      __ la(c_rarg1, Address(fp,   // address of first monitor
+                             (intptr_t)(frame::interpreter_frame_initial_sp_offset *
+                                        wordSize - sizeof(BasicObjectLock))));
+
+      __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+      __ bnez(t, unlock);
+
+      // Entry already unlocked, need to throw exception
+      __ MacroAssembler::call_VM(noreg,
+                                 CAST_FROM_FN_PTR(address,
+                                                  InterpreterRuntime::throw_illegal_monitor_state_exception));
+      __ should_not_reach_here();
+
+      __ bind(unlock);
+      __ unlock_object(c_rarg1);
+    }
+    __ bind(L);
+  }
+
+  // jvmti support
+  // Note: This must happen _after_ handling/throwing any exceptions since
+  //       the exception handler code notifies the runtime of method exits
+  //       too. If this happens before, method entry/exit notifications are
+  //       not properly paired (was bug - gri 11/22/99).
+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
+
+  __ pop(ltos);
+  __ pop(dtos);
+
+  __ jalr(result_handler);
+
+  // remove activation
+  __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
+  // remove frame anchor
+  __ leave();
+
+  // restore sender sp
+  __ mv(sp, esp);
+
+  __ ret();
+
+  if (inc_counter) {
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
+
+  // determine code generation flags
+  const bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
+
+  // t0: sender sp
+  address entry_point = __ pc();
+
+  const Address constMethod(xmethod, Method::const_offset());
+  const Address access_flags(xmethod, Method::access_flags_offset());
+  const Address size_of_parameters(x13,
+                                   ConstMethod::size_of_parameters_offset());
+  const Address size_of_locals(x13, ConstMethod::size_of_locals_offset());
+
+  // get parameter size (always needed)
+  // need to load the const method first
+  __ ld(x13, constMethod);
+  __ load_unsigned_short(x12, size_of_parameters);
+
+  // x12: size of parameters
+
+  __ load_unsigned_short(x13, size_of_locals); // get size of locals in words
+  __ sub(x13, x13, x12); // x13 = no. of additional locals
+
+  // see if we've got enough room on the stack for locals plus overhead.
+  generate_stack_overflow_check();
+
+  // compute beginning of parameters (xlocals)
+  __ shadd(xlocals, x12, esp, t1, 3);
+  __ add(xlocals, xlocals, -wordSize);
+
+  // Make room for additional locals
+  __ slli(t1, x13, 3);
+  __ sub(t0, esp, t1);
+
+  // Padding between locals and fixed part of activation frame to ensure
+  // SP is always 16-byte aligned.
+  __ andi(sp, t0, -16);
+
+  // x13 - # of additional locals
+  // allocate space for locals
+  // explicitly initialize locals
+  {
+    Label exit, loop;
+    __ blez(x13, exit); // do nothing if x13 <= 0
+    __ bind(loop);
+    __ sd(zr, Address(t0));
+    __ add(t0, t0, wordSize);
+    __ add(x13, x13, -1); // until everything initialized
+    __ bnez(x13, loop);
+    __ bind(exit);
+  }
+
+  // And the base dispatch table
+  __ get_dispatch();
+
+  // initialize fixed part of activation frame
+  generate_fixed_frame(false);
+
+  // make sure method is not native & not abstract
+#ifdef ASSERT
+  __ lwu(x10, access_flags);
+  __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native");
+  __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter");
+#endif
+
+  // Since at this point in the method invocation the exception
+  // handler would try to exit the monitor of synchronized methods
+  // which hasn't been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation
+  // will check this flag.
+
+  const Address do_not_unlock_if_synchronized(xthread,
+                                              in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  __ mv(t1, true);
+  __ sb(t1, do_not_unlock_if_synchronized);
+
+  Label no_mdp;
+  const Register mdp = x13;
+  __ ld(mdp, Address(xmethod, Method::method_data_offset()));
+  __ beqz(mdp, no_mdp);
+  __ add(mdp, mdp, in_bytes(MethodData::data_offset()));
+  __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers
+  __ bind(no_mdp);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  Label profile_method;
+  Label profile_method_continue;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow,
+                          &profile_method,
+                          &profile_method_continue);
+    if (ProfileInterpreter) {
+      __ bind(profile_method_continue);
+    }
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+  bang_stack_shadow_pages(false);
+
+  // reset the _do_not_unlock_if_synchronized flag
+  __ sb(zr, do_not_unlock_if_synchronized);
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    // Allocate monitor and lock method
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    __ lwu(x10, access_flags);
+    __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization");
+#endif
+  }
+
+  // start execution
+#ifdef ASSERT
+  __ verify_frame_setup();
+#endif
+
+  // jvmti support
+  __ notify_method_entry();
+
+  __ dispatch_next(vtos);
+
+  // invocation counter overflow
+  if (inc_counter) {
+    if (ProfileInterpreter) {
+      // We have decided to profile this method in the interpreter
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ set_method_data_pointer_for_bcp();
+      // don't think we need this
+      __ get_method(x11);
+      __ j(profile_method_continue);
+    }
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+  // Entry point in previous activation (i.e., if the caller was
+  // interpreted)
+  Interpreter::_rethrow_exception_entry = __ pc();
+  // Restore sp to interpreter_frame_last_sp even though we are going
+  // to empty the expression stack for the exception processing.
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  // x10: exception
+  // x13: return address/pc that threw exception
+  __ restore_bcp();    // xbcp points to call/send
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ reinit_heapbase();  // restore xheapbase as heapbase.
+  __ get_dispatch();
+
+  // Entry point for exceptions thrown within interpreter code
+  Interpreter::_throw_exception_entry = __ pc();
+  // If we came here via a NullPointerException on the receiver of a
+  // method, xthread may be corrupt.
+  __ get_method(xmethod);
+  // expression stack is undefined here
+  // x10: exception
+  // xbcp: exception bcp
+  __ verify_oop(x10);
+  __ mv(c_rarg1, x10);
+
+  // expression stack must be empty before entering the VM in case of
+  // an exception
+  __ empty_expression_stack();
+  // find exception handler address and preserve exception oop
+  __ call_VM(x13,
+             CAST_FROM_FN_PTR(address,
+                          InterpreterRuntime::exception_handler_for_exception),
+             c_rarg1);
+
+  // Calculate stack limit
+  __ ld(t0, Address(xmethod, Method::const_offset()));
+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ slli(t0, t0, 3);
+  __ sub(t0, t1, t0);
+  __ andi(sp, t0, -16);
+
+  // x10: exception handler entry point
+  // x13: preserved exception oop
+  // xbcp: bcp for exception handler
+  __ push_ptr(x13); // push exception which is now the only value on the stack
+  __ jr(x10); // jump to exception handler (may be _remove_activation_entry!)
+
+  // If the exception is not handled in the current frame the frame is
+  // removed and the exception is rethrown (i.e. exception
+  // continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bxi for the instruction
+  // which caused the exception and the expression stack is
+  // empty. Thus, for any VM calls at this point, GC will find a legal
+  // oop map (with empty expression stack).
+
+  //
+  // JVMTI PopFrame support
+  //
+
+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition
+  // indicating that we are currently handling popframe, so that
+  // call_VMs that may happen later do not trigger new popframe
+  // handling cycles.
+  __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset()));
+  __ ori(x13, x13, JavaThread::popframe_processing_bit);
+  __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset()));
+
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    Label caller_not_deoptimized;
+    __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize));
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1);
+    __ bnez(x10, caller_not_deoptimized);
+
+    // Compute size of arguments for saving when returning to
+    // deoptimized caller
+    __ get_method(x10);
+    __ ld(x10, Address(x10, Method::const_offset()));
+    __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod::
+                                                      size_of_parameters_offset())));
+    __ slli(x10, x10, Interpreter::logStackElementSize);
+    __ restore_locals();
+    __ sub(xlocals, xlocals, x10);
+    __ add(xlocals, xlocals, wordSize);
+    // Save these arguments
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                           Deoptimization::
+                                           popframe_preserve_args),
+                          xthread, x10, xlocals);
+
+    __ remove_activation(vtos,
+                         /* throw_monitor_exception */ false,
+                         /* install_monitor_exception */ false,
+                         /* notify_jvmdi */ false);
+
+    // Inform deoptimization that it is responsible for restoring
+    // these arguments
+    __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit);
+    __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset()));
+
+    // Continue in deoptimization handler
+    __ ret();
+
+    __ bind(caller_not_deoptimized);
+  }
+
+  __ remove_activation(vtos,
+                       /* throw_monitor_exception */ false,
+                       /* install_monitor_exception */ false,
+                       /* notify_jvmdi */ false);
+
+  // Restore the last_sp and null it out
+  __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
+
+  __ restore_bcp();
+  __ restore_locals();
+  __ restore_constant_pool_cache();
+  __ get_method(xmethod);
+  __ get_dispatch();
+
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+
+  // Clear the popframe condition flag
+  __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset()));
+  assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive");
+
+#if INCLUDE_JVMTI
+  {
+    Label L_done;
+
+    __ lbu(t0, Address(xbcp, 0));
+    __ mv(t1, Bytecodes::_invokestatic);
+    __ bne(t1, t0, L_done);
+
+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+    // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL.
+
+    __ ld(c_rarg0, Address(xlocals, 0));
+    __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp);
+
+    __ beqz(x10, L_done);
+
+    __ sd(x10, Address(esp, 0));
+    __ bind(L_done);
+  }
+#endif // INCLUDE_JVMTI
+
+  // Restore machine SP
+  __ ld(t0, Address(xmethod, Method::const_offset()));
+  __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
+  __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4);
+  __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize));
+  __ slliw(t0, t0, 3);
+  __ sub(t0, t1, t0);
+  __ andi(sp, t0, -16);
+
+  __ dispatch_next(vtos);
+  // end of PopFrame support
+
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // preserve exception over this code sequence
+  __ pop_ptr(x10);
+  __ sd(x10, Address(xthread, JavaThread::vm_result_offset()));
+  // remove the activation (without doing throws on illegalMonitorExceptions)
+  __ remove_activation(vtos, false, true, false);
+  // restore exception
+  __ get_vm_result(x10, xthread);
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects the
+  // following registers set up:
+  //
+  // x10: exception
+  // ra: return address/pc that threw exception
+  // sp: expression stack of caller
+  // fp: fp of caller
+  // FIXME: There's no point saving ra here because VM calls don't trash it
+  __ sub(sp, sp, 2 * wordSize);
+  __ sd(x10, Address(sp, 0));                   // save exception
+  __ sd(ra, Address(sp, wordSize));             // save return address
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                         SharedRuntime::exception_handler_for_return_address),
+                        xthread, ra);
+  __ mv(x11, x10);                              // save exception handler
+  __ ld(x10, Address(sp, 0));                   // restore exception
+  __ ld(ra, Address(sp, wordSize));             // restore return address
+  __ add(sp, sp, 2 * wordSize);
+  // We might be returning to a deopt handler that expects x13 to
+  // contain the exception pc
+  __ mv(x13, ra);
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ jr(x11);                                   // jump to exception
+                                                // handler of caller
+}
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state)  {
+  address entry = __ pc();
+
+  __ restore_bcp();
+  __ restore_locals();
+  __ empty_expression_stack();
+  __ load_earlyret_value(state);
+
+  __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset()));
+  Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset());
+
+  // Clear the earlyret state
+  assert(JvmtiThreadState::earlyret_inactive == 0, "should be");
+  __ sd(zr, cond_addr);
+
+  __ remove_activation(state,
+                       false, /* throw_monitor_exception */
+                       false, /* install_monitor_exception */
+                       true); /* notify_jvmdi */
+  __ ret();
+
+  return entry;
+}
+// end of ForceEarlyReturn support
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  aep = __ pc();  __ push_ptr();  __ j(L);
+  fep = __ pc();  __ push_f();    __ j(L);
+  dep = __ pc();  __ push_d();    __ j(L);
+  lep = __ pc();  __ push_l();    __ j(L);
+  bep = cep = sep =
+  iep = __ pc();  __ push_i();
+  vep = __ pc();
+  __ bind(L);
+  generate_and_dispatch(t);
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+
+  __ push_reg(ra);
+  __ push(state);
+  __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
+  __ mv(c_rarg2, x10);  // Pass itos
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3);
+  __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp);
+  __ pop(state);
+  __ pop_reg(ra);
+  __ ret();                                   // return from result handler
+
+  return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+  __ push_reg(t0);
+  __ push_reg(x10);
+  __ mv(x10, (address) &BytecodeCounter::_counter_value);
+  __ mv(t0, 1);
+  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
+  __ pop_reg(x10);
+  __ pop_reg(t0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; }
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; }
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+
+  assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated");
+  __ jal(Interpreter::trace_code(t->tos_in()));
+  __ reinit_heapbase();
+}
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  Label L;
+  __ push_reg(t0);
+  __ mv(t0, (address) &BytecodeCounter::_counter_value);
+  __ ld(t0, Address(t0));
+  __ mv(t1, StopInterpreterAt);
+  __ bne(t0, t1, L);
+  __ ebreak();
+  __ bind(L);
+  __ pop_reg(t0);
+}
+
+#endif // !PRODUCT
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
new file mode 100644
index 0000000000..c22fd3bfcd
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
@@ -0,0 +1,4000 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interp_masm.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.hpp"
+#include "oops/method.hpp"
+#include "oops/methodData.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+
+#define __ _masm->
+
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+  // No RISC-V specific initialization
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+  return Address(xlocals, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+  _masm->shadd(temp, r, xlocals, temp, 3);
+  return Address(temp, 0);
+}
+
+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+  _masm->shadd(temp, r, xlocals, temp, 3);
+  return Address(temp, Interpreter::local_offset_in_bytes(1));;
+}
+
+static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+  return iaddress(r, temp, _masm);
+}
+
+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+  return laddress(r, temp, _masm);
+}
+
+static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
+  return iaddress(r, temp, _masm);
+}
+
+static inline Address at_rsp() {
+  return Address(esp, 0);
+}
+
+// At top of Java expression stack which may be different than esp().  It
+// isn't for category 1 objects.
+static inline Address at_tos   () {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
+}
+
+static inline Address at_tos_p1() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(3));
+}
+
+static inline Address at_tos_p4() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(4));
+}
+
+static inline Address at_tos_p5() {
+  return Address(esp,  Interpreter::expr_offset_in_bytes(5));
+}
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the Address described by obj.
+// If val == noreg this means store a NULL
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address dst,
+                         Register val,
+                         DecoratorSet decorators) {
+  assert(val == noreg || val == x10, "parameter is just for looks");
+  __ store_heap_oop(dst, val, x29, x11, decorators);
+}
+
+static void do_oop_load(InterpreterMacroAssembler* _masm,
+                        Address src,
+                        Register dst,
+                        DecoratorSet decorators) {
+  __ load_heap_oop(dst, src, x7, x11, decorators);
+}
+
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(xbcp, offset);
+}
+
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no)
+{
+  if (!RewriteBytecodes)  { return; }
+  Label L_patch_done;
+
+  switch (bc) {
+    case Bytecodes::_fast_aputfield:  // fall through
+    case Bytecodes::_fast_bputfield:  // fall through
+    case Bytecodes::_fast_zputfield:  // fall through
+    case Bytecodes::_fast_cputfield:  // fall through
+    case Bytecodes::_fast_dputfield:  // fall through
+    case Bytecodes::_fast_fputfield:  // fall through
+    case Bytecodes::_fast_iputfield:  // fall through
+    case Bytecodes::_fast_lputfield:  // fall through
+    case Bytecodes::_fast_sputfield: {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
+      __ mv(bc_reg, bc);
+      __ beqz(temp_reg, L_patch_done);
+      break;
+    }
+    default:
+      assert(byte_no == -1, "sanity");
+      // the pair bytecodes have already done the load.
+      if (load_bc_into_bc_reg) {
+        __ mv(bc_reg, bc);
+      }
+  }
+
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label L_fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+    __ load_unsigned_byte(temp_reg, at_bcp(0));
+    __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register.
+    __ bnez(temp_reg, L_fast_patch);
+    // Let breakpoint table handling rewrite to quicker bytecode
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg);
+    __ j(L_patch_done);
+    __ bind(L_fast_patch);
+  }
+
+#ifdef ASSERT
+  Label L_okay;
+  __ load_unsigned_byte(temp_reg, at_bcp(0));
+  __ beq(temp_reg, bc_reg, L_okay);
+  __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc));
+  __ beqz(temp_reg, L_okay);
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
+#endif
+
+  // patch bytecode
+  __ sb(bc_reg, at_bcp(0));
+  __ bind(L_patch_done);
+}
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+  // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("should not reach here bytecode");
+}
+
+void TemplateTable::aconst_null()
+{
+  transition(vtos, atos);
+  __ mv(x10, zr);
+}
+
+void TemplateTable::iconst(int value)
+{
+  transition(vtos, itos);
+  __ mv(x10, value);
+}
+
+void TemplateTable::lconst(int value)
+{
+  transition(vtos, ltos);
+  __ mv(x10, value);
+}
+
+void TemplateTable::fconst(int value)
+{
+  transition(vtos, ftos);
+  static float fBuf[2] = {1.0, 2.0};
+  __ mv(t0, (intptr_t)fBuf);
+  switch (value) {
+    case 0:
+      __ fmv_w_x(f10, zr);
+      break;
+    case 1:
+      __ flw(f10, t0, 0);
+      break;
+    case 2:
+      __ flw(f10, t0, sizeof(float));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::dconst(int value)
+{
+  transition(vtos, dtos);
+  static double dBuf[2] = {1.0, 2.0};
+  __ mv(t0, (intptr_t)dBuf);
+  switch (value) {
+    case 0:
+      __ fmv_d_x(f10, zr);
+      break;
+    case 1:
+      __ fld(f10, t0, 0);
+      break;
+    case 2:
+      __ fld(f10, t0, sizeof(double));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::bipush()
+{
+  transition(vtos, itos);
+  __ load_signed_byte(x10, at_bcp(1));
+}
+
+void TemplateTable::sipush()
+{
+  transition(vtos, itos);
+  __ load_unsigned_short(x10, at_bcp(1));
+  __ revb_w_w(x10, x10);
+  __ sraiw(x10, x10, 16);
+}
+
+void TemplateTable::ldc(bool wide)
+{
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, notInt, Done;
+
+  if (wide) {
+   __ get_unsigned_2_byte_index_at_bcp(x11, 1);
+  } else {
+   __ load_unsigned_byte(x11, at_bcp(1));
+  }
+  __ get_cpool_and_tags(x12, x10);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // get type
+  __ addi(x13, x11, tags_offset);
+  __ add(x13, x10, x13);
+  __ membar(MacroAssembler::AnyAny);
+  __ lbu(x13, Address(x13, 0));
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+
+  // unresolved class - get the resolved class
+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass);
+  __ beq(x13, t1, call_ldc);
+
+  // unresolved class in error state - call into runtime to throw the error
+  // from the first resolution attempt
+  __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError);
+  __ beq(x13, t1, call_ldc);
+
+  // resolved class - need to call vm to get java mirror of the class
+  __ mv(t1, (u1)JVM_CONSTANT_Class);
+  __ bne(x13, t1, notClass);
+
+  __ bind(call_ldc);
+  __ mv(c_rarg1, wide);
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1);
+  __ push_ptr(x10);
+  __ verify_oop(x10);
+  __ j(Done);
+
+  __ bind(notClass);
+  __ mv(t1, (u1)JVM_CONSTANT_Float);
+  __ bne(x13, t1, notFloat);
+
+  // ftos
+  __ shadd(x11, x11, x12, x11, 3);
+  __ flw(f10, Address(x11, base_offset));
+  __ push_f(f10);
+  __ j(Done);
+
+  __ bind(notFloat);
+
+  __ mv(t1, (u1)JVM_CONSTANT_Integer);
+  __ bne(x13, t1, notInt);
+
+  // itos
+  __ shadd(x11, x11, x12, x11, 3);
+  __ lw(x10, Address(x11, base_offset));
+  __ push_i(x10);
+  __ j(Done);
+
+  __ bind(notInt);
+  condy_helper(Done);
+
+  __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+void TemplateTable::fast_aldc(bool wide)
+{
+  transition(vtos, atos);
+
+  const Register result = x10;
+  const Register tmp = x11;
+  const Register rarg = x12;
+
+  const int index_size = wide ? sizeof(u2) : sizeof(u1);
+
+  Label resolved;
+
+  // We are resolved if the resolved reference cache entry contains a
+  // non-null object (String, MethodType, etc.)
+  assert_different_registers(result, tmp);
+  __ get_cache_index_at_bcp(tmp, 1, index_size);
+  __ load_resolved_reference_at_index(result, tmp);
+  __ bnez(result, resolved);
+
+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+
+  // first time invocation - must resolve first
+  __ mv(rarg, (int)bytecode());
+  __ call_VM(result, entry, rarg);
+
+  __ bind(resolved);
+
+  { // Check for the null sentinel.
+    // If we just called the VM, it already did the mapping for us,
+    // but it's harmless to retry.
+    Label notNull;
+
+    // Stash null_sentinel address to get its value later
+    int32_t offset = 0;
+    __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset);
+    __ ld(tmp, Address(rarg, offset));
+    __ bne(result, tmp, notNull);
+    __ mv(result, zr);  // NULL object reference
+    __ bind(notNull);
+  }
+
+  if (VerifyOops) {
+    // Safe to call with 0 result
+    __ verify_oop(result);
+  }
+}
+
+void TemplateTable::ldc2_w()
+{
+    transition(vtos, vtos);
+    Label notDouble, notLong, Done;
+    __ get_unsigned_2_byte_index_at_bcp(x10, 1);
+
+    __ get_cpool_and_tags(x11, x12);
+    const int base_offset = ConstantPool::header_size() * wordSize;
+    const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+    // get type
+    __ add(x12, x12, x10);
+    __ load_unsigned_byte(x12, Address(x12, tags_offset));
+    __ mv(t1, JVM_CONSTANT_Double);
+    __ bne(x12, t1, notDouble);
+
+    // dtos
+    __ shadd(x12, x10, x11, x12, 3);
+    __ fld(f10, Address(x12, base_offset));
+    __ push_d(f10);
+    __ j(Done);
+
+    __ bind(notDouble);
+    __ mv(t1, (int)JVM_CONSTANT_Long);
+    __ bne(x12, t1, notLong);
+
+    // ltos
+    __ shadd(x10, x10, x11, x10, 3);
+    __ ld(x10, Address(x10, base_offset));
+    __ push_l(x10);
+    __ j(Done);
+
+    __ bind(notLong);
+    condy_helper(Done);
+    __ bind(Done);
+}
+
+void TemplateTable::condy_helper(Label& Done)
+{
+  const Register obj = x10;
+  const Register rarg = x11;
+  const Register flags = x12;
+  const Register off = x13;
+
+  const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+
+  __ mv(rarg, (int) bytecode());
+  __ call_VM(obj, entry, rarg);
+
+  __ get_vm_result_2(flags, xthread);
+
+  // VMr = obj = base address to find primitive value to push
+  // VMr2 = flags = (tos, off) using format of CPCE::_flags
+  __ mv(off, flags);
+  __ mv(t0, ConstantPoolCacheEntry::field_index_mask);
+  __ andrw(off, off, t0);
+
+  __ add(off, obj, off);
+  const Address field(off, 0); // base + R---->base + offset
+
+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3
+
+  switch (bytecode()) {
+    case Bytecodes::_ldc:   // fall through
+    case Bytecodes::_ldc_w: {
+      // tos in (itos, ftos, stos, btos, ctos, ztos)
+      Label notInt, notFloat, notShort, notByte, notChar, notBool;
+      __ mv(t1, itos);
+      __ bne(flags, t1, notInt);
+      // itos
+      __ lw(x10, field);
+      __ push(itos);
+      __ j(Done);
+
+      __ bind(notInt);
+      __ mv(t1, ftos);
+      __ bne(flags, t1, notFloat);
+      // ftos
+      __ load_float(field);
+      __ push(ftos);
+      __ j(Done);
+
+      __ bind(notFloat);
+      __ mv(t1, stos);
+      __ bne(flags, t1, notShort);
+      // stos
+      __ load_signed_short(x10, field);
+      __ push(stos);
+      __ j(Done);
+
+      __ bind(notShort);
+      __ mv(t1, btos);
+      __ bne(flags, t1, notByte);
+      // btos
+      __ load_signed_byte(x10, field);
+      __ push(btos);
+      __ j(Done);
+
+      __ bind(notByte);
+      __ mv(t1, ctos);
+      __ bne(flags, t1, notChar);
+      // ctos
+      __ load_unsigned_short(x10, field);
+      __ push(ctos);
+      __ j(Done);
+
+      __ bind(notChar);
+      __ mv(t1, ztos);
+      __ bne(flags, t1, notBool);
+      // ztos
+      __ load_signed_byte(x10, field);
+      __ push(ztos);
+      __ j(Done);
+
+      __ bind(notBool);
+      break;
+    }
+
+    case Bytecodes::_ldc2_w: {
+      Label notLong, notDouble;
+      __ mv(t1, ltos);
+      __ bne(flags, t1, notLong);
+      // ltos
+      __ ld(x10, field);
+      __ push(ltos);
+      __ j(Done);
+
+      __ bind(notLong);
+      __ mv(t1, dtos);
+      __ bne(flags, t1, notDouble);
+      // dtos
+      __ load_double(field);
+      __ push(dtos);
+      __ j(Done);
+
+      __ bind(notDouble);
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  __ stop("bad ldc/condy");
+}
+
+void TemplateTable::locals_index(Register reg, int offset)
+{
+  __ lbu(reg, at_bcp(offset));
+  __ neg(reg, reg);
+}
+
+void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
+  transition(vtos, itos);
+  if (RewriteFrequentPairs && rc == may_rewrite) {
+    Label rewrite, done;
+    const Register bc = x14;
+
+    // get next bytecode
+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+
+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
+    // last two iloads in a pair.  Comparing against fast_iload means that
+    // the next bytecode is neither an iload or a caload, and therefore
+    // an iload pair.
+    __ mv(t1, Bytecodes::_iload);
+    __ beq(x11, t1, done);
+
+    // if _fast_iload rewrite to _fast_iload2
+    __ mv(t1, Bytecodes::_fast_iload);
+    __ mv(bc, Bytecodes::_fast_iload2);
+    __ beq(x11, t1, rewrite);
+
+    // if _caload rewrite to _fast_icaload
+    __ mv(t1, Bytecodes::_caload);
+    __ mv(bc, Bytecodes::_fast_icaload);
+    __ beq(x11, t1, rewrite);
+
+    // else rewrite to _fast_iload
+    __ mv(bc, Bytecodes::_fast_iload);
+
+    // rewrite
+    // bc: new bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_iload, bc, x11, false);
+    __ bind(done);
+
+  }
+
+  // do iload, get the local value into tos
+  locals_index(x11);
+  __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::fast_iload2()
+{
+  transition(vtos, itos);
+  locals_index(x11);
+  __ lw(x10, iaddress(x11, x10, _masm));
+  __ push(itos);
+  locals_index(x11, 3);
+  __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::fast_iload()
+{
+  transition(vtos, itos);
+  locals_index(x11);
+  __ lw(x10, iaddress(x11, x10, _masm));
+}
+
+void TemplateTable::lload()
+{
+  transition(vtos, ltos);
+  __ lbu(x11, at_bcp(1));
+  __ slli(x11, x11, LogBytesPerWord);
+  __ sub(x11, xlocals, x11);
+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::fload()
+{
+  transition(vtos, ftos);
+  locals_index(x11);
+  __ flw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::dload()
+{
+  transition(vtos, dtos);
+  __ lbu(x11, at_bcp(1));
+  __ slli(x11, x11, LogBytesPerWord);
+  __ sub(x11, xlocals, x11);
+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::aload()
+{
+  transition(vtos, atos);
+  locals_index(x11);
+  __ ld(x10, iaddress(x11, x10, _masm));
+
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+  __ lhu(reg, at_bcp(2));
+  __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend
+  __ neg(reg, reg);
+}
+
+void TemplateTable::wide_iload() {
+  transition(vtos, itos);
+  locals_index_wide(x11);
+  __ lw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_lload()
+{
+  transition(vtos, ltos);
+  __ lhu(x11, at_bcp(2));
+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
+  __ slli(x11, x11, LogBytesPerWord);
+  __ sub(x11, xlocals, x11);
+  __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_fload()
+{
+  transition(vtos, ftos);
+  locals_index_wide(x11);
+  __ flw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_dload()
+{
+  transition(vtos, dtos);
+  __ lhu(x11, at_bcp(2));
+  __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend
+  __ slli(x11, x11, LogBytesPerWord);
+  __ sub(x11, xlocals, x11);
+  __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1)));
+}
+
+void TemplateTable::wide_aload()
+{
+  transition(vtos, atos);
+  locals_index_wide(x11);
+  __ ld(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::index_check(Register array, Register index)
+{
+  // destroys x11, t0
+  // check array
+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+  // sign extend index for use by indexed load
+  // check index
+  const Register length = t0;
+  __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes()));
+  if (index != x11) {
+    assert(x11 != array, "different registers");
+    __ mv(x11, index);
+  }
+  Label ok;
+  __ addw(index, index, zr);
+  __ bltu(index, length, ok);
+  __ mv(x13, array);
+  __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+  __ jr(t0);
+  __ bind(ok);
+}
+
+void TemplateTable::iaload()
+{
+  transition(itos, itos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
+  __ shadd(x10, x11, x10, t0, 2);
+  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+  __ addw(x10, x10, zr); // signed extended
+}
+
+void TemplateTable::laload()
+{
+  transition(itos, ltos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
+  __ shadd(x10, x11, x10, t0, 3);
+  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::faload()
+{
+  transition(itos, ftos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
+  __ shadd(x10, x11, x10, t0, 2);
+  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::daload()
+{
+  transition(itos, dtos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
+  __ shadd(x10, x11, x10, t0, 3);
+  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::aaload()
+{
+  transition(itos, atos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
+  do_oop_load(_masm,
+              Address(x10),
+              x10,
+              IS_ARRAY);
+}
+
+void TemplateTable::baload()
+{
+  transition(itos, itos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
+  __ shadd(x10, x11, x10, t0, 0);
+  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::caload()
+{
+ transition(itos, itos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
+  __ shadd(x10, x11, x10, t0, 1);
+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+// iload followed by caload frequent pair
+void TemplateTable::fast_icaload()
+{
+  transition(vtos, itos);
+  // load index out of locals
+  locals_index(x12);
+  __ lw(x11, iaddress(x12, x11, _masm));
+  __ pop_ptr(x10);
+
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11, kills t0
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
+  __ shadd(x10, x11, x10, t0, 1);
+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::saload()
+{
+  transition(itos, itos);
+  __ mv(x11, x10);
+  __ pop_ptr(x10);
+  // x10: array
+  // x11: index
+  index_check(x10, x11); // leaves index in x11, kills t0
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
+  __ shadd(x10, x11, x10, t0, 1);
+  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
+}
+
+void TemplateTable::iload(int n)
+{
+  transition(vtos, itos);
+  __ lw(x10, iaddress(n));
+}
+
+void TemplateTable::lload(int n)
+{
+  transition(vtos, ltos);
+  __ ld(x10, laddress(n));
+}
+
+void TemplateTable::fload(int n)
+{
+  transition(vtos, ftos);
+  __ flw(f10, faddress(n));
+}
+
+void TemplateTable::dload(int n)
+{
+  transition(vtos, dtos);
+  __ fld(f10, daddress(n));
+}
+
+void TemplateTable::aload(int n)
+{
+  transition(vtos, atos);
+  __ ld(x10, iaddress(n));
+}
+
+void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
+  // According to bytecode histograms, the pairs:
+  //
+  // _aload_0, _fast_igetfield
+  // _aload_0, _fast_agetfield
+  // _aload_0, _fast_fgetfield
+  //
+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
+  // _aload_0 bytecode checks if the next bytecode is either
+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+  // rewrites the current bytecode into a pair bytecode; otherwise it
+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
+  // the pair check anymore.
+  //
+  // Note: If the next bytecode is _getfield, the rewrite must be
+  //       delayed, otherwise we may miss an opportunity for a pair.
+  //
+  // Also rewrite frequent pairs
+  //   aload_0, aload_1
+  //   aload_0, iload_1
+  // These bytecodes with a small amount of code are most profitable
+  // to rewrite
+  if (RewriteFrequentPairs && rc == may_rewrite) {
+    Label rewrite, done;
+    const Register bc = x14;
+
+    // get next bytecode
+    __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+    // if _getfield then wait with rewrite
+    __ mv(t1, Bytecodes::Bytecodes::_getfield);
+    __ beq(x11, t1, done);
+
+    // if _igetfield then rewrite to _fast_iaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ mv(t1, Bytecodes::_fast_igetfield);
+    __ mv(bc, Bytecodes::_fast_iaccess_0);
+    __ beq(x11, t1, rewrite);
+
+    // if _agetfield then rewrite to _fast_aaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ mv(t1, Bytecodes::_fast_agetfield);
+    __ mv(bc, Bytecodes::_fast_aaccess_0);
+    __ beq(x11, t1, rewrite);
+
+    // if _fgetfield then rewrite to _fast_faccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ mv(t1, Bytecodes::_fast_fgetfield);
+    __ mv(bc, Bytecodes::_fast_faccess_0);
+    __ beq(x11, t1, rewrite);
+
+    // else rewrite to _fast_aload0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
+    __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0);
+
+    // rewrite
+    // bc: new bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_aload_0, bc, x11, false);
+
+    __ bind(done);
+  }
+
+  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
+  aload(0);
+}
+
+void TemplateTable::istore()
+{
+  transition(itos, vtos);
+  locals_index(x11);
+  __ sw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::lstore()
+{
+  transition(ltos, vtos);
+  locals_index(x11);
+  __ sd(x10, laddress(x11, t0, _masm));
+}
+
+void TemplateTable::fstore() {
+  transition(ftos, vtos);
+  locals_index(x11);
+  __ fsw(f10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::dstore() {
+  transition(dtos, vtos);
+  locals_index(x11);
+  __ fsd(f10, daddress(x11, t0, _masm));
+}
+
+void TemplateTable::astore()
+{
+  transition(vtos, vtos);
+  __ pop_ptr(x10);
+  locals_index(x11);
+  __ sd(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_istore() {
+  transition(vtos, vtos);
+  __ pop_i();
+  locals_index_wide(x11);
+  __ sw(x10, iaddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_lstore() {
+  transition(vtos, vtos);
+  __ pop_l();
+  locals_index_wide(x11);
+  __ sd(x10, laddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_fstore() {
+  transition(vtos, vtos);
+  __ pop_f();
+  locals_index_wide(x11);
+  __ fsw(f10, faddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_dstore() {
+  transition(vtos, vtos);
+  __ pop_d();
+  locals_index_wide(x11);
+  __ fsd(f10, daddress(x11, t0, _masm));
+}
+
+void TemplateTable::wide_astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(x10);
+  locals_index_wide(x11);
+  __ sd(x10, aaddress(x11, t0, _masm));
+}
+
+void TemplateTable::iastore() {
+  transition(itos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // x10: value
+  // x11: index
+  // x13: array
+  index_check(x13, x11); // prefer index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
+  __ shadd(t0, x11, x13, t0, 2);
+  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::lastore() {
+  transition(ltos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // x10: value
+  // x11: index
+  // x13: array
+  index_check(x13, x11); // prefer index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
+  __ shadd(t0, x11, x13, t0, 3);
+  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::fastore() {
+  transition(ftos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // f10: value
+  // x11:  index
+  // x13:  array
+  index_check(x13, x11); // prefer index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
+  __ shadd(t0, x11, x13, t0, 2);
+  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
+}
+
+void TemplateTable::dastore() {
+  transition(dtos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // f10: value
+  // x11:  index
+  // x13:  array
+  index_check(x13, x11); // prefer index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
+  __ shadd(t0, x11, x13, t0, 3);
+  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
+}
+
+void TemplateTable::aastore() {
+  Label is_null, ok_is_subtype, done;
+  transition(vtos, vtos);
+  // stack: ..., array, index, value
+  __ ld(x10, at_tos());    // value
+  __ ld(x12, at_tos_p1()); // index
+  __ ld(x13, at_tos_p2()); // array
+
+  index_check(x13, x12);     // kills x11
+  __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
+  __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop);
+
+  Address element_address(x14, 0);
+
+  // do array store check - check for NULL value first
+  __ beqz(x10, is_null);
+
+  // Move subklass into x11
+  __ load_klass(x11, x10);
+  // Move superklass into x10
+  __ load_klass(x10, x13);
+  __ ld(x10, Address(x10,
+                     ObjArrayKlass::element_klass_offset()));
+  // Compress array + index * oopSize + 12 into a single register.  Frees x12.
+
+  // Generate subtype check.  Blows x12, x15
+  // Superklass in x10.  Subklass in x11.
+  __ gen_subtype_check(x11, ok_is_subtype); //todo
+
+  // Come here on failure
+  // object is at TOS
+  __ j(Interpreter::_throw_ArrayStoreException_entry);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+
+  // Get the value we will store
+  __ ld(x10, at_tos());
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, element_address, x10, IS_ARRAY);
+  __ j(done);
+
+  // Have a NULL in x10, x13=array, x12=index.  Store NULL at ary[idx]
+  __ bind(is_null);
+  __ profile_null_seen(x12);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
+
+  // Pop stack arguments
+  __ bind(done);
+  __ add(esp, esp, 3 * Interpreter::stackElementSize);
+
+}
+
+void TemplateTable::bastore()
+{
+  transition(itos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // x10: value
+  // x11: index
+  // x13: array
+  index_check(x13, x11); // prefer index in x11
+
+  // Need to check whether array is boolean or byte
+  // since both types share the bastore bytecode.
+  __ load_klass(x12, x13);
+  __ lwu(x12, Address(x12, Klass::layout_helper_offset()));
+  Label L_skip;
+  __ andi(t0, x12, Klass::layout_helper_boolean_diffbit());
+  __ beqz(t0, L_skip);
+  __ andi(x10, x10, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
+  __ bind(L_skip);
+
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
+
+  __ add(x11, x13, x11);
+  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::castore()
+{
+  transition(itos, vtos);
+  __ pop_i(x11);
+  __ pop_ptr(x13);
+  // x10: value
+  // x11: index
+  // x13: array
+  index_check(x13, x11); // prefer index in x11
+  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
+  __ shadd(t0, x11, x13, t0, 1);
+  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
+}
+
+void TemplateTable::sastore()
+{
+  castore();
+}
+
+void TemplateTable::istore(int n)
+{
+  transition(itos, vtos);
+  __ sd(x10, iaddress(n));
+}
+
+void TemplateTable::lstore(int n)
+{
+  transition(ltos, vtos);
+  __ sd(x10, laddress(n));
+}
+
+void TemplateTable::fstore(int n)
+{
+  transition(ftos, vtos);
+  __ fsw(f10, faddress(n));
+}
+
+void TemplateTable::dstore(int n)
+{
+  transition(dtos, vtos);
+  __ fsd(f10, daddress(n));
+}
+
+void TemplateTable::astore(int n)
+{
+  transition(vtos, vtos);
+  __ pop_ptr(x10);
+  __ sd(x10, iaddress(n));
+}
+
+void TemplateTable::pop()
+{
+  transition(vtos, vtos);
+  __ addi(esp, esp, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2()
+{
+  transition(vtos, vtos);
+  __ addi(esp, esp, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup()
+{
+  transition(vtos, vtos);
+  __ ld(x10, Address(esp, 0));
+  __ push_reg(x10);
+  // stack: ..., a, a
+}
+
+void TemplateTable::dup_x1()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ld(x10, at_tos());  // load b
+  __ ld(x12, at_tos_p1());  // load a
+  __ sd(x10, at_tos_p1());  // store b
+  __ sd(x12, at_tos());  // store a
+  __ push_reg(x10);                  // push b
+  // stack: ..., b, a, b
+}
+
+void TemplateTable::dup_x2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ ld(x10, at_tos());  // load c
+  __ ld(x12, at_tos_p2());  // load a
+  __ sd(x10, at_tos_p2());  // store c in a
+  __ push_reg(x10);      // push c
+  // stack: ..., c, b, c, c
+  __ ld(x10, at_tos_p2());  // load b
+  __ sd(x12, at_tos_p2());  // store a in b
+  // stack: ..., c, a, c, c
+  __ sd(x10, at_tos_p1());  // store b in c
+  // stack: ..., c, a, b, c
+}
+
+void TemplateTable::dup2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ld(x10, at_tos_p1());  // load a
+  __ push_reg(x10);                  // push a
+  __ ld(x10, at_tos_p1());  // load b
+  __ push_reg(x10);                  // push b
+  // stack: ..., a, b, a, b
+}
+
+void TemplateTable::dup2_x1()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ ld(x12, at_tos());     // load c
+  __ ld(x10, at_tos_p1());  // load b
+  __ push_reg(x10);             // push b
+  __ push_reg(x12);             // push c
+  // stack: ..., a, b, c, b, c
+  __ sd(x12, at_tos_p3());  // store c in b
+  // stack: ..., a, c, c, b, c
+  __ ld(x12, at_tos_p4());  // load a
+  __ sd(x12, at_tos_p2());  // store a in 2nd c
+  // stack: ..., a, c, a, b, c
+  __ sd(x10, at_tos_p4());  // store b in a
+  // stack: ..., b, c, a, b, c
+}
+
+void TemplateTable::dup2_x2()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b, c, d
+  __ ld(x12, at_tos());     // load d
+  __ ld(x10, at_tos_p1());  // load c
+  __ push_reg(x10);             // push c
+  __ push_reg(x12);             // push d
+  // stack: ..., a, b, c, d, c, d
+  __ ld(x10, at_tos_p4());  // load b
+  __ sd(x10, at_tos_p2());  // store b in d
+  __ sd(x12, at_tos_p4());  // store d in b
+  // stack: ..., a, d, c, b, c, d
+  __ ld(x12, at_tos_p5());  // load a
+  __ ld(x10, at_tos_p3());  // load c
+  __ sd(x12, at_tos_p3());  // store a in c
+  __ sd(x10, at_tos_p5());  // store c in a
+  // stack: ..., c, d, a, b, c, d
+}
+
+void TemplateTable::swap()
+{
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ ld(x12, at_tos_p1());  // load a
+  __ ld(x10, at_tos());     // load b
+  __ sd(x12, at_tos());     // store a in b
+  __ sd(x10, at_tos_p1());  // store b in a
+  // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op)
+{
+  transition(itos, itos);
+  // x10 <== x11 op x10
+  __ pop_i(x11);
+  switch (op) {
+    case add  : __ addw(x10, x11, x10);  break;
+    case sub  : __ subw(x10, x11, x10);  break;
+    case mul  : __ mulw(x10, x11, x10);  break;
+    case _and : __ andrw(x10, x11, x10); break;
+    case _or  : __ orrw(x10, x11, x10);  break;
+    case _xor : __ xorrw(x10, x11, x10); break;
+    case shl  : __ sllw(x10, x11, x10);  break;
+    case shr  : __ sraw(x10, x11, x10);  break;
+    case ushr : __ srlw(x10, x11, x10);  break;
+    default   : ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lop2(Operation op)
+{
+  transition(ltos, ltos);
+  // x10 <== x11 op x10
+  __ pop_l(x11);
+  switch (op) {
+    case add  : __ add(x10, x11, x10);  break;
+    case sub  : __ sub(x10, x11, x10);  break;
+    case mul  : __ mul(x10, x11, x10);  break;
+    case _and : __ andr(x10, x11, x10); break;
+    case _or  : __ orr(x10, x11, x10);  break;
+    case _xor : __ xorr(x10, x11, x10); break;
+    default   : ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::idiv()
+{
+  transition(itos, itos);
+  // explicitly check for div0
+  Label no_div0;
+  __ bnez(x10, no_div0);
+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+  __ jr(t0);
+  __ bind(no_div0);
+  __ pop_i(x11);
+  // x10 <== x11 idiv x10
+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ false);
+}
+
+void TemplateTable::irem()
+{
+  transition(itos, itos);
+  // explicitly check for div0
+  Label no_div0;
+  __ bnez(x10, no_div0);
+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+  __ jr(t0);
+  __ bind(no_div0);
+  __ pop_i(x11);
+  // x10 <== x11 irem x10
+  __ corrected_idivl(x10, x11, x10, /* want_remainder */ true);
+}
+
+void TemplateTable::lmul()
+{
+  transition(ltos, ltos);
+  __ pop_l(x11);
+  __ mul(x10, x10, x11);
+}
+
+void TemplateTable::ldiv()
+{
+  transition(ltos, ltos);
+  // explicitly check for div0
+  Label no_div0;
+  __ bnez(x10, no_div0);
+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+  __ jr(t0);
+  __ bind(no_div0);
+  __ pop_l(x11);
+  // x10 <== x11 ldiv x10
+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ false);
+}
+
+void TemplateTable::lrem()
+{
+  transition(ltos, ltos);
+  // explicitly check for div0
+  Label no_div0;
+  __ bnez(x10, no_div0);
+  __ mv(t0, Interpreter::_throw_ArithmeticException_entry);
+  __ jr(t0);
+  __ bind(no_div0);
+  __ pop_l(x11);
+  // x10 <== x11 lrem x10
+  __ corrected_idivq(x10, x11, x10, /* want_remainder */ true);
+}
+
+void TemplateTable::lshl()
+{
+  transition(itos, ltos);
+  // shift count is in x10
+  __ pop_l(x11);
+  __ sll(x10, x11, x10);
+}
+
+void TemplateTable::lshr()
+{
+  transition(itos, ltos);
+  // shift count is in x10
+  __ pop_l(x11);
+  __ sra(x10, x11, x10);
+}
+
+void TemplateTable::lushr()
+{
+  transition(itos, ltos);
+  // shift count is in x10
+  __ pop_l(x11);
+  __ srl(x10, x11, x10);
+}
+
+void TemplateTable::fop2(Operation op)
+{
+  transition(ftos, ftos);
+  switch (op) {
+    case add:
+      __ pop_f(f11);
+      __ fadd_s(f10, f11, f10);
+      break;
+    case sub:
+      __ pop_f(f11);
+      __ fsub_s(f10, f11, f10);
+      break;
+    case mul:
+      __ pop_f(f11);
+      __ fmul_s(f10, f11, f10);
+      break;
+    case div:
+      __ pop_f(f11);
+      __ fdiv_s(f10, f11, f10);
+      break;
+    case rem:
+      __ fmv_s(f11, f10);
+      __ pop_f(f10);
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::dop2(Operation op)
+{
+  transition(dtos, dtos);
+  switch (op) {
+    case add:
+      __ pop_d(f11);
+      __ fadd_d(f10, f11, f10);
+      break;
+    case sub:
+      __ pop_d(f11);
+      __ fsub_d(f10, f11, f10);
+      break;
+    case mul:
+      __ pop_d(f11);
+      __ fmul_d(f10, f11, f10);
+      break;
+    case div:
+      __ pop_d(f11);
+      __ fdiv_d(f10, f11, f10);
+      break;
+    case rem:
+      __ fmv_d(f11, f10);
+      __ pop_d(f10);
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::ineg()
+{
+  transition(itos, itos);
+  __ negw(x10, x10);
+}
+
+void TemplateTable::lneg()
+{
+  transition(ltos, ltos);
+  __ neg(x10, x10);
+}
+
+void TemplateTable::fneg()
+{
+  transition(ftos, ftos);
+  __ fneg_s(f10, f10);
+}
+
+void TemplateTable::dneg()
+{
+  transition(dtos, dtos);
+  __ fneg_d(f10, f10);
+}
+
+void TemplateTable::iinc()
+{
+  transition(vtos, vtos);
+  __ load_signed_byte(x11, at_bcp(2)); // get constant
+  locals_index(x12);
+  __ ld(x10, iaddress(x12, x10, _masm));
+  __ addw(x10, x10, x11);
+  __ sd(x10, iaddress(x12, t0, _masm));
+}
+
+void TemplateTable::wide_iinc()
+{
+  transition(vtos, vtos);
+  __ lwu(x11, at_bcp(2)); // get constant and index
+  __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend
+  __ zero_extend(x12, x11, 16);
+  __ neg(x12, x12);
+  __ slli(x11, x11, 32);
+  __ srai(x11, x11, 48);
+  __ ld(x10, iaddress(x12, t0, _masm));
+  __ addw(x10, x10, x11);
+  __ sd(x10, iaddress(x12, t0, _masm));
+}
+
+void TemplateTable::convert()
+{
+  // Checking
+#ifdef ASSERT
+  {
+    TosState tos_in  = ilgl;
+    TosState tos_out = ilgl;
+    switch (bytecode()) {
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_in = itos; break;
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_l2d: tos_in = ltos; break;
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_f2d: tos_in = ftos; break;
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_d2l: // fall through
+      case Bytecodes::_d2f: tos_in = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    switch (bytecode()) {
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_out = itos; break;
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_d2l: tos_out = ltos; break;
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_d2f: tos_out = ftos; break;
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_l2d: // fall through
+      case Bytecodes::_f2d: tos_out = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    transition(tos_in, tos_out);
+  }
+#endif // ASSERT
+
+  // Conversion
+  switch (bytecode()) {
+    case Bytecodes::_i2l:
+      __ sign_extend(x10, x10, 32);
+      break;
+    case Bytecodes::_i2f:
+      __ fcvt_s_w(f10, x10);
+      break;
+    case Bytecodes::_i2d:
+      __ fcvt_d_w(f10, x10);
+      break;
+    case Bytecodes::_i2b:
+      __ sign_extend(x10, x10, 8);
+      break;
+    case Bytecodes::_i2c:
+      __ zero_extend(x10, x10, 16);
+      break;
+    case Bytecodes::_i2s:
+      __ sign_extend(x10, x10, 16);
+      break;
+    case Bytecodes::_l2i:
+      __ addw(x10, x10, zr);
+      break;
+    case Bytecodes::_l2f:
+      __ fcvt_s_l(f10, x10);
+      break;
+    case Bytecodes::_l2d:
+      __ fcvt_d_l(f10, x10);
+      break;
+    case Bytecodes::_f2i:
+      __ fcvt_w_s_safe(x10, f10);
+      break;
+    case Bytecodes::_f2l:
+      __ fcvt_l_s_safe(x10, f10);
+      break;
+    case Bytecodes::_f2d:
+      __ fcvt_d_s(f10, f10);
+      break;
+    case Bytecodes::_d2i:
+      __ fcvt_w_d_safe(x10, f10);
+      break;
+    case Bytecodes::_d2l:
+      __ fcvt_l_d_safe(x10, f10);
+      break;
+    case Bytecodes::_d2f:
+      __ fcvt_s_d(f10, f10);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lcmp()
+{
+  transition(ltos, itos);
+  __ pop_l(x11);
+  __ cmp_l2i(t0, x11, x10);
+  __ mv(x10, t0);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result)
+{
+  // For instruction feq, flt and fle, the result is 0 if either operand is NaN
+  if (is_float) {
+    __ pop_f(f11);
+    // if unordered_result < 0:
+    //   we want -1 for unordered or less than, 0 for equal and 1 for
+    //   greater than.
+    // else:
+    //   we want -1 for less than, 0 for equal and 1 for unordered or
+    //   greater than.
+    // f11 primary, f10 secondary
+    __ float_compare(x10, f11, f10, unordered_result);
+  } else {
+    __ pop_d(f11);
+    // if unordered_result < 0:
+    //   we want -1 for unordered or less than, 0 for equal and 1 for
+    //   greater than.
+    // else:
+    //   we want -1 for less than, 0 for equal and 1 for unordered or
+    //   greater than.
+    // f11 primary, f10 secondary
+    __ double_compare(x10, f11, f10, unordered_result);
+  }
+}
+
+void TemplateTable::branch(bool is_jsr, bool is_wide)
+{
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
+  __ profile_taken_branch(x10, x11);
+  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
+                             InvocationCounter::counter_offset();
+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
+                              InvocationCounter::counter_offset();
+
+  // load branch displacement
+  if (!is_wide) {
+    __ lhu(x12, at_bcp(1));
+    __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend
+  } else {
+    __ lwu(x12, at_bcp(1));
+    __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend
+  }
+
+  // Handle all the JSR stuff here, then exit.
+  // It's much shorter and cleaner than intermingling with the non-JSR
+  // normal-branch stuff occurring below.
+
+  if (is_jsr) {
+    // compute return address as bci
+    __ ld(t1, Address(xmethod, Method::const_offset()));
+    __ add(t1, t1,
+           in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3));
+    __ sub(x11, xbcp, t1);
+    __ push_i(x11);
+    // Adjust the bcp by the 16-bit displacement in x12
+    __ add(xbcp, xbcp, x12);
+    __ load_unsigned_byte(t0, Address(xbcp, 0));
+    // load the next target bytecode into t0, it is the argument of dispatch_only
+    __ dispatch_only(vtos, /*generate_poll*/true);
+    return;
+  }
+
+  // Normal (non-jsr) branch handling
+
+  // Adjust the bcp by the displacement in x12
+  __ add(xbcp, xbcp, x12);
+
+  assert(UseLoopCounter || !UseOnStackReplacement,
+         "on-stack-replacement requires loop counters");
+  Label backedge_counter_overflow;
+  Label profile_method;
+  Label dispatch;
+  if (UseLoopCounter) {
+    // increment backedge counter for backward branches
+    // x10: MDO
+    // x11: MDO bumped taken-count
+    // x12: target offset
+    __ bgtz(x12, dispatch); // count only if backward branch
+
+    // check if MethodCounters exists
+    Label has_counters;
+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+    __ bnez(t0, has_counters);
+    __ push_reg(x10);
+    __ push_reg(x11);
+    __ push_reg(x12);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+            InterpreterRuntime::build_method_counters), xmethod);
+    __ pop_reg(x12);
+    __ pop_reg(x11);
+    __ pop_reg(x10);
+    __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+    __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory
+    __ bind(has_counters);
+
+    if (TieredCompilation) {
+      Label no_mdo;
+      int increment = InvocationCounter::count_increment;
+      if (ProfileInterpreter) {
+        // Are we profiling?
+        __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset())));
+        __ beqz(x11, no_mdo);
+        // Increment the MDO backedge counter
+        const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) +
+                                           in_bytes(InvocationCounter::counter_offset()));
+        const Address mask(x11, in_bytes(MethodData::backedge_mask_offset()));
+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+                                   x10, t0, false,
+                                   UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+        __ j(dispatch);
+      }
+      __ bind(no_mdo);
+      // Increment backedge counter in MethodCounters*
+      __ ld(t0, Address(xmethod, Method::method_counters_offset()));
+      const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset()));
+      __ increment_mask_and_jump(Address(t0, be_offset), increment, mask,
+                                 x10, t1, false,
+                                 UseOnStackReplacement ? &backedge_counter_overflow : &dispatch);
+    } else { // not TieredCompilation
+      // increment counter
+      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
+      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
+      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
+      __ sw(t0, Address(t1, be_offset));       // store counter
+
+      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
+      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
+      __ addw(x10, x10, t0);        // add both counters
+
+      if (ProfileInterpreter) {
+        // Test to see if we should create a method data oop
+        __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
+        __ blt(x10, t0, dispatch);
+
+        // if no method data exists, go to profile method
+        __ test_method_data_pointer(x10, profile_method);
+
+        if (UseOnStackReplacement) {
+          // check for overflow against x11 which is the MDO taken count
+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
+          __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower
+
+          // When ProfileInterpreter is on, the backedge_count comes
+          // from the MethodData*, which value does not get reset on
+          // the call to frequency_counter_overflow().  To avoid
+          // excessive calls to the overflow routine while the method is
+          // being compiled, add a second test to make sure the overflow
+          // function is called only once every overflow_frequency.
+          const int overflow_frequency = 1024;
+          __ andi(x11, x11, overflow_frequency - 1);
+          __ beqz(x11, backedge_counter_overflow);
+
+        }
+      } else {
+        if (UseOnStackReplacement) {
+          // check for overflow against x10, which is the sum of the
+          // counters
+          __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
+          __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual
+        }
+      }
+    }
+    __ bind(dispatch);
+  }
+  // Pre-load the next target bytecode into t0
+  __ load_unsigned_byte(t0, Address(xbcp, 0));
+
+  // continue with the bytecode @ target
+  // t0: target bytecode
+  // xbcp: target bcp
+  __ dispatch_only(vtos, /*generate_poll*/true);
+
+  if (UseLoopCounter) {
+    if (ProfileInterpreter && !TieredCompilation) {
+      // Out-of-line code to allocate method data oop.
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
+      __ set_method_data_pointer_for_bcp();
+      __ j(dispatch);
+    }
+
+    if (UseOnStackReplacement) {
+      // invocation counter overflow
+      __ bind(backedge_counter_overflow);
+      __ neg(x12, x12);
+      __ add(x12, x12, xbcp);     // branch xbcp
+      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
+      __ call_VM(noreg,
+                 CAST_FROM_FN_PTR(address,
+                                  InterpreterRuntime::frequency_counter_overflow),
+                 x12);
+      __ load_unsigned_byte(x11, Address(xbcp, 0));  // restore target bytecode
+
+      // x10: osr nmethod (osr ok) or NULL (osr not possible)
+      // w11: target bytecode
+      // x12: temporary
+      __ beqz(x10, dispatch);     // test result -- no osr if null
+      // nmethod may have been invalidated (VM may block upon call_VM return)
+      __ lbu(x12, Address(x10, nmethod::state_offset()));
+      if (nmethod::in_use != 0) {
+        __ sub(x12, x12, nmethod::in_use);
+      }
+      __ bnez(x12, dispatch);
+
+      // We have the address of an on stack replacement routine in x10
+      // We need to prepare to execute the OSR method. First we must
+      // migrate the locals and monitors off of the stack.
+
+      __ mv(x9, x10);                             // save the nmethod
+
+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+      // x10 is OSR buffer, move it to expected parameter location
+      __ mv(j_rarg0, x10);
+
+      // remove activation
+      // get sender esp
+      __ ld(esp,
+          Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize));
+      // remove frame anchor
+      __ leave();
+      // Ensure compiled code always sees stack at proper alignment
+      __ andi(sp, esp, -16);
+
+      // and begin the OSR nmethod
+      __ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
+      __ jr(t0);
+    }
+  }
+}
+
+void TemplateTable::if_0cmp(Condition cc)
+{
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+
+  __ addw(x10, x10, zr);
+  switch (cc) {
+    case equal:
+      __ bnez(x10, not_taken);
+      break;
+    case not_equal:
+      __ beqz(x10, not_taken);
+      break;
+    case less:
+      __ bgez(x10, not_taken);
+      break;
+    case less_equal:
+      __ bgtz(x10, not_taken);
+      break;
+    case greater:
+      __ blez(x10, not_taken);
+      break;
+    case greater_equal:
+      __ bltz(x10, not_taken);
+      break;
+    default:
+      break;
+  }
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_icmp(Condition cc)
+{
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  __ pop_i(x11);
+  __ addw(x10, x10, zr);
+  switch (cc) {
+    case equal:
+      __ bne(x11, x10, not_taken);
+      break;
+    case not_equal:
+      __ beq(x11, x10, not_taken);
+      break;
+    case less:
+      __ bge(x11, x10, not_taken);
+      break;
+    case less_equal:
+      __ bgt(x11, x10, not_taken);
+      break;
+    case greater:
+      __ ble(x11, x10, not_taken);
+      break;
+    case greater_equal:
+      __ blt(x11, x10, not_taken);
+      break;
+    default:
+      break;
+  }
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_nullcmp(Condition cc)
+{
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  if (cc == equal) {
+    __ bnez(x10, not_taken);
+  } else {
+    __ beqz(x10, not_taken);
+  }
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::if_acmp(Condition cc)
+{
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  __ pop_ptr(x11);
+
+  if (cc == equal) {
+    __ bne(x11, x10, not_taken);
+  } else if (cc == not_equal) {
+    __ beq(x11, x10, not_taken);
+  }
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(x10);
+}
+
+void TemplateTable::ret() {
+  transition(vtos, vtos);
+  // We might be moving to a safepoint.  The thread which calls
+  // Interpreter::notice_safepoints() will effectively flush its cache
+  // when it makes a system call, but we need to do something to
+  // ensure that we see the changed dispatch table.
+  __ membar(MacroAssembler::LoadLoad);
+
+  locals_index(x11);
+  __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp
+  __ profile_ret(x11, x12);
+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
+  __ add(xbcp, xbcp, x11);
+  __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
+}
+
+void TemplateTable::wide_ret() {
+  transition(vtos, vtos);
+  locals_index_wide(x11);
+  __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp
+  __ profile_ret(x11, x12);
+  __ ld(xbcp, Address(xmethod, Method::const_offset()));
+  __ add(xbcp, xbcp, x11);
+  __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset()));
+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
+}
+
+void TemplateTable::tableswitch() {
+  Label default_case, continue_execution;
+  transition(itos, vtos);
+  // align xbcp
+  __ la(x11, at_bcp(BytesPerInt));
+  __ andi(x11, x11, -BytesPerInt);
+  // load lo & hi
+  __ lwu(x12, Address(x11, BytesPerInt));
+  __ lwu(x13, Address(x11, 2 * BytesPerInt));
+  __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  // check against lo & hi
+  __ blt(x10, x12, default_case);
+  __ bgt(x10, x13, default_case);
+  // lookup dispatch offset
+  __ subw(x10, x10, x12);
+  __ shadd(x13, x10, x11, t0, 2);
+  __ lwu(x13, Address(x13, 3 * BytesPerInt));
+  __ profile_switch_case(x10, x11, x12);
+  // continue execution
+  __ bind(continue_execution);
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  __ add(xbcp, xbcp, x13);
+  __ load_unsigned_byte(t0, Address(xbcp));
+  __ dispatch_only(vtos, /*generate_poll*/true);
+  // handle default
+  __ bind(default_case);
+  __ profile_switch_default(x10);
+  __ lwu(x13, Address(x11, 0));
+  __ j(continue_execution);
+}
+
+void TemplateTable::lookupswitch() {
+  transition(itos, itos);
+  __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+void TemplateTable::fast_linearswitch() {
+  transition(itos, vtos);
+  Label loop_entry, loop, found, continue_execution;
+  // bswap x10 so we can avoid bswapping the table entries
+  __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend
+  // align xbcp
+  __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of
+                                    // this instruction (change offsets
+                                    // below)
+  __ andi(x9, x9, -BytesPerInt);
+  // set counter
+  __ lwu(x11, Address(x9, BytesPerInt));
+  __ revb_w(x11, x11);
+  __ j(loop_entry);
+  // table search
+  __ bind(loop);
+  __ shadd(t0, x11, x9, t0, 3);
+  __ lw(t0, Address(t0, 2 * BytesPerInt));
+  __ beq(x10, t0, found);
+  __ bind(loop_entry);
+  __ addi(x11, x11, -1);
+  __ bgez(x11, loop);
+  // default case
+  __ profile_switch_default(x10);
+  __ lwu(x13, Address(x9, 0));
+  __ j(continue_execution);
+  // entry found -> get offset
+  __ bind(found);
+  __ shadd(t0, x11, x9, t0, 3);
+  __ lwu(x13, Address(t0, 3 * BytesPerInt));
+  __ profile_switch_case(x11, x10, x9);
+  // continue execution
+  __ bind(continue_execution);
+  __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend
+  __ add(xbcp, xbcp, x13);
+  __ lbu(t0, Address(xbcp, 0));
+  __ dispatch_only(vtos, /*generate_poll*/true);
+}
+
+void TemplateTable::fast_binaryswitch() {
+  transition(itos, vtos);
+  // Implementation using the following core algorithm:
+  //
+  // int binary_search(int key, LookupswitchPair* array, int n)
+  //   binary_search start:
+  //   #Binary search according to "Methodik des Programmierens" by
+  //   # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+  //   int i = 0;
+  //   int j = n;
+  //   while (i + 1 < j) do
+  //     # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+  //     # with      Q: for all i: 0 <= i < n: key < a[i]
+  //     # where a stands for the array and assuming that the (inexisting)
+  //     # element a[n] is infinitely big.
+  //     int h = (i + j) >> 1
+  //     # i < h < j
+  //     if (key < array[h].fast_match())
+  //     then [j = h]
+  //     else [i = h]
+  //   end
+  //   # R: a[i] <= key < a[i+1] or Q
+  //   # (i.e., if key is within array, i is the correct index)
+  //   return i
+  // binary_search end
+
+
+  // Register allocation
+  const Register key   = x10; // already set (tosca)
+  const Register array = x11;
+  const Register i     = x12;
+  const Register j     = x13;
+  const Register h     = x14;
+  const Register temp  = x15;
+
+  // Find array start
+  __ la(array, at_bcp(3 * BytesPerInt));  // btw: should be able to
+                                          // get rid of this
+                                          // instruction (change
+                                          // offsets below)
+  __ andi(array, array, -BytesPerInt);
+
+  // Initialize i & j
+  __ mv(i, zr);                            // i = 0
+  __ lwu(j, Address(array, -BytesPerInt)); // j = length(array)
+
+  // Convert j into native byteordering
+  __ revb_w(j, j);
+
+  // And start
+  Label entry;
+  __ j(entry);
+
+  // binary search loop
+  {
+    Label loop;
+    __ bind(loop);
+    __ addw(h, i, j);                           // h = i + j
+    __ srliw(h, h, 1);                          // h = (i + j) >> 1
+    // if [key < array[h].fast_match()]
+    // then [j = h]
+    // else [i = h]
+    // Convert array[h].match to native byte-ordering before compare
+    __ shadd(temp, h, array, temp, 3);
+    __ ld(temp, Address(temp, 0));
+    __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
+
+    Label L_done, L_greater;
+    __ bge(key, temp, L_greater);
+    // if [key < array[h].fast_match()] then j = h
+    __ mv(j, h);
+    __ j(L_done);
+    __ bind(L_greater);
+    // if [key >= array[h].fast_match()] then i = h
+    __ mv(i, h);
+    __ bind(L_done);
+
+    // while [i + 1 < j]
+    __ bind(entry);
+    __ addiw(h, i, 1);         // i + 1
+    __ blt(h, j, loop);        // i + 1 < j
+  }
+
+  // end of binary search, result index is i (must check again!)
+  Label default_case;
+  // Convert array[i].match to native byte-ordering before compare
+  __ shadd(temp, i, array, temp, 3);
+  __ ld(temp, Address(temp, 0));
+  __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend
+  __ bne(key, temp, default_case);
+
+  // entry found -> j = offset
+  __ shadd(temp, i, array, temp, 3);
+  __ lwu(j, Address(temp, BytesPerInt));
+  __ profile_switch_case(i, key, array);
+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
+
+  __ add(temp, xbcp, j);
+  __ load_unsigned_byte(t0, Address(temp, 0));
+
+  __ add(xbcp, xbcp, j);
+  __ la(xbcp, Address(xbcp, 0));
+  __ dispatch_only(vtos, /*generate_poll*/true);
+
+  // default case -> j = default offset
+  __ bind(default_case);
+  __ profile_switch_default(i);
+  __ lwu(j, Address(array, -2 * BytesPerInt));
+  __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend
+
+  __ add(temp, xbcp, j);
+  __ load_unsigned_byte(t0, Address(temp, 0));
+
+  __ add(xbcp, xbcp, j);
+  __ la(xbcp, Address(xbcp, 0));
+  __ dispatch_only(vtos, /*generate_poll*/true);
+}
+
+void TemplateTable::_return(TosState state)
+{
+  transition(state, state);
+  assert(_desc->calls_vm(),
+         "inconsistent calls_vm information"); // call in remove_activation
+
+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+    assert(state == vtos, "only valid state");
+
+    __ ld(c_rarg1, aaddress(0));
+    __ load_klass(x13, c_rarg1);
+    __ lwu(x13, Address(x13, Klass::access_flags_offset()));
+    Label skip_register_finalizer;
+    __ andi(t0, x13, JVM_ACC_HAS_FINALIZER);
+    __ beqz(t0, skip_register_finalizer);
+
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1);
+
+    __ bind(skip_register_finalizer);
+  }
+
+  // Issue a StoreStore barrier after all stores but before return
+  // from any constructor for any class with a final field. We don't
+  // know if this is a finalizer, so we always do so.
+  if (_desc->bytecode() == Bytecodes::_return) {
+    __ membar(MacroAssembler::StoreStore);
+  }
+
+  // Narrow result if state is itos but result type is smaller.
+  // Need to narrow in the return bytecode rather than in generate_return_entry
+  // since compiled code callers expect the result to already be narrowed.
+  if (state == itos) {
+    __ narrow(x10);
+  }
+
+  __ remove_activation(state);
+  __ ret();
+}
+
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's
+// in order.  Store buffers on most chips allow reads & writes to
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
+// without some kind of memory barrier (i.e., it's not sufficient that
+// the interpreter does not reorder volatile references, the hardware
+// also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
+//     writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that
+//     happen after the read float up to before the read.  It's OK for
+//     non-volatile memory refs that happen before the volatile read to
+//     float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile
+//     memory refs that happen BEFORE the write float down to after the
+//     write.  It's OK for non-volatile memory refs that happen after the
+//     volatile write to float up before it.
+//
+// We only put in barriers around volatile refs (they are expensive),
+// not _between_ memory refs (that would require us to track the
+// flavor of the previous memory refs).  Requirements (2) and (3)
+// require some barriers before volatile stores and after volatile
+// loads.  These nearly cover requirement (1) but miss the
+// volatile-store-volatile-load case.  This final case is placed after
+// volatile-stores although it could just as well go before
+// volatile-loads.
+
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register Rcache,
+                                            Register index,
+                                            size_t index_size) {
+  const Register temp = x9;
+  assert_different_registers(Rcache, index, temp);
+
+  Label resolved;
+
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+    case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+    case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+    default: break;
+  }
+
+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+  __ mv(t0, (int) code);
+  __ beq(temp, t0, resolved);
+
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+  __ mv(temp, (int) code);
+  __ call_VM(noreg, entry, temp);
+
+  // Update registers with resolved info
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+  // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
+  // so all clients ofthis method must be modified accordingly
+  __ bind(resolved);
+}
+
+// The Rcache and index registers must be set before call
+// n.b unlike x86 cache already includes the index offset
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+  assert_different_registers(cache, index, flags, off);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+  // Field offset
+  __ ld(off, Address(cache, in_bytes(cp_base_offset +
+                                     ConstantPoolCacheEntry::f2_offset())));
+  // Flags
+  __ lwu(flags, Address(cache, in_bytes(cp_base_offset +
+                                        ConstantPoolCacheEntry::flags_offset())));
+
+  // klass overwrite register
+  if (is_static) {
+    __ ld(obj, Address(cache, in_bytes(cp_base_offset +
+                                       ConstantPoolCacheEntry::f1_offset())));
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ld(obj, Address(obj, mirror_offset));
+    __ resolve_oop_handle(obj);
+  }
+}
+
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal, /*unused*/
+                                               bool is_invokedynamic) {
+  // setup registers
+  const Register cache = t1;
+  const Register index = x14;
+  assert_different_registers(method, flags);
+  assert_different_registers(method, cache, index);
+  assert_different_registers(itable_index, flags);
+  assert_different_registers(itable_index, cache, index);
+  // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
+  const int method_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                     (is_invokevirtual ?
+                                      ConstantPoolCacheEntry::f2_offset() :
+                                      ConstantPoolCacheEntry::f1_offset()));
+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  // access constant pool cache fields
+  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::f2_offset());
+
+  const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+  resolve_cache_and_index(byte_no, cache, index, index_size);
+  __ ld(method, Address(cache, method_offset));
+
+  if (itable_index != noreg) {
+    __ ld(itable_index, Address(cache, index_offset));
+  }
+  __ lwu(flags, Address(cache, flags_offset));
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+  // do the JVMTI work here to avoid disturbing the register state below
+  // We use c_rarg registers here beacause we want to use the register used in
+  // the call to the VM
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    assert_different_registers(cache, index, x10);
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset);
+    __ lwu(x10, Address(t0, offset));
+
+    __ beqz(x10, L1);
+
+    __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1);
+    __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset())));
+
+    if (is_static) {
+      __ mv(c_rarg1, zr); // NULL object reference
+    } else {
+      __ ld(c_rarg1, at_tos()); // get object pointer without popping it
+      __ verify_oop(c_rarg1);
+    }
+    // c_rarg1: object pointer or NULL
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on the stack
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                       InterpreterRuntime::post_field_access),
+                                       c_rarg1, c_rarg2, c_rarg3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::pop_and_check_object(Register r)
+{
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
+{
+  const Register cache     = x12;
+  const Register index     = x13;
+  const Register obj       = x14;
+  const Register off       = x9;
+  const Register flags     = x10;
+  const Register raw_flags = x16;
+  const Register bc        = x14; // uses same reg as obj, so don't mix them
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_access(cache, index, is_static, false);
+  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
+
+  if (!is_static) {
+    // obj is on the stack
+    pop_and_check_object(obj);
+  }
+
+  __ add(off, obj, off);
+  const Address field(off);
+
+  Label Done, notByte, notBool, notInt, notShort, notChar,
+              notLong, notFloat, notObj, notDouble;
+
+  __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+                                    ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
+
+  assert(btos == 0, "change code, btos != 0");
+  __ bnez(flags, notByte);
+
+  // Dont't rewrite getstatic, only getfield
+  if (is_static) {
+    rc = may_not_rewrite;
+  }
+
+  // btos
+  __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
+  __ push(btos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notByte);
+  __ sub(t0, flags, (u1)ztos);
+  __ bnez(t0, notBool);
+
+  // ztos (same code as btos)
+  __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg);
+  __ push(ztos);
+  // Rewirte bytecode to be faster
+  if (rc == may_rewrite) {
+    // uses btos rewriting, no truncating to t/f bit is needed for getfield
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notBool);
+  __ sub(t0, flags, (u1)atos);
+  __ bnez(t0, notObj);
+  // atos
+  do_oop_load(_masm, field, x10, IN_HEAP);
+  __ push(atos);
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_agetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notObj);
+  __ sub(t0, flags, (u1)itos);
+  __ bnez(t0, notInt);
+  // itos
+  __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
+  __ addw(x10, x10, zr); // signed extended
+  __ push(itos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_igetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notInt);
+  __ sub(t0, flags, (u1)ctos);
+  __ bnez(t0, notChar);
+  // ctos
+  __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
+  __ push(ctos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notChar);
+  __ sub(t0, flags, (u1)stos);
+  __ bnez(t0, notShort);
+  // stos
+  __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
+  __ push(stos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notShort);
+  __ sub(t0, flags, (u1)ltos);
+  __ bnez(t0, notLong);
+  // ltos
+  __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
+  __ push(ltos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notLong);
+  __ sub(t0, flags, (u1)ftos);
+  __ bnez(t0, notFloat);
+  // ftos
+  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+  __ push(ftos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11);
+  }
+  __ j(Done);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  __ sub(t0, flags, (u1)dtos);
+  __ bnez(t0, notDouble);
+#endif
+  // dtos
+  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+  __ push(dtos);
+  // Rewrite bytecode to be faster
+  if (rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11);
+  }
+#ifdef ASSERT
+  __ j(Done);
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  Label notVolatile;
+  __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+  __ beqz(t0, notVolatile);
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  __ bind(notVolatile);
+}
+
+void TemplateTable::getfield(int byte_no)
+{
+  getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::getstatic(int byte_no)
+{
+  getfield_or_static(byte_no, true);
+}
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+  transition(vtos, vtos);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L1;
+    assert_different_registers(cache, index, x10);
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
+    __ lwu(x10, Address(t0, offset));
+    __ beqz(x10, L1);
+
+    __ get_cache_and_index_at_bcp(c_rarg2, t0, 1);
+
+    if (is_static) {
+      // Life is simple. Null out the object pointer.
+      __ mv(c_rarg1, zr);
+    } else {
+      // Life is harder. The stack holds the value on top, followed by
+      // the object. We don't know the size of the value, though; it
+      // could be one or two words depending on its type. As a result,
+      // we must find the type to determine where the object is.
+      __ lwu(c_rarg3, Address(c_rarg2,
+                              in_bytes(cp_base_offset +
+                                       ConstantPoolCacheEntry::flags_offset())));
+      __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift);
+      ConstantPoolCacheEntry::verify_tos_state_shift();
+      Label nope2, done, ok;
+      __ ld(c_rarg1, at_tos_p1());   // initially assume a one word jvalue
+      __ sub(t0, c_rarg3, ltos);
+      __ beqz(t0, ok);
+      __ sub(t0, c_rarg3, dtos);
+      __ bnez(t0, nope2);
+      __ bind(ok);
+      __ ld(c_rarg1, at_tos_p2());  // ltos (two word jvalue);
+      __ bind(nope2);
+    }
+    // cache entry pointer
+    __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset));
+    // object (tos)
+    __ mv(c_rarg3, esp);
+    // c_rarg1: object pointer set up above (NULL if static)
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on  the stack
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+                                c_rarg1, c_rarg2, c_rarg3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
+  transition(vtos, vtos);
+
+  const Register cache = x12;
+  const Register index = x13;
+  const Register obj   = x12;
+  const Register off   = x9;
+  const Register flags = x10;
+  const Register bc    = x14;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_mod(cache, index, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  Label Done;
+  __ mv(x15, flags);
+
+  {
+    Label notVolatile;
+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
+    __ bind(notVolatile);
+  }
+
+  Label notByte, notBool, notInt, notShort, notChar,
+        notLong, notFloat, notObj, notDouble;
+
+  __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift +
+                                ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits);
+
+  assert(btos == 0, "change code, btos != 0");
+  __ bnez(flags, notByte);
+
+  // Don't rewrite putstatic, only putfield
+  if (is_static) {
+    rc = may_not_rewrite;
+  }
+
+  // btos
+  {
+    __ pop(btos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0); // off register as temparator register.
+    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notByte);
+  __ sub(t0, flags, (u1)ztos);
+  __ bnez(t0, notBool);
+
+  // ztos
+  {
+    __ pop(ztos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notBool);
+  __ sub(t0, flags, (u1)atos);
+  __ bnez(t0, notObj);
+
+  // atos
+  {
+    __ pop(atos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    // Store into the field
+    do_oop_store(_masm, field, x10, IN_HEAP);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notObj);
+  __ sub(t0, flags, (u1)itos);
+  __ bnez(t0, notInt);
+
+  // itos
+  {
+    __ pop(itos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notInt);
+  __ sub(t0, flags, (u1)ctos);
+  __ bnez(t0, notChar);
+
+  // ctos
+  {
+    __ pop(ctos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notChar);
+  __ sub(t0, flags, (u1)stos);
+  __ bnez(t0, notShort);
+
+  // stos
+  {
+    __ pop(stos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notShort);
+  __ sub(t0, flags, (u1)ltos);
+  __ bnez(t0, notLong);
+
+  // ltos
+  {
+    __ pop(ltos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notLong);
+  __ sub(t0, flags, (u1)ftos);
+  __ bnez(t0, notFloat);
+
+  // ftos
+  {
+    __ pop(ftos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
+    }
+    __ j(Done);
+  }
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  __ sub(t0, flags, (u1)dtos);
+  __ bnez(t0, notDouble);
+#endif
+
+  // dtos
+  {
+    __ pop(dtos);
+    // field address
+    if (!is_static) {
+      pop_and_check_object(obj);
+    }
+    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
+    const Address field(off, 0);
+    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
+    if (rc == may_rewrite) {
+      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
+    }
+  }
+
+#ifdef ASSERT
+  __ j(Done);
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::putfield(int byte_no)
+{
+  putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+  putfield_or_static(byte_no, true);
+}
+
+void TemplateTable::jvmti_post_fast_field_mod()
+{
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L2;
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset);
+    __ lwu(c_rarg3, Address(t0, offset));
+    __ beqz(c_rarg3, L2);
+    __ pop_ptr(x9);                  // copy the object pointer from tos
+    __ verify_oop(x9);
+    __ push_ptr(x9);                 // put the object pointer back on tos
+    // Save tos values before call_VM() clobbers them. Since we have
+    // to do it for every data type, we use the saved values as the
+    // jvalue object.
+    switch (bytecode()) {          // load values into the jvalue object
+      case Bytecodes::_fast_aputfield: __ push_ptr(x10); break;
+      case Bytecodes::_fast_bputfield: // fall through
+      case Bytecodes::_fast_zputfield: // fall through
+      case Bytecodes::_fast_sputfield: // fall through
+      case Bytecodes::_fast_cputfield: // fall through
+      case Bytecodes::_fast_iputfield: __ push_i(x10); break;
+      case Bytecodes::_fast_dputfield: __ push_d(); break;
+      case Bytecodes::_fast_fputfield: __ push_f(); break;
+      case Bytecodes::_fast_lputfield: __ push_l(x10); break;
+
+      default:
+        ShouldNotReachHere();
+    }
+    __ mv(c_rarg3, esp);             // points to jvalue on the stack
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1);
+    __ verify_oop(x9);
+    // x9: object pointer copied above
+    // c_rarg2: cache entry pointer
+    // c_rarg3: jvalue object on the stack
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               x9, c_rarg2, c_rarg3);
+
+    switch (bytecode()) {             // restore tos values
+      case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break;
+      case Bytecodes::_fast_bputfield: // fall through
+      case Bytecodes::_fast_zputfield: // fall through
+      case Bytecodes::_fast_sputfield: // fall through
+      case Bytecodes::_fast_cputfield: // fall through
+      case Bytecodes::_fast_iputfield: __ pop_i(x10); break;
+      case Bytecodes::_fast_dputfield: __ pop_d(); break;
+      case Bytecodes::_fast_fputfield: __ pop_f(); break;
+      case Bytecodes::_fast_lputfield: __ pop_l(x10); break;
+      default: break;
+    }
+    __ bind(L2);
+  }
+}
+
+void TemplateTable::fast_storefield(TosState state)
+{
+  transition(state, vtos);
+
+  ByteSize base = ConstantPoolCache::base_offset();
+
+  jvmti_post_fast_field_mod();
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(x12, x11, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ membar(MacroAssembler::LoadLoad);
+
+  // test for volatile with x13
+  __ lwu(x13, Address(x12, in_bytes(base +
+                                    ConstantPoolCacheEntry::flags_offset())));
+
+  // replace index with field offset from cache entry
+  __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
+
+  {
+    Label notVolatile;
+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore);
+    __ bind(notVolatile);
+  }
+
+  // Get object from stack
+  pop_and_check_object(x12);
+
+  // field address
+  __ add(x11, x12, x11);
+  const Address field(x11, 0);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_aputfield:
+      do_oop_store(_masm, field, x10, IN_HEAP);
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_iputfield:
+      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_zputfield:
+      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_bputfield:
+      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_sputfield:
+      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_cputfield:
+      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore);
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::fast_accessfield(TosState state)
+{
+  transition(atos, state);
+  // Do the JVMTI work here to avoid disturbing the register state below
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    int32_t offset = 0;
+    __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset);
+    __ lwu(x12, Address(t0, offset));
+    __ beqz(x12, L1);
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1);
+    __ verify_oop(x10);
+    __ push_ptr(x10);  // save object pointer before call_VM() clobbers it
+    __ mv(c_rarg1, x10);
+    // c_rarg1: object pointer copied above
+    // c_rarg2: cache entry pointer
+    __ call_VM(noreg,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_access),
+               c_rarg1, c_rarg2);
+    __ pop_ptr(x10); // restore object pointer
+    __ bind(L1);
+  }
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(x12, x11, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ membar(MacroAssembler::LoadLoad);
+
+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+                                   ConstantPoolCacheEntry::f2_offset())));
+  __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset())));
+
+  // x10: object
+  __ verify_oop(x10);
+  __ null_check(x10);
+  __ add(x11, x10, x11);
+  const Address field(x11, 0);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_agetfield:
+      do_oop_load(_masm, field, x10, IN_HEAP);
+      __ verify_oop(x10);
+      break;
+    case Bytecodes::_fast_lgetfield:
+      __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg);
+      break;
+    case Bytecodes::_fast_igetfield:
+      __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg);
+      __ addw(x10, x10, zr); // signed extended
+      break;
+    case Bytecodes::_fast_bgetfield:
+      __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg);
+      break;
+    case Bytecodes::_fast_sgetfield:
+      __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg);
+      break;
+    case Bytecodes::_fast_cgetfield:
+      __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg);
+      break;
+    case Bytecodes::_fast_fgetfield:
+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
+      break;
+    case Bytecodes::_fast_dgetfield:
+      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  {
+    Label notVolatile;
+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::fast_xaccess(TosState state)
+{
+  transition(vtos, state);
+
+  // get receiver
+  __ ld(x10, aaddress(0));
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(x12, x13, 2);
+  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+                                   ConstantPoolCacheEntry::f2_offset())));
+
+  // make sure exception is reported in correct bcp range (getfield is
+  // next instruction)
+  __ addi(xbcp, xbcp, 1);
+  __ null_check(x10);
+  switch (state) {
+    case itos:
+      __ add(x10, x10, x11);
+      __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg);
+      __ addw(x10, x10, zr); // signed extended
+      break;
+    case atos:
+      __ add(x10, x10, x11);
+      do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP);
+      __ verify_oop(x10);
+      break;
+    case ftos:
+      __ add(x10, x10, x11);
+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
+                                      ConstantPoolCacheEntry::flags_offset())));
+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
+    __ beqz(t0, notVolatile);
+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+    __ bind(notVolatile);
+  }
+
+  __ sub(xbcp, xbcp, 1);
+}
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method, // linked method (or i-klass)
+                                   Register index,  // itable index, MethodType, etc.
+                                   Register recv,   // if caller wants to see it
+                                   Register flags   // if caller wants to test it
+                                   ) {
+  // determine flags
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == x13, "");
+  assert(recv  == noreg || recv  == x12, "");
+
+  // setup registers & access constant pool cache
+  if (recv == noreg) {
+    recv = x12;
+  }
+  if (flags == noreg) {
+    flags = x13;
+  }
+  assert_different_registers(method, index, recv, flags);
+
+  // save 'interpreter return address'
+  __ save_bcp();
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  // maybe push appendix to arguments (just before return address)
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift);
+    __ beqz(t0, L_no_push);
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push_reg(x9);
+    __ mv(x9, index);
+    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
+    __ load_resolved_reference_at_index(index, x9);
+    __ pop_reg(x9);
+    __ push_reg(index);  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
+  // load receiver if needed (note: no return address pushed yet)
+  if (load_receiver) {
+    __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8
+    __ shadd(t0, recv, esp, t0, 3);
+    __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1)));
+    __ verify_oop(recv);
+  }
+
+  // compute return type
+  __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits));
+  __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3
+
+  // load return address
+  {
+    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
+    __ mv(t0, table_addr);
+    __ shadd(t0, t1, t0, t1, 3);
+    __ ld(ra, Address(t0, 0));
+  }
+}
+
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags)
+{
+  // Uses temporary registers x10, x13
+  assert_different_registers(index, recv, x10, x13);
+  // Test for an invoke of a final method
+  Label notFinal;
+  __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
+  __ beqz(t0, notFinal);
+
+  const Register method = index;  // method must be xmethod
+  assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
+
+  // do the call - the index is actually the method to call
+  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
+
+  // It's final, need a null check here!
+  __ null_check(recv);
+
+  // profile this call
+  __ profile_final_call(x10);
+  __ profile_arguments_type(x10, method, x14, true);
+
+  __ jump_from_interpreted(method);
+
+  __ bind(notFinal);
+
+  // get receiver klass
+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(x10, recv);
+
+  // profile this call
+  __ profile_virtual_call(x10, xlocals, x13);
+
+  // get target Method & entry point
+  __ lookup_virtual_method(x10, index, method);
+  __ profile_arguments_type(x13, method, x14, true);
+  __ jump_from_interpreted(method);
+}
+
+void TemplateTable::invokevirtual(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+
+  prepare_invoke(byte_no, xmethod, noreg, x12, x13);
+
+  // xmethod: index (actually a Method*)
+  // x12: receiver
+  // x13: flags
+
+  invokevirtual_helper(xmethod, x12, x13);
+}
+
+void TemplateTable::invokespecial(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, xmethod, noreg,  // get f1 Method*
+                 x12);  // get receiver also for null check
+  __ verify_oop(x12);
+  __ null_check(x12);
+  // do the call
+  __ profile_call(x10);
+  __ profile_arguments_type(x10, xmethod, xbcp, false);
+  __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::invokestatic(int byte_no)
+{
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this arugment");
+
+  prepare_invoke(byte_no, xmethod);  // get f1 Method*
+  // do the call
+  __ profile_call(x10);
+  __ profile_arguments_type(x10, xmethod, x14, false);
+  __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::fast_invokevfinal(int byte_no)
+{
+  __ call_Unimplemented();
+}
+
+void TemplateTable::invokeinterface(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, x10, xmethod,  // get f1 Klass*, f2 Method*
+                 x12, x13);  // recv, flags
+
+  // x10: interface klass (from f1)
+  // xmethod: method (from f2)
+  // x12: receiver
+  // x13: flags
+
+  // First check for Object case, then private interface method,
+  // then regular interface method.
+
+  // Special case of invokeinterface called for virtual method of
+  // java.lang.Object. See cpCache.cpp for details
+  Label notObjectMethod;
+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift);
+  __ beqz(t0, notObjectMethod);
+
+  invokevirtual_helper(xmethod, x12, x13);
+  __ bind(notObjectMethod);
+
+  Label no_such_interface;
+
+  // Check for private method invocation - indicated by vfinal
+  Label notVFinal;
+  __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift);
+  __ beqz(t0, notVFinal);
+
+  // Check receiver klass into x13 - also a null check
+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
+  __ load_klass(x13, x12);
+
+  Label subtype;
+  __ check_klass_subtype(x13, x10, x14, subtype);
+  // If we get here the typecheck failed
+  __ j(no_such_interface);
+  __ bind(subtype);
+
+  __ profile_final_call(x10);
+  __ profile_arguments_type(x10, xmethod, x14, true);
+  __ jump_from_interpreted(xmethod);
+
+  __ bind(notVFinal);
+
+  // Get receiver klass into x13 - also a null check
+  __ restore_locals();
+  __ null_check(x12, oopDesc::klass_offset_in_bytes());
+  __ load_klass(x13, x12);
+
+  Label no_such_method;
+
+  // Preserve method for the throw_AbstractMethodErrorVerbose.
+  __ mv(x28, xmethod);
+  // Receiver subtype check against REFC.
+  // Superklass in x10. Subklass in x13. Blows t1, x30
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             x13, x10, noreg,
+                             // outputs: scan temp. reg, scan temp. reg
+                             t1, x30,
+                             no_such_interface,
+                             /*return_method=*/false);
+
+  // profile this call
+  __ profile_virtual_call(x13, x30, x9);
+
+  // Get declaring interface class from method, and itable index
+  __ ld(x10, Address(xmethod, Method::const_offset()));
+  __ ld(x10, Address(x10, ConstMethod::constants_offset()));
+  __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes()));
+  __ lwu(xmethod, Address(xmethod, Method::itable_index_offset()));
+  __ subw(xmethod, xmethod, Method::itable_index_max);
+  __ negw(xmethod, xmethod);
+
+  // Preserve recvKlass for throw_AbstractMethodErrorVerbose
+  __ mv(xlocals, x13);
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             xlocals, x10, xmethod,
+                             // outputs: method, scan temp. reg
+                             xmethod, x30,
+                             no_such_interface);
+
+  // xmethod: Method to call
+  // x12: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ beqz(xmethod, no_such_method);
+
+  __ profile_arguments_type(x13, xmethod, x30, true);
+
+  // do the call
+  // x12: receiver
+  // xmethod: Method
+  __ jump_from_interpreted(xmethod);
+  __ should_not_reach_here();
+
+  // exception handling code follows ...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
+  // throw exception
+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
+  // Pass arguments for generating a verbose error message.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28);
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exceptiong
+  __ restore_bcp();    // bcp must be correct for exception handler   (was destroyed)
+  __ restore_locals(); // make sure locals pointer is correct as well (was destroyed)
+  // Pass arguments for generating a verbose error message.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                     InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10);
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+  return;
+}
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, xmethod, x10, x12);
+  __ verify_method_ptr(x12);
+  __ verify_oop(x12);
+  __ null_check(x12);
+
+  // FIXME: profile the LambdaForm also
+
+  // x30 is safe to use here as a temp reg because it is about to
+  // be clobbered by jump_from_interpreted().
+  __ profile_final_call(x30);
+  __ profile_arguments_type(x30, xmethod, x14, true);
+
+  __ jump_from_interpreted(xmethod);
+}
+
+void TemplateTable::invokedynamic(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+
+  prepare_invoke(byte_no, xmethod, x10);
+
+  // x10: CallSite object (from cpool->resolved_references[])
+  // xmethod: MH.linkToCallSite method (from f2)
+
+  // Note: x10_callsite is already pushed by prepare_invoke
+
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
+  // profile this call
+  __ profile_call(xbcp);
+  __ profile_arguments_type(x13, xmethod, x30, false);
+
+  __ verify_oop(x10);
+
+  __ jump_from_interpreted(xmethod);
+}
+
+//-----------------------------------------------------------------------------
+// Allocation
+
+void TemplateTable::_new() {
+  transition(vtos, atos);
+
+  __ get_unsigned_2_byte_index_at_bcp(x13, 1);
+  Label slow_case;
+  Label done;
+  Label initialize_header;
+  Label initialize_object; // including clearing the fields
+
+  __ get_cpool_and_tags(x14, x10);
+  // Make sure the class we're about to instantiate has been resolved.
+  // This is done before loading InstanceKlass to be consistent with the order
+  // how Constant Pool is update (see ConstantPool::klass_at_put)
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+  __ add(t0, x10, x13);
+  __ la(t0, Address(t0, tags_offset));
+  __ membar(MacroAssembler::AnyAny);
+  __ lbu(t0, t0);
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  __ sub(t1, t0, (u1)JVM_CONSTANT_Class);
+  __ bnez(t1, slow_case);
+
+  // get InstanceKlass
+  __ load_resolved_klass_at_offset(x14, x13, x14, t0);
+
+  // make sure klass is initialized & doesn't have finalizer
+  // make sure klass is fully initialized
+  __ lbu(t0, Address(x14, InstanceKlass::init_state_offset()));
+  __ sub(t1, t0, (u1)InstanceKlass::fully_initialized);
+  __ bnez(t1, slow_case);
+
+  // get instance_size in InstanceKlass (scaled to a count of bytes)
+  __ lwu(x13, Address(x14, Klass::layout_helper_offset()));
+  // test to see if it has a finalizer or is malformed in some way
+  __ andi(t0, x13, Klass::_lh_instance_slow_path_bit);
+  __ bnez(t0, slow_case);
+
+  // Allocate the instance:
+  //  If TLAB is enabled:
+  //    Try to allocate in the TLAB.
+  //    If fails, go to the slow path.
+  //  Else If inline contiguous allocations are enabled:
+  //    Try to allocate in eden.
+  //    If fails due to heap end, go to slow path
+  //
+  //  If TLAB is enabled OR inline contiguous is enabled:
+  //    Initialize the allocation.
+  //    Exit.
+  //  Go to slow path.
+  const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc();
+
+  if (UseTLAB) {
+    __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
+
+    if (ZeroTLAB) {
+      // the fields have been already cleared
+      __ j(initialize_header);
+    } else {
+      // initialize both the header and fields
+      __ j(initialize_object);
+    }
+  } else {
+    // Allocation in the shared Eden, if allowed.
+    //
+    // x13: instance size in bytes
+    if (allow_shared_alloc) {
+      __ eden_allocate(x10, x13, 0, x28, slow_case);
+    }
+  }
+
+  // If USETLAB or allow_shared_alloc are true, the object is created above and
+  // there is an initialized need. Otherwise, skip and go to the slow path.
+  if (UseTLAB || allow_shared_alloc) {
+    // The object is initialized before the header. If the object size is
+    // zero, go directly to the header initialization.
+    __ bind(initialize_object);
+    __ sub(x13, x13, sizeof(oopDesc));
+    __ beqz(x13, initialize_header);
+
+    // Initialize obejct fields
+    {
+      __ add(x12, x10, sizeof(oopDesc));
+      Label loop;
+      __ bind(loop);
+      __ sd(zr, Address(x12));
+      __ add(x12, x12, BytesPerLong);
+      __ sub(x13, x13, BytesPerLong);
+      __ bnez(x13, loop);
+    }
+
+    // initialize object header only.
+    __ bind(initialize_header);
+    if (UseBiasedLocking) {
+      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
+    } else {
+      __ mv(t0, (intptr_t)markOopDesc::prototype());
+    }
+    __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes()));
+    __ store_klass_gap(x10, zr);   // zero klass gap for compressed oops
+    __ store_klass(x10, x14);      // store klass last
+
+    {
+      SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
+      // Trigger dtrace event for fastpath
+      __ push(atos); // save the return value
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10);
+      __ pop(atos); // restore the return value
+    }
+    __ j(done);
+  }
+
+  // slow case
+  __ bind(slow_case);
+  __ get_constant_pool(c_rarg1);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2);
+  __ verify_oop(x10);
+
+  // continue
+  __ bind(done);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::newarray() {
+  transition(itos, atos);
+  __ load_unsigned_byte(c_rarg1, at_bcp(1));
+  __ mv(c_rarg2, x10);
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
+          c_rarg1, c_rarg2);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::anewarray() {
+  transition(itos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1);
+  __ get_constant_pool(c_rarg1);
+  __ mv(c_rarg3, x10);
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
+          c_rarg1, c_rarg2, c_rarg3);
+  // Must prevent reordering of stores for object initialization with stores that publish the new object.
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void TemplateTable::arraylength() {
+  transition(atos, itos);
+  __ null_check(x10, arrayOopDesc::length_offset_in_bytes());
+  __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes()));
+}
+
+void TemplateTable::checkcast()
+{
+  transition(atos, atos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ beqz(x10, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
+  // See if bytecode has already been quicked
+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
+  __ add(x11, t0, x9);
+  __ membar(MacroAssembler::AnyAny);
+  __ lbu(x11, x11);
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
+  __ beqz(t0, quicked);
+
+  __ push(atos); // save receiver for result, and for GC
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  // vm_result_2 has metadata result
+  __ get_vm_result_2(x10, xthread);
+  __ pop_reg(x13); // restore receiver
+  __ j(resolved);
+
+  // Get superklass in x10 and subklass in x13
+  __ bind(quicked);
+  __ mv(x13, x10); // Save object in x13; x10 needed for subtype check
+  __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass
+
+  __ bind(resolved);
+  __ load_klass(x9, x13);
+
+  // Generate subtype check.  Blows x12, x15.  Object in x13.
+  // Superklass in x10.  Subklass in x9.
+  __ gen_subtype_check(x9, ok_is_subtype);
+
+  // Come here on failure
+  __ push_reg(x13);
+  // object is at TOS
+  __ j(Interpreter::_throw_ClassCastException_entry);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ mv(x10, x13); // Restore object in x13
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ j(done);
+    __ bind(is_null);
+    __ profile_null_seen(x12);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+}
+
+void TemplateTable::instanceof() {
+  transition(atos, itos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ beqz(x10, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array
+  __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index
+  // See if bytecode has already been quicked
+  __ add(t0, x13, Array<u1>::base_offset_in_bytes());
+  __ add(x11, t0, x9);
+  __ membar(MacroAssembler::AnyAny);
+  __ lbu(x11, x11);
+  __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  __ sub(t0, x11, (u1)JVM_CONSTANT_Class);
+  __ beqz(t0, quicked);
+
+  __ push(atos); // save receiver for result, and for GC
+  call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  // vm_result_2 has metadata result
+  __ get_vm_result_2(x10, xthread);
+  __ pop_reg(x13); // restore receiver
+  __ verify_oop(x13);
+  __ load_klass(x13, x13);
+  __ j(resolved);
+
+  // Get superklass in x10 and subklass in x13
+  __ bind(quicked);
+  __ load_klass(x13, x10);
+  __ load_resolved_klass_at_offset(x12, x9, x10, t0);
+
+  __ bind(resolved);
+
+  // Generate subtype check.  Blows x12, x15
+  // Superklass in x10.  Subklass in x13.
+  __ gen_subtype_check(x13, ok_is_subtype);
+
+  // Come here on failure
+  __ mv(x10, zr);
+  __ j(done);
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ mv(x10, 1);
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ j(done);
+    __ bind(is_null);
+    __ profile_null_seen(x12);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+  // x10 = 0: obj == NULL or  obj is not an instanceof the specified klass
+  // x10 = 1: obj != NULL and obj is     an instanceof the specified klass
+}
+
+//-----------------------------------------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+  // Note: We get here even if we are single stepping..
+  // jbug inists on setting breakpoints at every bytecode
+  // even if we are in single step mode.
+
+  transition(vtos, vtos);
+
+  // get the unpatched byte code
+  __ get_method(c_rarg1);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::get_original_bytecode_at),
+             c_rarg1, xbcp);
+  __ mv(x9, x10);
+
+  // post the breakpoint event
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
+             xmethod, xbcp);
+
+  // complete the execution of original bytecode
+  __ mv(t0, x9);
+  __ dispatch_only_normal(vtos);
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+  transition(atos, vtos);
+  __ null_check(x10);
+  __ j(Interpreter::throw_exception_entry());
+}
+
+//-----------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions  ] <--- esp               = expression stack top
+// ..
+// [expressions  ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data   ] <--- monitor block bot
+// ...
+// [saved fp     ] <--- fp
+void TemplateTable::monitorenter()
+{
+  transition(atos, vtos);
+
+   // check for NULL object
+   __ null_check(x10);
+
+   const Address monitor_block_top(
+         fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+   const Address monitor_block_bot(
+         fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+   Label allocated;
+
+   // initialize entry pointer
+   __ mv(c_rarg1, zr); // points to free slot or NULL
+
+   // find a free slot in the monitor block (result in c_rarg1)
+   {
+     Label entry, loop, exit, notUsed;
+     __ ld(c_rarg3, monitor_block_top); // points to current entry,
+                                        // starting with top-most entry
+     __ la(c_rarg2, monitor_block_bot); // points to word before bottom
+
+     __ j(entry);
+
+     __ bind(loop);
+     // check if current entry is used
+     // if not used then remember entry in c_rarg1
+     __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes()));
+     __ bnez(t0, notUsed);
+     __ mv(c_rarg1, c_rarg3);
+     __ bind(notUsed);
+     // check if current entry is for same object
+     // if same object then stop searching
+     __ beq(x10, t0, exit);
+     // otherwise advance to next entry
+     __ add(c_rarg3, c_rarg3, entry_size);
+     __ bind(entry);
+     // check if bottom reached
+     // if not at bottom then check this entry
+     __ bne(c_rarg3, c_rarg2, loop);
+     __ bind(exit);
+   }
+
+   __ bnez(c_rarg1, allocated); // check if a slot has been found and
+                             // if found, continue with that on
+
+   // allocate one if there's no free slot
+   {
+     Label entry, loop;
+     // 1. compute new pointers            // esp: old expression stack top
+     __ ld(c_rarg1, monitor_block_bot);    // c_rarg1: old expression stack bottom
+     __ sub(esp, esp, entry_size);         // move expression stack top
+     __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom
+     __ mv(c_rarg3, esp);                  // set start value for copy loop
+     __ sd(c_rarg1, monitor_block_bot);    // set new monitor block bottom
+     __ sub(sp, sp, entry_size);           // make room for the monitor
+
+     __ j(entry);
+     // 2. move expression stack contents
+     __ bind(loop);
+     __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack
+                                                   // word from old location
+     __ sd(c_rarg2, Address(c_rarg3, 0));          // and store it at new location
+     __ add(c_rarg3, c_rarg3, wordSize);           // advance to next word
+     __ bind(entry);
+     __ bne(c_rarg3, c_rarg1, loop);    // check if bottom reached.if not at bottom
+                                        // then copy next word
+   }
+
+   // call run-time routine
+   // c_rarg1: points to monitor entry
+   __ bind(allocated);
+
+   // Increment bcp to point to the next bytecode, so exception
+   // handling for async. exceptions work correctly.
+   // The object has already been poped from the stack, so the
+   // expression stack looks correct.
+   __ addi(xbcp, xbcp, 1);
+
+   // store object
+   __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+   __ lock_object(c_rarg1);
+
+   // check to make sure this monitor doesn't cause stack overflow after locking
+   __ save_bcp();  // in case of exception
+   __ generate_stack_overflow_check(0);
+
+   // The bcp has already been incremented. Just need to dispatch to
+   // next instruction.
+   __ dispatch_next(vtos);
+}
+
+void TemplateTable::monitorexit()
+{
+  transition(atos, vtos);
+
+  // check for NULL object
+  __ null_check(x10);
+
+  const Address monitor_block_top(
+        fp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  const Address monitor_block_bot(
+        fp, frame::interpreter_frame_initial_sp_offset * wordSize);
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+  Label found;
+
+  // find matching slot
+  {
+    Label entry, loop;
+    __ ld(c_rarg1, monitor_block_top); // points to current entry,
+                                        // starting with top-most entry
+    __ la(c_rarg2, monitor_block_bot); // points to word before bottom
+                                        // of monitor block
+    __ j(entry);
+
+    __ bind(loop);
+    // check if current entry is for same object
+    __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+    // if same object then stop searching
+    __ beq(x10, t0, found);
+    // otherwise advance to next entry
+    __ add(c_rarg1, c_rarg1, entry_size);
+    __ bind(entry);
+    // check if bottom reached
+    // if not at bottom then check this entry
+    __ bne(c_rarg1, c_rarg2, loop);
+  }
+
+  // error handling. Unlocking was not block-structured
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+  __ should_not_reach_here();
+
+  // call run-time routine
+  __ bind(found);
+  __ push_ptr(x10); // make sure object is on stack (contract with oopMaps)
+  __ unlock_object(c_rarg1);
+  __ pop_ptr(x10); // discard object
+}
+
+// Wide instructions
+void TemplateTable::wide()
+{
+  __ load_unsigned_byte(x9, at_bcp(1));
+  __ mv(t0, (address)Interpreter::_wentry_point);
+  __ shadd(t0, x9, t0, t1, 3);
+  __ ld(t0, Address(t0));
+  __ jr(t0);
+}
+
+// Multi arrays
+void TemplateTable::multianewarray() {
+  transition(vtos, atos);
+  __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions
+  // last dim is on top of stack; we want address of first one:
+  // first_addr = last_addr + (ndims - 1) * wordSize
+  __ shadd(c_rarg1, x10, esp, c_rarg1, 3);
+  __ sub(c_rarg1, c_rarg1, wordSize);
+  call_VM(x10,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray),
+          c_rarg1);
+  __ load_unsigned_byte(x11, at_bcp(3));
+  __ shadd(esp, x11, esp, t0, 3);
+}
diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
new file mode 100644
index 0000000000..fcc86108d2
--- /dev/null
+++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP
+
+static void prepare_invoke(int byte_no,
+                           Register method,         // linked method (or i-klass)
+                           Register index = noreg,  // itable index, MethodType, etc.
+                           Register recv  = noreg,  // if caller wants to see it
+                           Register flags = noreg   // if caller wants to test it
+                           );
+static void invokevirtual_helper(Register index, Register recv,
+                                 Register flags);
+
+// Helpers
+static void index_check(Register array, Register index);
+
+#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
new file mode 100644
index 0000000000..6c89133de0
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP
+#define CPU_RISCV_VMSTRUCTS_RISCV_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
new file mode 100644
index 0000000000..6bdce51506
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/os.inline.hpp"
+#include "vm_version_ext_riscv.hpp"
+
+// VM_Version_Ext statics
+int VM_Version_Ext::_no_of_threads = 0;
+int VM_Version_Ext::_no_of_cores = 0;
+int VM_Version_Ext::_no_of_sockets = 0;
+bool VM_Version_Ext::_initialized = false;
+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
+
+void VM_Version_Ext::initialize_cpu_information(void) {
+  // do nothing if cpu info has been initialized
+  if (_initialized) {
+    return;
+  }
+
+  _no_of_cores  = os::processor_count();
+  _no_of_threads = _no_of_cores;
+  _no_of_sockets = _no_of_cores;
+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64");
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string);
+  _initialized = true;
+}
+
+int VM_Version_Ext::number_of_threads(void) {
+  initialize_cpu_information();
+  return _no_of_threads;
+}
+
+int VM_Version_Ext::number_of_cores(void) {
+  initialize_cpu_information();
+  return _no_of_cores;
+}
+
+int VM_Version_Ext::number_of_sockets(void) {
+  initialize_cpu_information();
+  return _no_of_sockets;
+}
+
+const char* VM_Version_Ext::cpu_name(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
+  return tmp;
+}
+
+const char* VM_Version_Ext::cpu_description(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
+  return tmp;
+}
diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
new file mode 100644
index 0000000000..711e4aeaf6
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
+#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
+
+#include "runtime/vm_version.hpp"
+#include "utilities/macros.hpp"
+
+class VM_Version_Ext : public VM_Version {
+ private:
+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
+
+  static int               _no_of_threads;
+  static int               _no_of_cores;
+  static int               _no_of_sockets;
+  static bool              _initialized;
+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
+
+ public:
+  static int number_of_threads(void);
+  static int number_of_cores(void);
+  static int number_of_sockets(void);
+
+  static const char* cpu_name(void);
+  static const char* cpu_description(void);
+  static void initialize_cpu_information(void);
+
+};
+
+#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
new file mode 100644
index 0000000000..0e8f526bd9
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/java.hpp"
+#include "runtime/os.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/formatBuffer.hpp"
+#include "utilities/macros.hpp"
+
+#include OS_HEADER_INLINE(os)
+
+const char* VM_Version::_uarch = "";
+const char* VM_Version::_vm_mode = "";
+uint32_t VM_Version::_initial_vector_length = 0;
+
+void VM_Version::initialize() {
+  get_os_cpu_info();
+
+  // check if satp.mode is supported, currently supports up to SV48(RV64)
+  if (get_satp_mode() > VM_SV48) {
+    vm_exit_during_initialization(
+      err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.",
+              _vm_mode));
+  }
+
+  if (FLAG_IS_DEFAULT(UseFMA)) {
+    FLAG_SET_DEFAULT(UseFMA, true);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
+  }
+
+  if (UseAES || UseAESIntrinsics) {
+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+      warning("AES instructions are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
+  }
+
+  if (UseAESCTRIntrinsics) {
+    warning("AES/CTR intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+  }
+
+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
+  if (UseCRC32Intrinsics) {
+    warning("CRC32 intrinsics are not available on this CPU.");
+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+  }
+
+  if (UseCRC32CIntrinsics) {
+    warning("CRC32C intrinsics are not available on this CPU.");
+    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+  }
+
+  if (UseRVV) {
+    if (!(_features & CPU_V)) {
+      warning("RVV is not supported on this CPU");
+      FLAG_SET_DEFAULT(UseRVV, false);
+    } else {
+      // read vector length from vector CSR vlenb
+      _initial_vector_length = get_current_vector_length();
+    }
+  }
+
+  if (UseRVC && !(_features & CPU_C)) {
+    warning("RVC is not supported on this CPU");
+    FLAG_SET_DEFAULT(UseRVC, false);
+  }
+
+  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
+  }
+
+  if (UseZbb) {
+    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+      FLAG_SET_DEFAULT(UsePopCountInstruction, true);
+    }
+  } else {
+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
+  }
+
+  char buf[512];
+  buf[0] = '\0';
+  if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
+  strcat(buf, "rv64");
+#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name);
+  CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED)
+#undef ADD_FEATURE_IF_SUPPORTED
+
+  _features_string = os::strdup(buf);
+
+#ifdef COMPILER2
+  c2_initialize();
+#endif // COMPILER2
+
+  UNSUPPORTED_OPTION(CriticalJNINatives);
+
+  FLAG_SET_DEFAULT(UseMembar, true);
+}
+
+#ifdef COMPILER2
+void VM_Version::c2_initialize() {
+  if (UseCMoveUnconditionally) {
+    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
+  }
+
+  if (ConditionalMoveLimit > 0) {
+    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
+  }
+
+  if (!UseRVV) {
+    FLAG_SET_DEFAULT(SpecialEncodeISOArray, false);
+  }
+
+  if (!UseRVV && MaxVectorSize) {
+    FLAG_SET_DEFAULT(MaxVectorSize, 0);
+  }
+
+  if (UseRVV) {
+    if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+      MaxVectorSize = _initial_vector_length;
+    } else if (MaxVectorSize < 16) {
+      warning("RVV does not support vector length less than 16 bytes. Disabling RVV.");
+      UseRVV = false;
+    } else if (is_power_of_2(MaxVectorSize)) {
+      if (MaxVectorSize > _initial_vector_length) {
+        warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d",
+                _initial_vector_length, _initial_vector_length);
+      }
+      MaxVectorSize = _initial_vector_length;
+    } else {
+      vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
+    }
+  }
+
+  // disable prefetch
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
+  }
+}
+#endif // COMPILER2
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
new file mode 100644
index 0000000000..875511f522
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP
+#define CPU_RISCV_VM_VERSION_RISCV_HPP
+
+#include "runtime/abstract_vm_version.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/globals_extension.hpp"
+#include "utilities/sizes.hpp"
+
+class VM_Version : public Abstract_VM_Version {
+#ifdef COMPILER2
+private:
+  static void c2_initialize();
+#endif // COMPILER2
+
+// VM modes (satp.mode) privileged ISA 1.10
+enum VM_MODE {
+  VM_MBARE = 0,
+  VM_SV39  = 8,
+  VM_SV48  = 9,
+  VM_SV57  = 10,
+  VM_SV64  = 11
+};
+
+protected:
+  static const char* _uarch;
+  static const char* _vm_mode;
+  static uint32_t _initial_vector_length;
+  static void get_os_cpu_info();
+  static uint32_t get_current_vector_length();
+  static VM_MODE get_satp_mode();
+
+public:
+  // Initialization
+  static void initialize();
+
+  enum Feature_Flag {
+#define CPU_FEATURE_FLAGS(decl)               \
+    decl(I,            "i",            8)     \
+    decl(M,            "m",           12)     \
+    decl(A,            "a",            0)     \
+    decl(F,            "f",            5)     \
+    decl(D,            "d",            3)     \
+    decl(C,            "c",            2)     \
+    decl(V,            "v",           21)
+
+#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit),
+    CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
+#undef DECLARE_CPU_FEATURE_FLAG
+  };
+
+  static void initialize_cpu_information(void);
+};
+
+#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
new file mode 100644
index 0000000000..c4338715f9
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+void VMRegImpl::set_regName() {
+  int i = 0;
+  Register reg = ::as_Register(0);
+  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
+    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
+      regName[i++] = reg->name();
+    }
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
+      regName[i++] = freg->name();
+    }
+    freg = freg->successor();
+  }
+
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
+    regName[i] = "NON-GPR-FPR";
+  }
+}
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
new file mode 100644
index 0000000000..6f613a8f11
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VMREG_RISCV_HPP
+#define CPU_RISCV_VMREG_RISCV_HPP
+
+inline bool is_Register() {
+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool is_FloatRegister() {
+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register as_Register() {
+  assert(is_Register(), "must be");
+  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
+}
+
+inline FloatRegister as_FloatRegister() {
+  assert(is_FloatRegister() && is_even(value()), "must be");
+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
+                            FloatRegisterImpl::max_slots_per_register);
+}
+
+inline bool is_concrete() {
+  assert(is_reg(), "must be");
+  return is_even(value());
+}
+
+#endif // CPU_RISCV_VMREG_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
new file mode 100644
index 0000000000..06b70020b4
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() const {
+  if (this == noreg) {
+    return VMRegImpl::Bad();
+  }
+  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() const {
+  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
+                             ConcreteRegisterImpl::max_gpr);
+}
+
+inline VMReg VectorRegisterImpl::as_VMReg() const {
+  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
+                             ConcreteRegisterImpl::max_fpr);
+}
+
+#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
new file mode 100644
index 0000000000..448bb09ba7
--- /dev/null
+++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "assembler_riscv.inline.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_riscv.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/instanceKlass.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_riscv.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
+#endif
+
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(true);
+  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
+  // Can be NULL if there is no free space in the code cache.
+  if (s == NULL) {
+    return NULL;
+  }
+
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc = NULL;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  assert_cond(masm != NULL);
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+    __ increment(Address(t2));
+  }
+#endif
+
+  // get receiver (need to skip return address on top of stack)
+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
+  // get receiver klass
+  address npe_addr = __ pc();
+  __ load_klass(t2, j_rarg0);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    start_pc = __ pc();
+
+    // check offset vs vtable length
+    __ lwu(t0, Address(t2, Klass::vtable_length_offset()));
+    __ mvw(t1, vtable_index * vtableEntry::size());
+    __ bgt(t0, t1, L);
+    __ enter();
+    __ mv(x12, vtable_index);
+
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12);
+    const ptrdiff_t estimate = 256;
+    const ptrdiff_t codesize = __ pc() - start_pc;
+    slop_delta = estimate - codesize;  // call_VM varies in length, depending on data
+    slop_bytes += slop_delta;
+    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
+
+    __ leave();
+    __ bind(L);
+  }
+#endif // PRODUCT
+
+  start_pc = __ pc();
+  __ lookup_virtual_method(t2, vtable_index, xmethod);
+  // lookup_virtual_method generates
+  // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld
+  // 1 instruction (best case):ld * 1
+  slop_delta = 16 - (int)(__ pc() - start_pc);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
+
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    __ beqz(xmethod, L);
+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+    __ bnez(t0, L);
+    __ stop("Vtable entry is NULL");
+    __ bind(L);
+  }
+#endif // PRODUCT
+
+  // x10: receiver klass
+  // xmethod: Method*
+  // x12: receiver
+  address ame_addr = __ pc();
+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+  __ jr(t0);
+
+  masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
+
+  return s;
+}
+
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
+  const int stub_code_length = code_size_limit(false);
+  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
+  // Can be NULL if there is no free space in the code cache.
+  if (s == NULL) {
+    return NULL;
+  }
+  // Count unused bytes in instruction sequences of variable size.
+  // We add them to the computed buffer size in order to avoid
+  // overflow in subsequently generated stubs.
+  address   start_pc = NULL;
+  int       slop_bytes = 0;
+  int       slop_delta = 0;
+
+  ResourceMark    rm;
+  CodeBuffer      cb(s->entry_point(), stub_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  assert_cond(masm != NULL);
+
+#if (!defined(PRODUCT) && defined(COMPILER2))
+  if (CountCompiledCalls) {
+    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
+    __ increment(Address(x18));
+  }
+#endif
+
+  // get receiver (need to skip return address on top of stack)
+  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
+
+  // Entry arguments:
+  //  t1: CompiledICHolder
+  //  j_rarg0: Receiver
+
+  // This stub is called from compiled code which has no callee-saved registers,
+  // so all registers except arguments are free at this point.
+  const Register recv_klass_reg     = x18;
+  const Register holder_klass_reg   = x19; // declaring interface klass (DECC)
+  const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC)
+  const Register temp_reg           = x28;
+  const Register temp_reg2          = x29;
+  const Register icholder_reg       = t1;
+
+  Label L_no_such_interface;
+
+  __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
+  __ ld(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
+
+  start_pc = __ pc();
+
+  // get receiver klass (also an implicit null-check)
+  address npe_addr = __ pc();
+  __ load_klass(recv_klass_reg, j_rarg0);
+
+  // Receiver subtype check against REFC.
+  __ lookup_interface_method(// inputs: rec. class, interface
+                             recv_klass_reg, resolved_klass_reg, noreg,
+                             // outputs:  scan temp. reg1, scan temp. reg2
+                             temp_reg2, temp_reg,
+                             L_no_such_interface,
+                             /*return_method=*/false);
+
+  const ptrdiff_t typecheckSize = __ pc() - start_pc;
+  start_pc = __ pc();
+
+  // Get selected method from declaring class and itable index
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             recv_klass_reg, holder_klass_reg, itable_index,
+                             // outputs: method, scan temp. reg
+                             xmethod, temp_reg,
+                             L_no_such_interface);
+
+  const ptrdiff_t lookupSize = __ pc() - start_pc;
+
+  // Reduce "estimate" such that "padding" does not drop below 8.
+  const ptrdiff_t estimate = 256;
+  const ptrdiff_t codesize = typecheckSize + lookupSize;
+  slop_delta = (int)(estimate - codesize);
+  slop_bytes += slop_delta;
+  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
+
+#ifdef ASSERT
+  if (DebugVtables) {
+    Label L2;
+    __ beqz(xmethod, L2);
+    __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+    __ bnez(t0, L2);
+    __ stop("compiler entrypoint is null");
+    __ bind(L2);
+  }
+#endif // ASSERT
+
+  // xmethod: Method*
+  // j_rarg0: receiver
+  address ame_addr = __ pc();
+  __ ld(t0, Address(xmethod, Method::from_compiled_offset()));
+  __ jr(t0);
+
+  __ bind(L_no_such_interface);
+  // Handle IncompatibleClassChangeError in itable stubs.
+  // More detailed error message.
+  // We force resolving of the call site by jumping to the "handle
+  // wrong method" stub, and so let the interpreter runtime do all the
+  // dirty work.
+  assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order");
+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+
+  masm->flush();
+  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
+
+  return s;
+}
+
+int VtableStub::pd_code_alignment() {
+  // RISCV cache line size is not an architected constant. We just align on word size.
+  const unsigned int icache_line_size = wordSize;
+  return icache_line_size;
+}
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 2842a11f92..208a374eea 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
   strncpy(cpuinfo, "IA64", length);
 #elif defined(PPC)
   strncpy(cpuinfo, "PPC64", length);
+#elif defined(RISCV)
+  strncpy(cpuinfo, "RISCV64", length);
 #elif defined(S390)
   strncpy(cpuinfo, "S390", length);
 #elif defined(SPARC)
@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() {
     IA64_ONLY(256 * M)
     PPC_ONLY(4 * M)
     S390_ONLY(1 * M)
-    SPARC_ONLY(4 * M);
+    SPARC_ONLY(4 * M)
+    RISCV64_ONLY(2 * M);
 #endif // ZERO
 
   FILE *fp = fopen("/proc/meminfo", "r");
diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
new file mode 100644
index 0000000000..f2610af6cd
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// nothing required here
diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
new file mode 100644
index 0000000000..4a1ebee8b0
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
+
+#include "runtime/vm_version.hpp"
+
+// Implementation of class atomic
+
+// Note that memory_order_conservative requires a full barrier after atomic stores.
+// See https://patchwork.kernel.org/patch/3575821/
+
+#if defined(__clang_major__)
+#define FULL_COMPILER_ATOMIC_SUPPORT
+#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2))
+#define FULL_COMPILER_ATOMIC_SUPPORT
+#endif
+
+#define FULL_MEM_BARRIER  __sync_synchronize()
+#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
+
+template<size_t byte_size>
+struct Atomic::PlatformAdd
+  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
+{
+  template<typename I, typename D>
+  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+    // If we add add and fetch for sub word and are using older compiler
+    // it must be added here due to not using lib atomic.
+    STATIC_ASSERT(byte_size >= 4);
+#endif
+
+    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+    FULL_MEM_BARRIER;
+    return res;
+  }
+
+  template<typename I, typename D>
+  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
+    return add_and_fetch(add_value, dest, order) - add_value;
+  }
+};
+
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest __attribute__((unused)),
+                                                T compare_value,
+                                                atomic_memory_order order) const {
+  STATIC_ASSERT(1 == sizeof(T));
+
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+
+  uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3)));
+  int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24
+
+  uint64_t mask = 0xfful << shift; // 0x00000000..FF..
+  uint64_t remask = ~mask;         // 0xFFFFFFFF..00..
+
+  uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift;  // widen to 64-bit 0x00000000..CC..
+  uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE..
+
+  uint64_t old_value;
+  uint64_t rc_temp;
+
+  __asm__ __volatile__ (
+    "1:  lr.w      %0, %2      \n\t"
+    "    and       %1, %0, %5  \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX..
+    "    bne       %1, %3, 2f  \n\t" // compare 64-bit w_cv
+    "    and       %1, %0, %6  \n\t" // remove old byte
+    "    or        %1, %1, %4  \n\t" // add new byte
+    "    sc.w      %1, %1, %2  \n\t" // store new word
+    "    bnez      %1, 1b      \n\t"
+    "2:                        \n\t"
+    : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst)
+    : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask)
+    : "memory" );
+
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+
+  return (T)((old_value & mask) >> shift);
+}
+#endif
+
+template<size_t byte_size>
+template<typename T>
+inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
+                                                     T volatile* dest,
+                                                     atomic_memory_order order) const {
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+  // If we add xchg for sub word and are using older compiler
+  // it must be added here due to not using lib atomic.
+  STATIC_ASSERT(byte_size >= 4);
+#endif
+
+  STATIC_ASSERT(byte_size == sizeof(T));
+  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
+  FULL_MEM_BARRIER;
+  return res;
+}
+
+// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
+template<size_t byte_size>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
+                                                        T volatile* dest __attribute__((unused)),
+                                                        T compare_value,
+                                                        atomic_memory_order order) const {
+
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+  STATIC_ASSERT(byte_size >= 4);
+#endif
+
+  STATIC_ASSERT(byte_size == sizeof(T));
+  T value = compare_value;
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+
+  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false,
+                            __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+  return value;
+}
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest __attribute__((unused)),
+                                                T compare_value,
+                                                atomic_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+  T rv;
+  int tmp;
+  __asm volatile(
+    "1:\n\t"
+    " addiw     %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend
+    " lr.w.aq   %[rv], (%[dest])\n\t"
+    " bne       %[rv], %[tmp], 2f\n\t"
+    " sc.w.rl   %[tmp], %[ev], (%[dest])\n\t"
+    " bnez      %[tmp], 1b\n\t"
+    "2:\n\t"
+    : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
+    : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
+    : "memory");
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+  return rv;
+}
+
+#undef FULL_COMPILER_ATOMIC_SUPPORT
+#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
new file mode 100644
index 0000000000..28868c7640
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2   Bytes::swap_u2(u2 x) {
+  return bswap_16(x);
+}
+
+inline u4   Bytes::swap_u4(u4 x) {
+  return bswap_32(x);
+}
+
+inline u8 Bytes::swap_u8(u8 x) {
+  return bswap_64(x);
+}
+
+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
new file mode 100644
index 0000000000..bdf36d6b4c
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
+#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
+
+static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+    case 8:  to[7] = from[7];   // fall through
+    case 7:  to[6] = from[6];   // fall through
+    case 6:  to[5] = from[5];   // fall through
+    case 5:  to[4] = from[4];   // fall through
+    case 4:  to[3] = from[3];   // fall through
+    case 3:  to[2] = from[2];   // fall through
+    case 2:  to[1] = from[1];   // fall through
+    case 1:  to[0] = from[0];   // fall through
+    case 0:  break;
+    default:
+      memcpy(to, from, count * HeapWordSize);
+      break;
+  }
+}
+
+static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+    case 8:  to[7] = from[7];
+    case 7:  to[6] = from[6];
+    case 6:  to[5] = from[5];
+    case 5:  to[4] = from[4];
+    case 4:  to[3] = from[3];
+    case 3:  to[2] = from[2];
+    case 2:  to[1] = from[1];
+    case 1:  to[0] = from[0];
+    case 0:  break;
+    default:
+      while (count-- > 0) {
+        *to++ = *from++;
+      }
+      break;
+  }
+}
+
+static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
+  _Copy_conjoint_jshorts_atomic(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
+  _Copy_conjoint_jints_atomic(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
+  _Copy_conjoint_jlongs_atomic(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size.");
+  _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_bytes(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_jshorts(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_jints(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
+  _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
+  assert(!UseCompressedOops, "foo!");
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+  _Copy_arrayof_conjoint_jlongs(from, to, count);
+}
+
+#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
new file mode 100644
index 0000000000..297414bfcd
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool,  DontYieldALot,            false);
+define_pd_global(intx,  ThreadStackSize,          2048); // 0 => use system default
+define_pd_global(intx,  VMThreadStackSize,        2048);
+
+define_pd_global(intx,  CompilerThreadStackSize,  2048);
+
+define_pd_global(uintx, JVMInvokeMethodSlack,     8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address
+define_pd_global(uintx, HeapBaseMinAddress,       2 * G);
+
+#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
new file mode 100644
index 0000000000..5b5d35553f
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
+
+// Included in orderAccess.hpp header file.
+
+#include "runtime/vm_version.hpp"
+
+// Implementation of class OrderAccess.
+
+inline void OrderAccess::loadload()   { acquire(); }
+inline void OrderAccess::storestore() { release(); }
+inline void OrderAccess::loadstore()  { acquire(); }
+inline void OrderAccess::storeload()  { fence(); }
+
+inline void OrderAccess::acquire() {
+  READ_MEM_BARRIER;
+}
+
+inline void OrderAccess::release() {
+  WRITE_MEM_BARRIER;
+}
+
+inline void OrderAccess::fence() {
+  FULL_MEM_BARRIER;
+}
+
+
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
+{
+  template <typename T>
+  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
+};
+
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
+};
+
+template<size_t byte_size>
+struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
+{
+  template <typename T>
+  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
+};
+
+#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
new file mode 100644
index 0000000000..8b772892b4
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "asm/macroAssembler.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/codeCache.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nativeInst.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm.h"
+#include "memory/allocation.inline.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/safepointMechanism.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+
+// put OS-includes here
+# include <dlfcn.h>
+# include <fpu_control.h>
+# include <errno.h>
+# include <pthread.h>
+# include <signal.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <sys/mman.h>
+# include <sys/resource.h>
+# include <sys/socket.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/types.h>
+# include <sys/utsname.h>
+# include <sys/wait.h>
+# include <poll.h>
+# include <pwd.h>
+# include <ucontext.h>
+# include <unistd.h>
+
+#define REG_LR       1
+#define REG_FP       8
+
+NOINLINE address os::current_stack_pointer() {
+  return (address)__builtin_frame_address(0);
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  return (char*) -1;
+}
+
+address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
+  return (address)uc->uc_mcontext.__gregs[REG_PC];
+}
+
+void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
+  uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  assert(thread != NULL, "just checking");
+  assert(ret_sp != NULL, "just checking");
+  assert(ret_fp != NULL, "just checking");
+
+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC epc;
+  const ucontext_t* uc = (const ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp != NULL) {
+      *ret_sp = os::Linux::ucontext_get_sp(uc);
+    }
+    if (ret_fp != NULL) {
+      *ret_fp = os::Linux::ucontext_get_fp(uc);
+    }
+  } else {
+    // construct empty ExtendedPC for return value checking
+    epc = ExtendedPC(NULL);
+    if (ret_sp != NULL) {
+      *ret_sp = (intptr_t *)NULL;
+    }
+    if (ret_fp != NULL) {
+      *ret_fp = (intptr_t *)NULL;
+    }
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(const void* ucVoid) {
+  intptr_t* frame_sp = NULL;
+  intptr_t* frame_fp = NULL;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp);
+  return frame(frame_sp, frame_fp, epc.pc());
+}
+
+bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
+  address pc = (address) os::Linux::ucontext_get_pc(uc);
+  if (Interpreter::contains(pc)) {
+    // interpreter performs stack banging after the fixed frame header has
+    // been generated while the compilers perform it before. To maintain
+    // semantic consistency between interpreted and compiled frames, the
+    // method returns the Java sender of the current frame.
+    *fr = os::fetch_frame_from_context(uc);
+    if (!fr->is_first_java_frame()) {
+      assert(fr->safe_for_sender(thread), "Safety check");
+      *fr = fr->java_sender();
+    }
+  } else {
+    // more complex code with compiled code
+    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
+    CodeBlob* cb = CodeCache::find_blob(pc);
+    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
+      // Not sure where the pc points to, fallback to default
+      // stack overflow handling
+      return false;
+    } else {
+      // In compiled code, the stack banging is performed before RA
+      // has been saved in the frame.  RA is live, and SP and FP
+      // belong to the caller.
+      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
+      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
+                         - NativeInstruction::instruction_size);
+      *fr = frame(sp, fp, pc);
+      if (!fr->is_java_frame()) {
+        assert(fr->safe_for_sender(thread), "Safety check");
+        assert(!fr->is_first_frame(), "Safety check");
+        *fr = fr->java_sender();
+      }
+    }
+  }
+  assert(fr->is_java_frame(), "Safety check");
+  return true;
+}
+
+// By default, gcc always saves frame pointer rfp on this stack. This
+// may get turned off by -fomit-frame-pointer.
+frame os::get_sender_for_C_frame(frame* fr) {
+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+NOINLINE frame os::current_frame() {
+  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
+  if (sender_sp != NULL) {
+    frame myframe((intptr_t*)os::current_stack_pointer(),
+                  sender_sp[frame::link_offset],
+                  CAST_FROM_FN_PTR(address, os::current_frame));
+    if (os::is_first_C_frame(&myframe)) {
+      // stack is not walkable
+      return frame();
+    } else {
+      return os::get_sender_for_C_frame(&myframe);
+    }
+  } else {
+    ShouldNotReachHere();
+    return frame();
+  }
+}
+
+// Utility functions
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = Thread::current_or_null_safe();
+
+  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
+  // (no destructors can be run)
+  os::ThreadCrashProtection::check_crash_protection(sig, t);
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  if (sig == SIGPIPE || sig == SIGXFSZ) {
+    // allow chained handler to go first
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
+      return true;
+    }
+  }
+
+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
+  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
+    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
+      return 1;
+    }
+  }
+#endif
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL ){
+      if(t->is_Java_thread()) {
+        thread = (JavaThread *) t;
+      }
+      else if(t->is_VM_thread()){
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+
+  // Handle SafeFetch faults
+  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
+    address const pc = (address) os::Linux::ucontext_get_pc(uc);
+    if (pc && StubRoutines::is_safefetch_fault(pc)) {
+      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
+      return 1;
+    }
+  }
+
+  // decide if this trap can be handled by a stub
+  address stub = NULL;
+
+  address pc          = NULL;
+
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = (address) os::Linux::ucontext_get_pc(uc);
+
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address) info->si_addr;
+
+      // check if fault address is within thread stack
+      if (thread->on_local_stack(addr)) {
+        // stack overflow
+        if (thread->in_stack_yellow_reserved_zone(addr)) {
+          if (thread->thread_state() == _thread_in_Java) {
+            if (thread->in_stack_reserved_zone(addr)) {
+              frame fr;
+              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
+                assert(fr.is_java_frame(), "Must be a Java frame");
+                frame activation =
+                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
+                if (activation.sp() != NULL) {
+                  thread->disable_stack_reserved_zone();
+                  if (activation.is_interpreted_frame()) {
+                    thread->set_reserved_stack_activation((address)(
+                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
+                  } else {
+                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
+                  }
+                  return 1;
+                }
+              }
+            }
+            // Throw a stack overflow exception.  Guard pages will be reenabled
+            // while unwinding the stack.
+            thread->disable_stack_yellow_reserved_zone();
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code.  Return and try to finish.
+            thread->disable_stack_yellow_reserved_zone();
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+          // This is a likely cause, but hard to verify. Let's just print
+          // it as a hint.
+          tty->print_raw_cr("Please check if any of your loaded .so files has "
+                            "enabled executable stack (see man page execstack(8))");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      }
+    }
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+
+      // Handle signal from NativeJump::patch_verified_entry().
+      if ((sig == SIGILL || sig == SIGTRAP)
+          && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+        if (TraceTraps) {
+          tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL");
+        }
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+        stub = SharedRuntime::get_poll_stub(pc);
+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault
+        // here if the underlying file has been truncated.
+        // Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
+        if (nm != NULL && nm->has_unsafe_access()) {
+          address next_pc = pc + NativeCall::instruction_size;
+          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+        }
+      } else if (sig == SIGFPE  &&
+                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
+        stub =
+          SharedRuntime::
+          continuation_for_implicit_exception(thread,
+                                              pc,
+                                              SharedRuntime::
+                                              IMPLICIT_DIVIDE_BY_ZERO);
+      } else if (sig == SIGSEGV &&
+               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+          // Determination of interpreter/vtable stub/compiled code null exception
+          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+      }
+    } else if (thread->thread_state() == _thread_in_vm &&
+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access()) {
+      address next_pc = pc + NativeCall::instruction_size;
+      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
+    }
+
+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+    // and the heap gets shrunk before the field access.
+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
+      }
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    if ((sig == SIGSEGV) &&
+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+      // Block current thread until the memory serialize page permission restored.
+      os::block_on_serialize_page_trap();
+      return true;
+    }
+  }
+
+  if (stub != NULL) {
+    // save all thread context in case we need to restore it
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+    os::Linux::ucontext_set_pc(uc, stub);
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+     return true;
+  }
+
+  if (!abort_if_unrecognized) {
+    // caller wants another chance, so give it to him
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+
+  VMError::report_and_die(t, sig, pc, info, ucVoid);
+
+  ShouldNotReachHere();
+  return true; // Mute compiler
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+}
+
+int os::Linux::get_fpu_control_word(void) {
+  return 0;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+// Minimum usable stack sizes required to get to user code. Space for
+// HotSpot guard pages is added later.
+size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K;
+size_t os::Posix::_java_thread_min_stack_allowed = 72 * K;
+size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K;
+
+// return default stack size for thr_type
+size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M);
+  return s;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+
+static const char* reg_abi_names[] = {
+  "pc",
+  "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)",
+  "x5(t0)", "x6(t1)", "x7(t2)",
+  "x8(s0)", "x9(s1)",
+  "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)",
+  "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)",
+  "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)"
+};
+
+void os::print_context(outputStream *st, const void *context) {
+  if (context == NULL) {
+    return;
+  }
+
+  const ucontext_t *uc = (const ucontext_t*)context;
+  st->print_cr("Registers:");
+  for (int r = 0; r < 32; r++) {
+    st->print("%-*.*s=", 8, 8, reg_abi_names[r]);
+    print_location(st, uc->uc_mcontext.__gregs[r]);
+  }
+  st->cr();
+
+  intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp));
+  print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  print_instructions(st, pc, sizeof(char));
+  st->cr();
+}
+
+void os::print_register_info(outputStream *st, const void *context) {
+  if (context == NULL) {
+    return;
+  }
+
+  const ucontext_t *uc = (const ucontext_t*)context;
+
+  st->print_cr("Register to memory mapping:");
+  st->cr();
+
+  // this is horrendously verbose but the layout of the registers in the
+  // context does not match how we defined our abstract Register set, so
+  // we can't just iterate through the gregs area
+
+  // this is only for the "general purpose" registers
+
+  for (int r = 0; r < 32; r++)
+    st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]);
+  st->cr();
+}
+
+void os::setup_fpu() {
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+}
+#endif
+
+int os::extra_bang_size_in_bytes() {
+  return 0;
+}
+
+extern "C" {
+  int SpinPause() {
+    return 0;
+  }
+
+  void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
+    if (from > to) {
+      const jshort *end = from + count;
+      while (from < end) {
+        *(to++) = *(from++);
+      }
+    } else if (from < to) {
+      const jshort *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end) {
+        *(to--) = *(from--);
+      }
+    }
+  }
+  void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
+    if (from > to) {
+      const jint *end = from + count;
+      while (from < end) {
+        *(to++) = *(from++);
+      }
+    } else if (from < to) {
+      const jint *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end) {
+        *(to--) = *(from--);
+      }
+    }
+  }
+  void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
+    if (from > to) {
+      const jlong *end = from + count;
+      while (from < end) {
+        os::atomic_copy64(from++, to++);
+      }
+    } else if (from < to) {
+      const jlong *end = from;
+      from += count - 1;
+      to   += count - 1;
+      while (from >= end) {
+        os::atomic_copy64(from--, to--);
+      }
+    }
+  }
+
+  void _Copy_arrayof_conjoint_bytes(const HeapWord* from,
+                                    HeapWord* to,
+                                    size_t    count) {
+    memmove(to, from, count);
+  }
+  void _Copy_arrayof_conjoint_jshorts(const HeapWord* from,
+                                      HeapWord* to,
+                                      size_t    count) {
+    memmove(to, from, count * 2);
+  }
+  void _Copy_arrayof_conjoint_jints(const HeapWord* from,
+                                    HeapWord* to,
+                                    size_t    count) {
+    memmove(to, from, count * 4);
+  }
+  void _Copy_arrayof_conjoint_jlongs(const HeapWord* from,
+                                     HeapWord* to,
+                                     size_t    count) {
+    memmove(to, from, count * 8);
+  }
+};
diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
new file mode 100644
index 0000000000..f3e3a73bc5
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
+
+  static void setup_fpu();
+
+  // Used to register dynamic code cache area with the OS
+  // Note: Currently only used in 64 bit Windows implementations
+  static bool register_code_area(char *low, char *high) { return true; }
+
+  // Atomically copy 64 bits of data
+  static void atomic_copy64(const volatile void *src, volatile void *dst) {
+    *(jlong *) dst = *(const jlong *) src;
+  }
+
+#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
new file mode 100644
index 0000000000..2bd48e09c3
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+
+inline void Prefetch::read (void *loc, intx interval) {
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+}
+
+#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
new file mode 100644
index 0000000000..ffcd819487
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "logging/log.hpp"
+#include "riscv_flush_icache.hpp"
+#include "runtime/os.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/debug.hpp"
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define check_with_errno(check_type, cond, msg)                             \
+  do {                                                                      \
+    int err = errno;                                                        \
+    check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err),   \
+               os::errno_name(err));                                        \
+} while (false)
+
+#define assert_with_errno(cond, msg)    check_with_errno(assert, cond, msg)
+#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg)
+
+#ifndef NR_riscv_flush_icache
+#ifndef NR_arch_specific_syscall
+#define NR_arch_specific_syscall 244
+#endif
+#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15)
+#endif
+
+#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
+#define SYS_RISCV_FLUSH_ICACHE_ALL   0UL
+
+static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) {
+  return syscall(NR_riscv_flush_icache, start, end, flags);
+}
+
+bool RiscvFlushIcache::test() {
+  ATTRIBUTE_ALIGNED(64) char memory[64];
+  long ret = sys_flush_icache((uintptr_t)&memory[0],
+                              (uintptr_t)&memory[sizeof(memory) - 1],
+                              SYS_RISCV_FLUSH_ICACHE_ALL);
+  if (ret == 0) {
+    return true;
+  }
+  int err = errno;                                                        \
+  log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)",
+                os::strerror(err), os::errno_name(err));
+  return false;
+}
+
+void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) {
+  long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL);
+  guarantee_with_errno(ret == 0, "riscv_flush_icache failed");
+}
diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
new file mode 100644
index 0000000000..f4e7263b39
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
+#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
+
+#include "memory/allocation.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/growableArray.hpp"
+
+class RiscvFlushIcache: public AllStatic {
+ public:
+  static bool test();
+  static void flush(uintptr_t start, uintptr_t end);
+};
+
+#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
new file mode 100644
index 0000000000..ccceed643e
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/metaspaceShared.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/thread.inline.hpp"
+
+frame JavaThread::pd_last_frame() {
+  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+  void* ucontext, bool isInJava) {
+
+  assert(Thread::current() == this, "caller must be current thread");
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+  // If we have a last_Java_frame, then we should use it even if
+  // isInJava == true.  It should be more reliable than ucontext info.
+  if (has_last_Java_frame() && frame_anchor()->walkable()) {
+    *fr_addr = pd_last_frame();
+    return true;
+  }
+
+  // At this point, we don't have a last_Java_frame, so
+  // we try to glean some information out of the ucontext
+  // if we were running Java code when SIGPROF came in.
+  if (isInJava) {
+    ucontext_t* uc = (ucontext_t*) ucontext;
+
+    intptr_t* ret_fp = NULL;
+    intptr_t* ret_sp = NULL;
+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+      &ret_sp, &ret_fp);
+    if (addr.pc() == NULL || ret_sp == NULL ) {
+      // ucontext wasn't useful
+      return false;
+    }
+
+    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
+      // In the middle of a trampoline call. Bail out for safety.
+      // This happens rarely so shouldn't affect profiling.
+      return false;
+    }
+
+    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    if (!ret_frame.safe_for_sender(this)) {
+#ifdef COMPILER2
+      frame ret_frame2(ret_sp, NULL, addr.pc());
+      if (!ret_frame2.safe_for_sender(this)) {
+        // nothing else to try if the frame isn't good
+        return false;
+      }
+      ret_frame = ret_frame2;
+#else
+      // nothing else to try if the frame isn't good
+      return false;
+#endif /* COMPILER2 */
+    }
+    *fr_addr = ret_frame;
+    return true;
+  }
+
+  // nothing else to try
+  return false;
+}
+
+void JavaThread::cache_global_variables() { }
diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
new file mode 100644
index 0000000000..4b91fa855a
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
+
+ private:
+  void pd_initialize() {
+    _anchor.clear();
+  }
+
+  frame pd_last_frame();
+
+ public:
+  // Mutators are highly dangerous....
+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+
+  void set_base_of_stack_pointer(intptr_t* base_sp) {
+  }
+
+  static ByteSize last_Java_fp_offset()          {
+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+  }
+
+  intptr_t* base_of_stack_pointer() {
+    return NULL;
+  }
+  void record_base_of_stack_pointer() {
+  }
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+    bool isInJava);
+
+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+public:
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
+
+#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
new file mode 100644
index 0000000000..6cf7683a58
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(OSThread::thread_id_t)                             \
+  declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
new file mode 100644
index 0000000000..8bcc949fed
--- /dev/null
+++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/register.hpp"
+#include "runtime/os.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/vm_version.hpp"
+
+#include <asm/hwcap.h>
+#include <sys/auxv.h>
+
+#ifndef HWCAP_ISA_I
+#define HWCAP_ISA_I  (1 << ('I' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_M
+#define HWCAP_ISA_M  (1 << ('M' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_A
+#define HWCAP_ISA_A  (1 << ('A' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_F
+#define HWCAP_ISA_F  (1 << ('F' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_D
+#define HWCAP_ISA_D  (1 << ('D' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_C
+#define HWCAP_ISA_C  (1 << ('C' - 'A'))
+#endif
+
+#ifndef HWCAP_ISA_V
+#define HWCAP_ISA_V  (1 << ('V' - 'A'))
+#endif
+
+#define read_csr(csr)                                           \
+({                                                              \
+        register unsigned long __v;                             \
+        __asm__ __volatile__ ("csrr %0, %1"                     \
+                              : "=r" (__v)                      \
+                              : "i" (csr)                       \
+                              : "memory");                      \
+        __v;                                                    \
+})
+
+uint32_t VM_Version::get_current_vector_length() {
+  assert(_features & CPU_V, "should not call this");
+  return (uint32_t)read_csr(CSR_VLENB);
+}
+
+VM_Version::VM_MODE VM_Version::get_satp_mode() {
+  if (!strcmp(_vm_mode, "sv39")) {
+    return VM_SV39;
+  } else if (!strcmp(_vm_mode, "sv48")) {
+    return VM_SV48;
+  } else if (!strcmp(_vm_mode, "sv57")) {
+    return VM_SV57;
+  } else if (!strcmp(_vm_mode, "sv64")) {
+    return VM_SV64;
+  } else {
+    return VM_MBARE;
+  }
+}
+
+void VM_Version::get_os_cpu_info() {
+
+  uint64_t auxv = getauxval(AT_HWCAP);
+
+  STATIC_ASSERT(CPU_I == HWCAP_ISA_I);
+  STATIC_ASSERT(CPU_M == HWCAP_ISA_M);
+  STATIC_ASSERT(CPU_A == HWCAP_ISA_A);
+  STATIC_ASSERT(CPU_F == HWCAP_ISA_F);
+  STATIC_ASSERT(CPU_D == HWCAP_ISA_D);
+  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
+  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
+
+  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
+  // Availability for those extensions could not be queried from HWCAP.
+  // TODO: Add proper detection for those extensions.
+  _features = auxv & (
+      HWCAP_ISA_I |
+      HWCAP_ISA_M |
+      HWCAP_ISA_A |
+      HWCAP_ISA_F |
+      HWCAP_ISA_D |
+      HWCAP_ISA_C |
+      HWCAP_ISA_V);
+
+  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
+    char buf[512], *p;
+    while (fgets(buf, sizeof (buf), f) != NULL) {
+      if ((p = strchr(buf, ':')) != NULL) {
+        if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) {
+          if (_vm_mode[0] != '\0') {
+            continue;
+          }
+          char* vm_mode = os::strdup(p + 2);
+          vm_mode[strcspn(vm_mode, "\n")] = '\0';
+          _vm_mode = vm_mode;
+        } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
+          char* uarch = os::strdup(p + 2);
+          uarch[strcspn(uarch, "\n")] = '\0';
+          _uarch = uarch;
+          break;
+        }
+      }
+    }
+    fclose(f);
+  }
+}
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index e30d39f73d..c640c546b1 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -200,6 +200,9 @@ void LIR_Op2::verify() const {
 #ifdef ASSERT
   switch (code()) {
     case lir_cmove:
+#ifdef RISCV
+      assert(false, "lir_cmove is LIR_Op4 on RISCV");
+#endif
     case lir_xchg:
       break;
 
@@ -252,9 +255,13 @@ void LIR_Op2::verify() const {
 
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
+#ifdef RISCV
+  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+#else
   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
   , _cond(cond)
   , _type(type)
+#endif
   , _label(block->label())
   , _block(block)
   , _ublock(NULL)
@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
 }
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
+#ifdef RISCV
+  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+#else
   LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
   , _cond(cond)
   , _type(type)
+#endif
   , _label(stub->entry())
   , _block(NULL)
   , _ublock(NULL)
@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
 }
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
+#ifdef RISCV
+  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
+#else
   : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
   , _cond(cond)
   , _type(type)
+#endif
   , _label(block->label())
   , _block(block)
   , _ublock(ublock)
@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
 }
 
 void LIR_OpBranch::negate_cond() {
-  switch (_cond) {
-    case lir_cond_equal:        _cond = lir_cond_notEqual;     break;
-    case lir_cond_notEqual:     _cond = lir_cond_equal;        break;
-    case lir_cond_less:         _cond = lir_cond_greaterEqual; break;
-    case lir_cond_lessEqual:    _cond = lir_cond_greater;      break;
-    case lir_cond_greaterEqual: _cond = lir_cond_less;         break;
-    case lir_cond_greater:      _cond = lir_cond_lessEqual;    break;
+  switch (cond()) {
+    case lir_cond_equal:        set_cond(lir_cond_notEqual);     break;
+    case lir_cond_notEqual:     set_cond(lir_cond_equal);        break;
+    case lir_cond_less:         set_cond(lir_cond_greaterEqual); break;
+    case lir_cond_lessEqual:    set_cond(lir_cond_greater);      break;
+    case lir_cond_greaterEqual: set_cond(lir_cond_less);         break;
+    case lir_cond_greater:      set_cond(lir_cond_lessEqual);    break;
     default: ShouldNotReachHere();
   }
 }
@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       assert(op->as_OpBranch() != NULL, "must be");
       LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
 
+#ifdef RISCV
+      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
+             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
+             opBranch->_tmp5->is_illegal(), "not used");
+
+      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
+      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
+#endif
+
       if (opBranch->_info != NULL)     do_info(opBranch->_info);
       assert(opBranch->_result->is_illegal(), "not used");
       if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
     // to the result operand, otherwise the backend fails
     case lir_cmove:
     {
+#ifdef RISCV
+      assert(op->as_Op4() != NULL, "must be");
+      LIR_Op4* op4 = (LIR_Op4*)op;
+
+      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
+             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
+      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
+
+      do_input(op4->_opr1);
+      do_input(op4->_opr2);
+      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
+      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
+      do_temp(op4->_opr2);
+      do_output(op4->_result);
+#else
       assert(op->as_Op2() != NULL, "must be");
       LIR_Op2* op2 = (LIR_Op2*)op;
 
@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       do_input(op2->_opr2);
       do_temp(op2->_opr2);
       do_output(op2->_result);
+#endif
 
       break;
     }
@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
   masm->emit_op3(this);
 }
 
+#ifdef RISCV
+void LIR_Op4::emit_code(LIR_Assembler* masm) {
+  masm->emit_op4(this);
+}
+#endif
+
 void LIR_OpLock::emit_code(LIR_Assembler* masm) {
   masm->emit_lock(this);
   if (stub()) {
@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
   , _file(NULL)
   , _line(0)
 #endif
+#ifdef RISCV
+  , _cmp_opr1(LIR_OprFact::illegalOpr)
+  , _cmp_opr2(LIR_OprFact::illegalOpr)
+#endif
 { }
 
 
@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
 }
 #endif
 
+#ifdef RISCV
+void LIR_List::set_cmp_oprs(LIR_Op* op) {
+  switch (op->code()) {
+    case lir_cmp:
+      _cmp_opr1 = op->as_Op2()->in_opr1();
+      _cmp_opr2 = op->as_Op2()->in_opr2();
+      break;
+    case lir_branch: // fall through
+    case lir_cond_float_branch:
+      assert(op->as_OpBranch()->cond() == lir_cond_always ||
+            (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr),
+            "conditional branches must have legal operands");
+      if (op->as_OpBranch()->cond() != lir_cond_always) {
+        op->as_Op2()->set_in_opr1(_cmp_opr1);
+        op->as_Op2()->set_in_opr2(_cmp_opr2);
+      }
+      break;
+    case lir_cmove:
+      op->as_Op4()->set_in_opr3(_cmp_opr1);
+      op->as_Op4()->set_in_opr4(_cmp_opr2);
+      break;
+#if INCLUDE_ZGC
+    case lir_zloadbarrier_test:
+      _cmp_opr1 = FrameMap::as_opr(t1);
+      _cmp_opr2 = LIR_OprFact::intConst(0);
+      break;
+#endif
+    default:
+      break;
+  }
+}
+#endif
 
 void LIR_List::append(LIR_InsertionBuffer* buffer) {
   assert(this == buffer->lir_list(), "wrong lir list");
@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
 // LIR_OpBranch
 void LIR_OpBranch::print_instr(outputStream* out) const {
   print_condition(out, cond());             out->print(" ");
+#ifdef RISCV
+  in_opr1()->print(out); out->print(" ");
+  in_opr2()->print(out); out->print(" ");
+#endif
   if (block() != NULL) {
     out->print("[B%d] ", block()->block_id());
   } else if (stub() != NULL) {
@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
 
 // LIR_Op2
 void LIR_Op2::print_instr(outputStream* out) const {
+#ifdef RISCV
+  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
+#else
   if (code() == lir_cmove || code() == lir_cmp) {
+#endif
     print_condition(out, condition());         out->print(" ");
   }
   in_opr1()->print(out);    out->print(" ");
@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const {
   result_opr()->print(out);
 }
 
+#ifdef RISCV
+// LIR_Op4
+void LIR_Op4::print_instr(outputStream* out) const {
+  print_condition(out, condition()); out->print(" ");
+  in_opr1()->print(out);             out->print(" ");
+  in_opr2()->print(out);             out->print(" ");
+  in_opr3()->print(out);             out->print(" ");
+  in_opr4()->print(out);             out->print(" ");
+  result_opr()->print(out);
+}
+#endif
 
 void LIR_OpLock::print_instr(outputStream* out) const {
   hdr_opr()->print(out);   out->print(" ");
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 3234ca018b..33943e369d 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -867,6 +867,9 @@ class    LIR_Op2;
 class    LIR_OpDelay;
 class    LIR_Op3;
 class      LIR_OpAllocArray;
+#ifdef RISCV
+class    LIR_Op4;
+#endif
 class    LIR_OpCall;
 class      LIR_OpJavaCall;
 class      LIR_OpRTCall;
@@ -916,8 +919,10 @@ enum LIR_Code {
       , lir_null_check
       , lir_return
       , lir_leal
+#ifndef RISCV
       , lir_branch
       , lir_cond_float_branch
+#endif
       , lir_move
       , lir_convert
       , lir_alloc_object
@@ -929,11 +934,17 @@ enum LIR_Code {
       , lir_unwind
   , end_op1
   , begin_op2
+#ifdef RISCV
+      , lir_branch
+      , lir_cond_float_branch
+#endif
       , lir_cmp
       , lir_cmp_l2i
       , lir_ucmp_fd2i
       , lir_cmp_fd2i
+#ifndef RISCV
       , lir_cmove
+#endif
       , lir_add
       , lir_sub
       , lir_mul
@@ -964,6 +975,11 @@ enum LIR_Code {
       , lir_fmad
       , lir_fmaf
   , end_op3
+#ifdef RISCV
+  , begin_op4
+      , lir_cmove
+  , end_op4
+#endif
   , begin_opJavaCall
       , lir_static_call
       , lir_optvirtual_call
@@ -1001,6 +1017,11 @@ enum LIR_Code {
   , begin_opAssert
     , lir_assert
   , end_opAssert
+#if defined(RISCV) && defined(INCLUDE_ZGC)
+  , begin_opZLoadBarrierTest
+    , lir_zloadbarrier_test
+  , end_opZLoadBarrierTest
+#endif
 };
 
 
@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj {
   virtual LIR_Op1* as_Op1() { return NULL; }
   virtual LIR_Op2* as_Op2() { return NULL; }
   virtual LIR_Op3* as_Op3() { return NULL; }
+#ifdef RISCV
+  virtual LIR_Op4* as_Op4() { return NULL; }
+#endif
   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall {
   virtual void verify() const;
 };
 
-
-class LIR_OpBranch: public LIR_Op {
- friend class LIR_OpVisitState;
-
- private:
-  LIR_Condition _cond;
-  BasicType     _type;
-  Label*        _label;
-  BlockBegin*   _block;  // if this is a branch to a block, this is the block
-  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
-  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
-
- public:
-  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
-    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
-    , _cond(cond)
-    , _type(type)
-    , _label(lbl)
-    , _block(NULL)
-    , _ublock(NULL)
-    , _stub(NULL) { }
-
-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
-  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
-
-  // for unordered comparisons
-  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
-
-  LIR_Condition cond()        const              { return _cond;        }
-  BasicType     type()        const              { return _type;        }
-  Label*        label()       const              { return _label;       }
-  BlockBegin*   block()       const              { return _block;       }
-  BlockBegin*   ublock()      const              { return _ublock;      }
-  CodeStub*     stub()        const              { return _stub;       }
-
-  void          change_block(BlockBegin* b);
-  void          change_ublock(BlockBegin* b);
-  void          negate_cond();
-
-  virtual void emit_code(LIR_Assembler* masm);
-  virtual LIR_OpBranch* as_OpBranch() { return this; }
-  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
-};
-
-
 class ConversionStub;
 
 class LIR_OpConvert: public LIR_Op1 {
@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op {
   void verify() const;
 
  public:
-  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL)
+  LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL)
     : LIR_Op(code, LIR_OprFact::illegalOpr, info)
     , _opr1(opr1)
     , _opr2(opr2)
-    , _type(T_ILLEGAL)
-    , _condition(condition)
     , _fpu_stack_size(0)
+    , _type(type)
     , _tmp1(LIR_OprFact::illegalOpr)
     , _tmp2(LIR_OprFact::illegalOpr)
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
-    , _tmp5(LIR_OprFact::illegalOpr) {
-    assert(code == lir_cmp || code == lir_assert, "code check");
+    , _tmp5(LIR_OprFact::illegalOpr)
+    , _condition(condition) {
+    assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check");
   }
 
   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op {
     , _opr1(opr1)
     , _opr2(opr2)
     , _type(type)
-    , _condition(lir_cond_unknown)
     , _fpu_stack_size(0)
     , _tmp1(LIR_OprFact::illegalOpr)
     , _tmp2(LIR_OprFact::illegalOpr)
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
-    , _tmp5(LIR_OprFact::illegalOpr) {
-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+    , _tmp5(LIR_OprFact::illegalOpr)
+    , _condition(lir_cond_unknown) {
+    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
   }
 
   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op {
     , _opr1(opr1)
     , _opr2(opr2)
     , _type(T_ILLEGAL)
-    , _condition(lir_cond_unknown)
     , _fpu_stack_size(0)
     , _tmp1(tmp1)
     , _tmp2(tmp2)
     , _tmp3(tmp3)
     , _tmp4(tmp4)
-    , _tmp5(tmp5) {
-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+    , _tmp5(tmp5)
+    , _condition(lir_cond_unknown) {
+    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
   }
 
   LIR_Opr in_opr1() const                        { return _opr1; }
@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op {
   LIR_Opr tmp4_opr() const                       { return _tmp4; }
   LIR_Opr tmp5_opr() const                       { return _tmp5; }
   LIR_Condition condition() const  {
+#ifdef RISCV
+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
+#else
     assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
+#endif
   }
   void set_condition(LIR_Condition condition) {
+#ifdef RISCV
+    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
+#else
     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
+#endif
   }
 
   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op {
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
 
+#ifdef RISCV
+class LIR_OpBranch: public LIR_Op2 {
+#else
+class LIR_OpBranch: public LIR_Op {
+#endif
+ friend class LIR_OpVisitState;
+
+ private:
+#ifndef RISCV
+  LIR_Condition _cond;
+  BasicType     _type;
+#endif
+  Label*        _label;
+  BlockBegin*   _block;  // if this is a branch to a block, this is the block
+  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
+  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
+
+ public:
+  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
+#ifdef RISCV
+    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
+#else
+    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
+    , _cond(cond)
+    , _type(type)
+#endif
+    , _label(lbl)
+    , _block(NULL)
+    , _ublock(NULL)
+    , _stub(NULL) { }
+
+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block);
+  LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub);
+
+  // for unordered comparisons
+  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
+
+#ifdef RISCV
+  LIR_Condition cond()        const              { return condition();  }
+  void set_cond(LIR_Condition cond)              { set_condition(cond); }
+#else
+  LIR_Condition cond()        const              { return _cond;        }
+  void set_cond(LIR_Condition cond)              { _cond = cond;        }
+#endif
+  BasicType     type()        const              { return _type;        }
+  Label*        label()       const              { return _label;       }
+  BlockBegin*   block()       const              { return _block;       }
+  BlockBegin*   ublock()      const              { return _ublock;      }
+  CodeStub*     stub()        const              { return _stub;        }
+
+  void          change_block(BlockBegin* b);
+  void          change_ublock(BlockBegin* b);
+  void          negate_cond();
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpBranch* as_OpBranch() { return this; }
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+
 class LIR_OpAllocArray : public LIR_Op {
  friend class LIR_OpVisitState;
 
@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op {
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
 
+#ifdef RISCV
+class LIR_Op4: public LIR_Op {
+  friend class LIR_OpVisitState;
+ protected:
+  LIR_Opr   _opr1;
+  LIR_Opr   _opr2;
+  LIR_Opr   _opr3;
+  LIR_Opr   _opr4;
+  BasicType _type;
+  LIR_Opr   _tmp1;
+  LIR_Opr   _tmp2;
+  LIR_Opr   _tmp3;
+  LIR_Opr   _tmp4;
+  LIR_Opr   _tmp5;
+  LIR_Condition _condition;
+
+ public:
+  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4,
+          LIR_Opr result, BasicType type)
+    : LIR_Op(code, result, NULL)
+    , _opr1(opr1)
+    , _opr2(opr2)
+    , _opr3(opr3)
+    , _opr4(opr4)
+    , _type(type)
+    , _tmp1(LIR_OprFact::illegalOpr)
+    , _tmp2(LIR_OprFact::illegalOpr)
+    , _tmp3(LIR_OprFact::illegalOpr)
+    , _tmp4(LIR_OprFact::illegalOpr)
+    , _tmp5(LIR_OprFact::illegalOpr)
+    , _condition(condition) {
+    assert(code == lir_cmove, "code check");
+    assert(type != T_ILLEGAL, "cmove should have type");
+  }
+
+  LIR_Opr in_opr1() const                        { return _opr1; }
+  LIR_Opr in_opr2() const                        { return _opr2; }
+  LIR_Opr in_opr3() const                        { return _opr3; }
+  LIR_Opr in_opr4() const                        { return _opr4; }
+  BasicType type()  const                        { return _type; }
+  LIR_Opr tmp1_opr() const                       { return _tmp1; }
+  LIR_Opr tmp2_opr() const                       { return _tmp2; }
+  LIR_Opr tmp3_opr() const                       { return _tmp3; }
+  LIR_Opr tmp4_opr() const                       { return _tmp4; }
+  LIR_Opr tmp5_opr() const                       { return _tmp5; }
+
+  LIR_Condition condition() const                { return _condition; }
+  void set_condition(LIR_Condition condition)    { _condition = condition; }
+
+  void set_in_opr1(LIR_Opr opr)                  { _opr1 = opr; }
+  void set_in_opr2(LIR_Opr opr)                  { _opr2 = opr; }
+  void set_in_opr3(LIR_Opr opr)                  { _opr3 = opr; }
+  void set_in_opr4(LIR_Opr opr)                  { _opr4 = opr; }
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_Op4* as_Op4() { return this; }
+
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+#endif
 
 //--------------------------------
 class LabelObj: public CompilationResourceObj {
@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj {
   const char *  _file;
   int           _line;
 #endif
+#ifdef RISCV
+  LIR_Opr       _cmp_opr1;
+  LIR_Opr       _cmp_opr2;
+#endif
 
  public:
   void append(LIR_Op* op) {
@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj {
     }
 #endif // PRODUCT
 
+#ifdef RISCV
+    set_cmp_oprs(op);
+    // lir_cmp set cmp oprs only on riscv
+    if (op->code() == lir_cmp) return;
+#endif
+
     _operations.append(op);
 
 #ifdef ASSERT
@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj {
   void set_file_and_line(const char * file, int line);
 #endif
 
+#ifdef RISCV
+  void set_cmp_oprs(LIR_Op* op);
+#endif
+
   //---------- accessors ---------------
   LIR_OpList* instructions_list()                { return &_operations; }
   int         length() const                     { return _operations.length(); }
@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj {
   void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
 
+#ifdef RISCV
+  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
+    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
+  }
+#else
   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
   }
+#endif
 
   void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
index 160483d5f7..68aec26c1e 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
       comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
       break;
 
+#ifndef RISCV
     case lir_cmove:
       cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
       break;
+#endif
 
     case lir_shl:
     case lir_shr:
@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
   }
 }
 
+#ifdef RISCV
+void LIR_Assembler::emit_op4(LIR_Op4* op) {
+  switch(op->code()) {
+    case lir_cmove:
+      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4());
+      break;
+
+    default:
+      Unimplemented();
+      break;
+  }
+}
+#endif
 
 void LIR_Assembler::build_frame() {
   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
index 44a5bcbe54..baeb4aa442 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj {
   void emit_op1(LIR_Op1* op);
   void emit_op2(LIR_Op2* op);
   void emit_op3(LIR_Op3* op);
+#ifdef RISCV
+  void emit_op4(LIR_Op4* op);
+#endif
   void emit_opBranch(LIR_OpBranch* op);
   void emit_opLabel(LIR_OpLabel* op);
   void emit_arraycopy(LIR_OpArrayCopy* op);
@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj {
   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+#ifdef RISCV
+  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
+             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
+#else
   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
-
+#endif
   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
   void ic_call(     LIR_OpJavaCall* op);
   void vtable_call( LIR_OpJavaCall* op);
diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
index acc969ac9c..512b63c744 100644
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) {
       break;
     }
     case lir_cmove: {
+#ifdef RISCV
+      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
+      LIR_Op4* cmove = (LIR_Op4*)op;
+#else
       assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
       LIR_Op2* cmove = (LIR_Op2*)op;
+#endif
 
       LIR_Opr move_from = cmove->in_opr1();
       LIR_Opr move_to = cmove->result_opr();
@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() {
     }
   }
 
+#ifndef RISCV
+  // Disable these optimizations on riscv temporarily, because it does not
+  // work when the comparison operands are bound to branches or cmoves.
   { TIME_LINEAR_SCAN(timer_optimize_lir);
 
     EdgeMoveOptimizer::optimize(ir()->code());
@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() {
     // check that cfg is still correct after optimizations
     ir()->verify();
   }
+#endif
 
   NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
   NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
               // There might be a cmove inserted for profiling which depends on the same
               // compare. If we change the condition of the respective compare, we have
               // to take care of this cmove as well.
+#ifdef RISCV
+              LIR_Op4* prev_cmove = NULL;
+#else
               LIR_Op2* prev_cmove = NULL;
+#endif
 
               for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
                 prev_op = instructions->at(j);
                 // check for the cmove
                 if (prev_op->code() == lir_cmove) {
+#ifdef RISCV
+                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
+                  prev_cmove = (LIR_Op4*)prev_op;
+#else
                   assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
                   prev_cmove = (LIR_Op2*)prev_op;
+#endif
                   assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
                 }
                 if (prev_op->code() == lir_cmp) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
index 4771a8b865..6d377fa005 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,7 +31,7 @@
 #include "utilities/defaultStream.hpp"
 
 void ShenandoahArguments::initialize() {
-#if !(defined AARCH64 || defined AMD64 || defined IA32)
+#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64)
   vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
 #endif
 
diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
index 9f8ce74243..125cc169be 100644
--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -100,7 +100,11 @@ private:
 
 public:
   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
+#ifdef RISCV
+      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
+#else
       LIR_Op(),
+#endif
       _opr(opr) {}
 
   virtual void visit(LIR_OpVisitState* state) {
diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
index e01a242a57..ff16de0e77 100644
--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
 inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
 #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
   return true;
-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV)
   return false;
 #else
   #warning "Unconfigured platform"
diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
index a383297611..5e9228e705 100644
--- a/src/hotspot/share/runtime/abstract_vm_version.cpp
+++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
                  IA32_ONLY("x86")                \
                  IA64_ONLY("ia64")               \
                  S390_ONLY("s390")               \
-                 SPARC_ONLY("sparc")
+                 SPARC_ONLY("sparc")             \
+                 RISCV64_ONLY("riscv64")
 #endif // !ZERO
 #endif // !CPU
 
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
index 8ac6d63586..6bb38c40cc 100644
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -1261,7 +1261,7 @@ class JavaThread: public Thread {
   address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
 
   // Safepoint support
-#if !(defined(PPC64) || defined(AARCH64))
+#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64))
   JavaThreadState thread_state() const           { return _thread_state; }
   void set_thread_state(JavaThreadState s)       {
     assert(current_or_null() == NULL || current_or_null() == this,
diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
index dee8534f73..9af07aeb45 100644
--- a/src/hotspot/share/runtime/thread.inline.hpp
+++ b/src/hotspot/share/runtime/thread.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
   set_has_async_exception();
 }
 
-#if defined(PPC64) || defined (AARCH64)
+#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
 inline JavaThreadState JavaThread::thread_state() const    {
   return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
 }
diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
index 6605ab367c..7f1bcff6b3 100644
--- a/src/hotspot/share/utilities/macros.hpp
+++ b/src/hotspot/share/utilities/macros.hpp
@@ -601,6 +601,32 @@
 
 #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
 
+#if defined(RISCV32) || defined(RISCV64)
+#define RISCV
+#define RISCV_ONLY(code) code
+#define NOT_RISCV(code)
+#else
+#undef RISCV
+#define RISCV_ONLY(code)
+#define NOT_RISCV(code) code
+#endif
+
+#ifdef RISCV32
+#define RISCV32_ONLY(code) code
+#define NOT_RISCV32(code)
+#else
+#define RISCV32_ONLY(code)
+#define NOT_RISCV32(code) code
+#endif
+
+#ifdef RISCV64
+#define RISCV64_ONLY(code) code
+#define NOT_RISCV64(code)
+#else
+#define RISCV64_ONLY(code)
+#define NOT_RISCV64(code) code
+#endif
+
 #ifdef VM_LITTLE_ENDIAN
 #define LITTLE_ENDIAN_ONLY(code) code
 #define BIG_ENDIAN_ONLY(code)
diff --git a/src/hotspot/share/utilities/vmassert_reinstall.hpp b/src/hotspot/share/utilities/vmassert_reinstall.hpp
new file mode 100644
index 0000000000..32d31ac0c4
--- /dev/null
+++ b/src/hotspot/share/utilities/vmassert_reinstall.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Intentionally no #include guard.  May be included multiple times for effect.
+
+// See vmassert_uninstall.hpp for usage.
+
+// Remove possible stdlib assert macro (or any others, for that matter).
+#undef assert
+
+// Reinstall HotSpot's assert macro, if previously defined.
+#ifdef vmassert
+#define assert(p, ...) vmassert(p, __VA_ARGS__)
+#endif
+
diff --git a/src/hotspot/share/utilities/vmassert_uninstall.hpp b/src/hotspot/share/utilities/vmassert_uninstall.hpp
new file mode 100644
index 0000000000..dd6d51633d
--- /dev/null
+++ b/src/hotspot/share/utilities/vmassert_uninstall.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Intentionally no #include guard.  May be included multiple times for effect.
+
+// The files vmassert_uninstall.hpp and vmassert_reinstall.hpp provide a
+// workaround for the name collision between HotSpot's assert macro and the
+// Standard Library's assert macro.  When including a 3rd-party header that
+// uses (and so includes) the standard assert macro, wrap that inclusion with
+// includes of these two files, e.g.
+//
+// #include "utilities/vmassert_uninstall.hpp"
+// #include <header including standard assert macro>
+// #include "utilities/vmassert_reinstall.hpp"
+//
+// This removes the HotSpot macro definition while pre-processing the
+// 3rd-party header, then reinstates the HotSpot macro (if previously defined)
+// for following code.
+
+// Remove HotSpot's assert macro, if present.
+#ifdef vmassert
+#undef assert
+#endif // vmassert
+
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
index 0d834302c5..45a927fb5e 100644
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,6 +58,10 @@
 #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
 #endif
 
+#ifdef riscv64
+#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h"
+#endif
+
 static jfieldID p_ps_prochandle_ID = 0;
 static jfieldID threadList_ID = 0;
 static jfieldID loadObjectList_ID = 0;
@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   return (err == PS_OK)? array : 0;
 }
 
-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
 JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
   (JNIEnv *env, jobject this_obj, jint lwp_id) {
 
@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
 #if defined(sparc) || defined(sparcv9)
 #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
 #endif
+#ifdef riscv64
+#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
+#endif
 #if defined(ppc64) || defined(ppc64le)
 #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
 #endif
@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   }
 #endif /* aarch64 */
 
+#if defined(riscv64)
+#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
+
+  regs[REG_INDEX(PC)]  = gregs.pc;
+  regs[REG_INDEX(LR)]  = gregs.ra;
+  regs[REG_INDEX(SP)]  = gregs.sp;
+  regs[REG_INDEX(R3)]  = gregs.gp;
+  regs[REG_INDEX(R4)]  = gregs.tp;
+  regs[REG_INDEX(R5)]  = gregs.t0;
+  regs[REG_INDEX(R6)]  = gregs.t1;
+  regs[REG_INDEX(R7)]  = gregs.t2;
+  regs[REG_INDEX(R8)]  = gregs.s0;
+  regs[REG_INDEX(R9)]  = gregs.s1;
+  regs[REG_INDEX(R10)]  = gregs.a0;
+  regs[REG_INDEX(R11)]  = gregs.a1;
+  regs[REG_INDEX(R12)]  = gregs.a2;
+  regs[REG_INDEX(R13)]  = gregs.a3;
+  regs[REG_INDEX(R14)]  = gregs.a4;
+  regs[REG_INDEX(R15)]  = gregs.a5;
+  regs[REG_INDEX(R16)]  = gregs.a6;
+  regs[REG_INDEX(R17)]  = gregs.a7;
+  regs[REG_INDEX(R18)]  = gregs.s2;
+  regs[REG_INDEX(R19)]  = gregs.s3;
+  regs[REG_INDEX(R20)]  = gregs.s4;
+  regs[REG_INDEX(R21)]  = gregs.s5;
+  regs[REG_INDEX(R22)]  = gregs.s6;
+  regs[REG_INDEX(R23)]  = gregs.s7;
+  regs[REG_INDEX(R24)]  = gregs.s8;
+  regs[REG_INDEX(R25)]  = gregs.s9;
+  regs[REG_INDEX(R26)]  = gregs.s10;
+  regs[REG_INDEX(R27)]  = gregs.s11;
+  regs[REG_INDEX(R28)]  = gregs.t3;
+  regs[REG_INDEX(R29)]  = gregs.t4;
+  regs[REG_INDEX(R30)]  = gregs.t5;
+  regs[REG_INDEX(R31)]  = gregs.t6;
+
+#endif /* riscv64 */
+
 #if defined(ppc64) || defined(ppc64le)
 #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
 
diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
index 8318e8e021..ab092d4ee3 100644
--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,6 +43,8 @@
 #elif defined(arm)
 #include <asm/ptrace.h>
 #define user_regs_struct  pt_regs
+#elif defined(riscv64)
+#include <asm/ptrace.h>
 #endif
 
 // This C bool type must be int for compatibility with Linux calls and
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
index 0f5f0119c7..9bff9ee9b1 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
 import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
 import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
 import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
+import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64;
 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
@@ -598,6 +599,8 @@ public class HotSpotAgent {
             } else {
                     machDesc = new MachineDescriptionSPARC32Bit();
             }
+        } else if (cpu.equals("riscv64")) {
+            machDesc = new MachineDescriptionRISCV64();
         } else {
           try {
             machDesc = (MachineDescription)
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
new file mode 100644
index 0000000000..a972516dee
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger;
+
+public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription {
+  public long getAddressSize() {
+    return 8;
+  }
+
+  public boolean isLP64() {
+    return true;
+  }
+
+  public boolean isBigEndian() {
+    return false;
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
index 5e5a6bb714..dc0bcb3da9 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2015, Red Hat Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*;
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
 import sun.jvm.hotspot.debugger.ppc64.*;
 import sun.jvm.hotspot.debugger.linux.x86.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.linux.sparc.*;
 import sun.jvm.hotspot.debugger.linux.ppc64.*;
 import sun.jvm.hotspot.debugger.linux.aarch64.*;
+import sun.jvm.hotspot.debugger.linux.riscv64.*;
 import sun.jvm.hotspot.utilities.*;
 
 class LinuxCDebugger implements CDebugger {
@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger {
        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
        if (pc == null) return null;
        return new LinuxAARCH64CFrame(dbg, fp, pc);
-     } else {
+    } else if (cpu.equals("riscv64")) {
+       RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
+       Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
+       if (fp == null) return null;
+       Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
+       if (pc == null) return null;
+       return new LinuxRISCV64CFrame(dbg, fp, pc);
+    } else {
        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
        ThreadContext context = (ThreadContext) thread.getContext();
        return context.getTopFrame(dbg);
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
new file mode 100644
index 0000000000..f06da24bd0
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
+
+public final class LinuxRISCV64CFrame extends BasicCFrame {
+   private static final int C_FRAME_LINK_OFFSET        = -2;
+   private static final int C_FRAME_RETURN_ADDR_OFFSET = -1;
+
+   public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
+      super(dbg.getCDebugger());
+      this.fp = fp;
+      this.pc = pc;
+      this.dbg = dbg;
+   }
+
+   // override base class impl to avoid ELF parsing
+   public ClosestSymbol closestSymbolToPC() {
+      // try native lookup in debugger.
+      return dbg.lookup(dbg.getAddressValue(pc()));
+   }
+
+   public Address pc() {
+      return pc;
+   }
+
+   public Address localVariableBase() {
+      return fp;
+   }
+
+   public CFrame sender(ThreadProxy thread) {
+      RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext();
+      Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+
+      if ((fp == null) || fp.lessThan(rsp)) {
+        return null;
+      }
+
+      // Check alignment of fp
+      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
+        return null;
+      }
+
+      Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE);
+      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
+        return null;
+      }
+      Address nextPC  = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE);
+      if (nextPC == null) {
+        return null;
+      }
+      return new LinuxRISCV64CFrame(dbg, nextFP, nextPC);
+   }
+
+   // package/class internals only
+   private static final int ADDRESS_SIZE = 8;
+   private Address pc;
+   private Address sp;
+   private Address fp;
+   private LinuxDebugger dbg;
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
new file mode 100644
index 0000000000..fdb841ccf3
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+
+public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext {
+  private LinuxDebugger debugger;
+
+  public LinuxRISCV64ThreadContext(LinuxDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
new file mode 100644
index 0000000000..96d5dee47c
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ProcRISCV64Thread implements ThreadProxy {
+    private ProcDebugger debugger;
+    private int         id;
+
+    public ProcRISCV64Thread(ProcDebugger debugger, Address addr) {
+        this.debugger = debugger;
+
+        // FIXME: the size here should be configurable. However, making it
+        // so would produce a dependency on the "types" package from the
+        // debugger package, which is not desired.
+        this.id       = (int) addr.getCIntegerAt(0, 4, true);
+    }
+
+    public ProcRISCV64Thread(ProcDebugger debugger, long id) {
+        this.debugger = debugger;
+        this.id = (int) id;
+    }
+
+    public ThreadContext getContext() throws IllegalThreadStateException {
+        ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger);
+        long[] regs = debugger.getThreadIntegerRegisterSet(id);
+        if (Assert.ASSERTS_ENABLED) {
+            Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch");
+        }
+        for (int i = 0; i < regs.length; i++) {
+            context.setRegister(i, regs[i]);
+        }
+        return context;
+    }
+
+    public boolean canSetContext() throws DebuggerException {
+        return false;
+    }
+
+    public void setContext(ThreadContext context)
+    throws IllegalThreadStateException, DebuggerException {
+        throw new DebuggerException("Unimplemented");
+    }
+
+    public String toString() {
+        return "t@" + id;
+    }
+
+    public boolean equals(Object obj) {
+        if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) {
+            return false;
+        }
+
+        return (((ProcRISCV64Thread) obj).id == id);
+    }
+
+    public int hashCode() {
+        return id;
+    }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
new file mode 100644
index 0000000000..f2aa845e66
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcRISCV64ThreadContext extends RISCV64ThreadContext {
+    private ProcDebugger debugger;
+
+    public ProcRISCV64ThreadContext(ProcDebugger debugger) {
+        super();
+        this.debugger = debugger;
+    }
+
+    public void setRegisterAsAddress(int index, Address value) {
+        setRegister(index, debugger.getAddressValue(value));
+    }
+
+    public Address getRegisterAsAddress(int index) {
+        return debugger.newAddress(getRegister(index));
+    }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
new file mode 100644
index 0000000000..19f64b8ce2
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcRISCV64ThreadFactory implements ProcThreadFactory {
+    private ProcDebugger debugger;
+
+    public ProcRISCV64ThreadFactory(ProcDebugger debugger) {
+        this.debugger = debugger;
+    }
+
+    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+        return new ProcRISCV64Thread(debugger, threadIdentifierAddr);
+    }
+
+    public ThreadProxy createThreadWrapper(long id) {
+        return new ProcRISCV64Thread(debugger, id);
+    }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
new file mode 100644
index 0000000000..aecbda5902
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RemoteRISCV64Thread extends RemoteThread  {
+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) {
+     super(debugger, addr);
+  }
+
+  public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) {
+     super(debugger, id);
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger);
+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
+                                  debugger.getThreadIntegerRegisterSet(id);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match");
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
new file mode 100644
index 0000000000..1d3da6be5a
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
new file mode 100644
index 0000000000..725b94e25a
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new RemoteRISCV64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new RemoteRISCV64Thread(debugger, id);
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
new file mode 100644
index 0000000000..fb60a70427
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.riscv64;
+
+import java.lang.annotation.Native;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+
+/** Specifies the thread context on riscv64 platforms; only a sub-portion
+ * of the context is guaranteed to be present on all operating
+ * systems. */
+
+public abstract class RISCV64ThreadContext implements ThreadContext {
+    // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64.
+
+    //  /*
+    //   * Signal context structure - contains all info to do with the state
+    //   * before the signal handler was invoked.
+    //   */
+    // struct sigcontext {
+    //   struct user_regs_struct sc_regs;
+    //   union __riscv_fp_state sc_fpregs;
+    // };
+    //
+    // struct user_regs_struct {
+    //    unsigned long pc;
+    //    unsigned long ra;
+    //    unsigned long sp;
+    //    unsigned long gp;
+    //    unsigned long tp;
+    //    unsigned long t0;
+    //    unsigned long t1;
+    //    unsigned long t2;
+    //    unsigned long s0;
+    //    unsigned long s1;
+    //    unsigned long a0;
+    //    unsigned long a1;
+    //    unsigned long a2;
+    //    unsigned long a3;
+    //    unsigned long a4;
+    //    unsigned long a5;
+    //    unsigned long a6;
+    //    unsigned long a7;
+    //    unsigned long s2;
+    //    unsigned long s3;
+    //    unsigned long s4;
+    //    unsigned long s5;
+    //    unsigned long s6;
+    //    unsigned long s7;
+    //    unsigned long s8;
+    //    unsigned long s9;
+    //    unsigned long s10;
+    //    unsigned long s11;
+    //    unsigned long t3;
+    //    unsigned long t4;
+    //    unsigned long t5;
+    //    unsigned long t6;
+    // };
+
+    // NOTE: the indices for the various registers must be maintained as
+    // listed across various operating systems. However, only a small
+    // subset of the registers' values are guaranteed to be present (and
+    // must be present for the SA's stack walking to work)
+
+    // One instance of the Native annotation is enough to trigger header generation
+    // for this file.
+    @Native
+    public static final int R0 = 0;
+    public static final int R1 = 1;
+    public static final int R2 = 2;
+    public static final int R3 = 3;
+    public static final int R4 = 4;
+    public static final int R5 = 5;
+    public static final int R6 = 6;
+    public static final int R7 = 7;
+    public static final int R8 = 8;
+    public static final int R9 = 9;
+    public static final int R10 = 10;
+    public static final int R11 = 11;
+    public static final int R12 = 12;
+    public static final int R13 = 13;
+    public static final int R14 = 14;
+    public static final int R15 = 15;
+    public static final int R16 = 16;
+    public static final int R17 = 17;
+    public static final int R18 = 18;
+    public static final int R19 = 19;
+    public static final int R20 = 20;
+    public static final int R21 = 21;
+    public static final int R22 = 22;
+    public static final int R23 = 23;
+    public static final int R24 = 24;
+    public static final int R25 = 25;
+    public static final int R26 = 26;
+    public static final int R27 = 27;
+    public static final int R28 = 28;
+    public static final int R29 = 29;
+    public static final int R30 = 30;
+    public static final int R31 = 31;
+
+    public static final int NPRGREG = 32;
+
+    public static final int PC = R0;
+    public static final int LR = R1;
+    public static final int SP = R2;
+    public static final int FP = R8;
+
+    private long[] data;
+
+    public RISCV64ThreadContext() {
+        data = new long[NPRGREG];
+    }
+
+    public int getNumRegisters() {
+        return NPRGREG;
+    }
+
+    public String getRegisterName(int index) {
+        switch (index) {
+        case LR: return "lr";
+        case SP: return "sp";
+        case PC: return "pc";
+        default:
+            return "r" + index;
+        }
+    }
+
+    public void setRegister(int index, long value) {
+        data[index] = value;
+    }
+
+    public long getRegister(int index) {
+        return data[index];
+    }
+
+    public CFrame getTopFrame(Debugger dbg) {
+        return null;
+    }
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract void setRegisterAsAddress(int index, Address value);
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract Address getRegisterAsAddress(int index);
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
index 190062785a..89d676fe3b 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
@@ -99,6 +100,8 @@ public class Threads {
                 access = new LinuxPPC64JavaThreadPDAccess();
             } else if (cpu.equals("aarch64")) {
                 access = new LinuxAARCH64JavaThreadPDAccess();
+            } else if (cpu.equals("riscv64")) {
+                access = new LinuxRISCV64JavaThreadPDAccess();
             } else {
               try {
                 access = (JavaThreadPDAccess)
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
new file mode 100644
index 0000000000..5c2b6e0e3e
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.linux_riscv64;
+
+import java.io.*;
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.riscv64.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess {
+  private static AddressField  lastJavaFPField;
+  private static AddressField  osThreadField;
+
+  // Field from OSThread
+  private static CIntegerField osThreadThreadIDField;
+
+  // This is currently unneeded but is being kept in case we change
+  // the currentFrameGuess algorithm
+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaThread");
+    osThreadField           = type.getAddressField("_osthread");
+
+    Type anchorType = db.lookupType("JavaFrameAnchor");
+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
+
+    Type osThreadType = db.lookupType("OSThread");
+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
+  }
+
+  public Address getLastJavaFP(Address addr) {
+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
+  }
+
+  public Address getLastJavaPC(Address addr) {
+    return null;
+  }
+
+  public Address getBaseOfStackPointer(Address addr) {
+    return null;
+  }
+
+  public Frame getLastFramePD(JavaThread thread, Address addr) {
+    Address fp = thread.getLastJavaFP();
+    if (fp == null) {
+      return null; // no information
+    }
+    return new RISCV64Frame(thread.getLastJavaSP(), fp);
+  }
+
+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
+    return new RISCV64RegisterMap(thread, updateMap);
+  }
+
+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
+    RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread);
+    if (!guesser.run(GUESS_SCAN_RANGE)) {
+      return null;
+    }
+    if (guesser.getPC() == null) {
+      return new RISCV64Frame(guesser.getSP(), guesser.getFP());
+    } else {
+      return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+    }
+  }
+
+  public void printThreadIDOn(Address addr, PrintStream tty) {
+    tty.print(getThreadProxy(addr));
+  }
+
+  public void printInfoOn(Address threadAddr, PrintStream tty) {
+    tty.print("Thread id: ");
+    printThreadIDOn(threadAddr, tty);
+  }
+
+  public Address getLastSP(Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext();
+    return context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+  }
+
+  public ThreadProxy getThreadProxy(Address addr) {
+    // Addr is the address of the JavaThread.
+    // Fetch the OSThread (for now and for simplicity, not making a
+    // separate "OSThread" class in this package)
+    Address osThreadAddr = osThreadField.getValue(addr);
+    // Get the address of the _thread_id from the OSThread
+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
+
+    JVMDebugger debugger = VM.getVM().getDebugger();
+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
new file mode 100644
index 0000000000..34701c6922
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.riscv64.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.riscv64.*;
+
+/** <P> Should be able to be used on all riscv64 platforms we support
+    (Linux/riscv64) to implement JavaThread's "currentFrameGuess()"
+    functionality. Input is an RISCV64ThreadContext; output is SP, FP,
+    and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is
+    left to the caller, since we may need to subclass RISCV64Frame to
+    support signal handler frames on Unix platforms. </P>
+
+    <P> Algorithm is to walk up the stack within a given range (say,
+    512K at most) looking for a plausible PC and SP for a Java frame,
+    also considering those coming in from the context. If we find a PC
+    that belongs to the VM (i.e., in generated code like the
+    interpreter or CodeCache) then we try to find an associated FP.
+    We repeat this until we either find a complete frame or run out of
+    stack to look at. </P> */
+
+public class RISCV64CurrentFrameGuess {
+  private RISCV64ThreadContext context;
+  private JavaThread       thread;
+  private Address          spFound;
+  private Address          fpFound;
+  private Address          pcFound;
+
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG")
+                                       != null;
+
+  public RISCV64CurrentFrameGuess(RISCV64ThreadContext context,
+                              JavaThread thread) {
+    this.context = context;
+    this.thread  = thread;
+  }
+
+  /** Returns false if not able to find a frame within a reasonable range. */
+  public boolean run(long regionInBytesToSearch) {
+    Address sp  = context.getRegisterAsAddress(RISCV64ThreadContext.SP);
+    Address pc  = context.getRegisterAsAddress(RISCV64ThreadContext.PC);
+    Address fp  = context.getRegisterAsAddress(RISCV64ThreadContext.FP);
+    if (sp == null) {
+      // Bail out if no last java frame either
+      if (thread.getLastJavaSP() != null) {
+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+        return true;
+      }
+      return false;
+    }
+    Address end = sp.addOffsetTo(regionInBytesToSearch);
+    VM vm       = VM.getVM();
+
+    setValues(null, null, null); // Assume we're not going to find anything
+
+    if (vm.isJavaPCDbg(pc)) {
+      if (vm.isClientCompiler()) {
+        // If the topmost frame is a Java frame, we are (pretty much)
+        // guaranteed to have a viable FP. We should be more robust
+        // than this (we have the potential for losing entire threads'
+        // stack traces) but need to see how much work we really have
+        // to do here. Searching the stack for an (SP, FP) pair is
+        // hard since it's easy to misinterpret inter-frame stack
+        // pointers as base-of-frame pointers; we also don't know the
+        // sizes of C1 frames (not registered in the nmethod) so can't
+        // derive them from SP.
+
+        setValues(sp, fp, pc);
+        return true;
+      } else {
+        if (vm.getInterpreter().contains(pc)) {
+          if (DEBUG) {
+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
+                               sp + ", fp = " + fp + ", pc = " + pc);
+          }
+          setValues(sp, fp, pc);
+          return true;
+        }
+
+        // For the server compiler, FP is not guaranteed to be valid
+        // for compiled code. In addition, an earlier attempt at a
+        // non-searching algorithm (see below) failed because the
+        // stack pointer from the thread context was pointing
+        // (considerably) beyond the ostensible end of the stack, into
+        // garbage; walking from the topmost frame back caused a crash.
+        //
+        // This algorithm takes the current PC as a given and tries to
+        // find the correct corresponding SP by walking up the stack
+        // and repeatedly performing stackwalks (very inefficient).
+        //
+        // FIXME: there is something wrong with stackwalking across
+        // adapter frames...this is likely to be the root cause of the
+        // failure with the simpler algorithm below.
+
+        for (long offset = 0;
+             offset < regionInBytesToSearch;
+             offset += vm.getAddressSize()) {
+          try {
+            Address curSP = sp.addOffsetTo(offset);
+            Frame frame = new RISCV64Frame(curSP, null, pc);
+            RegisterMap map = thread.newRegisterMap(false);
+            while (frame != null) {
+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
+                // We were able to traverse all the way to the
+                // bottommost Java frame.
+                // This sp looks good. Keep it.
+                if (DEBUG) {
+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
+                }
+                setValues(curSP, null, pc);
+                return true;
+              }
+              frame = frame.sender(map);
+            }
+          } catch (Exception e) {
+            if (DEBUG) {
+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
+            }
+            // Bad SP. Try another.
+          }
+        }
+
+        // Were not able to find a plausible SP to go with this PC.
+        // Bail out.
+        return false;
+      }
+    } else {
+      // If the current program counter was not known to us as a Java
+      // PC, we currently assume that we are in the run-time system
+      // and attempt to look to thread-local storage for saved SP and
+      // FP. Note that if these are null (because we were, in fact,
+      // in Java code, i.e., vtable stubs or similar, and the SA
+      // didn't have enough insight into the target VM to understand
+      // that) then we are going to lose the entire stack trace for
+      // the thread, which is sub-optimal. FIXME.
+
+      if (DEBUG) {
+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
+      }
+      if (thread.getLastJavaSP() == null) {
+        return false; // No known Java frames on stack
+      }
+
+      // The runtime has a nasty habit of not saving fp in the frame
+      // anchor, leaving us to grovel about in the stack to find a
+      // plausible address.  Fortunately, this only happens in
+      // compiled code; there we always have a valid PC, and we always
+      // push LR and FP onto the stack as a pair, with FP at the lower
+      // address.
+      pc = thread.getLastJavaPC();
+      fp = thread.getLastJavaFP();
+      sp = thread.getLastJavaSP();
+
+      if (fp == null) {
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+          if (DEBUG) {
+            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
+          }
+          // See if we can derive a frame pointer from SP and PC
+          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
+          if (link_offset >= 0) {
+            fp = sp.addOffsetTo(link_offset);
+          }
+        }
+      }
+
+      // We found a PC in the frame anchor. Check that it's plausible, and
+      // if it is, use it.
+      if (vm.isJavaPCDbg(pc)) {
+        setValues(sp, fp, pc);
+      } else {
+        setValues(sp, fp, null);
+      }
+
+      return true;
+    }
+  }
+
+  public Address getSP() { return spFound; }
+  public Address getFP() { return fpFound; }
+  /** May be null if getting values from thread-local storage; take
+      care to call the correct RISCV64Frame constructor to recover this if
+      necessary */
+  public Address getPC() { return pcFound; }
+
+  private void setValues(Address sp, Address fp, Address pc) {
+    spFound = sp;
+    fpFound = fp;
+    pcFound = pc;
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
new file mode 100644
index 0000000000..e372bc5f7b
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Red Hat Inc.
+ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import java.util.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.compiler.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** Specialization of and implementation of abstract methods of the
+    Frame class for the riscv64 family of CPUs. */
+
+public class RISCV64Frame extends Frame {
+  private static final boolean DEBUG;
+  static {
+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null;
+  }
+
+  // Java frames
+  private static final int LINK_OFFSET                =  -2;
+  private static final int RETURN_ADDR_OFFSET         =  -1;
+  private static final int SENDER_SP_OFFSET           =   0;
+
+  // Interpreter frames
+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3;
+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
+  private static       int INTERPRETER_FRAME_PADDING_OFFSET;
+  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
+
+  // Entry frames
+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10;
+
+  // Native frames
+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
+
+  private static VMReg fp = new VMReg(8);
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
+    INTERPRETER_FRAME_PADDING_OFFSET              = INTERPRETER_FRAME_MDX_OFFSET - 1;
+    INTERPRETER_FRAME_MIRROR_OFFSET               = INTERPRETER_FRAME_PADDING_OFFSET - 1;
+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  }
+
+
+  // an additional field beyond sp and pc:
+  Address raw_fp; // frame pointer
+  private Address raw_unextendedSP;
+
+  private RISCV64Frame() {
+  }
+
+  private void adjustForDeopt() {
+    if ( pc != null) {
+      // Look for a deopt pc and if it is deopted convert to original pc
+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null && cb.isJavaMethod()) {
+        NMethod nm = (NMethod) cb;
+        if (pc.equals(nm.deoptHandlerBegin())) {
+          if (Assert.ASSERTS_ENABLED) {
+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
+          }
+          // adjust pc if frame is deoptimized.
+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
+          deoptimized = true;
+        }
+      }
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, fp, pc): " + this);
+      dumpStack();
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_fp) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+
+    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
+    // the callee is a C/C++ compiled frame). If the PC is not known to
+    // Java then this.pc is null.
+    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    if (VM.getVM().isJavaPCDbg(savedPC)) {
+      this.pc = savedPC;
+    }
+
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, fp): " + this);
+      dumpStack();
+    }
+  }
+
+  public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_unextendedSp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this);
+      dumpStack();
+    }
+
+  }
+
+  public Object clone() {
+    RISCV64Frame frame = new RISCV64Frame();
+    frame.raw_sp = raw_sp;
+    frame.raw_unextendedSP = raw_unextendedSP;
+    frame.raw_fp = raw_fp;
+    frame.pc = pc;
+    frame.deoptimized = deoptimized;
+    return frame;
+  }
+
+  public boolean equals(Object arg) {
+    if (arg == null) {
+      return false;
+    }
+
+    if (!(arg instanceof RISCV64Frame)) {
+      return false;
+    }
+
+    RISCV64Frame other = (RISCV64Frame) arg;
+
+    return (AddressOps.equal(getSP(), other.getSP()) &&
+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
+            AddressOps.equal(getFP(), other.getFP()) &&
+            AddressOps.equal(getPC(), other.getPC()));
+  }
+
+  public int hashCode() {
+    if (raw_sp == null) {
+      return 0;
+    }
+
+    return raw_sp.hashCode();
+  }
+
+  public String toString() {
+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
+         ", pc: " + (pc == null? "null" : pc.toString());
+  }
+
+  // accessors for the instance variables
+  public Address getFP() { return raw_fp; }
+  public Address getSP() { return raw_sp; }
+  public Address getID() { return raw_sp; }
+
+  // FIXME: not implemented yet
+  public boolean isSignalHandlerFrameDbg() { return false; }
+  public int     getSignalNumberDbg()      { return 0;     }
+  public String  getSignalNameDbg()        { return null;  }
+
+  public boolean isInterpretedFrameValid() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
+    }
+
+    // These are reasonable sanity checks
+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
+      return false;
+    }
+
+    // These are hacks to keep us out of trouble.
+    // The problem with these is that they mask other problems
+    if (getFP().lessThanOrEqual(getSP())) {
+      // this attempts to deal with unsigned comparison above
+      return false;
+    }
+
+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
+      // stack frames shouldn't be large.
+      return false;
+    }
+
+    return true;
+  }
+
+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
+    RISCV64RegisterMap map = (RISCV64RegisterMap) regMap;
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // Default is we done have to follow them. The sender_for_xxx will
+    // update it accordingly
+    map.setIncludeArgumentOops(false);
+
+    if (isEntryFrame())       return senderForEntryFrame(map);
+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
+      }
+    }
+
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
+    }
+
+    // Must be native-compiled frame, i.e. the marshaling code for native
+    // methods that exists in the core system.
+    return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC());
+  }
+
+  private Frame senderForEntryFrame(RISCV64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+    // Java frame called from C; skip all C frames and return top C
+    // frame of that chunk as the sender
+    RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
+    }
+    RISCV64Frame fr;
+    if (jcw.getLastJavaPC() != null) {
+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
+    } else {
+      fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
+    }
+    map.clear();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
+    }
+    return fr;
+  }
+
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // If we are returning to a compiled MethodHandle call site, the
+    // saved_fp will in fact be a saved value of the unextended SP.  The
+    // simplest way to tell whether we are returning to such a call site
+    // is as follows:
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original
+      // PC.  For MethodHandle call site the unextended_sp is stored in
+      // saved_fp.
+      if (senderNm.isDeoptMhEntry(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+      else if (senderNm.isDeoptEntry(getPC())) {
+      }
+      else if (senderNm.isMethodHandleReturn(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+    }
+  }
+
+  private Frame senderForInterpreterFrame(RISCV64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
+    // We do not need to update the callee-save register mapping because above
+    // us is either another interpreter frame or a converter-frame, but never
+    // directly a compiled frame.
+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
+    // However c2 no longer uses callee save register for java calls so there
+    // are no callee register to find.
+
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
+
+    return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(fp, savedFPAddr);
+  }
+
+  private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
+    //
+    // NOTE: some of this code is (unfortunately) duplicated  RISCV64CurrentFrameGuess
+    //
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // frame owned by optimizing compiler
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+    // The return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of FP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame.
+    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
+      }
+
+      // Since the prolog does the save and restore of FP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
+    }
+
+    return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
+  }
+
+  protected boolean hasSenderPD() {
+    return true;
+  }
+
+  public long frameSize() {
+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
+  }
+
+    public Address getLink() {
+        try {
+            if (DEBUG) {
+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
+            }
+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
+        } catch (Exception e) {
+            if (DEBUG)
+                System.out.println("Returning null");
+            return null;
+        }
+    }
+
+  public Address getUnextendedSP() { return raw_unextendedSP; }
+
+  // Return address:
+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
+
+  // return address of param, zero origin index.
+  public Address getNativeParamAddr(int idx) {
+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
+  }
+
+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
+
+  public Address addressOfInterpreterFrameLocals() {
+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
+  }
+
+  private Address addressOfInterpreterFrameBCX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
+  }
+
+  public int getInterpreterFrameBCI() {
+    // FIXME: this is not atomic with respect to GC and is unsuitable
+    // for use in a non-debugging, or reflective, system. Need to
+    // figure out how to express this.
+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
+    return bcpToBci(bcp, method);
+  }
+
+  public Address addressOfInterpreterFrameMDX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
+  }
+
+  // expression stack
+  // (the max_stack arguments are used by the GC; see class FrameClosure)
+
+  public Address addressOfInterpreterFrameExpressionStack() {
+    Address monitorEnd = interpreterFrameMonitorEnd().address();
+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
+  }
+
+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
+
+  // top of expression stack
+  public Address addressOfInterpreterFrameTOS() {
+    return getSP();
+  }
+
+  /** Expression stack from top down */
+  public Address addressOfInterpreterFrameTOSAt(int slot) {
+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
+  }
+
+  public Address getInterpreterFrameSenderSP() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
+    }
+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+  }
+
+  // Monitors
+  public BasicObjectLock interpreterFrameMonitorBegin() {
+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
+  }
+
+  public BasicObjectLock interpreterFrameMonitorEnd() {
+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
+    if (Assert.ASSERTS_ENABLED) {
+      // make sure the pointer points inside the frame
+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
+    }
+    return new BasicObjectLock(result);
+  }
+
+  public int interpreterFrameMonitorSize() {
+    return BasicObjectLock.size();
+  }
+
+  // Method
+  public Address addressOfInterpreterFrameMethod() {
+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
+  }
+
+  // Constant pool cache
+  public Address addressOfInterpreterFrameCPCache() {
+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
+  }
+
+  // Entry frames
+  public JavaCallWrapper getEntryFrameCallWrapper() {
+    return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
+  }
+
+  protected Address addressOfSavedOopResult() {
+    // offset is 2 for compiler2 and 3 for compiler1
+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
+                               VM.getVM().getAddressSize());
+  }
+
+  protected Address addressOfSavedReceiver() {
+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+  }
+
+  private void dumpStack() {
+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+         AddressOps.lt(addr, getSP());
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+    System.out.println("-----------------------");
+    for (Address addr = getSP();
+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
new file mode 100644
index 0000000000..850758a7ed
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RISCV64JavaCallWrapper extends JavaCallWrapper {
+  private static AddressField lastJavaFPField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaFrameAnchor");
+
+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
+  }
+
+  public RISCV64JavaCallWrapper(Address addr) {
+    super(addr);
+  }
+
+  public Address getLastJavaFP() {
+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
+  }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
new file mode 100644
index 0000000000..4aeb1c6f55
--- /dev/null
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.riscv64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class RISCV64RegisterMap extends RegisterMap {
+
+  /** This is the only public constructor */
+  public RISCV64RegisterMap(JavaThread thread, boolean updateMap) {
+    super(thread, updateMap);
+  }
+
+  protected RISCV64RegisterMap(RegisterMap map) {
+    super(map);
+  }
+
+  public Object clone() {
+    RISCV64RegisterMap retval = new RISCV64RegisterMap(this);
+    return retval;
+  }
+
+  // no PD state to clear or copy:
+  protected void clearPD() {}
+  protected void initializePD() {}
+  protected void initializeFromPD(RegisterMap map) {}
+  protected Address getLocationPD(VMReg reg) { return null; }
+}
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
index 7d7a6107ca..6552ce255f 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,7 @@ public class PlatformInfo {
 
   public static boolean knownCPU(String cpu) {
     final String[] KNOWN =
-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"};
 
     for(String s : KNOWN) {
       if(s.equals(cpu))
diff --git a/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp b/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
index 24f25b87af..7a3845e336 100644
--- a/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
+++ b/test/hotspot/gtest/gc/shared/test_memset_with_concurrent_readers.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,10 +24,13 @@
 #include "precompiled.hpp"
 #include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "utilities/globalDefinitions.hpp"
-#include "unittest.hpp"
 
+#include "utilities/vmassert_uninstall.hpp"
 #include <string.h>
 #include <sstream>
+#include "utilities/vmassert_reinstall.hpp"
+
+#include "unittest.hpp"
 
 static unsigned line_byte(const char* line, size_t i) {
   return unsigned(line[i]) & 0xFF;
diff --git a/test/hotspot/gtest/jfr/test_networkUtilization.cpp b/test/hotspot/gtest/jfr/test_networkUtilization.cpp
index 19d6a6e2c2..42cd18356b 100644
--- a/test/hotspot/gtest/jfr/test_networkUtilization.cpp
+++ b/test/hotspot/gtest/jfr/test_networkUtilization.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,11 +42,13 @@
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/growableArray.hpp"
 
-#include "unittest.hpp"
-
+#include "utilities/vmassert_uninstall.hpp"
 #include <vector>
 #include <list>
 #include <map>
+#include "utilities/vmassert_reinstall.hpp"
+
+#include "unittest.hpp"
 
 namespace {
 
diff --git a/test/hotspot/gtest/unittest.hpp b/test/hotspot/gtest/unittest.hpp
index 0494a0e240..91edf6adba 100644
--- a/test/hotspot/gtest/unittest.hpp
+++ b/test/hotspot/gtest/unittest.hpp
@@ -28,19 +28,10 @@
 #include <stdio.h>
 
 #define GTEST_DONT_DEFINE_TEST 1
-#include "gtest/gtest.h"
 
-// gtest/gtest.h includes assert.h which will define the assert macro, but hotspot has its
-// own standards incompatible assert macro that takes two parameters.
-// The workaround is to undef assert and then re-define it. The re-definition
-// must unfortunately be copied since debug.hpp might already have been
-// included and a second include wouldn't work due to the header guards in debug.hpp.
-#ifdef assert
-  #undef assert
-  #ifdef vmassert
-    #define assert(p, ...) vmassert(p, __VA_ARGS__)
-  #endif
-#endif
+#include "utilities/vmassert_uninstall.hpp"
+#include "gtest/gtest.h"
+#include "utilities/vmassert_reinstall.hpp"
 
 #define CONCAT(a, b) a ## b
 
diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
index 7805918c28..823b9f39db 100644
--- a/test/hotspot/jtreg/compiler/c2/TestBit.java
+++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools;
  *
  * @run driver compiler.c2.TestBit
  *
- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le"
+ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64"
  * @requires vm.debug == true & vm.compiler2.enabled
  */
 public class TestBit {
@@ -54,7 +54,8 @@ public class TestBit {
         String expectedTestBitInstruction =
             "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
             "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
-            "amd64".equals(System.getProperty("os.arch"))   ? "test" : null;
+            "amd64".equals(System.getProperty("os.arch"))   ? "test" :
+            "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null;
 
         if (expectedTestBitInstruction != null) {
             output.shouldContain(expectedTestBitInstruction);
diff --git a/test/hotspot/jtreg/compiler/calls/TestManyArgs.java b/test/hotspot/jtreg/compiler/calls/TestManyArgs.java
new file mode 100644
index 0000000000..fbd9c13d7c
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/calls/TestManyArgs.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test
+ * @summary Pass values on stack.
+ * @requires os.arch == "riscv64"
+ * @run main/native compiler.calls.TestManyArgs
+ */
+
+package compiler.calls;
+
+public class TestManyArgs {
+    static {
+        System.loadLibrary("TestManyArgs");
+    }
+
+    native static void scramblestack();
+
+    native static int checkargs(int arg0, short arg1, byte arg2,
+                                int arg3, short arg4, byte arg5,
+                                int arg6, short arg7, byte arg8,
+                                int arg9, short arg10, byte arg11);
+
+    static int compiledbridge(int arg0, short arg1, byte arg2,
+                              int arg3, short arg4, byte arg5,
+                              int arg6, short arg7, byte arg8,
+                              int arg9, short arg10, byte arg11) {
+        return checkargs(arg0, arg1, arg2, arg3, arg4, arg5,
+                         arg6, arg7, arg8, arg9, arg10, arg11);
+    }
+
+    static public void main(String[] args) {
+        scramblestack();
+        for (int i = 0; i < 20000; i++) {
+            int res = compiledbridge((int)0xf, (short)0xf, (byte)0xf,
+                                     (int)0xf, (short)0xf, (byte)0xf,
+                                     (int)0xf, (short)0xf, (byte)0xf,
+                                     (int)0xf, (short)0xf, (byte)0xf);
+            if (res != 0) {
+                throw new RuntimeException("Test failed");
+            }
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c b/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c
new file mode 100644
index 0000000000..8836c79e43
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/calls/libTestManyArgs.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Rivos Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jni.h"
+
+#ifdef riscv64
+/* RV64 ABI pass all integers as 64-bit, in registers or on stack
+ * As compiler may choose to load smaller width than 64-bit if passed on stack,
+ * this test may not find any bugs.
+ * Therefore we trick the compiler todo 64-bit loads,
+ * by saying these args are jlongs.
+ */
+JNIEXPORT jint JNICALL Java_compiler_calls_TestManyArgs_checkargs(JNIEnv* env, jclass jclazz,
+                                                                  jlong arg0, jlong arg1, jlong arg2,
+                                                                  jlong arg3, jlong arg4, jlong arg5,
+                                                                  jlong arg6, jlong arg7, jlong arg8,
+                                                                  jlong arg9, jlong arg10, jlong arg11)
+#else
+JNIEXPORT jint JNICALL Java_compiler_calls_TestManyArgs_checkargs(JNIEnv* env, jclass jclazz,
+                                                                  jint arg0, jshort arg1, jbyte arg2,
+                                                                  jint arg3, jshort arg4, jbyte arg5,
+                                                                  jint arg6, jshort arg7, jbyte arg8,
+                                                                  jint arg9, jshort arg10, jbyte arg11)
+#endif
+{
+    if (arg0 != 0xf) return 1;
+    if (arg1 != 0xf) return 1;
+    if (arg2 != 0xf) return 1;
+    if (arg3 != 0xf) return 1;
+    if (arg4 != 0xf) return 1;
+    if (arg5 != 0xf) return 1;
+    if (arg6 != 0xf) return 1;
+    if (arg7 != 0xf) return 1;
+    if (arg8 != 0xf) return 1;
+    if (arg9 != 0xf) return 1;
+    if (arg10 != 0xf) return 1;
+    if (arg11 != 0xf) return 1;
+    return 0;
+}
+
+JNIEXPORT
+void JNICALL Java_compiler_calls_TestManyArgs_scramblestack(JNIEnv* env, jclass jclazz)
+{
+    volatile char stack[12*8];
+    for (unsigned int i = 0; i < sizeof(stack); i++) {
+        stack[i] = (char)0xff;
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
new file mode 100644
index 0000000000..5a1b659bbe
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Test libm intrinsics
+ * @library /test/lib /
+ *
+ * @build sun.hotspot.WhiteBox
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
+ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
+ *                   compiler.floatingpoint.TestLibmIntrinsics
+ */
+
+package compiler.floatingpoint;
+
+import compiler.whitebox.CompilerWhiteBoxTest;
+import sun.hotspot.WhiteBox;
+
+import java.lang.reflect.Method;
+
+public class TestLibmIntrinsics {
+
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+
+    private static final double pi = 3.1415926;
+
+    private static final double expected = 2.5355263553695413;
+
+    static double m() {
+        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
+    }
+
+    static public void main(String[] args) throws NoSuchMethodException {
+        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
+
+        double interpreter_result = m();
+
+        // Compile with C1 if possible
+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
+
+        double c1_result = m();
+
+        WHITE_BOX.deoptimizeMethod(test_method);
+
+        // Compile it with C2 if possible
+        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+
+        double c2_result = m();
+
+        if (interpreter_result != c1_result ||
+            interpreter_result != c2_result ||
+            c1_result != c2_result) {
+            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
+            throw new RuntimeException("Test Failed");
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
index 558b4218f0..55374b116e 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
+                new GenericTestCaseForUnsupportedRISCV64CPU(
+                        SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                 new GenericTestCaseForOtherCPU(
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
index 3ed72bf0a9..8fb82ee453 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
+                new GenericTestCaseForUnsupportedRISCV64CPU(
+                        SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                 new GenericTestCaseForOtherCPU(
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
index c05cf309da..aca32137ed 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
 
@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
+                new GenericTestCaseForUnsupportedRISCV64CPU(
+                        SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                 new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                 new GenericTestCaseForOtherCPU(
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
index 58ce5366ba..8deac4f789 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
 import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
 import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
 
@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
                         SHAOptionsBase.USE_SHA_OPTION),
                 new GenericTestCaseForUnsupportedAArch64CPU(
                         SHAOptionsBase.USE_SHA_OPTION),
+                new GenericTestCaseForUnsupportedRISCV64CPU(
+                        SHAOptionsBase.USE_SHA_OPTION),
                 new UseSHASpecificTestCaseForUnsupportedCPU(
                         SHAOptionsBase.USE_SHA_OPTION),
                 new GenericTestCaseForOtherCPU(
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
index faa9fdbae6..2663500204 100644
--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
 
 /**
  * Generic test case for SHA-related options targeted to any CPU except
- * AArch64, PPC, S390x, SPARC and X86.
+ * AArch64, RISCV64, PPC, S390x, SPARC and X86.
  */
 public class GenericTestCaseForOtherCPU extends
         SHAOptionsBase.TestCase {
     public GenericTestCaseForOtherCPU(String optionName) {
-        // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86.
+        // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86.
         super(optionName, new NotPredicate(
                               new OrPredicate(Platform::isAArch64,
+                              new OrPredicate(Platform::isRISCV64,
                               new OrPredicate(Platform::isS390x,
                               new OrPredicate(Platform::isSparc,
                               new OrPredicate(Platform::isPPC,
                               new OrPredicate(Platform::isX64,
-                                              Platform::isX86)))))));
+                                              Platform::isX86))))))));
     }
 
     @Override
     protected void verifyWarnings() throws Throwable {
         String shouldPassMessage = String.format("JVM should start with "
                 + "option '%s' without any warnings", optionName);
-        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
+        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
         //  SHA-related options will not cause any warnings.
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
new file mode 100644
index 0000000000..8566d57c39
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.intrinsics.sha.cli.testcases;
+
+import compiler.intrinsics.sha.cli.SHAOptionsBase;
+import jdk.test.lib.process.ExitCode;
+import jdk.test.lib.Platform;
+import jdk.test.lib.cli.CommandLineOptionTest;
+import jdk.test.lib.cli.predicate.AndPredicate;
+import jdk.test.lib.cli.predicate.NotPredicate;
+
+/**
+ * Generic test case for SHA-related options targeted to RISCV64 CPUs
+ * which don't support instruction required by the tested option.
+ */
+public class GenericTestCaseForUnsupportedRISCV64CPU extends
+        SHAOptionsBase.TestCase {
+
+    final private boolean checkUseSHA;
+
+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
+        this(optionName, true);
+    }
+
+    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
+        super(optionName, new AndPredicate(Platform::isRISCV64,
+                new NotPredicate(SHAOptionsBase.getPredicateForOption(
+                        optionName))));
+
+        this.checkUseSHA = checkUseSHA;
+    }
+
+    @Override
+    protected void verifyWarnings() throws Throwable {
+        String shouldPassMessage = String.format("JVM startup should pass with"
+                + "option '-XX:-%s' without any warnings", optionName);
+        //Verify that option could be disabled without any warnings.
+        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
+                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
+                }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
+
+        if (checkUseSHA) {
+            shouldPassMessage = String.format("If JVM is started with '-XX:-"
+                    + "%s' '-XX:+%s', output should contain warning.",
+                    SHAOptionsBase.USE_SHA_OPTION, optionName);
+
+            // Verify that when the tested option is enabled, then
+            // a warning will occur in VM output if UseSHA is disabled.
+            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+                CommandLineOptionTest.verifySameJVMStartup(
+                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                        null,
+                        shouldPassMessage,
+                        shouldPassMessage,
+                        ExitCode.OK,
+                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+            }
+        }
+    }
+
+    @Override
+    protected void verifyOptionValues() throws Throwable {
+        // Verify that option is disabled by default.
+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                String.format("Option '%s' should be disabled by default",
+                        optionName),
+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
+
+        if (checkUseSHA) {
+            // Verify that option is disabled even if it was explicitly enabled
+            // using CLI options.
+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                    String.format("Option '%s' should be off on unsupported "
+                            + "RISCV64CPU even if set to true directly", optionName),
+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+
+            // Verify that option is disabled when +UseSHA was passed to JVM.
+            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
+                    String.format("Option '%s' should be off on unsupported "
+                            + "RISCV64CPU even if %s flag set to JVM",
+                            optionName, CommandLineOptionTest.prepareBooleanFlag(
+                                  SHAOptionsBase.USE_SHA_OPTION, true)),
+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
+                    CommandLineOptionTest.prepareBooleanFlag(
+                            SHAOptionsBase.USE_SHA_OPTION, true));
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
index 2e3e2717a6..7be8af6d03 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
index 0e06a9e432..797927b42b 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
index c3cdbf3746..be8f7d586c 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
index d33bd411f1..d96d5e29c0 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
  *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
index 992fa4b516..b09c873d05 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8138583
  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test
- * @requires os.arch=="aarch64"
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
index 3e79b3528b..fe40ed6f98 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8138583
  * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test
- * @requires os.arch=="aarch64"
+ * @requires os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
index 6603dd224e..5163191049 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8135028
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
index d9a0c98800..d999ae423c 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
index 722db95aed..65912a5c7f 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
index f58f21feb2..fffdc2f756 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
  * @test
  * @bug 8074981
  * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64"
  *
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
index acb86812d2..2c866f26f0 100644
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
@@ -24,7 +24,7 @@
 
 /* @test
  * @bug 8167409
- * @requires (os.arch != "aarch64") & (os.arch != "arm")
+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
  */
 package compiler.runtime.criticalnatives.argumentcorruption;
diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
index eab36f9311..1da369fde2 100644
--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
+++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
@@ -24,7 +24,7 @@
 
 /* @test
  * @bug 8167408
- * @requires (os.arch != "aarch64") & (os.arch != "arm")
+ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
  * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
  */
 package compiler.runtime.criticalnatives.lookup;
diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
index 7774dabcb5..7afe3560f3 100644
--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -61,15 +61,17 @@ public class IntrinsicPredicates {
 
     public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null),
+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null),
               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha1" }, null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha1" }, null),
               // x86 variants
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "sha" },  null),
               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha" },  null),
-                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null))))));
+                              new CPUSpecificPredicate("x86.*",     new String[] { "sha" },  null)))))));
 
     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256"       }, null),
+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256"       }, null),
               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha256"       }, null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256"       }, null),
               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
@@ -79,10 +81,11 @@ public class IntrinsicPredicates {
               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
 
     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
             = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
+              new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512"       }, null),
               new OrPredicate(new CPUSpecificPredicate("s390.*",    new String[] { "sha512"       }, null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512"       }, null),
               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"          }, null),
@@ -92,7 +95,7 @@ public class IntrinsicPredicates {
               new OrPredicate(new CPUSpecificPredicate("i386.*",    new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("x86.*",     new String[] { "sha"          }, null),
               new OrPredicate(new CPUSpecificPredicate("amd64.*",   new String[] { "avx2", "bmi2" }, null),
-                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null))))))))));
+                              new CPUSpecificPredicate("x86_64",    new String[] { "avx2", "bmi2" }, null)))))))))));
 
     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
diff --git a/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java b/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
index 5aea51a24f..da63e02555 100644
--- a/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
+++ b/test/hotspot/jtreg/gc/stress/TestStressG1Humongous.java
@@ -24,14 +24,41 @@
 package gc.stress;
 
 /*
- * @test TestStressG1Humongous
+ * @test
  * @key gc stress
  * @summary Stress G1 by humongous allocations in situation near OOM
  * @requires vm.gc.G1
  * @requires !vm.flightRecorder
  * @library /test/lib
  * @modules java.base/jdk.internal.misc
- * @run driver/timeout=1300 gc.stress.TestStressG1Humongous
+ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 4 3 1.1 120
+ */
+
+/*
+ * @test
+ * @requires vm.gc.G1
+ * @requires !vm.flightRecorder
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 16 5 2.1 120
+ */
+
+/*
+ * @test
+ * @requires vm.gc.G1
+ * @requires !vm.flightRecorder
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * @run driver/timeout=180 gc.stress.TestStressG1Humongous 32 4 0.6 120
+ */
+
+/*
+ * @test
+ * @requires vm.gc.G1
+ * @requires !vm.flightRecorder
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ * @run driver/timeout=900 gc.stress.TestStressG1Humongous 1 7 0.6 600
  */
 
 import java.util.ArrayList;
@@ -48,17 +75,19 @@ import jdk.test.lib.process.OutputAnalyzer;
 public class TestStressG1Humongous{
 
     public static void main(String[] args) throws Exception {
+        if (args.length != 4) {
+            throw new IllegalArgumentException("Test expects 4 arguments");
+        }
+
         // Limit heap size on 32-bit platforms
         int heapSize = Platform.is32bit() ? 512 : 1024;
-        // Heap size, region size, threads, humongous size, timeout
-        run(heapSize, 4, 3, 1.1, 120);
-        run(heapSize, 16, 5, 2.1, 120);
-        run(heapSize, 32, 4, 0.6, 120);
-        run(heapSize, 1, 7, 0.6, 600);
-    }
 
-    private static void run(int heapSize, int regionSize, int threads, double humongousSize, int timeout)
-            throws Exception {
+        // Region size, threads, humongous size, and timeout passed as @run arguments
+        int regionSize = Integer.parseInt(args[0]);
+        int threads = Integer.parseInt(args[1]);
+        double humongousSize = Double.parseDouble(args[2]);
+        int timeout = Integer.parseInt(args[3]);
+
         ArrayList<String> options = new ArrayList<>();
         Collections.addAll(options, Utils.getTestJavaOpts());
         Collections.addAll(options,
diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
index 57256aa5a3..d4d43b01ae 100644
--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
+++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace {
             // It's ok for ARM not to have symbols, because it does not support NMT detail
             // when targeting thumb2. It's also ok for Windows not to have symbols, because
             // they are only available if the symbols file is included with the build.
-            if (Platform.isWindows() || Platform.isARM()) {
+            if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) {
                 return; // we are done
             }
             output.reportDiagnosticSummary();
diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
index 127bb6abcd..eab19273ad 100644
--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
+++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -239,7 +239,7 @@ public class ReservedStackTest {
         return Platform.isAix() ||
             (Platform.isLinux() &&
              (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
-              Platform.isX86())) ||
+              Platform.isX86() || Platform.isRISCV64())) ||
             Platform.isOSX() ||
             Platform.isSolaris();
     }
diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
index 126a43a900..feb4de5388 100644
--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
@@ -45,7 +45,7 @@ import java.util.Set;
  */
 public class TestMutuallyExclusivePlatformPredicates {
     private static enum MethodGroup {
-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
+        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
         BITNESS("is32bit", "is64bit"),
         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
diff --git a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html
deleted file mode 100644
index 7049e82703..0000000000
--- a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.html
+++ /dev/null
@@ -1,43 +0,0 @@
-<html>
-<!--
-  Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
-  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-
-  This code is free software; you can redistribute it and/or modify it
-  under the terms of the GNU General Public License version 2 only, as
-  published by the Free Software Foundation.
-
-  This code is distributed in the hope that it will be useful, but WITHOUT
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-  version 2 for more details (a copy is included in the LICENSE file that
-  accompanied this code).
-
-  You should have received a copy of the GNU General Public License version
-  2 along with this work; if not, write to the Free Software Foundation,
-  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-  or visit www.oracle.com if you need additional information or have any
-  questions.
-  -->
-
-<!--  
-  @test
-  @bug 6243382 8006070
-  @summary Dragging of mouse outside of a List and Choice area don't work properly on XAWT
-  @author Dmitry.Cherepanov@SUN.COM area=awt.list
-  @run applet/manual=yesno MouseDraggedOutCauseScrollingTest.html
-  -->
-<head>
-<title> ManualYesNoTest </title>
-</head>
-<body>
-
-<h1>ManualYesNoTest<br>Bug ID: </h1>
-
-<p> See the dialog box (usually in upper left corner) for instructions</p>
-
-<APPLET CODE="MouseDraggedOutCauseScrollingTest.class" WIDTH=200 HEIGHT=200></APPLET>
-</body>
-</html>
diff --git a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
index 8b509a1231..446b7a3a93 100644
--- a/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
+++ b/test/jdk/java/awt/List/MouseDraggedOutCauseScrollingTest/MouseDraggedOutCauseScrollingTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,29 +22,29 @@
  */
 
 /*
-  test
+  @test
   @bug 6243382 8006070
   @summary Dragging of mouse outside of a List and Choice area don't work properly on XAWT
-  @author Dmitry.Cherepanov@SUN.COM area=awt.list
-  @run applet/manual=yesno MouseDraggedOutCauseScrollingTest.html
+  @requires (os.family == "linux")
+  @library /java/awt/regtesthelpers
+  @run main/manual MouseDraggedOutCauseScrollingTest
 */
 
-import java.applet.Applet;
-import java.awt.*;
+import java.awt.Choice;
+import java.awt.Frame;
+import java.awt.GridLayout;
+import java.awt.List;
+import java.awt.Toolkit;
 
-public class MouseDraggedOutCauseScrollingTest extends Applet
-{
-    Choice choice;
-    List singleList;
-    List multipleList;
+public class MouseDraggedOutCauseScrollingTest {
 
-    public void init()
-    {
-        this.setLayout (new GridLayout (1, 3));
+    static Frame createUI() {
+        Frame frame = new Frame("MouseDraggedOutCausesScrollingTest");
+        frame.setLayout(new GridLayout(1, 3));
 
-        choice = new Choice();
-        singleList = new List(3, false);
-        multipleList = new List(3, true);
+        Choice choice = new Choice();
+        List singleList = new List(3, false);
+        List multipleList = new List(3, true);
 
         choice.add("Choice");
         for (int i = 1; i < 100; i++){
@@ -59,188 +59,66 @@ public class MouseDraggedOutCauseScrollingTest extends Applet
         for (int i = 1; i < 100; i++)
             multipleList.add(""+i);
 
-        this.add(choice);
-        this.add(singleList);
-        this.add(multipleList);
+        frame.add(choice);
+        frame.add(singleList);
+        frame.add(multipleList);
+        frame.setSize(400, 100);
+        return frame;
+    }
 
+    public static void main(String[] args) throws Exception {
         String toolkitName = Toolkit.getDefaultToolkit().getClass().getName();
+
         if (!toolkitName.equals("sun.awt.X11.XToolkit")) {
-            String[] instructions =
-            {
-                "This test is not applicable to the current platform. Press PASS"
-            };
-            Sysout.createDialogWithInstructions( instructions );
-        } else {
-            String[] instructions =
-            {
-                "0) Please note, that this is only Motif/XAWT test. At first, make the applet active",
-                "1.1) Click on the choice",
-                "1.2) Press the left button of the mouse and keep on any item of the choice, for example 5",
-                "1.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
-                "1.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling UP of the list and selected item changes on the upper. If not, the test failed",
-                "1.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling DOWN of the list and selected item changes on the lower. If not, the test failed",
-                "-----------------------------------",
-                "2.1) Click on the single list",
-                "2.2) Press the left button of the mouse and keep on any item of the list, for example 5",
-                "2.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
-                "2.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling UP of the list and selected item changes on the upper. If not, the test failed",
-                "2.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling DOWN of the list and selected item changes on the lower. If not, the test failed",
-                "-----------------------------------",
-                "3.1) Click on the multiple list",
-                "3.2) Press the left button of the mouse and keep on any item of the list, for example 5",
-                "3.3) Drag mouse out of the area of the unfurled list, at the same time hold the X coordinate of the mouse position about the same",
-                "3.4) To make sure, that when the Y coordinate of the mouse position higher of the upper bound of the list then scrolling of the list NO OCCURED and selected item NO CHANGES on the upper. If not, the test failed",
-                "3.5) To make sure, that when the Y coordinate of the mouse position under of the lower bound of the list then scrolling of the list NO OCCURED and selected item NO CHANGES on the lower. If not, the test failed",
-                "4) Test passed."
-            };
-            Sysout.createDialogWithInstructions( instructions );
+              System.out.println(INAPPLICABLE);
+              return;
         }
 
-    }//End  init()
-
-    public void start ()
-    {
-        setSize (400,100);
-        setVisible(true);
-        validate();
-
-    }// start()
-
-}// class ManualYesNoTest
-
-/****************************************************
- Standard Test Machinery
- DO NOT modify anything below -- it's a standard
-  chunk of code whose purpose is to make user
-  interaction uniform, and thereby make it simpler
-  to read and understand someone else's test.
- ****************************************************/
-
-/**
- This is part of the standard test machinery.
- It creates a dialog (with the instructions), and is the interface
-  for sending text messages to the user.
- To print the instructions, send an array of strings to Sysout.createDialog
-  WithInstructions method.  Put one line of instructions per array entry.
- To display a message for the tester to see, simply call Sysout.println
-  with the string to be displayed.
- This mimics System.out.println but works within the test harness as well
-  as standalone.
- */
-
-class Sysout
-{
-    private static TestDialog dialog;
-
-    public static void createDialogWithInstructions( String[] instructions )
-    {
-        dialog = new TestDialog( new Frame(), "Instructions" );
-        dialog.printInstructions( instructions );
-        dialog.setVisible(true);
-        println( "Any messages for the tester will display here." );
-    }
-
-    public static void createDialog( )
-    {
-        dialog = new TestDialog( new Frame(), "Instructions" );
-        String[] defInstr = { "Instructions will appear here. ", "" } ;
-        dialog.printInstructions( defInstr );
-        dialog.setVisible(true);
-        println( "Any messages for the tester will display here." );
-    }
-
-
-    public static void printInstructions( String[] instructions )
-    {
-        dialog.printInstructions( instructions );
-    }
-
-
-    public static void println( String messageIn )
-    {
-        dialog.displayMessage( messageIn );
-    }
-
-}// Sysout  class
-
-/**
-  This is part of the standard test machinery.  It provides a place for the
-   test instructions to be displayed, and a place for interactive messages
-   to the user to be displayed.
-  To have the test instructions displayed, see Sysout.
-  To have a message to the user be displayed, see Sysout.
-  Do not call anything in this dialog directly.
-  */
-class TestDialog extends Dialog
-{
-
-    TextArea instructionsText;
-    TextArea messageText;
-    int maxStringLength = 80;
-
-    //DO NOT call this directly, go through Sysout
-    public TestDialog( Frame frame, String name )
-    {
-        super( frame, name );
-        int scrollBoth = TextArea.SCROLLBARS_BOTH;
-        instructionsText = new TextArea( "", 15, maxStringLength, scrollBoth );
-        add( "North", instructionsText );
-
-        messageText = new TextArea( "", 5, maxStringLength, scrollBoth );
-        add("Center", messageText);
-
-        pack();
-
-        setVisible(true);
-    }// TestDialog()
-
-    //DO NOT call this directly, go through Sysout
-    public void printInstructions( String[] instructions )
-    {
-        //Clear out any current instructions
-        instructionsText.setText( "" );
-
-        //Go down array of instruction strings
-
-        String printStr, remainingStr;
-        for( int i=0; i < instructions.length; i++ )
-        {
-            //chop up each into pieces maxSringLength long
-            remainingStr = instructions[ i ];
-            while( remainingStr.length() > 0 )
-            {
-                //if longer than max then chop off first max chars to print
-                if( remainingStr.length() >= maxStringLength )
-                {
-                    //Try to chop on a word boundary
-                    int posOfSpace = remainingStr.
-                        lastIndexOf( ' ', maxStringLength - 1 );
-
-                    if( posOfSpace <= 0 ) posOfSpace = maxStringLength - 1;
-
-                    printStr = remainingStr.substring( 0, posOfSpace + 1 );
-                    remainingStr = remainingStr.substring( posOfSpace + 1 );
-                }
-                //else just print
-                else
-                {
-                    printStr = remainingStr;
-                    remainingStr = "";
-                }
-
-                instructionsText.append( printStr + "\n" );
-
-            }// while
-
-        }// for
-
-    }//printInstructions()
-
-    //DO NOT call this directly, go through Sysout
-    public void displayMessage( String messageIn )
-    {
-        messageText.append( messageIn + "\n" );
-        System.out.println(messageIn);
+        PassFailJFrame
+            .builder()
+            .instructions(INSTRUCTIONS)
+            .rows(40)
+            .columns(70)
+            .testUI(MouseDraggedOutCauseScrollingTest::createUI)
+            .build()
+            .awaitAndCheck();
     }
 
-}// TestDialog  class
+    static final String INAPPLICABLE = "The test is not applicable to the current platform. Test PASSES.";
+    static final String INSTRUCTIONS = "0) Please note, that this is an XAWT/Linux only test. First, make the test window is active.\n" +
+            "-----------------------------------\n" +
+            "1.1) Click on the Choice.\n" +
+            "1.2) Press and hold down the left button of the mouse to select (eg) item 5 in the choice.\n" +
+            "1.3) Drag the mouse vertically out of the area of the open list,\n" +
+            "     keeping the X coordinate of the mouse position about the same.\n" +
+            "1.4) Check that when the Y coordinate of the mouse position is higher than the upper bound of the list\n" +
+            "     then the list continues to scrolls UP and the selected item changes at the top until you reach the topmost item.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "1.5) Check that when the Y coordinate of the mouse position is lower than the lower bound of the list\n" +
+            "     then the list continues to scroll DOWN and the selected item changes at the bottom until you reach the bottommost item.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "-----------------------------------\n" +
+            "2.1) Click on the Single List.\n" +
+            "2.2) Press and hold down the left button of the mouse to select (eg) item 5 in the list.\n" +
+            "2.3) Drag the mouse vertically out of the area of the open list,\n" +
+            "     keeping the X coordinate of the mouse position about the same.\n" +
+            "2.4) Check that when the Y coordinate of the mouse position is higher than the upper bound of the list\n" +
+            "     then the list continues to scrolls UP and the selected item changes at the top until you reach the topmost item.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "2.5) Check that when the Y coordinate of the mouse position is lower than the lower bound of the list\n" +
+            "     then the list continues to scroll DOWN and the selected item changes at the bottom until you reach the bottommost item.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "-----------------------------------\n" +
+            "3.1) Click on the Multiple List.\n" +
+            "3.2) Press and hold down the left button of the mouse to select (eg) item 5 in the list.\n" +
+            "3.3) Drag the mouse vertically out of the area of the open list,\n" +
+            "     keeping the X coordinate of the mouse position about the same.\n" +
+            "3.4) Check that when the Y coordinate of the mouse is higher than the upper bound of the list\n" +
+            "     that scrolling of the list DOES NOT OCCUR and the selected item IS UNCHANGED at the top.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "3.5) Check that when the Y coordinate of the mouse is below the lower bound of the list\n" +
+            "     that scrolling of the list DOES NOT OCCUR and the selected item IS UNCHANGED at the bottom.\n" +
+            "     If not, the test failed. Press FAIL.\n" +
+            "-----------------------------------\n" +
+            "4) The test has now passed. Press PASS.";
+}
diff --git a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
index a562b886ab..e69de29bb2 100644
--- a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
+++ b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.html
@@ -1,44 +0,0 @@
-<!--
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
-DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License version 2 only, as
-published by the Free Software Foundation.
-
-This code is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-version 2 for more details (a copy is included in the LICENSE file that
-accompanied this code).
-
-You should have received a copy of the GNU General Public License version
-2 along with this work; if not, write to the Free Software Foundation,
-Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-or visit www.oracle.com if you need additional information or have any
-questions.
--->
-
-<html>
-<head>
-<title> PrintDialogsTest </title>
-</head>
-<body>
-<applet code="PrintDialogsTest.class" width=250 height=350></applet>
-
-Please select dialog modality type and parent; also select
-the print auxiliary dialog to be displayed (Page Setup or Print dialog).
-Then click "Start test" button.
-
-When the windows will appear check if modal blocking for Dialog works as expected.
-Then push "Open" button on the Dialog to show the auxiliary dialog and check
-if it blocks the rest of the application. Then close it and check correctness
-of modal blocking behavior for the Dialog again. To close all the test
-windows please push "Finish" button.
-
-To finish the overall test push "Pass" or "Fail" button depending on result.
-
-</body>
-</html>
diff --git a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
index 989c48295b..8a07d284a9 100644
--- a/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
+++ b/test/jdk/java/awt/Modal/PrintDialogsTest/PrintDialogsTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,21 +25,75 @@
 /*
  * @test
  * @bug 8055836 8057694 8055752
- * @summary Check if Print and Page Setup dialogs lock other windows;
+ * @summary Check if Print and Page Setup dialogs block other windows;
  *          check also correctness of modal behavior for other dialogs.
- *
- * @run applet/manual=yesno PrintDialogsTest.html
+ * @library /java/awt/regtesthelpers
+ * @run main/manual PrintDialogsTest
  */
 
 
-import java.applet.Applet;
-import java.awt.*;
+import java.awt.BorderLayout;
+import java.awt.Button;
+import java.awt.Checkbox;
+import java.awt.CheckboxGroup;
+import java.awt.Dialog;
+import java.awt.Frame;
+import java.awt.EventQueue;
+import java.awt.GridLayout;
+import java.awt.Label;
+import java.awt.Panel;
 
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
 
 
-public class PrintDialogsTest extends Applet implements ActionListener {
+public class PrintDialogsTest extends Panel implements ActionListener {
+
+    static final String INSTRUCTIONS =
+        "This test is free format, which means there is no enforced or guided sequence." + "\n" +
+
+        "Please select each of " + "\n" +
+        "(a) The dialog parent type." + "\n" +
+        "(b) The dialog modality type" + "\n" +
+        "(c) The print dialog type (Print dialog or Page Setup dialog)" + "\n" +
+
+        "Once the choices have been made click the \"Start test\" button." + "\n" +
+
+        "Three windows will appear" + "\n" +
+        "(1) A Frame or a Dialog - in the case you selected \"Dialog\" as the parent type" + "\n" +
+        "(2) a Window (ie an undecorated top-level)" + "\n" +
+        "(3) A dialog with two buttons \"Open\" and \"Finish\"" + "\n" +
+
+        "Now check as follows whether modal blocking works as expected." + "\n" +
+        "Windows (1) and (2) contain a button which you should be able to press" + "\n" +
+        "ONLY if you selected \"Non-modal\", or \"Modeless\" for modality type." + "\n" +
+        "In other cases window (3) will block input to (1) and (2)" + "\n" +
+
+        "Then push the \"Open\" button on the Dialog to show the printing dialog and check" + "\n" +
+        "if it blocks the rest of the application - ie all of windows (1), (2) and (3)" + "\n" +
+        "should ALWAYS be blocked when the print dialog is showing." + "\n" +
+        "Now cancel the printing dialog and check the correctness of modal blocking" + "\n" +
+        "behavior for the Dialog again." + "\n" +
+        "To close all the 3 test windows please push the \"Finish\" button." + "\n" +
+
+        "Repeat all the above for different combinations, which should include" + "\n" +
+        "using all of the Dialog parent choices and all of the Dialog Modality types." + "\n" +
+
+        "If any behave incorrectly, note the combination of choices and press Fail." + "\n" +
+
+        "If all behave correctly, press Pass.";
+
+    public static void main(String[] args) throws Exception {
+
+         PassFailJFrame.builder()
+             .instructions(INSTRUCTIONS)
+             .rows(35)
+             .columns(60)
+             .testUI(PrintDialogsTest::createUI)
+             .testTimeOut(10)
+             .build()
+             .awaitAndCheck();
+    }
 
     private Button btnTest;
     private Checkbox  cbPage, cbPrint,
@@ -48,6 +102,14 @@ public class PrintDialogsTest extends Applet implements ActionListener {
 
     private CheckboxGroup groupDialog, groupParent, groupModType;
 
+    private static Frame createUI() {
+        Frame frame = new Frame("Dialog Modality Testing");
+        PrintDialogsTest test = new PrintDialogsTest();
+        test.createGUI();
+        frame.add(test);
+        frame.pack();
+        return frame;
+    }
 
     public void actionPerformed(ActionEvent e) {
 
@@ -99,13 +161,13 @@ public class PrintDialogsTest extends Applet implements ActionListener {
 
         setLayout(new BorderLayout());
 
-        setSize(350, 200);
         Panel panel = new Panel();
-        panel.setLayout(new GridLayout(18, 1));
+        panel.setLayout(new GridLayout(21, 1));
 
         btnTest = new Button("Start test");
         btnTest.addActionListener(this);
         panel.add(btnTest);
+        panel.add(new Label(" ")); // spacing
 
 
         panel.add(new Label("Dialog parent:"));
@@ -123,6 +185,7 @@ public class PrintDialogsTest extends Applet implements ActionListener {
         panel.add(cbHiddFrm);
         panel.add(cbDlg);
         panel.add(cbFrm);
+        panel.add(new Label(" ")); // spacing
 
         panel.add(new Label("Dialog modality type:"));
         groupModType = new CheckboxGroup();
@@ -139,7 +202,7 @@ public class PrintDialogsTest extends Applet implements ActionListener {
         panel.add(cbDocModal);
         panel.add(cbTKModal);
         panel.add(cbModeless);
-        add(panel);
+        panel.add(new Label(" ")); // spacing
 
         panel.add(new Label("Print dialog type:"));
         groupDialog = new CheckboxGroup();
@@ -148,13 +211,6 @@ public class PrintDialogsTest extends Applet implements ActionListener {
         panel.add(cbPage);
         panel.add(cbPrint);
 
-        validate();
-        setVisible(true);
-    }
-
-    public void start() {
-        try {
-            EventQueue.invokeAndWait(this::createGUI);
-        } catch (Exception e) {}
+        add(panel);
     }
 }
diff --git a/test/jdk/javax/naming/module/RunBasic.java b/test/jdk/javax/naming/module/RunBasic.java
index 512062de40..f9d259d620 100644
--- a/test/jdk/javax/naming/module/RunBasic.java
+++ b/test/jdk/javax/naming/module/RunBasic.java
@@ -134,7 +134,15 @@ public class RunBasic {
         opts.add("test/" + clsName);
         opts.add("ldap://" + HOST_NAME + "/dc=ie,dc=oracle,dc=com");
         System.out.println("Running with the '" + desc + "' module...");
+<<<<<<< HEAD
+        runJava("-Dtest.src=" + TEST_SRC, "-p", "mods", "-m", "test/" + clsName,
+                "ldap://" + HOST_NAME + "/dc=ie,dc=oracle,dc=com");
+||||||| 82c330b464
+        runJava("-Dtest.src=" + TEST_SRC, "-p", "mods", "-m", "test/" + clsName,
+                "ldap://localhost/dc=ie,dc=oracle,dc=com");
+=======
         runJava(opts.toArray(String[]::new));
+>>>>>>> cee8535a9d3de8558b4b5028d68e397e508bef71
     }
 
     private static void runJava(String... opts) throws Throwable {
diff --git a/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java b/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
index a275eda517..2cb092e60b 100644
--- a/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
+++ b/test/jdk/jdk/jfr/event/oldobject/TestListenerLeak.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,15 +74,17 @@ public class TestListenerLeak {
 
     public static void main(String[] args) throws Exception {
         WhiteBox.setWriteAllObjectSamples(true);
-
-        try (Recording r = new Recording()) {
-            r.enable(EventNames.OldObjectSample).withStackTrace().with("cutoff", "infinity");
-            r.start();
-            listenerLeak();
-            r.stop();
-            List<RecordedEvent> events = Events.fromRecording(r);
-            if (OldObjects.countMatchingEvents(events, Stuff[].class, null, null, -1, "listenerLeak") == 0) {
-                throw new Exception("Could not find leak with " + Stuff[].class);
+        while (true) {
+            try (Recording r = new Recording()) {
+                r.enable(EventNames.OldObjectSample).withStackTrace().with("cutoff", "infinity");
+                r.start();
+                listenerLeak();
+                r.stop();
+                List<RecordedEvent> events = Events.fromRecording(r);
+                if (OldObjects.countMatchingEvents(events, Stuff[].class, null, null, -1, "listenerLeak") != 0) {
+                    return; // Success
+                }
+                System.out.println("Could not find leak with " + Stuff[].class + ". Retrying.");
             }
         }
     }
diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
index 7990c49a1f..abeff80e5e 100644
--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,8 +54,8 @@ public class TestCPUInformation {
             Events.assertField(event, "hwThreads").atLeast(1);
             Events.assertField(event, "cores").atLeast(1);
             Events.assertField(event, "sockets").atLeast(1);
-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
         }
     }
 }
diff --git a/test/jdk/sun/util/calendar/zi/Month.java b/test/jdk/sun/util/calendar/zi/Month.java
index cb60b8d441..bab909f763 100644
--- a/test/jdk/sun/util/calendar/zi/Month.java
+++ b/test/jdk/sun/util/calendar/zi/Month.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,11 +21,6 @@
  * questions.
  */
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 /**
  * Month enum handles month related manipulation.
  *
@@ -47,15 +42,6 @@ enum Month {
 
     private final String abbr;
 
-    private static final Map<String,Month> abbreviations
-                                = new HashMap<String,Month>(12);
-
-    static {
-        for (Month m : Month.values()) {
-            abbreviations.put(m.abbr, m);
-        }
-    }
-
     private Month(String abbr) {
         this.abbr = abbr;
     }
@@ -70,11 +56,22 @@ enum Month {
      * @return the Month value
      */
     static Month parse(String name) {
-        Month m = abbreviations.get(name);
-        if (m != null) {
-            return m;
-        }
-        return null;
+        int len = name.length();
+
+        if (name.regionMatches(true, 0, "January", 0, len)) return Month.JANUARY;
+        if (name.regionMatches(true, 0, "February", 0, len)) return Month.FEBRUARY;
+        if (name.regionMatches(true, 0, "March", 0, len)) return Month.MARCH;
+        if (name.regionMatches(true, 0, "April", 0, len)) return Month.APRIL;
+        if (name.regionMatches(true, 0, "May", 0, len)) return Month.MAY;
+        if (name.regionMatches(true, 0, "June", 0, len)) return Month.JUNE;
+        if (name.regionMatches(true, 0, "July", 0, len)) return Month.JULY;
+        if (name.regionMatches(true, 0, "August", 0, len)) return Month.AUGUST;
+        if (name.regionMatches(true, 0, "September", 0, len)) return Month.SEPTEMBER;
+        if (name.regionMatches(true, 0, "October", 0, len)) return Month.OCTOBER;
+        if (name.regionMatches(true, 0, "November", 0, len)) return Month.NOVEMBER;
+        if (name.regionMatches(true, 0, "December", 0, len)) return Month.DECEMBER;
+
+        throw new IllegalArgumentException("Unknown month: " + name);
     }
 
     /**
diff --git a/test/jdk/sun/util/calendar/zi/RuleDay.java b/test/jdk/sun/util/calendar/zi/RuleDay.java
index bc730944b4..9cd81c1e52 100644
--- a/test/jdk/sun/util/calendar/zi/RuleDay.java
+++ b/test/jdk/sun/util/calendar/zi/RuleDay.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,11 +21,6 @@
  * questions.
  */
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 /**
  * RuleDay class represents the value of the "ON" field.  The day of
  * week values start from 1 following the {@link java.util.Calendar}
@@ -34,13 +29,6 @@ import java.util.Map;
  * @since 1.4
  */
 class RuleDay {
-    private static final Map<String,DayOfWeek> abbreviations = new HashMap<String,DayOfWeek>(7);
-    static {
-        for (DayOfWeek day : DayOfWeek.values()) {
-            abbreviations.put(day.getAbbr(), day);
-        }
-    }
-
     private String dayName = null;
     private DayOfWeek dow;
     private boolean lastOne = false;
@@ -166,13 +154,23 @@ class RuleDay {
         return sign + toString(d);
     }
 
-    private static DayOfWeek getDOW(String abbr) {
-        return abbreviations.get(abbr);
+    private static DayOfWeek getDOW(String name) {
+        int len = name.length();
+
+        if (name.regionMatches(true, 0, "Monday", 0, len)) return DayOfWeek.MONDAY;
+        if (name.regionMatches(true, 0, "Tuesday", 0, len)) return DayOfWeek.TUESDAY;
+        if (name.regionMatches(true, 0, "Wednesday", 0, len)) return DayOfWeek.WEDNESDAY;
+        if (name.regionMatches(true, 0, "Thursday", 0, len)) return DayOfWeek.THURSDAY;
+        if (name.regionMatches(true, 0, "Friday", 0, len)) return DayOfWeek.FRIDAY;
+        if (name.regionMatches(true, 0, "Saturday", 0, len)) return DayOfWeek.SATURDAY;
+        if (name.regionMatches(true, 0, "Sunday", 0, len)) return DayOfWeek.SUNDAY;
+
+        throw new IllegalArgumentException("Unknown day-of-week: " + name);
     }
 
     /**
      * Converts the specified day of week value to the day-of-week
-     * name defined in {@link java.util.Calenda}.
+     * name defined in {@link java.util.Calendar}.
      * @param dow 1-based day of week value
      * @return the Calendar day of week name with "Calendar." prefix.
      * @throws IllegalArgumentException if the specified dow value is out of range.
diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
index 6269373c2b..e1511772e7 100644
--- a/test/lib/jdk/test/lib/Platform.java
+++ b/test/lib/jdk/test/lib/Platform.java
@@ -205,6 +205,10 @@ public class Platform {
         return isArch("arm.*");
     }
 
+    public static boolean isRISCV64() {
+        return isArch("riscv64");
+    }
+
     public static boolean isPPC() {
         return isArch("ppc.*");
     }