3939 lines
158 KiB
Diff
3939 lines
158 KiB
Diff
From 02b097417275acaad294d71a852c2def2222be25 Mon Sep 17 00:00:00 2001
|
|
From: kuenking111 <wangkun49@huawei.com>
|
|
Date: Sat, 3 Sep 2022 14:17:50 +0000
|
|
Subject: [PATCH 1/6] 8143925-enhancing-CounterMode.crypt-for-AESCrypt
|
|
|
|
---
|
|
.../src/cpu/aarch64/vm/assembler_aarch64.hpp | 35 +-
|
|
.../cpu/aarch64/vm/macroAssembler_aarch64.hpp | 17 +
|
|
.../aarch64/vm/macroAssembler_aarch64_aes.cpp | 685 ++++++++++++++++++
|
|
.../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 324 ++++++++-
|
|
.../cpu/aarch64/vm/stubRoutines_aarch64.hpp | 2 +-
|
|
.../src/cpu/aarch64/vm/vm_version_aarch64.cpp | 13 +-
|
|
hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp | 5 +
|
|
hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp | 5 +
|
|
hotspot/src/cpu/x86/vm/assembler_x86.cpp | 74 +-
|
|
hotspot/src/cpu/x86/vm/assembler_x86.hpp | 12 +
|
|
.../src/cpu/x86/vm/stubGenerator_x86_32.cpp | 344 +++++++++
|
|
.../src/cpu/x86/vm/stubGenerator_x86_64.cpp | 340 ++++++++-
|
|
hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp | 1 +
|
|
hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp | 5 +
|
|
.../src/cpu/x86/vm/stubRoutines_x86_32.hpp | 2 +-
|
|
.../src/cpu/x86/vm/stubRoutines_x86_64.hpp | 2 +-
|
|
hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 36 +
|
|
hotspot/src/share/vm/classfile/vmSymbols.hpp | 4 +
|
|
hotspot/src/share/vm/opto/escape.cpp | 1 +
|
|
hotspot/src/share/vm/opto/library_call.cpp | 174 +++++
|
|
hotspot/src/share/vm/opto/runtime.cpp | 29 +
|
|
hotspot/src/share/vm/opto/runtime.hpp | 1 +
|
|
hotspot/src/share/vm/runtime/globals.hpp | 3 +
|
|
hotspot/src/share/vm/runtime/stubRoutines.cpp | 1 +
|
|
hotspot/src/share/vm/runtime/stubRoutines.hpp | 2 +
|
|
hotspot/src/share/vm/runtime/vmStructs.cpp | 1 +
|
|
.../test/compiler/7184394/TestAESBase.java | 4 +-
|
|
.../test/compiler/7184394/TestAESMain.java | 7 +
|
|
.../com/sun/crypto/provider/CounterMode.java | 11 +-
|
|
.../classes/com/sun/crypto/provider/GCTR.java | 89 +--
|
|
.../com/sun/crypto/provider/GHASH.java | 20 +-
|
|
.../sun/security/ssl/SSLSocketImpl.java | 14 +-
|
|
.../security/ssl/SSLSocketInputRecord.java | 215 +++---
|
|
.../sun/security/ssl/SSLTransport.java | 4 +
|
|
.../bench/javax/crypto/full/AESGCMBench.java | 128 ++++
|
|
.../javax/crypto/full/AESGCMByteBuffer.java | 163 +++++
|
|
.../bench/javax/crypto/full/CryptoBase.java | 102 +++
|
|
.../bench/javax/crypto/small/AESGCMBench.java | 36 +
|
|
.../javax/crypto/small/AESGCMByteBuffer.java | 36 +
|
|
.../ssl/SSLSocketImpl/ClientTimeout.java | 3 +-
|
|
.../SSLSocketImpl/SSLExceptionForIOIssue.java | 4 +-
|
|
41 files changed, 2738 insertions(+), 216 deletions(-)
|
|
create mode 100644 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
|
|
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
|
|
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
|
|
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
|
|
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
|
|
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
|
|
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
index b0fa9b5fc..9202e61f8 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
@@ -146,6 +146,21 @@ REGISTER_DECLARATION(Register, esp, r20);
|
|
|
|
#define assert_cond(ARG1) assert(ARG1, #ARG1)
|
|
|
|
+// In many places we've added C-style casts to silence compiler
|
|
+// warnings, for example when truncating a size_t to an int when we
|
|
+// know the size_t is a small struct. Such casts are risky because
|
|
+// they effectively disable useful compiler warnings. We can make our
|
|
+// lives safer with this function, which ensures that any cast is
|
|
+// reversible without loss of information. It doesn't check
|
|
+// everything: it isn't intended to make sure that pointer types are
|
|
+// compatible, for example.
|
|
+template <typename T2, typename T1>
|
|
+T2 checked_cast(T1 thing) {
|
|
+ T2 result = static_cast<T2>(thing);
|
|
+ assert(static_cast<T1>(result) == thing, "must be");
|
|
+ return result;
|
|
+}
|
|
+
|
|
namespace asm_util {
|
|
uint32_t encode_logical_immediate(bool is32, uint64_t imm);
|
|
};
|
|
@@ -193,7 +208,7 @@ public:
|
|
static inline uint32_t extract(uint32_t val, int msb, int lsb) {
|
|
int nbits = msb - lsb + 1;
|
|
assert_cond(msb >= lsb);
|
|
- uint32_t mask = (1U << nbits) - 1;
|
|
+ uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
|
|
uint32_t result = val >> lsb;
|
|
result &= mask;
|
|
return result;
|
|
@@ -208,7 +223,7 @@ public:
|
|
int nbits = msb - lsb + 1;
|
|
guarantee(val < (1U << nbits), "Field too big for insn");
|
|
assert_cond(msb >= lsb);
|
|
- unsigned mask = (1U << nbits) - 1;
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
|
|
val <<= lsb;
|
|
mask <<= lsb;
|
|
unsigned target = *(unsigned *)a;
|
|
@@ -222,7 +237,7 @@ public:
|
|
long chk = val >> (nbits - 1);
|
|
guarantee (chk == -1 || chk == 0, "Field too big for insn");
|
|
unsigned uval = val;
|
|
- unsigned mask = (1U << nbits) - 1;
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
|
|
uval &= mask;
|
|
uval <<= lsb;
|
|
mask <<= lsb;
|
|
@@ -234,9 +249,9 @@ public:
|
|
|
|
void f(unsigned val, int msb, int lsb) {
|
|
int nbits = msb - lsb + 1;
|
|
- guarantee(val < (1U << nbits), "Field too big for insn");
|
|
+ guarantee(val < (1ULL << nbits), "Field too big for insn");
|
|
assert_cond(msb >= lsb);
|
|
- unsigned mask = (1U << nbits) - 1;
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
|
|
val <<= lsb;
|
|
mask <<= lsb;
|
|
insn |= val;
|
|
@@ -255,7 +270,7 @@ public:
|
|
long chk = val >> (nbits - 1);
|
|
guarantee (chk == -1 || chk == 0, "Field too big for insn");
|
|
unsigned uval = val;
|
|
- unsigned mask = (1U << nbits) - 1;
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
|
|
uval &= mask;
|
|
f(uval, lsb + nbits - 1, lsb);
|
|
}
|
|
@@ -280,7 +295,7 @@ public:
|
|
|
|
unsigned get(int msb = 31, int lsb = 0) {
|
|
int nbits = msb - lsb + 1;
|
|
- unsigned mask = ((1U << nbits) - 1) << lsb;
|
|
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
|
|
assert_cond((bits & mask) == mask);
|
|
return (insn & mask) >> lsb;
|
|
}
|
|
@@ -1991,21 +2006,21 @@ public:
|
|
starti;
|
|
f(0,31), f((int)T & 1, 30);
|
|
f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
|
|
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
|
|
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
|
|
}
|
|
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
|
|
int imm, int op1, int op2) {
|
|
starti;
|
|
f(0,31), f((int)T & 1, 30);
|
|
f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
|
|
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
|
|
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
|
|
}
|
|
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
|
|
Register Xm, int op1, int op2) {
|
|
starti;
|
|
f(0,31), f((int)T & 1, 30);
|
|
f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
|
|
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
|
|
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
|
|
}
|
|
|
|
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
|
index 0ca694038..d334f1b69 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
|
|
@@ -1240,6 +1240,23 @@ public:
|
|
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
|
|
Register zlen, Register tmp1, Register tmp2, Register tmp3,
|
|
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
|
|
+ void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
|
|
+ FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
|
|
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3);
|
|
+ void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
|
|
+ FloatRegister p, FloatRegister z, FloatRegister t1);
|
|
+ void ghash_processBlocks_wide(address p, Register state, Register subkeyH,
|
|
+ Register data, Register blocks, int unrolls);
|
|
+ void ghash_modmul (FloatRegister result,
|
|
+ FloatRegister result_lo, FloatRegister result_hi, FloatRegister b,
|
|
+ FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p,
|
|
+ FloatRegister t1, FloatRegister t2, FloatRegister t3);
|
|
+
|
|
+ void aesenc_loadkeys(Register key, Register keylen);
|
|
+ void aesecb_encrypt(Register from, Register to, Register keylen,
|
|
+ FloatRegister data = v0, int unrolls = 1);
|
|
+ void aesecb_decrypt(Register from, Register to, Register key, Register keylen);
|
|
+ void aes_round(FloatRegister input, FloatRegister subkey);
|
|
// ISB may be needed because of a safepoint
|
|
void maybe_isb() { isb(); }
|
|
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
|
|
new file mode 100644
|
|
index 000000000..1db79c97a
|
|
--- /dev/null
|
|
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
|
|
@@ -0,0 +1,685 @@
|
|
+/*
|
|
+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "precompiled.hpp"
|
|
+
|
|
+#include "asm/assembler.hpp"
|
|
+#include "asm/assembler.inline.hpp"
|
|
+#include "macroAssembler_aarch64.hpp"
|
|
+#include "memory/resourceArea.hpp"
|
|
+#include "runtime/stubRoutines.hpp"
|
|
+
|
|
+void MacroAssembler::aesecb_decrypt(Register from, Register to, Register key, Register keylen) {
|
|
+ Label L_doLast;
|
|
+
|
|
+ ld1(v0, T16B, from); // get 16 bytes of input
|
|
+
|
|
+ ld1(v5, T16B, post(key, 16));
|
|
+ rev32(v5, T16B, v5);
|
|
+
|
|
+ ld1(v1, v2, v3, v4, T16B, post(key, 64));
|
|
+ rev32(v1, T16B, v1);
|
|
+ rev32(v2, T16B, v2);
|
|
+ rev32(v3, T16B, v3);
|
|
+ rev32(v4, T16B, v4);
|
|
+ aesd(v0, v1);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v2);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v3);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v4);
|
|
+ aesimc(v0, v0);
|
|
+
|
|
+ ld1(v1, v2, v3, v4, T16B, post(key, 64));
|
|
+ rev32(v1, T16B, v1);
|
|
+ rev32(v2, T16B, v2);
|
|
+ rev32(v3, T16B, v3);
|
|
+ rev32(v4, T16B, v4);
|
|
+ aesd(v0, v1);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v2);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v3);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v4);
|
|
+ aesimc(v0, v0);
|
|
+
|
|
+ ld1(v1, v2, T16B, post(key, 32));
|
|
+ rev32(v1, T16B, v1);
|
|
+ rev32(v2, T16B, v2);
|
|
+
|
|
+ cmpw(keylen, 44);
|
|
+ br(Assembler::EQ, L_doLast);
|
|
+
|
|
+ aesd(v0, v1);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v2);
|
|
+ aesimc(v0, v0);
|
|
+
|
|
+ ld1(v1, v2, T16B, post(key, 32));
|
|
+ rev32(v1, T16B, v1);
|
|
+ rev32(v2, T16B, v2);
|
|
+
|
|
+ cmpw(keylen, 52);
|
|
+ br(Assembler::EQ, L_doLast);
|
|
+
|
|
+ aesd(v0, v1);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v2);
|
|
+ aesimc(v0, v0);
|
|
+
|
|
+ ld1(v1, v2, T16B, post(key, 32));
|
|
+ rev32(v1, T16B, v1);
|
|
+ rev32(v2, T16B, v2);
|
|
+
|
|
+ bind(L_doLast);
|
|
+
|
|
+ aesd(v0, v1);
|
|
+ aesimc(v0, v0);
|
|
+ aesd(v0, v2);
|
|
+
|
|
+ eor(v0, T16B, v0, v5);
|
|
+
|
|
+ st1(v0, T16B, to);
|
|
+
|
|
+ // Preserve the address of the start of the key
|
|
+ sub(key, key, keylen, LSL, exact_log2(sizeof (jint)));
|
|
+}
|
|
+
|
|
+// Load expanded key into v17..v31
|
|
+void MacroAssembler::aesenc_loadkeys(Register key, Register keylen) {
|
|
+ Label L_loadkeys_44, L_loadkeys_52;
|
|
+ cmpw(keylen, 52);
|
|
+ br(Assembler::LO, L_loadkeys_44);
|
|
+ br(Assembler::EQ, L_loadkeys_52);
|
|
+
|
|
+ ld1(v17, v18, T16B, post(key, 32));
|
|
+ rev32(v17, T16B, v17);
|
|
+ rev32(v18, T16B, v18);
|
|
+ bind(L_loadkeys_52);
|
|
+ ld1(v19, v20, T16B, post(key, 32));
|
|
+ rev32(v19, T16B, v19);
|
|
+ rev32(v20, T16B, v20);
|
|
+ bind(L_loadkeys_44);
|
|
+ ld1(v21, v22, v23, v24, T16B, post(key, 64));
|
|
+ rev32(v21, T16B, v21);
|
|
+ rev32(v22, T16B, v22);
|
|
+ rev32(v23, T16B, v23);
|
|
+ rev32(v24, T16B, v24);
|
|
+ ld1(v25, v26, v27, v28, T16B, post(key, 64));
|
|
+ rev32(v25, T16B, v25);
|
|
+ rev32(v26, T16B, v26);
|
|
+ rev32(v27, T16B, v27);
|
|
+ rev32(v28, T16B, v28);
|
|
+ ld1(v29, v30, v31, T16B, post(key, 48));
|
|
+ rev32(v29, T16B, v29);
|
|
+ rev32(v30, T16B, v30);
|
|
+ rev32(v31, T16B, v31);
|
|
+
|
|
+ // Preserve the address of the start of the key
|
|
+ sub(key, key, keylen, LSL, exact_log2(sizeof (jint)));
|
|
+}
|
|
+
|
|
+// NeoverseTM N1Software Optimization Guide:
|
|
+// Adjacent AESE/AESMC instruction pairs and adjacent AESD/AESIMC
|
|
+// instruction pairs will exhibit the performance characteristics
|
|
+// described in Section 4.6.
|
|
+void MacroAssembler::aes_round(FloatRegister input, FloatRegister subkey) {
|
|
+ aese(input, subkey); aesmc(input, input);
|
|
+}
|
|
+
|
|
+// KernelGenerator
|
|
+//
|
|
+// The abstract base class of an unrolled function generator.
|
|
+// Subclasses override generate(), length(), and next() to generate
|
|
+// unrolled and interleaved functions.
|
|
+//
|
|
+// The core idea is that a subclass defines a method which generates
|
|
+// the base case of a function and a method to generate a clone of it,
|
|
+// shifted to a different set of registers. KernelGenerator will then
|
|
+// generate several interleaved copies of the function, with each one
|
|
+// using a different set of registers.
|
|
+
|
|
+// The subclass must implement three methods: length(), which is the
|
|
+// number of instruction bundles in the intrinsic, generate(int n)
|
|
+// which emits the nth instruction bundle in the intrinsic, and next()
|
|
+// which takes an instance of the generator and returns a version of it,
|
|
+// shifted to a new set of registers.
|
|
+
|
|
+class KernelGenerator: public MacroAssembler {
|
|
+protected:
|
|
+ const int _unrolls;
|
|
+public:
|
|
+ KernelGenerator(Assembler *as, int unrolls)
|
|
+ : MacroAssembler(as->code()), _unrolls(unrolls) { }
|
|
+ virtual void generate(int index) = 0;
|
|
+ virtual int length() = 0;
|
|
+ virtual KernelGenerator *next() = 0;
|
|
+ int unrolls() { return _unrolls; }
|
|
+ void unroll();
|
|
+};
|
|
+
|
|
+void KernelGenerator::unroll() {
|
|
+ ResourceMark rm;
|
|
+ KernelGenerator **generators
|
|
+ = NEW_RESOURCE_ARRAY(KernelGenerator *, unrolls());
|
|
+
|
|
+ generators[0] = this;
|
|
+ for (int i = 1; i < unrolls(); i++) {
|
|
+ generators[i] = generators[i-1]->next();
|
|
+ }
|
|
+
|
|
+ for (int j = 0; j < length(); j++) {
|
|
+ for (int i = 0; i < unrolls(); i++) {
|
|
+ generators[i]->generate(j);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+// An unrolled and interleaved generator for AES encryption.
|
|
+class AESKernelGenerator: public KernelGenerator {
|
|
+ Register _from, _to;
|
|
+ const Register _keylen;
|
|
+ FloatRegister _data;
|
|
+ const FloatRegister _subkeys;
|
|
+ bool _once;
|
|
+ Label _rounds_44, _rounds_52;
|
|
+
|
|
+public:
|
|
+ AESKernelGenerator(Assembler *as, int unrolls,
|
|
+ Register from, Register to, Register keylen, FloatRegister data,
|
|
+ FloatRegister subkeys, bool once = true)
|
|
+ : KernelGenerator(as, unrolls),
|
|
+ _from(from), _to(to), _keylen(keylen), _data(data),
|
|
+ _subkeys(subkeys), _once(once) {
|
|
+ }
|
|
+
|
|
+ virtual void generate(int index) {
|
|
+ switch (index) {
|
|
+ case 0:
|
|
+ if (_from != noreg) {
|
|
+ ld1(_data, T16B, _from); // get 16 bytes of input
|
|
+ }
|
|
+ break;
|
|
+ case 1:
|
|
+ if (_once) {
|
|
+ cmpw(_keylen, 52);
|
|
+ br(Assembler::LO, _rounds_44);
|
|
+ br(Assembler::EQ, _rounds_52);
|
|
+ }
|
|
+ break;
|
|
+ case 2: aes_round(_data, _subkeys + 0); break;
|
|
+ case 3: aes_round(_data, _subkeys + 1); break;
|
|
+ case 4:
|
|
+ if (_once) bind(_rounds_52);
|
|
+ break;
|
|
+ case 5: aes_round(_data, _subkeys + 2); break;
|
|
+ case 6: aes_round(_data, _subkeys + 3); break;
|
|
+ case 7:
|
|
+ if (_once) bind(_rounds_44);
|
|
+ break;
|
|
+ case 8: aes_round(_data, _subkeys + 4); break;
|
|
+ case 9: aes_round(_data, _subkeys + 5); break;
|
|
+ case 10: aes_round(_data, _subkeys + 6); break;
|
|
+ case 11: aes_round(_data, _subkeys + 7); break;
|
|
+ case 12: aes_round(_data, _subkeys + 8); break;
|
|
+ case 13: aes_round(_data, _subkeys + 9); break;
|
|
+ case 14: aes_round(_data, _subkeys + 10); break;
|
|
+ case 15: aes_round(_data, _subkeys + 11); break;
|
|
+ case 16: aes_round(_data, _subkeys + 12); break;
|
|
+ case 17: aese(_data, _subkeys + 13); break;
|
|
+ case 18: eor(_data, T16B, _data, _subkeys + 14); break;
|
|
+ case 19:
|
|
+ if (_to != noreg) {
|
|
+ st1(_data, T16B, _to);
|
|
+ }
|
|
+ break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual KernelGenerator *next() {
|
|
+ return new AESKernelGenerator(this, _unrolls,
|
|
+ _from, _to, _keylen,
|
|
+ _data + 1, _subkeys, /*once*/false);
|
|
+ }
|
|
+
|
|
+ virtual int length() { return 20; }
|
|
+};
|
|
+
|
|
+// Uses expanded key in v17..v31
|
|
+// Returns encrypted values in inputs.
|
|
+// If to != noreg, store value at to; likewise from
|
|
+// Preserves key, keylen
|
|
+// Increments from, to
|
|
+// Input data in v0, v1, ...
|
|
+// unrolls controls the number of times to unroll the generated function
|
|
+void MacroAssembler::aesecb_encrypt(Register from, Register to, Register keylen,
|
|
+ FloatRegister data, int unrolls) {
|
|
+ AESKernelGenerator(this, unrolls, from, to, keylen, data, v17) .unroll();
|
|
+}
|
|
+
|
|
+// ghash_multiply and ghash_reduce are the non-unrolled versions of
|
|
+// the GHASH function generators.
|
|
+void MacroAssembler::ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
|
|
+ FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
|
|
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3) {
|
|
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
|
|
+ // multiplication in three 128-bit multiplications and a few
|
|
+ // additions.
|
|
+ //
|
|
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
|
|
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
|
|
+ //
|
|
+ // Inputs:
|
|
+ //
|
|
+ // A0 in a.d[0] (subkey)
|
|
+ // A1 in a.d[1]
|
|
+ // (A1+A0) in a1_xor_a0.d[0]
|
|
+ //
|
|
+ // B0 in b.d[0] (state)
|
|
+ // B1 in b.d[1]
|
|
+
|
|
+ ext(tmp1, T16B, b, b, 0x08);
|
|
+ pmull2(result_hi, T1Q, b, a, T2D); // A1*B1
|
|
+ eor(tmp1, T16B, tmp1, b); // (B1+B0)
|
|
+ pmull(result_lo, T1Q, b, a, T1D); // A0*B0
|
|
+ pmull(tmp2, T1Q, tmp1, a1_xor_a0, T1D); // (A1+A0)(B1+B0)
|
|
+
|
|
+ ext(tmp1, T16B, result_lo, result_hi, 0x08);
|
|
+ eor(tmp3, T16B, result_hi, result_lo); // A1*B1+A0*B0
|
|
+ eor(tmp2, T16B, tmp2, tmp1);
|
|
+ eor(tmp2, T16B, tmp2, tmp3);
|
|
+
|
|
+ // Register pair <result_hi:result_lo> holds the result of carry-less multiplication
|
|
+ ins(result_hi, D, tmp2, 0, 1);
|
|
+ ins(result_lo, D, tmp2, 1, 0);
|
|
+}
|
|
+
|
|
+void MacroAssembler::ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
|
|
+ FloatRegister p, FloatRegister vzr, FloatRegister t1) {
|
|
+ const FloatRegister t0 = result;
|
|
+
|
|
+ // The GCM field polynomial f is z^128 + p(z), where p =
|
|
+ // z^7+z^2+z+1.
|
|
+ //
|
|
+ // z^128 === -p(z) (mod (z^128 + p(z)))
|
|
+ //
|
|
+ // so, given that the product we're reducing is
|
|
+ // a == lo + hi * z^128
|
|
+ // substituting,
|
|
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
|
|
+ //
|
|
+ // we reduce by multiplying hi by p(z) and subtracting the result
|
|
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
|
|
+ // bits we can do this with two 64-bit multiplications, lo*p and
|
|
+ // hi*p.
|
|
+
|
|
+ pmull2(t0, T1Q, hi, p, T2D);
|
|
+ ext(t1, T16B, t0, vzr, 8);
|
|
+ eor(hi, T16B, hi, t1);
|
|
+ ext(t1, T16B, vzr, t0, 8);
|
|
+ eor(lo, T16B, lo, t1);
|
|
+ pmull(t0, T1Q, hi, p, T1D);
|
|
+ eor(result, T16B, lo, t0);
|
|
+}
|
|
+
|
|
+class GHASHMultiplyGenerator: public KernelGenerator {
|
|
+ FloatRegister _result_lo, _result_hi, _b,
|
|
+ _a, _vzr, _a1_xor_a0, _p,
|
|
+ _tmp1, _tmp2, _tmp3;
|
|
+
|
|
+public:
|
|
+ GHASHMultiplyGenerator(Assembler *as, int unrolls,
|
|
+ FloatRegister result_lo, FloatRegister result_hi,
|
|
+ /* offsetted registers */
|
|
+ FloatRegister b,
|
|
+ /* non-offsetted (shared) registers */
|
|
+ FloatRegister a, FloatRegister a1_xor_a0, FloatRegister p, FloatRegister vzr,
|
|
+ /* offseted (temp) registers */
|
|
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3)
|
|
+ : KernelGenerator(as, unrolls),
|
|
+ _result_lo(result_lo), _result_hi(result_hi), _b(b),
|
|
+ _a(a), _vzr(vzr), _a1_xor_a0(a1_xor_a0), _p(p),
|
|
+ _tmp1(tmp1), _tmp2(tmp2), _tmp3(tmp3) { }
|
|
+
|
|
+ static const int register_stride = 7;
|
|
+
|
|
+ virtual void generate(int index) {
|
|
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
|
|
+ // multiplication in three 128-bit multiplications and a few
|
|
+ // additions.
|
|
+ //
|
|
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
|
|
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
|
|
+ //
|
|
+ // Inputs:
|
|
+ //
|
|
+ // A0 in a.d[0] (subkey)
|
|
+ // A1 in a.d[1]
|
|
+ // (A1+A0) in a1_xor_a0.d[0]
|
|
+ //
|
|
+ // B0 in b.d[0] (state)
|
|
+ // B1 in b.d[1]
|
|
+
|
|
+ switch (index) {
|
|
+ case 0: ext(_tmp1, T16B, _b, _b, 0x08); break;
|
|
+ case 1: pmull2(_result_hi, T1Q, _b, _a, T2D); // A1*B1
|
|
+ break;
|
|
+ case 2: eor(_tmp1, T16B, _tmp1, _b); // (B1+B0)
|
|
+ break;
|
|
+ case 3: pmull(_result_lo, T1Q, _b, _a, T1D); // A0*B0
|
|
+ break;
|
|
+ case 4: pmull(_tmp2, T1Q, _tmp1, _a1_xor_a0, T1D); // (A1+A0)(B1+B0)
|
|
+ break;
|
|
+
|
|
+ case 5: ext(_tmp1, T16B, _result_lo, _result_hi, 0x08); break;
|
|
+ case 6: eor(_tmp3, T16B, _result_hi, _result_lo); // A1*B1+A0*B0
|
|
+ break;
|
|
+ case 7: eor(_tmp2, T16B, _tmp2, _tmp1); break;
|
|
+ case 8: eor(_tmp2, T16B, _tmp2, _tmp3); break;
|
|
+
|
|
+ // Register pair <_result_hi:_result_lo> holds the _result of carry-less multiplication
|
|
+ case 9: ins(_result_hi, D, _tmp2, 0, 1); break;
|
|
+ case 10: ins(_result_lo, D, _tmp2, 1, 0); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual KernelGenerator *next() {
|
|
+ GHASHMultiplyGenerator *result
|
|
+ = new GHASHMultiplyGenerator(this, _unrolls, _result_lo, _result_hi,
|
|
+ _b, _a, _a1_xor_a0, _p, _vzr,
|
|
+ _tmp1, _tmp2, _tmp3);
|
|
+ result->_result_lo += register_stride;
|
|
+ result->_result_hi += register_stride;
|
|
+ result->_b += register_stride;
|
|
+ result->_tmp1 += register_stride;
|
|
+ result->_tmp2 += register_stride;
|
|
+ result->_tmp3 += register_stride;
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ virtual int length() { return 11; }
|
|
+};
|
|
+
|
|
+// Reduce the 128-bit product in hi:lo by the GCM field polynomial.
|
|
+// The FloatRegister argument called data is optional: if it is a
|
|
+// valid register, we interleave LD1 instructions with the
|
|
+// reduction. This is to reduce latency next time around the loop.
|
|
+class GHASHReduceGenerator: public KernelGenerator {
|
|
+ FloatRegister _result, _lo, _hi, _p, _vzr, _data, _t1;
|
|
+ int _once;
|
|
+public:
|
|
+ GHASHReduceGenerator(Assembler *as, int unrolls,
|
|
+ /* offsetted registers */
|
|
+ FloatRegister result, FloatRegister lo, FloatRegister hi,
|
|
+ /* non-offsetted (shared) registers */
|
|
+ FloatRegister p, FloatRegister vzr, FloatRegister data,
|
|
+ /* offseted (temp) registers */
|
|
+ FloatRegister t1)
|
|
+ : KernelGenerator(as, unrolls),
|
|
+ _result(result), _lo(lo), _hi(hi),
|
|
+ _p(p), _vzr(vzr), _data(data), _t1(t1), _once(true) { }
|
|
+
|
|
+ static const int register_stride = 7;
|
|
+
|
|
+ virtual void generate(int index) {
|
|
+ const FloatRegister t0 = _result;
|
|
+
|
|
+ switch (index) {
|
|
+ // The GCM field polynomial f is z^128 + p(z), where p =
|
|
+ // z^7+z^2+z+1.
|
|
+ //
|
|
+ // z^128 === -p(z) (mod (z^128 + p(z)))
|
|
+ //
|
|
+ // so, given that the product we're reducing is
|
|
+ // a == lo + hi * z^128
|
|
+ // substituting,
|
|
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
|
|
+ //
|
|
+ // we reduce by multiplying hi by p(z) and subtracting the _result
|
|
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
|
|
+ // bits we can do this with two 64-bit multiplications, lo*p and
|
|
+ // hi*p.
|
|
+
|
|
+ case 0: pmull2(t0, T1Q, _hi, _p, T2D); break;
|
|
+ case 1: ext(_t1, T16B, t0, _vzr, 8); break;
|
|
+ case 2: eor(_hi, T16B, _hi, _t1); break;
|
|
+ case 3: ext(_t1, T16B, _vzr, t0, 8); break;
|
|
+ case 4: eor(_lo, T16B, _lo, _t1); break;
|
|
+ case 5: pmull(t0, T1Q, _hi, _p, T1D); break;
|
|
+ case 6: eor(_result, T16B, _lo, t0); break;
|
|
+ default: ShouldNotReachHere();
|
|
+ }
|
|
+
|
|
+ // Sprinkle load instructions into the generated instructions
|
|
+ if (_data->is_valid() && _once) {
|
|
+ assert(length() >= unrolls(), "not enough room for inteleaved loads");
|
|
+ if (index < unrolls()) {
|
|
+ ld1((_data + index*register_stride), T16B, post(r2, 0x10));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virtual KernelGenerator *next() {
|
|
+ GHASHReduceGenerator *result
|
|
+ = new GHASHReduceGenerator(this, _unrolls,
|
|
+ _result, _lo, _hi, _p, _vzr, _data, _t1);
|
|
+ result->_result += register_stride;
|
|
+ result->_hi += register_stride;
|
|
+ result->_lo += register_stride;
|
|
+ result->_t1 += register_stride;
|
|
+ result->_once = false;
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ int length() { return 7; }
|
|
+};
|
|
+
|
|
+// Perform a GHASH multiply/reduce on a single FloatRegister.
|
|
+void MacroAssembler::ghash_modmul(FloatRegister result,
|
|
+ FloatRegister result_lo, FloatRegister result_hi, FloatRegister b,
|
|
+ FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p,
|
|
+ FloatRegister t1, FloatRegister t2, FloatRegister t3) {
|
|
+ ghash_multiply(result_lo, result_hi, a, b, a1_xor_a0, t1, t2, t3);
|
|
+ ghash_reduce(result, result_lo, result_hi, p, vzr, t1);
|
|
+}
|
|
+
|
|
+// Interleaved GHASH processing.
|
|
+//
|
|
+// Clobbers all vector registers.
|
|
+//
|
|
+void MacroAssembler::ghash_processBlocks_wide(address field_polynomial, Register state,
|
|
+ Register subkeyH,
|
|
+ Register data, Register blocks, int unrolls) {
|
|
+ int register_stride = 7;
|
|
+
|
|
+ // Bafflingly, GCM uses little-endian for the byte order, but
|
|
+ // big-endian for the bit order. For example, the polynomial 1 is
|
|
+ // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
|
|
+ //
|
|
+ // So, we must either reverse the bytes in each word and do
|
|
+ // everything big-endian or reverse the bits in each byte and do
|
|
+ // it little-endian. On AArch64 it's more idiomatic to reverse
|
|
+ // the bits in each byte (we have an instruction, RBIT, to do
|
|
+ // that) and keep the data in little-endian bit order throught the
|
|
+ // calculation, bit-reversing the inputs and outputs.
|
|
+
|
|
+ assert(unrolls * register_stride < 32, "out of registers");
|
|
+
|
|
+ FloatRegister a1_xor_a0 = v28;
|
|
+ FloatRegister Hprime = v29;
|
|
+ FloatRegister vzr = v30;
|
|
+ FloatRegister p = v31;
|
|
+ eor(vzr, T16B, vzr, vzr); // zero register
|
|
+
|
|
+ ldrq(p, field_polynomial); // The field polynomial
|
|
+
|
|
+ ldrq(v0, Address(state));
|
|
+ ldrq(Hprime, Address(subkeyH));
|
|
+
|
|
+ rev64(v0, T16B, v0); // Bit-reverse words in state and subkeyH
|
|
+ rbit(v0, T16B, v0);
|
|
+ rev64(Hprime, T16B, Hprime);
|
|
+ rbit(Hprime, T16B, Hprime);
|
|
+
|
|
+ // Powers of H -> Hprime
|
|
+
|
|
+ Label already_calculated, done;
|
|
+ {
|
|
+ // The first time around we'll have to calculate H**2, H**3, etc.
|
|
+ // Look at the largest power of H in the subkeyH array to see if
|
|
+ // it's already been calculated.
|
|
+ ldp(rscratch1, rscratch2, Address(subkeyH, 16 * (unrolls - 1)));
|
|
+ orr(rscratch1, rscratch1, rscratch2);
|
|
+ cbnz(rscratch1, already_calculated);
|
|
+
|
|
+ orr(v6, T16B, Hprime, Hprime); // Start with H in v6 and Hprime
|
|
+ for (int i = 1; i < unrolls; i++) {
|
|
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
|
|
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
|
|
+ ghash_modmul(/*result*/v6, /*result_lo*/v5, /*result_hi*/v4, /*b*/v6,
|
|
+ Hprime, vzr, a1_xor_a0, p,
|
|
+ /*temps*/v1, v3, v2);
|
|
+ rev64(v1, T16B, v6);
|
|
+ rbit(v1, T16B, v1);
|
|
+ strq(v1, Address(subkeyH, 16 * i));
|
|
+ }
|
|
+ b(done);
|
|
+ }
|
|
+ {
|
|
+ bind(already_calculated);
|
|
+
|
|
+ // Load the largest power of H we need into v6.
|
|
+ ldrq(v6, Address(subkeyH, 16 * (unrolls - 1)));
|
|
+ rev64(v6, T16B, v6);
|
|
+ rbit(v6, T16B, v6);
|
|
+ }
|
|
+ bind(done);
|
|
+
|
|
+ orr(Hprime, T16B, v6, v6); // Move H ** unrolls into Hprime
|
|
+
|
|
+ // Hprime contains (H ** 1, H ** 2, ... H ** unrolls)
|
|
+ // v0 contains the initial state. Clear the others.
|
|
+ for (int i = 1; i < unrolls; i++) {
|
|
+ int ofs = register_stride * i;
|
|
+ eor(ofs+v0, T16B, ofs+v0, ofs+v0); // zero each state register
|
|
+ }
|
|
+
|
|
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
|
|
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
|
|
+
|
|
+ // Load #unrolls blocks of data
|
|
+ for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) {
|
|
+ ld1(v2+ofs, T16B, post(data, 0x10));
|
|
+ }
|
|
+
|
|
+ // Register assignments, replicated across 4 clones, v0 ... v23
|
|
+ //
|
|
+ // v0: input / output: current state, result of multiply/reduce
|
|
+ // v1: temp
|
|
+ // v2: input: one block of data (the ciphertext)
|
|
+ // also used as a temp once the data has been consumed
|
|
+ // v3: temp
|
|
+ // v4: output: high part of product
|
|
+ // v5: output: low part ...
|
|
+ // v6: unused
|
|
+ //
|
|
+ // Not replicated:
|
|
+ //
|
|
+ // v28: High part of H xor low part of H'
|
|
+ // v29: H' (hash subkey)
|
|
+ // v30: zero
|
|
+ // v31: Reduction polynomial of the Galois field
|
|
+
|
|
+ // Inner loop.
|
|
+ // Do the whole load/add/multiply/reduce over all our data except
|
|
+ // the last few rows.
|
|
+ {
|
|
+ Label L_ghash_loop;
|
|
+ bind(L_ghash_loop);
|
|
+
|
|
+ // Prefetching doesn't help here. In fact, on Neoverse N1 it's worse.
|
|
+ // prfm(Address(data, 128), PLDL1KEEP);
|
|
+
|
|
+ // Xor data into current state
|
|
+ for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) {
|
|
+ rbit((v2+ofs), T16B, (v2+ofs));
|
|
+ eor((v2+ofs), T16B, v0+ofs, (v2+ofs)); // bit-swapped data ^ bit-swapped state
|
|
+ }
|
|
+
|
|
+ // Generate fully-unrolled multiply-reduce in two stages.
|
|
+
|
|
+ (new GHASHMultiplyGenerator(this, unrolls,
|
|
+ /*result_lo*/v5, /*result_hi*/v4, /*data*/v2,
|
|
+ Hprime, a1_xor_a0, p, vzr,
|
|
+ /*temps*/v1, v3, /* reuse b*/v2))->unroll();
|
|
+
|
|
+ // NB: GHASHReduceGenerator also loads the next #unrolls blocks of
|
|
+ // data into v0, v0+ofs, the current state.
|
|
+ (new GHASHReduceGenerator (this, unrolls,
|
|
+ /*result*/v0, /*lo*/v5, /*hi*/v4, p, vzr,
|
|
+ /*data*/v2, /*temp*/v3))->unroll();
|
|
+
|
|
+ sub(blocks, blocks, unrolls);
|
|
+ cmp(blocks, (unsigned char)(unrolls * 2));
|
|
+ br(GE, L_ghash_loop);
|
|
+ }
|
|
+
|
|
+ // Merge the #unrolls states. Note that the data for the next
|
|
+ // iteration has already been loaded into v4, v4+ofs, etc...
|
|
+
|
|
+ // First, we multiply/reduce each clone by the appropriate power of H.
|
|
+ for (int i = 0; i < unrolls; i++) {
|
|
+ int ofs = register_stride * i;
|
|
+ ldrq(Hprime, Address(subkeyH, 16 * (unrolls - i - 1)));
|
|
+
|
|
+ rbit(v2+ofs, T16B, v2+ofs);
|
|
+ eor(v2+ofs, T16B, ofs+v0, v2+ofs); // bit-swapped data ^ bit-swapped state
|
|
+
|
|
+ rev64(Hprime, T16B, Hprime);
|
|
+ rbit(Hprime, T16B, Hprime);
|
|
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
|
|
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
|
|
+ ghash_modmul(/*result*/v0+ofs, /*result_lo*/v5+ofs, /*result_hi*/v4+ofs, /*b*/v2+ofs,
|
|
+ Hprime, vzr, a1_xor_a0, p,
|
|
+ /*temps*/v1+ofs, v3+ofs, /* reuse b*/v2+ofs);
|
|
+ }
|
|
+
|
|
+ // Then we sum the results.
|
|
+ for (int i = 0; i < unrolls - 1; i++) {
|
|
+ int ofs = register_stride * i;
|
|
+ eor(v0, T16B, v0, v0 + register_stride + ofs);
|
|
+ }
|
|
+
|
|
+ sub(blocks, blocks, (unsigned char)unrolls);
|
|
+
|
|
+ // And finally bit-reverse the state back to big endian.
|
|
+ rev64(v0, T16B, v0);
|
|
+ rbit(v0, T16B, v0);
|
|
+ st1(v0, T16B, state);
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
index 2e2e8ae78..c024dec55 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
@@ -2804,6 +2804,266 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ // CTR AES crypt.
|
|
+ // Arguments:
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source byte array address
|
|
+ // c_rarg1 - destination byte array address
|
|
+ // c_rarg2 - K (key) in little endian int array
|
|
+ // c_rarg3 - counter vector byte array address
|
|
+ // c_rarg4 - input length
|
|
+ // c_rarg5 - saved encryptedCounter start
|
|
+ // c_rarg6 - saved used length
|
|
+ //
|
|
+ // Output:
|
|
+ // r0 - input length
|
|
+ //
|
|
+ address generate_counterMode_AESCrypt() {
|
|
+ const Register in = c_rarg0;
|
|
+ const Register out = c_rarg1;
|
|
+ const Register key = c_rarg2;
|
|
+ const Register counter = c_rarg3;
|
|
+ const Register saved_len = c_rarg4, len = r10;
|
|
+ const Register saved_encrypted_ctr = c_rarg5;
|
|
+ const Register used_ptr = c_rarg6, used = r12;
|
|
+
|
|
+ const Register offset = r7;
|
|
+ const Register keylen = r11;
|
|
+
|
|
+ const unsigned char block_size = 16;
|
|
+ const int bulk_width = 4;
|
|
+ // NB: bulk_width can be 4 or 8. 8 gives slightly faster
|
|
+ // performance with larger data sizes, but it also means that the
|
|
+ // fast path isn't used until you have at least 8 blocks, and up
|
|
+ // to 127 bytes of data will be executed on the slow path. For
|
|
+ // that reason, and also so as not to blow away too much icache, 4
|
|
+ // blocks seems like a sensible compromise.
|
|
+
|
|
+ // Algorithm:
|
|
+ //
|
|
+ // if (len == 0) {
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // int result = len;
|
|
+ // do {
|
|
+ // if (used >= blockSize) {
|
|
+ // if (len >= bulk_width * blockSize) {
|
|
+ // CTR_large_block();
|
|
+ // if (len == 0)
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // for (;;) {
|
|
+ // 16ByteVector v0 = counter;
|
|
+ // embeddedCipher.encryptBlock(v0, 0, encryptedCounter, 0);
|
|
+ // used = 0;
|
|
+ // if (len < blockSize)
|
|
+ // break; /* goto NEXT */
|
|
+ // 16ByteVector v1 = load16Bytes(in, offset);
|
|
+ // v1 = v1 ^ encryptedCounter;
|
|
+ // store16Bytes(out, offset);
|
|
+ // used = blockSize;
|
|
+ // offset += blockSize;
|
|
+ // len -= blockSize;
|
|
+ // if (len == 0)
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // }
|
|
+ // NEXT:
|
|
+ // out[outOff++] = (byte)(in[inOff++] ^ encryptedCounter[used++]);
|
|
+ // len--;
|
|
+ // } while (len != 0);
|
|
+ // DONE:
|
|
+ // return result;
|
|
+ //
|
|
+ // CTR_large_block()
|
|
+ // Wide bulk encryption of whole blocks.
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
|
|
+ const address start = __ pc();
|
|
+ __ enter();
|
|
+
|
|
+ Label DONE, CTR_large_block, large_block_return;
|
|
+ __ ldrw(used, Address(used_ptr));
|
|
+ __ cbzw(saved_len, DONE);
|
|
+
|
|
+ __ mov(len, saved_len);
|
|
+ __ mov(offset, 0);
|
|
+
|
|
+ // Compute #rounds for AES based on the length of the key array
|
|
+ __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
|
+
|
|
+ __ aesenc_loadkeys(key, keylen);
|
|
+
|
|
+ {
|
|
+ Label L_CTR_loop, NEXT;
|
|
+
|
|
+ __ bind(L_CTR_loop);
|
|
+
|
|
+ __ cmp(used, block_size);
|
|
+ __ br(__ LO, NEXT);
|
|
+
|
|
+ // Maybe we have a lot of data
|
|
+ __ subsw(rscratch1, len, bulk_width * block_size);
|
|
+ __ br(__ HS, CTR_large_block);
|
|
+ __ BIND(large_block_return);
|
|
+ __ cbzw(len, DONE);
|
|
+
|
|
+ // Setup the counter
|
|
+ __ movi(v4, __ T4S, 0);
|
|
+ __ movi(v5, __ T4S, 1);
|
|
+ __ ins(v4, __ S, v5, 3, 3); // v4 contains { 0, 0, 0, 1 }
|
|
+
|
|
+ __ ld1(v0, __ T16B, counter); // Load the counter into v0
|
|
+ __ rev32(v16, __ T16B, v0);
|
|
+ __ addv(v16, __ T4S, v16, v4);
|
|
+ __ rev32(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter); // Save the incremented counter back
|
|
+
|
|
+ {
|
|
+ // We have fewer than bulk_width blocks of data left. Encrypt
|
|
+ // them one by one until there is less than a full block
|
|
+ // remaining, being careful to save both the encrypted counter
|
|
+ // and the counter.
|
|
+
|
|
+ Label inner_loop;
|
|
+ __ bind(inner_loop);
|
|
+ // Counter to encrypt is in v0
|
|
+ __ aesecb_encrypt(noreg, noreg, keylen);
|
|
+ __ st1(v0, __ T16B, saved_encrypted_ctr);
|
|
+
|
|
+ // Do we have a remaining full block?
|
|
+
|
|
+ __ mov(used, 0);
|
|
+ __ cmp(len, block_size);
|
|
+ __ br(__ LO, NEXT);
|
|
+
|
|
+ // Yes, we have a full block
|
|
+ __ ldrq(v1, Address(in, offset));
|
|
+ __ eor(v1, __ T16B, v1, v0);
|
|
+ __ strq(v1, Address(out, offset));
|
|
+ __ mov(used, block_size);
|
|
+ __ add(offset, offset, block_size);
|
|
+
|
|
+ __ subw(len, len, block_size);
|
|
+ __ cbzw(len, DONE);
|
|
+
|
|
+ // Increment the counter, store it back
|
|
+ __ orr(v0, __ T16B, v16, v16);
|
|
+ __ rev32(v16, __ T16B, v16);
|
|
+ __ addv(v16, __ T4S, v16, v4);
|
|
+ __ rev32(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter); // Save the incremented counter back
|
|
+
|
|
+ __ b(inner_loop);
|
|
+ }
|
|
+
|
|
+ __ BIND(NEXT);
|
|
+
|
|
+ // Encrypt a single byte, and loop.
|
|
+ // We expect this to be a rare event.
|
|
+ __ ldrb(rscratch1, Address(in, offset));
|
|
+ __ ldrb(rscratch2, Address(saved_encrypted_ctr, used));
|
|
+ __ eor(rscratch1, rscratch1, rscratch2);
|
|
+ __ strb(rscratch1, Address(out, offset));
|
|
+ __ add(offset, offset, 1);
|
|
+ __ add(used, used, 1);
|
|
+ __ subw(len, len,1);
|
|
+ __ cbnzw(len, L_CTR_loop);
|
|
+ }
|
|
+
|
|
+ __ bind(DONE);
|
|
+ __ strw(used, Address(used_ptr));
|
|
+ __ mov(r0, saved_len);
|
|
+
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret(lr);
|
|
+
|
|
+ // Bulk encryption
|
|
+
|
|
+ __ BIND (CTR_large_block);
|
|
+ assert(bulk_width == 4 || bulk_width == 8, "must be");
|
|
+
|
|
+ if (bulk_width == 8) {
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
|
|
+ }
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
|
|
+ RegSet saved_regs = (RegSet::of(in, out, offset)
|
|
+ + RegSet::of(saved_encrypted_ctr, used_ptr, len));
|
|
+ __ push(saved_regs, sp);
|
|
+ __ andr(len, len, -16 * bulk_width); // 8/4 encryptions, 16 bytes per encryption
|
|
+ __ add(in, in, offset);
|
|
+ __ add(out, out, offset);
|
|
+
|
|
+ // Keys should already be loaded into the correct registers
|
|
+
|
|
+ __ ld1(v0, __ T16B, counter); // v0 contains the first counter
|
|
+ __ rev32(v16, __ T16B, v0); // v16 contains byte-reversed counter
|
|
+
|
|
+ // AES/CTR loop
|
|
+ {
|
|
+ Label L_CTR_loop;
|
|
+ __ BIND(L_CTR_loop);
|
|
+
|
|
+ // Setup the counters
|
|
+ __ movi(v8, __ T4S, 0);
|
|
+ __ movi(v9, __ T4S, 1);
|
|
+ __ ins(v8, __ S, v9, 3, 3); // v8 contains { 0, 0, 0, 1 }
|
|
+
|
|
+ for (FloatRegister f = v0; f < v0 + bulk_width; f++) {
|
|
+ __ rev32(f, __ T16B, v16);
|
|
+ __ addv(v16, __ T4S, v16, v8);
|
|
+ }
|
|
+
|
|
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16));
|
|
+
|
|
+ // Encrypt the counters
|
|
+ __ aesecb_encrypt(noreg, noreg, keylen, v0, bulk_width);
|
|
+
|
|
+ if (bulk_width == 8) {
|
|
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16));
|
|
+ }
|
|
+
|
|
+ // XOR the encrypted counters with the inputs
|
|
+ for (int i = 0; i < bulk_width; i++) {
|
|
+ __ eor(v0 + i, __ T16B, v0 + i, v8 + i);
|
|
+ }
|
|
+
|
|
+ // Write the encrypted data
|
|
+ __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16));
|
|
+ if (bulk_width == 8) {
|
|
+ __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16));
|
|
+ }
|
|
+
|
|
+ __ subw(len, len, 16 * bulk_width);
|
|
+ __ cbnzw(len, L_CTR_loop);
|
|
+ }
|
|
+
|
|
+ // Save the counter back where it goes
|
|
+ __ rev32(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter);
|
|
+
|
|
+ __ pop(saved_regs, sp);
|
|
+
|
|
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
|
|
+ if (bulk_width == 8) {
|
|
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
|
|
+ }
|
|
+
|
|
+ __ andr(rscratch1, len, -16 * bulk_width);
|
|
+ __ sub(len, len, rscratch1);
|
|
+ __ add(offset, offset, rscratch1);
|
|
+ __ mov(used, 16);
|
|
+ __ strw(used, Address(used_ptr));
|
|
+ __ b(large_block_return);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+
|
|
// Arguments:
|
|
//
|
|
// Inputs:
|
|
@@ -3677,6 +3937,56 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ address generate_ghash_processBlocks_wide() {
|
|
+ address small = generate_ghash_processBlocks();
|
|
+
|
|
+ StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks_wide");
|
|
+ __ align(wordSize * 2);
|
|
+ address p = __ pc();
|
|
+ __ emit_int64(0x87); // The low-order bits of the field
|
|
+ // polynomial (i.e. p = z^7+z^2+z+1)
|
|
+ // repeated in the low and high parts of a
|
|
+ // 128-bit vector
|
|
+ __ emit_int64(0x87);
|
|
+
|
|
+ __ align(CodeEntryAlignment);
|
|
+ address start = __ pc();
|
|
+
|
|
+ Register state = c_rarg0;
|
|
+ Register subkeyH = c_rarg1;
|
|
+ Register data = c_rarg2;
|
|
+ Register blocks = c_rarg3;
|
|
+
|
|
+ const int unroll = 4;
|
|
+
|
|
+ __ cmp(blocks, (unsigned char)(unroll * 2));
|
|
+ __ br(__ LT, small);
|
|
+
|
|
+ if (unroll > 1) {
|
|
+ // Save state before entering routine
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
|
|
+ }
|
|
+
|
|
+ __ ghash_processBlocks_wide(p, state, subkeyH, data, blocks, unroll);
|
|
+
|
|
+ if (unroll > 1) {
|
|
+ // And restore state
|
|
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
|
|
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
|
|
+ }
|
|
+
|
|
+ __ cmp(blocks, 0u);
|
|
+ __ br(__ GT, small);
|
|
+
|
|
+ __ ret(lr);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+
|
|
// Continuation point for throwing of implicit exceptions that are
|
|
// not handled in the current activation. Fabricates an exception
|
|
// oop and initiates normal exception dispatching in this
|
|
@@ -4687,6 +4997,15 @@ class StubGenerator: public StubCodeGenerator {
|
|
StubRoutines::_montgomerySquare = g.generate_multiply();
|
|
}
|
|
|
|
+ // generate GHASH intrinsics code
|
|
+ if (UseGHASHIntrinsics) {
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks_wide();
|
|
+ } else {
|
|
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
|
+ }
|
|
+ }
|
|
+
|
|
if (UseAESIntrinsics) {
|
|
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
|
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
|
@@ -4694,9 +5013,8 @@ class StubGenerator: public StubCodeGenerator {
|
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
|
|
}
|
|
|
|
- // generate GHASH intrinsics code
|
|
- if (UseGHASHIntrinsics) {
|
|
- StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
|
|
}
|
|
|
|
if (UseSHA1Intrinsics) {
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
|
|
index d1c312ab3..05619ce7f 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
|
|
@@ -37,7 +37,7 @@ static bool returns_to_call_stub(address return_pc) {
|
|
|
|
enum platform_dependent_constants {
|
|
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
|
|
- code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
|
|
+ code_size2 = 32000 // simply increase if too small (assembler will crash if too small)
|
|
};
|
|
|
|
class aarch64 {
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
|
|
index 9808337a0..de636fb83 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
|
|
@@ -233,12 +233,21 @@ void VM_Version::get_processor_features() {
|
|
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
|
|
UseAES = true;
|
|
}
|
|
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
} else {
|
|
if (UseAES) {
|
|
- warning("UseAES specified, but not supported on this CPU");
|
|
+ warning("AES instructions are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAES, false);
|
|
}
|
|
if (UseAESIntrinsics) {
|
|
- warning("UseAESIntrinsics specified, but not supported on this CPU");
|
|
+ warning("AES intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
+ }
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ warning("AES/CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
}
|
|
}
|
|
|
|
diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
|
|
index b5ce1cfa9..fea8b1f87 100644
|
|
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
|
|
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
|
|
@@ -194,6 +194,11 @@ void VM_Version::initialize() {
|
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
}
|
|
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ warning("AES/CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+
|
|
if (UseGHASHIntrinsics) {
|
|
warning("GHASH intrinsics are not available on this CPU");
|
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
|
diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
|
|
index bd893e138..08d7a7311 100644
|
|
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
|
|
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
|
|
@@ -319,6 +319,11 @@ void VM_Version::initialize() {
|
|
}
|
|
}
|
|
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ warning("AES/CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+
|
|
// GHASH/GCM intrinsics
|
|
if (has_vis3() && (UseVIS > 2)) {
|
|
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
|
|
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
|
|
index 1759ecdfd..ddc1acfd8 100644
|
|
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
|
|
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
|
|
@@ -2373,20 +2373,52 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
|
|
|
|
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
|
|
assert(VM_Version::supports_sse4_1(), "");
|
|
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
|
|
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
|
|
emit_int8(0x16);
|
|
emit_int8((unsigned char)(0xC0 | encode));
|
|
emit_int8(imm8);
|
|
}
|
|
|
|
+void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
|
|
+ emit_int8(0x16);
|
|
+ emit_operand(src, dst);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
|
|
assert(VM_Version::supports_sse4_1(), "");
|
|
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
|
|
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
|
|
emit_int8(0x16);
|
|
emit_int8((unsigned char)(0xC0 | encode));
|
|
emit_int8(imm8);
|
|
}
|
|
|
|
+void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
|
|
+ emit_int8(0x16);
|
|
+ emit_operand(src, dst);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
+void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A);
|
|
+ emit_int8((unsigned char)0x15);
|
|
+ emit_operand(src, dst);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
+void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A);
|
|
+ emit_int8(0x14);
|
|
+ emit_operand(src, dst);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
|
|
assert(VM_Version::supports_sse4_1(), "");
|
|
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
|
|
@@ -2395,6 +2427,14 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
|
|
emit_int8(imm8);
|
|
}
|
|
|
|
+void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
|
|
+ emit_int8(0x22);
|
|
+ emit_operand(dst,src);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
|
|
assert(VM_Version::supports_sse4_1(), "");
|
|
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
|
|
@@ -2403,6 +2443,30 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
|
|
emit_int8(imm8);
|
|
}
|
|
|
|
+void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
|
|
+ emit_int8(0x22);
|
|
+ emit_operand(dst, src);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
+void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
|
|
+ assert(VM_Version::supports_sse2(), "");
|
|
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F);
|
|
+ emit_int8((unsigned char)0xC4);
|
|
+ emit_operand(dst, src);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
+void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
|
|
+ assert(VM_Version::supports_sse4_1(), "");
|
|
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
|
|
+ emit_int8(0x20);
|
|
+ emit_operand(dst, src);
|
|
+ emit_int8(imm8);
|
|
+}
|
|
+
|
|
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
|
|
assert(VM_Version::supports_sse4_1(), "");
|
|
InstructionMark im(this);
|
|
@@ -3075,6 +3139,12 @@ void Assembler::xorl(Register dst, Register src) {
|
|
emit_arith(0x33, 0xC0, dst, src);
|
|
}
|
|
|
|
+void Assembler::xorb(Register dst, Address src) {
|
|
+ InstructionMark im(this);
|
|
+ prefix(src, dst);
|
|
+ emit_int8(0x32);
|
|
+ emit_operand(dst, src);
|
|
+}
|
|
|
|
// AVX 3-operands scalar float-point arithmetic instructions
|
|
|
|
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
|
|
index 5ea01311e..c2e70bc2a 100644
|
|
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
|
|
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
|
|
@@ -1479,10 +1479,20 @@ private:
|
|
// SSE 4.1 extract
|
|
void pextrd(Register dst, XMMRegister src, int imm8);
|
|
void pextrq(Register dst, XMMRegister src, int imm8);
|
|
+ void pextrd(Address dst, XMMRegister src, int imm8);
|
|
+ void pextrq(Address dst, XMMRegister src, int imm8);
|
|
+ void pextrb(Address dst, XMMRegister src, int imm8);
|
|
+ // SSE 2 extract
|
|
+ void pextrw(Address dst, XMMRegister src, int imm8);
|
|
|
|
// SSE 4.1 insert
|
|
void pinsrd(XMMRegister dst, Register src, int imm8);
|
|
void pinsrq(XMMRegister dst, Register src, int imm8);
|
|
+ void pinsrd(XMMRegister dst, Address src, int imm8);
|
|
+ void pinsrq(XMMRegister dst, Address src, int imm8);
|
|
+ void pinsrb(XMMRegister dst, Address src, int imm8);
|
|
+ // SSE 2 insert
|
|
+ void pinsrw(XMMRegister dst, Address src, int imm8);
|
|
|
|
// SSE4.1 packed move
|
|
void pmovzxbw(XMMRegister dst, XMMRegister src);
|
|
@@ -1687,6 +1697,8 @@ private:
|
|
void xorl(Register dst, Address src);
|
|
void xorl(Register dst, Register src);
|
|
|
|
+ void xorb(Register dst, Address src);
|
|
+
|
|
void xorq(Register dst, Address src);
|
|
void xorq(Register dst, Register src);
|
|
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
|
|
index 2e5599807..f555f3326 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
|
|
@@ -2153,6 +2153,17 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ address generate_counter_shuffle_mask() {
|
|
+ __ align(16);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
|
|
+ address start = __ pc();
|
|
+ __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
|
+ __ emit_data(0x08090a0b, relocInfo::none, 0);
|
|
+ __ emit_data(0x04050607, relocInfo::none, 0);
|
|
+ __ emit_data(0x00010203, relocInfo::none, 0);
|
|
+ return start;
|
|
+ }
|
|
+
|
|
// Utility routine for loading a 128-bit key word in little endian format
|
|
// can optionally specify that the shuffle mask is already in an xmmregister
|
|
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
|
|
@@ -2178,6 +2189,31 @@ class StubGenerator: public StubCodeGenerator {
|
|
__ aesdec(xmmdst, xmmtmp);
|
|
}
|
|
|
|
+ // Utility routine for increase 128bit counter (iv in CTR mode)
|
|
+ // XMM_128bit, D3, D2, D1, D0
|
|
+ void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
|
|
+ __ pextrd(reg, xmmdst, 0x0);
|
|
+ __ addl(reg, inc_delta);
|
|
+ __ pinsrd(xmmdst, reg, 0x0);
|
|
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
|
|
+
|
|
+ __ pextrd(reg, xmmdst, 0x01); // Carry-> D1
|
|
+ __ addl(reg, 0x01);
|
|
+ __ pinsrd(xmmdst, reg, 0x01);
|
|
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
|
|
+
|
|
+ __ pextrd(reg, xmmdst, 0x02); // Carry-> D2
|
|
+ __ addl(reg, 0x01);
|
|
+ __ pinsrd(xmmdst, reg, 0x02);
|
|
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
|
|
+
|
|
+ __ pextrd(reg, xmmdst, 0x03); // Carry -> D3
|
|
+ __ addl(reg, 0x01);
|
|
+ __ pinsrd(xmmdst, reg, 0x03);
|
|
+
|
|
+ __ BIND(next_block); // next instruction
|
|
+ }
|
|
+
|
|
|
|
// Arguments:
|
|
//
|
|
@@ -2719,6 +2755,309 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+
|
|
+ // CTR AES crypt.
|
|
+ // In 32-bit stub, parallelize 4 blocks at a time
|
|
+ // Arguments:
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source byte array address
|
|
+ // c_rarg1 - destination byte array address
|
|
+ // c_rarg2 - K (key) in little endian int array
|
|
+ // c_rarg3 - counter vector byte array address
|
|
+ // c_rarg4 - input length
|
|
+ //
|
|
+ // Output:
|
|
+ // rax - input length
|
|
+ //
|
|
+ address generate_counterMode_AESCrypt_Parallel() {
|
|
+ assert(UseAES, "need AES instructions and misaligned SSE support");
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
|
|
+ address start = __ pc();
|
|
+ const Register from = rsi; // source array address
|
|
+ const Register to = rdx; // destination array address
|
|
+ const Register key = rcx; // key array address
|
|
+ const Register counter = rdi; // counter byte array initialized from initvector array address
|
|
+
|
|
+ // and left with the results of the last encryption block
|
|
+ const Register len_reg = rbx;
|
|
+ const Register pos = rax;
|
|
+
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+ handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi
|
|
+
|
|
+ // load registers from incoming parameters
|
|
+ const Address from_param(rbp, 8+0);
|
|
+ const Address to_param (rbp, 8+4);
|
|
+ const Address key_param (rbp, 8+8);
|
|
+ const Address rvec_param (rbp, 8+12);
|
|
+ const Address len_param (rbp, 8+16);
|
|
+ const Address saved_counter_param(rbp, 8 + 20);
|
|
+ const Address used_addr_param(rbp, 8 + 24);
|
|
+
|
|
+ __ movptr(from , from_param);
|
|
+ __ movptr(to , to_param);
|
|
+ //__ movptr(key, key_param);
|
|
+ //__ movptr(counter, rvec_param);
|
|
+ __ movptr(len_reg , len_param);
|
|
+ //__ movptr(pos, 0);
|
|
+
|
|
+ // Use the partially used encrpyted counter from last invocation
|
|
+ Label L_exit_preLoop, L_preLoop_start;
|
|
+
|
|
+ // Use the registers 'counter' and 'key' here in this preloop
|
|
+ // to hold of last 2 params 'used' and 'saved_encCounter_start'
|
|
+ Register used = counter;
|
|
+ Register saved_encCounter_start = key;
|
|
+ Register used_addr = saved_encCounter_start;
|
|
+
|
|
+ __ movptr(used_addr, used_addr_param);
|
|
+ __ movptr(used, Address(used_addr, 0));
|
|
+ __ movptr(saved_encCounter_start, saved_counter_param);
|
|
+
|
|
+ __ BIND(L_preLoop_start);
|
|
+ __ cmpptr(used, 16);
|
|
+ __ jcc(Assembler::aboveEqual, L_exit_preLoop);
|
|
+ __ cmpptr(len_reg, 0);
|
|
+ __ jcc(Assembler::lessEqual, L_exit_preLoop);
|
|
+ __ movb(rax, Address(saved_encCounter_start, used));
|
|
+ __ xorb(rax, Address(from, 0));
|
|
+ __ movb(Address(to, 0), rax);
|
|
+ __ addptr(from, 1);
|
|
+ __ addptr(to, 1);
|
|
+ __ addptr(used, 1);
|
|
+ __ subptr(len_reg, 1);
|
|
+
|
|
+ __ jmp(L_preLoop_start);
|
|
+
|
|
+ __ BIND(L_exit_preLoop);
|
|
+ __ movptr(used_addr, used_addr_param);
|
|
+ __ movptr(used_addr, used_addr_param);
|
|
+ __ movl(Address(used_addr, 0), used);
|
|
+
|
|
+ // load the parameters 'key' and 'counter'
|
|
+ __ movptr(key, key_param);
|
|
+ __ movptr(counter, rvec_param);
|
|
+
|
|
+ // xmm register assignments for the loops below
|
|
+ const XMMRegister xmm_curr_counter = xmm0;
|
|
+ const XMMRegister xmm_counter_shuf_mask = xmm1; // need to be reloaded
|
|
+ const XMMRegister xmm_key_shuf_mask = xmm2; // need to be reloaded
|
|
+ const XMMRegister xmm_key = xmm3;
|
|
+ const XMMRegister xmm_result0 = xmm4;
|
|
+ const XMMRegister xmm_result1 = xmm5;
|
|
+ const XMMRegister xmm_result2 = xmm6;
|
|
+ const XMMRegister xmm_result3 = xmm7;
|
|
+ const XMMRegister xmm_from0 = xmm1; //reuse XMM register
|
|
+ const XMMRegister xmm_from1 = xmm2;
|
|
+ const XMMRegister xmm_from2 = xmm3;
|
|
+ const XMMRegister xmm_from3 = xmm4;
|
|
+
|
|
+ //for key_128, key_192, key_256
|
|
+ const int rounds[3] = {10, 12, 14};
|
|
+ Label L_singleBlockLoopTop[3];
|
|
+ Label L_multiBlock_loopTop[3];
|
|
+ Label L_key192_top, L_key256_top;
|
|
+ Label L_incCounter[3][4]; // 3: different key length, 4: 4 blocks at a time
|
|
+ Label L_incCounter_single[3]; //for single block, key128, key192, key256
|
|
+ Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
|
|
+ Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
|
|
+
|
|
+ Label L_exit;
|
|
+ const int PARALLEL_FACTOR = 4; //because of the limited register number
|
|
+
|
|
+ // initialize counter with initial counter
|
|
+ __ movdqu(xmm_curr_counter, Address(counter, 0x00));
|
|
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
|
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase
|
|
+
|
|
+ // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
|
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
|
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
|
+ __ cmpl(rax, 52);
|
|
+ __ jcc(Assembler::equal, L_key192_top);
|
|
+ __ cmpl(rax, 60);
|
|
+ __ jcc(Assembler::equal, L_key256_top);
|
|
+
|
|
+ //key128 begins here
|
|
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
|
|
+
|
|
+#define CTR_DoFour(opc, src_reg) \
|
|
+ __ opc(xmm_result0, src_reg); \
|
|
+ __ opc(xmm_result1, src_reg); \
|
|
+ __ opc(xmm_result2, src_reg); \
|
|
+ __ opc(xmm_result3, src_reg);
|
|
+
|
|
+ // k == 0 : generate code for key_128
|
|
+ // k == 1 : generate code for key_192
|
|
+ // k == 2 : generate code for key_256
|
|
+ for (int k = 0; k < 3; ++k) {
|
|
+ //multi blocks starts here
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ BIND(L_multiBlock_loopTop[k]);
|
|
+ __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
|
|
+ __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
|
|
+
|
|
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
|
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
|
+
|
|
+ //load, then increase counters
|
|
+ CTR_DoFour(movdqa, xmm_curr_counter);
|
|
+ __ push(rbx);
|
|
+ inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]);
|
|
+ inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]);
|
|
+ inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]);
|
|
+ inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]);
|
|
+ __ pop (rbx);
|
|
+
|
|
+ load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance
|
|
+
|
|
+ CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
|
|
+ CTR_DoFour(pxor, xmm_key); //PXOR with Round 0 key
|
|
+
|
|
+ for (int i = 1; i < rounds[k]; ++i) {
|
|
+ load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
|
|
+ CTR_DoFour(aesenc, xmm_key);
|
|
+ }
|
|
+ load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
|
|
+ CTR_DoFour(aesenclast, xmm_key);
|
|
+
|
|
+ // get next PARALLEL_FACTOR blocks into xmm_from registers
|
|
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
|
|
+ __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
|
|
+ __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
|
|
+
|
|
+ // PXOR with input text
|
|
+ __ pxor(xmm_result0, xmm_from0); //result0 is xmm4
|
|
+ __ pxor(xmm_result1, xmm_from1);
|
|
+ __ pxor(xmm_result2, xmm_from2);
|
|
+
|
|
+ // store PARALLEL_FACTOR results into the next 64 bytes of output
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
|
|
+
|
|
+ // do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0.
|
|
+ __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
|
|
+ __ pxor(xmm_result3, xmm_from3);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
|
|
+
|
|
+ __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
|
|
+ __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
|
|
+ __ jmp(L_multiBlock_loopTop[k]);
|
|
+
|
|
+ // singleBlock starts here
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ BIND(L_singleBlockLoopTop[k]);
|
|
+ __ cmpptr(len_reg, 0);
|
|
+ __ jcc(Assembler::equal, L_exit);
|
|
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
|
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
|
+ __ movdqa(xmm_result0, xmm_curr_counter);
|
|
+ load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
|
|
+ __ push(rbx);//rbx is used for increasing counter
|
|
+ inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]);
|
|
+ __ pop (rbx);
|
|
+ __ pshufb(xmm_result0, xmm_counter_shuf_mask);
|
|
+ __ pxor(xmm_result0, xmm_key);
|
|
+ for (int i = 1; i < rounds[k]; i++) {
|
|
+ load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
|
|
+ __ aesenc(xmm_result0, xmm_key);
|
|
+ }
|
|
+ load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
|
|
+ __ aesenclast(xmm_result0, xmm_key);
|
|
+ __ cmpptr(len_reg, AESBlockSize);
|
|
+ __ jcc(Assembler::less, L_processTail_insr[k]);
|
|
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
|
|
+ __ pxor(xmm_result0, xmm_from0);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
|
|
+ __ addptr(pos, AESBlockSize);
|
|
+ __ subptr(len_reg, AESBlockSize);
|
|
+ __ jmp(L_singleBlockLoopTop[k]);
|
|
+
|
|
+ __ BIND(L_processTail_insr[k]);
|
|
+ __ addptr(pos, len_reg);
|
|
+ __ testptr(len_reg, 8);
|
|
+ __ jcc(Assembler::zero, L_processTail_4_insr[k]);
|
|
+ __ subptr(pos,8);
|
|
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
|
|
+ __ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1);
|
|
+ __ BIND(L_processTail_4_insr[k]);
|
|
+ __ testptr(len_reg, 4);
|
|
+ __ jcc(Assembler::zero, L_processTail_2_insr[k]);
|
|
+ __ subptr(pos,4);
|
|
+ __ pslldq(xmm_from0, 4);
|
|
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_2_insr[k]);
|
|
+ __ testptr(len_reg, 2);
|
|
+ __ jcc(Assembler::zero, L_processTail_1_insr[k]);
|
|
+ __ subptr(pos, 2);
|
|
+ __ pslldq(xmm_from0, 2);
|
|
+ __ pinsrw(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_1_insr[k]);
|
|
+ __ testptr(len_reg, 1);
|
|
+ __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
|
|
+ __ subptr(pos, 1);
|
|
+ __ pslldq(xmm_from0, 1);
|
|
+ __ pinsrb(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_exit_insr[k]);
|
|
+
|
|
+ __ movptr(saved_encCounter_start, saved_counter_param);
|
|
+ __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);
|
|
+ __ pxor(xmm_result0, xmm_from0);
|
|
+
|
|
+ __ testptr(len_reg, 8);
|
|
+ __ jcc(Assembler::zero, L_processTail_4_extr[k]);
|
|
+ __ pextrd(Address(to, pos), xmm_result0, 0);
|
|
+ __ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1);
|
|
+ __ psrldq(xmm_result0, 8);
|
|
+ __ addptr(pos, 8);
|
|
+ __ BIND(L_processTail_4_extr[k]);
|
|
+ __ testptr(len_reg, 4);
|
|
+ __ jcc(Assembler::zero, L_processTail_2_extr[k]);
|
|
+ __ pextrd(Address(to, pos), xmm_result0, 0);
|
|
+ __ psrldq(xmm_result0, 4);
|
|
+ __ addptr(pos, 4);
|
|
+ __ BIND(L_processTail_2_extr[k]);
|
|
+ __ testptr(len_reg, 2);
|
|
+ __ jcc(Assembler::zero, L_processTail_1_extr[k]);
|
|
+ __ pextrb(Address(to, pos), xmm_result0, 0);
|
|
+ __ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1);
|
|
+ __ psrldq(xmm_result0, 2);
|
|
+ __ addptr(pos, 2);
|
|
+ __ BIND(L_processTail_1_extr[k]);
|
|
+ __ testptr(len_reg, 1);
|
|
+ __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
|
|
+ __ pextrb(Address(to, pos), xmm_result0, 0);
|
|
+
|
|
+ __ BIND(L_processTail_exit_extr[k]);
|
|
+ __ movptr(used_addr, used_addr_param);
|
|
+ __ movl(Address(used_addr, 0), len_reg);
|
|
+ __ jmp(L_exit);
|
|
+ }
|
|
+
|
|
+ __ BIND(L_exit);
|
|
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
|
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
|
|
+ __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
|
|
+ handleSOERegisters(false /*restoring*/);
|
|
+ __ movptr(rax, len_param); // return length
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret(0);
|
|
+
|
|
+ __ BIND (L_key192_top);
|
|
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
|
|
+ __ jmp(L_multiBlock_loopTop[1]); //key192
|
|
+
|
|
+ __ BIND (L_key256_top);
|
|
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
|
|
+ __ jmp(L_multiBlock_loopTop[2]); //key192
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
+
|
|
// byte swap x86 long
|
|
address generate_ghash_long_swap_mask() {
|
|
__ align(CodeEntryAlignment);
|
|
@@ -3181,6 +3520,11 @@ class StubGenerator: public StubCodeGenerator {
|
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
|
|
}
|
|
|
|
+ if (UseAESCTRIntrinsics) {
|
|
+ StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
|
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
|
+ }
|
|
+
|
|
// Generate GHASH intrinsics code
|
|
if (UseGHASHIntrinsics) {
|
|
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
|
|
index c5811b28b..254f63392 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
|
|
@@ -3010,6 +3010,15 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ address generate_counter_shuffle_mask() {
|
|
+ __ align(16);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
|
|
+ address start = __ pc();
|
|
+ __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
|
+ __ emit_data64(0x0001020304050607, relocInfo::none);
|
|
+ return start;
|
|
+ }
|
|
+
|
|
// Utility routine for loading a 128-bit key word in little endian format
|
|
// can optionally specify that the shuffle mask is already in an xmmregister
|
|
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
|
|
@@ -3021,6 +3030,18 @@ class StubGenerator: public StubCodeGenerator {
|
|
}
|
|
}
|
|
|
|
+ // Utility routine for increase 128bit counter (iv in CTR mode)
|
|
+ void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
|
|
+ __ pextrq(reg, xmmdst, 0x0);
|
|
+ __ addq(reg, inc_delta);
|
|
+ __ pinsrq(xmmdst, reg, 0x0);
|
|
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
|
|
+ __ pextrq(reg, xmmdst, 0x01); // Carry
|
|
+ __ addq(reg, 0x01);
|
|
+ __ pinsrq(xmmdst, reg, 0x01); //Carry end
|
|
+ __ BIND(next_block); // next instruction
|
|
+ }
|
|
+
|
|
// Arguments:
|
|
//
|
|
// Inputs:
|
|
@@ -3639,6 +3660,320 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
|
|
+ // to hide instruction latency
|
|
+ //
|
|
+ // Arguments:
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source byte array address
|
|
+ // c_rarg1 - destination byte array address
|
|
+ // c_rarg2 - K (key) in little endian int array
|
|
+ // c_rarg3 - counter vector byte array address
|
|
+ // Linux
|
|
+ // c_rarg4 - input length
|
|
+ // c_rarg5 - saved encryptedCounter start
|
|
+ // rbp + 6 * wordSize - saved used length
|
|
+ // Windows
|
|
+ // rbp + 6 * wordSize - input length
|
|
+ // rbp + 7 * wordSize - saved encryptedCounter start
|
|
+ // rbp + 8 * wordSize - saved used length
|
|
+ //
|
|
+ // Output:
|
|
+ // rax - input length
|
|
+ //
|
|
+ address generate_counterMode_AESCrypt_Parallel() {
|
|
+ assert(UseAES, "need AES instructions and misaligned SSE support");
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
|
|
+ address start = __ pc();
|
|
+ const Register from = c_rarg0; // source array address
|
|
+ const Register to = c_rarg1; // destination array address
|
|
+ const Register key = c_rarg2; // key array address
|
|
+ const Register counter = c_rarg3; // counter byte array initialized from counter array address
|
|
+ // and left with the results of the last encryption block
|
|
+#ifndef _WIN64
|
|
+ const Register len_reg = c_rarg4;
|
|
+ const Register saved_encCounter_start = c_rarg5;
|
|
+ const Register used_addr = r10;
|
|
+ const Address used_mem(rbp, 2 * wordSize);
|
|
+ const Register used = r11;
|
|
+#else
|
|
+ const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
|
+ const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64
|
|
+ const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64
|
|
+ const Register len_reg = r10; // pick the first volatile windows register
|
|
+ const Register saved_encCounter_start = r11;
|
|
+ const Register used_addr = r13;
|
|
+ const Register used = r14;
|
|
+#endif
|
|
+ const Register pos = rax;
|
|
+
|
|
+ const int PARALLEL_FACTOR = 6;
|
|
+ const XMMRegister xmm_counter_shuf_mask = xmm0;
|
|
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
|
|
+ const XMMRegister xmm_curr_counter = xmm2;
|
|
+
|
|
+ const XMMRegister xmm_key_tmp0 = xmm3;
|
|
+ const XMMRegister xmm_key_tmp1 = xmm4;
|
|
+
|
|
+ // registers holding the four results in the parallelized loop
|
|
+ const XMMRegister xmm_result0 = xmm5;
|
|
+ const XMMRegister xmm_result1 = xmm6;
|
|
+ const XMMRegister xmm_result2 = xmm7;
|
|
+ const XMMRegister xmm_result3 = xmm8;
|
|
+ const XMMRegister xmm_result4 = xmm9;
|
|
+ const XMMRegister xmm_result5 = xmm10;
|
|
+
|
|
+ const XMMRegister xmm_from0 = xmm11;
|
|
+ const XMMRegister xmm_from1 = xmm12;
|
|
+ const XMMRegister xmm_from2 = xmm13;
|
|
+ const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
|
|
+ const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
|
|
+ const XMMRegister xmm_from5 = xmm4;
|
|
+
|
|
+ //for key_128, key_192, key_256
|
|
+ const int rounds[3] = {10, 12, 14};
|
|
+ Label L_exit_preLoop, L_preLoop_start;
|
|
+ Label L_multiBlock_loopTop[3];
|
|
+ Label L_singleBlockLoopTop[3];
|
|
+ Label L__incCounter[3][6]; //for 6 blocks
|
|
+ Label L__incCounter_single[3]; //for single block, key128, key192, key256
|
|
+ Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
|
|
+ Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
|
|
+
|
|
+ Label L_exit;
|
|
+
|
|
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
|
|
+
|
|
+#ifdef _WIN64
|
|
+ // save the xmm registers which must be preserved 6-14
|
|
+ const int XMM_REG_NUM_KEY_LAST = 14;
|
|
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
|
|
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
|
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
|
|
+ }
|
|
+
|
|
+ const Address r13_save(rbp, rdi_off * wordSize);
|
|
+ const Address r14_save(rbp, rsi_off * wordSize);
|
|
+
|
|
+ __ movptr(r13_save, r13);
|
|
+ __ movptr(r14_save, r14);
|
|
+
|
|
+ // on win64, fill len_reg from stack position
|
|
+ __ movl(len_reg, len_mem);
|
|
+ __ movptr(saved_encCounter_start, saved_encCounter_mem);
|
|
+ __ movptr(used_addr, used_mem);
|
|
+ __ movl(used, Address(used_addr, 0));
|
|
+#else
|
|
+ __ push(len_reg); // Save
|
|
+ __ movptr(used_addr, used_mem);
|
|
+ __ movl(used, Address(used_addr, 0));
|
|
+#endif
|
|
+
|
|
+ __ push(rbx); // Save RBX
|
|
+ __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
|
|
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
|
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
|
|
+ __ movptr(pos, 0);
|
|
+
|
|
+ // Use the partially used encrpyted counter from last invocation
|
|
+ __ BIND(L_preLoop_start);
|
|
+ __ cmpptr(used, 16);
|
|
+ __ jcc(Assembler::aboveEqual, L_exit_preLoop);
|
|
+ __ cmpptr(len_reg, 0);
|
|
+ __ jcc(Assembler::lessEqual, L_exit_preLoop);
|
|
+ __ movb(rbx, Address(saved_encCounter_start, used));
|
|
+ __ xorb(rbx, Address(from, pos));
|
|
+ __ movb(Address(to, pos), rbx);
|
|
+ __ addptr(pos, 1);
|
|
+ __ addptr(used, 1);
|
|
+ __ subptr(len_reg, 1);
|
|
+
|
|
+ __ jmp(L_preLoop_start);
|
|
+
|
|
+ __ BIND(L_exit_preLoop);
|
|
+ __ movl(Address(used_addr, 0), used);
|
|
+
|
|
+ // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
|
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
|
+ __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
|
+ __ cmpl(rbx, 52);
|
|
+ __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);
|
|
+ __ cmpl(rbx, 60);
|
|
+ __ jcc(Assembler::equal, L_multiBlock_loopTop[2]);
|
|
+
|
|
+#define CTR_DoSix(opc, src_reg) \
|
|
+ __ opc(xmm_result0, src_reg); \
|
|
+ __ opc(xmm_result1, src_reg); \
|
|
+ __ opc(xmm_result2, src_reg); \
|
|
+ __ opc(xmm_result3, src_reg); \
|
|
+ __ opc(xmm_result4, src_reg); \
|
|
+ __ opc(xmm_result5, src_reg);
|
|
+
|
|
+ // k == 0 : generate code for key_128
|
|
+ // k == 1 : generate code for key_192
|
|
+ // k == 2 : generate code for key_256
|
|
+ for (int k = 0; k < 3; ++k) {
|
|
+ //multi blocks starts here
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ BIND(L_multiBlock_loopTop[k]);
|
|
+ __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
|
|
+ __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
|
|
+ load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
|
|
+
|
|
+ //load, then increase counters
|
|
+ CTR_DoSix(movdqa, xmm_curr_counter);
|
|
+ inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]);
|
|
+ inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]);
|
|
+ inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]);
|
|
+ inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]);
|
|
+ inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]);
|
|
+ inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]);
|
|
+ CTR_DoSix(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
|
|
+ CTR_DoSix(pxor, xmm_key_tmp0); //PXOR with Round 0 key
|
|
+
|
|
+ //load two ROUND_KEYs at a time
|
|
+ for (int i = 1; i < rounds[k]; ) {
|
|
+ load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask);
|
|
+ load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask);
|
|
+ CTR_DoSix(aesenc, xmm_key_tmp1);
|
|
+ i++;
|
|
+ if (i != rounds[k]) {
|
|
+ CTR_DoSix(aesenc, xmm_key_tmp0);
|
|
+ } else {
|
|
+ CTR_DoSix(aesenclast, xmm_key_tmp0);
|
|
+ }
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ // get next PARALLEL_FACTOR blocks into xmm_result registers
|
|
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
|
|
+ __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
|
|
+ __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
|
|
+ __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
|
|
+ __ movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize));
|
|
+ __ movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize));
|
|
+
|
|
+ __ pxor(xmm_result0, xmm_from0);
|
|
+ __ pxor(xmm_result1, xmm_from1);
|
|
+ __ pxor(xmm_result2, xmm_from2);
|
|
+ __ pxor(xmm_result3, xmm_from3);
|
|
+ __ pxor(xmm_result4, xmm_from4);
|
|
+ __ pxor(xmm_result5, xmm_from5);
|
|
+
|
|
+ // store 6 results into the next 64 bytes of output
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5);
|
|
+
|
|
+ __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
|
|
+ __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
|
|
+ __ jmp(L_multiBlock_loopTop[k]);
|
|
+
|
|
+ // singleBlock starts here
|
|
+ __ align(OptoLoopAlignment);
|
|
+ __ BIND(L_singleBlockLoopTop[k]);
|
|
+ __ cmpptr(len_reg, 0);
|
|
+ __ jcc(Assembler::lessEqual, L_exit);
|
|
+ load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
|
|
+ __ movdqa(xmm_result0, xmm_curr_counter);
|
|
+ inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]);
|
|
+ __ pshufb(xmm_result0, xmm_counter_shuf_mask);
|
|
+ __ pxor(xmm_result0, xmm_key_tmp0);
|
|
+ for (int i = 1; i < rounds[k]; i++) {
|
|
+ load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask);
|
|
+ __ aesenc(xmm_result0, xmm_key_tmp0);
|
|
+ }
|
|
+ load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask);
|
|
+ __ aesenclast(xmm_result0, xmm_key_tmp0);
|
|
+ __ cmpptr(len_reg, AESBlockSize);
|
|
+ __ jcc(Assembler::less, L_processTail_insr[k]);
|
|
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
|
|
+ __ pxor(xmm_result0, xmm_from0);
|
|
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
|
|
+ __ addptr(pos, AESBlockSize);
|
|
+ __ subptr(len_reg, AESBlockSize);
|
|
+ __ jmp(L_singleBlockLoopTop[k]);
|
|
+ __ BIND(L_processTail_insr[k]);
|
|
+ __ addptr(pos, len_reg);
|
|
+ __ testptr(len_reg, 8);
|
|
+ __ jcc(Assembler::zero, L_processTail_4_insr[k]);
|
|
+ __ subptr(pos,8);
|
|
+ __ pinsrq(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_4_insr[k]);
|
|
+ __ testptr(len_reg, 4);
|
|
+ __ jcc(Assembler::zero, L_processTail_2_insr[k]);
|
|
+ __ subptr(pos,4);
|
|
+ __ pslldq(xmm_from0, 4);
|
|
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_2_insr[k]);
|
|
+ __ testptr(len_reg, 2);
|
|
+ __ jcc(Assembler::zero, L_processTail_1_insr[k]);
|
|
+ __ subptr(pos, 2);
|
|
+ __ pslldq(xmm_from0, 2);
|
|
+ __ pinsrw(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_1_insr[k]);
|
|
+ __ testptr(len_reg, 1);
|
|
+ __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
|
|
+ __ subptr(pos, 1);
|
|
+ __ pslldq(xmm_from0, 1);
|
|
+ __ pinsrb(xmm_from0, Address(from, pos), 0);
|
|
+ __ BIND(L_processTail_exit_insr[k]);
|
|
+
|
|
+ __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);
|
|
+ __ pxor(xmm_result0, xmm_from0);
|
|
+
|
|
+ __ testptr(len_reg, 8);
|
|
+ __ jcc(Assembler::zero, L_processTail_4_extr[k]);
|
|
+ __ pextrq(Address(to, pos), xmm_result0, 0);
|
|
+ __ psrldq(xmm_result0, 8);
|
|
+ __ addptr(pos, 8);
|
|
+ __ BIND(L_processTail_4_extr[k]);
|
|
+ __ testptr(len_reg, 4);
|
|
+ __ jcc(Assembler::zero, L_processTail_2_extr[k]);
|
|
+ __ pextrd(Address(to, pos), xmm_result0, 0);
|
|
+ __ psrldq(xmm_result0, 4);
|
|
+ __ addptr(pos, 4);
|
|
+ __ BIND(L_processTail_2_extr[k]);
|
|
+ __ testptr(len_reg, 2);
|
|
+ __ jcc(Assembler::zero, L_processTail_1_extr[k]);
|
|
+ __ pextrw(Address(to, pos), xmm_result0, 0);
|
|
+ __ psrldq(xmm_result0, 2);
|
|
+ __ addptr(pos, 2);
|
|
+ __ BIND(L_processTail_1_extr[k]);
|
|
+ __ testptr(len_reg, 1);
|
|
+ __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
|
|
+ __ pextrb(Address(to, pos), xmm_result0, 0);
|
|
+
|
|
+ __ BIND(L_processTail_exit_extr[k]);
|
|
+ __ movl(Address(used_addr, 0), len_reg);
|
|
+ __ jmp(L_exit);
|
|
+
|
|
+ }
|
|
+
|
|
+ __ BIND(L_exit);
|
|
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
|
|
+ __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
|
|
+ __ pop(rbx); // pop the saved RBX.
|
|
+#ifdef _WIN64
|
|
+ // restore regs belonging to calling function
|
|
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
|
|
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
|
|
+ }
|
|
+ __ movl(rax, len_mem);
|
|
+ __ movptr(r13, r13_save);
|
|
+ __ movptr(r14, r14_save);
|
|
+#else
|
|
+ __ pop(rax); // return 'len'
|
|
+#endif
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret(0);
|
|
+ return start;
|
|
+ }
|
|
|
|
// byte swap x86 long
|
|
address generate_ghash_long_swap_mask() {
|
|
@@ -4239,12 +4574,15 @@ class StubGenerator: public StubCodeGenerator {
|
|
// don't bother generating these AES intrinsic stubs unless global flag is set
|
|
if (UseAESIntrinsics) {
|
|
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
|
|
-
|
|
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
|
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
|
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
|
}
|
|
+ if (UseAESCTRIntrinsics){
|
|
+ StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
|
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
|
+ }
|
|
|
|
// Generate GHASH intrinsics code
|
|
if (UseGHASHIntrinsics) {
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
|
|
index 9b0d8fc75..617879377 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
|
|
@@ -33,6 +33,7 @@
|
|
|
|
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
|
|
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
|
+address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
|
|
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
|
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
|
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
|
|
index bb160486c..70b5a34ac 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
|
|
@@ -33,6 +33,10 @@
|
|
static address _verify_mxcsr_entry;
|
|
// shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
|
|
static address _key_shuffle_mask_addr;
|
|
+
|
|
+ //shuffle mask for big-endian 128-bit integers
|
|
+ static address _counter_shuffle_mask_addr;
|
|
+
|
|
// masks and table for CRC32
|
|
static uint64_t _crc_by128_masks[];
|
|
static juint _crc_table[];
|
|
@@ -43,6 +47,7 @@
|
|
public:
|
|
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
|
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
|
+ static address counter_shuffle_mask_addr() { return _counter_shuffle_mask_addr; }
|
|
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
|
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
|
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
|
|
index bca5d493c..538f83e69 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
|
|
@@ -31,7 +31,7 @@
|
|
|
|
enum platform_dependent_constants {
|
|
code_size1 = 9000, // simply increase if too small (assembler will crash if too small)
|
|
- code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
|
|
+ code_size2 = 25800 // simply increase if too small (assembler will crash if too small)
|
|
};
|
|
|
|
class x86 {
|
|
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
|
|
index b048fd74e..f963cd2f8 100644
|
|
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
|
|
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
|
|
@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _
|
|
|
|
enum platform_dependent_constants {
|
|
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
|
|
- code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
|
|
+ code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
|
|
};
|
|
|
|
class x86 {
|
|
diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
|
|
index 46b3e32ea..ce3037d76 100644
|
|
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
|
|
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
|
|
@@ -573,6 +573,28 @@ void VM_Version::get_processor_features() {
|
|
}
|
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
}
|
|
+
|
|
+ // --AES-CTR begins--
|
|
+ if (!UseAESIntrinsics) {
|
|
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+ } else {
|
|
+ if(supports_sse4_1() && UseSSE >= 4) {
|
|
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
|
|
+ }
|
|
+ } else {
|
|
+ // The AES-CTR intrinsic stubs require AES instruction support (of course)
|
|
+ // but also require sse4.1 mode or higher for instructions it use.
|
|
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
|
|
+ }
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+ }
|
|
+ // --AES-CTR ends--
|
|
}
|
|
} else if (UseAES || UseAESIntrinsics) {
|
|
if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
|
|
@@ -583,6 +605,10 @@ void VM_Version::get_processor_features() {
|
|
warning("AES intrinsics are not available on this CPU");
|
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
}
|
|
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ warning("AES-CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
}
|
|
|
|
// Use CLMUL instructions if available.
|
|
@@ -606,6 +632,16 @@ void VM_Version::get_processor_features() {
|
|
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
|
}
|
|
|
|
+ if (UseAESIntrinsics) {
|
|
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
|
|
+ UseAESCTRIntrinsics = true;
|
|
+ }
|
|
+ } else if (UseAESCTRIntrinsics) {
|
|
+ if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics))
|
|
+ warning("AES/CTR intrinsics are not available on this CPU");
|
|
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
|
+ }
|
|
+
|
|
// GHASH/GCM intrinsics
|
|
if (UseCLMUL && (UseSSE > 2)) {
|
|
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
|
|
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
|
index 942d172a1..4ca2a3ad4 100644
|
|
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
|
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
|
|
@@ -846,6 +846,10 @@
|
|
do_name( decrypt_name, "implDecrypt") \
|
|
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \
|
|
\
|
|
+ do_class(com_sun_crypto_provider_counterMode, "com/sun/crypto/provider/CounterMode") \
|
|
+ do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
|
|
+ do_name( crypt_name, "implCrypt") \
|
|
+ \
|
|
/* support for sun.security.provider.SHA */ \
|
|
do_class(sun_security_provider_sha, "sun/security/provider/SHA") \
|
|
do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R) \
|
|
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
|
|
index 6f8ffe608..a0e497f08 100644
|
|
--- a/hotspot/src/share/vm/opto/escape.cpp
|
|
+++ b/hotspot/src/share/vm/opto/escape.cpp
|
|
@@ -952,6 +952,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
|
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
|
|
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
|
|
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
|
|
+ strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
|
|
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
|
|
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
|
|
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
|
|
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
|
|
index bb721f6f1..2add82dd1 100644
|
|
--- a/hotspot/src/share/vm/opto/library_call.cpp
|
|
+++ b/hotspot/src/share/vm/opto/library_call.cpp
|
|
@@ -196,6 +196,7 @@ class LibraryCallKit : public GraphKit {
|
|
return generate_method_call(method_id, true, false);
|
|
}
|
|
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
|
|
+ Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
|
|
|
|
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
|
|
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
|
|
@@ -309,7 +310,9 @@ class LibraryCallKit : public GraphKit {
|
|
bool inline_reference_get();
|
|
bool inline_aescrypt_Block(vmIntrinsics::ID id);
|
|
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
|
|
+ bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
|
|
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
|
+ Node* inline_counterMode_AESCrypt_predicate();
|
|
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
|
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
|
bool inline_ghash_processBlocks();
|
|
@@ -558,6 +561,13 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
|
predicates = 1;
|
|
break;
|
|
|
|
+ case vmIntrinsics::_counterMode_AESCrypt:
|
|
+ if (!UseAESCTRIntrinsics) {
|
|
+ return NULL;
|
|
+ }
|
|
+ predicates = 1;
|
|
+ break;
|
|
+
|
|
case vmIntrinsics::_sha_implCompress:
|
|
if (!UseSHA1Intrinsics) return NULL;
|
|
break;
|
|
@@ -950,6 +960,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
|
|
return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
|
|
|
|
+ case vmIntrinsics::_counterMode_AESCrypt:
|
|
+ return inline_counterMode_AESCrypt(intrinsic_id());
|
|
+
|
|
case vmIntrinsics::_sha_implCompress:
|
|
case vmIntrinsics::_sha2_implCompress:
|
|
case vmIntrinsics::_sha5_implCompress:
|
|
@@ -1021,6 +1034,8 @@ Node* LibraryCallKit::try_to_predicate(int predicate) {
|
|
return inline_cipherBlockChaining_AESCrypt_predicate(false);
|
|
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
|
|
return inline_cipherBlockChaining_AESCrypt_predicate(true);
|
|
+ case vmIntrinsics::_counterMode_AESCrypt:
|
|
+ return inline_counterMode_AESCrypt_predicate();
|
|
case vmIntrinsics::_digestBase_implCompressMB:
|
|
return inline_digestBase_implCompressMB_predicate(predicate);
|
|
|
|
@@ -6581,6 +6596,39 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field
|
|
return loadedField;
|
|
}
|
|
|
|
+Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
|
|
+ bool is_exact = true, bool is_static = false,
|
|
+ ciInstanceKlass * fromKls = NULL) {
|
|
+ if (fromKls == NULL) {
|
|
+ const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
|
|
+ assert(tinst != NULL, "obj is null");
|
|
+ assert(tinst->klass()->is_loaded(), "obj is not loaded");
|
|
+ assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
|
|
+ fromKls = tinst->klass()->as_instance_klass();
|
|
+ }
|
|
+ else {
|
|
+ assert(is_static, "only for static field access");
|
|
+ }
|
|
+ ciField* field = fromKls->get_field_by_name(ciSymbol::make(fieldName),
|
|
+ ciSymbol::make(fieldTypeString),
|
|
+ is_static);
|
|
+
|
|
+ assert(field != NULL, "undefined field");
|
|
+ assert(!field->is_volatile(), "not defined for volatile fields");
|
|
+
|
|
+ if (is_static) {
|
|
+ const TypeInstPtr* tip = TypeInstPtr::make(fromKls->java_mirror());
|
|
+ fromObj = makecon(tip);
|
|
+ }
|
|
+
|
|
+ // Next code copied from Parse::do_get_xxx():
|
|
+
|
|
+ // Compute address and memory type.
|
|
+ int offset = field->offset_in_bytes();
|
|
+ Node *adr = basic_plus_adr(fromObj, fromObj, offset);
|
|
+
|
|
+ return adr;
|
|
+}
|
|
|
|
//------------------------------inline_aescrypt_Block-----------------------
|
|
bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
|
|
@@ -6747,6 +6795,90 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
|
|
return true;
|
|
}
|
|
|
|
+//------------------------------inline_counterMode_AESCrypt-----------------------
|
|
+bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
|
|
+ assert(UseAES, "need AES instruction support");
|
|
+ if (!UseAESCTRIntrinsics) return false;
|
|
+
|
|
+ address stubAddr = NULL;
|
|
+ const char *stubName = NULL;
|
|
+ if (id == vmIntrinsics::_counterMode_AESCrypt) {
|
|
+ stubAddr = StubRoutines::counterMode_AESCrypt();
|
|
+ stubName = "counterMode_AESCrypt";
|
|
+ }
|
|
+ if (stubAddr == NULL) return false;
|
|
+
|
|
+ Node* counterMode_object = argument(0);
|
|
+ Node* src = argument(1);
|
|
+ Node* src_offset = argument(2);
|
|
+ Node* len = argument(3);
|
|
+ Node* dest = argument(4);
|
|
+ Node* dest_offset = argument(5);
|
|
+
|
|
+ // (1) src and dest are arrays.
|
|
+ const Type* src_type = src->Value(&_gvn);
|
|
+ const Type* dest_type = dest->Value(&_gvn);
|
|
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
|
|
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
|
|
+ assert(top_src != NULL && top_src->klass() != NULL &&
|
|
+ top_dest != NULL && top_dest->klass() != NULL, "args are strange");
|
|
+
|
|
+ // checks are the responsibility of the caller
|
|
+ Node* src_start = src;
|
|
+ Node* dest_start = dest;
|
|
+ if (src_offset != NULL || dest_offset != NULL) {
|
|
+ assert(src_offset != NULL && dest_offset != NULL, "");
|
|
+ src_start = array_element_address(src, src_offset, T_BYTE);
|
|
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
|
|
+ }
|
|
+
|
|
+ // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
|
|
+ // (because of the predicated logic executed earlier).
|
|
+ // so we cast it here safely.
|
|
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
|
|
+ Node* embeddedCipherObj = load_field_from_object(counterMode_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
|
|
+ if (embeddedCipherObj == NULL) return false;
|
|
+ // cast it to what we know it will be at runtime
|
|
+ const TypeInstPtr* tinst = _gvn.type(counterMode_object)->isa_instptr();
|
|
+ assert(tinst != NULL, "CTR obj is null");
|
|
+ assert(tinst->klass()->is_loaded(), "CTR obj is not loaded");
|
|
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
|
|
+ assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
|
|
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
|
|
+ const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
|
|
+ const TypeOopPtr* xtype = aklass->as_instance_type();
|
|
+ Node* aescrypt_object = new (C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
|
|
+ aescrypt_object = _gvn.transform(aescrypt_object);
|
|
+ // we need to get the start of the aescrypt_object's expanded key array
|
|
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
|
|
+ if (k_start == NULL) return false;
|
|
+ // similarly, get the start address of the r vector
|
|
+ Node* obj_counter = load_field_from_object(counterMode_object, "counter", "[B", /*is_exact*/ false);
|
|
+ if (obj_counter == NULL) return false;
|
|
+ Node* cnt_start = array_element_address(obj_counter, intcon(0), T_BYTE);
|
|
+
|
|
+ Node* saved_encCounter = load_field_from_object(counterMode_object, "encryptedCounter", "[B", /*is_exact*/ false);
|
|
+ if (saved_encCounter == NULL) return false;
|
|
+ Node* saved_encCounter_start = array_element_address(saved_encCounter, intcon(0), T_BYTE);
|
|
+ Node* used = field_address_from_object(counterMode_object, "used", "I", /*is_exact*/ false);
|
|
+
|
|
+ Node* ctrCrypt;
|
|
+ if (Matcher::pass_original_key_for_aes()) {
|
|
+ // no SPARC version for AES/CTR intrinsics now.
|
|
+ return false;
|
|
+ }
|
|
+ // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
|
|
+ ctrCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
|
|
+ OptoRuntime::counterMode_aescrypt_Type(),
|
|
+ stubAddr, stubName, TypePtr::BOTTOM,
|
|
+ src_start, dest_start, k_start, cnt_start, len, saved_encCounter_start, used);
|
|
+
|
|
+ // return cipher length (int)
|
|
+ Node* retvalue = _gvn.transform(new (C) ProjNode(ctrCrypt, TypeFunc::Parms));
|
|
+ set_result(retvalue);
|
|
+ return true;
|
|
+}
|
|
+
|
|
//------------------------------get_key_start_from_aescrypt_object-----------------------
|
|
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
|
|
#ifdef PPC64
|
|
@@ -6841,6 +6973,48 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt
|
|
return _gvn.transform(region);
|
|
}
|
|
|
|
+//----------------------------inline_counterMode_AESCrypt_predicate----------------------------
|
|
+// Return node representing slow path of predicate check.
|
|
+// the pseudo code we want to emulate with this predicate is:
|
|
+// for encryption:
|
|
+// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
|
|
+// for decryption:
|
|
+// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
|
|
+// note cipher==plain is more conservative than the original java code but that's OK
|
|
+//
|
|
+
|
|
+Node* LibraryCallKit::inline_counterMode_AESCrypt_predicate() {
|
|
+ // The receiver was checked for NULL already.
|
|
+ Node* objCTR = argument(0);
|
|
+
|
|
+ // Load embeddedCipher field of CipherBlockChaining object.
|
|
+ Node* embeddedCipherObj = load_field_from_object(objCTR, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
|
|
+
|
|
+ // get AESCrypt klass for instanceOf check
|
|
+ // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
|
|
+ // will have same classloader as CipherBlockChaining object
|
|
+ const TypeInstPtr* tinst = _gvn.type(objCTR)->isa_instptr();
|
|
+ assert(tinst != NULL, "CTRobj is null");
|
|
+ assert(tinst->klass()->is_loaded(), "CTRobj is not loaded");
|
|
+
|
|
+ // we want to do an instanceof comparison against the AESCrypt class
|
|
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
|
|
+ if (!klass_AESCrypt->is_loaded()) {
|
|
+ // if AESCrypt is not even loaded, we never take the intrinsic fast path
|
|
+ Node* ctrl = control();
|
|
+ set_control(top()); // no regular fast path
|
|
+ return ctrl;
|
|
+ }
|
|
+
|
|
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
|
|
+ Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
|
|
+ Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
|
|
+ Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
|
|
+ Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
|
|
+
|
|
+ return instof_false; // even if it is NULL
|
|
+}
|
|
+
|
|
//------------------------------inline_ghash_processBlocks
|
|
bool LibraryCallKit::inline_ghash_processBlocks() {
|
|
address stubAddr;
|
|
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
|
|
index 0a86211ba..1c51be19b 100644
|
|
--- a/hotspot/src/share/vm/opto/runtime.cpp
|
|
+++ b/hotspot/src/share/vm/opto/runtime.cpp
|
|
@@ -1021,6 +1021,35 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
|
|
return TypeFunc::make(domain, range);
|
|
}
|
|
|
|
+//for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
|
|
+const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
|
|
+ // create input type (domain)
|
|
+ int num_args = 7;
|
|
+ if (Matcher::pass_original_key_for_aes()) {
|
|
+ num_args = 8;
|
|
+ }
|
|
+ int argcnt = num_args;
|
|
+ const Type** fields = TypeTuple::fields(argcnt);
|
|
+ int argp = TypeFunc::Parms;
|
|
+ fields[argp++] = TypePtr::NOTNULL; // src
|
|
+ fields[argp++] = TypePtr::NOTNULL; // dest
|
|
+ fields[argp++] = TypePtr::NOTNULL; // k array
|
|
+ fields[argp++] = TypePtr::NOTNULL; // counter array
|
|
+ fields[argp++] = TypeInt::INT; // src len
|
|
+ fields[argp++] = TypePtr::NOTNULL; // saved_encCounter
|
|
+ fields[argp++] = TypePtr::NOTNULL; // saved used addr
|
|
+ if (Matcher::pass_original_key_for_aes()) {
|
|
+ fields[argp++] = TypePtr::NOTNULL; // original k array
|
|
+ }
|
|
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
|
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
|
|
+ // returning cipher len (int)
|
|
+ fields = TypeTuple::fields(1);
|
|
+ fields[TypeFunc::Parms + 0] = TypeInt::INT;
|
|
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
|
|
+ return TypeFunc::make(domain, range);
|
|
+}
|
|
+
|
|
/*
|
|
* void implCompress(byte[] buf, int ofs)
|
|
*/
|
|
diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
|
|
index 47133d58c..f27e7d507 100644
|
|
--- a/hotspot/src/share/vm/opto/runtime.hpp
|
|
+++ b/hotspot/src/share/vm/opto/runtime.hpp
|
|
@@ -299,6 +299,7 @@ private:
|
|
|
|
static const TypeFunc* aescrypt_block_Type();
|
|
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
|
|
+ static const TypeFunc* counterMode_aescrypt_Type();
|
|
|
|
static const TypeFunc* sha_implCompress_Type();
|
|
static const TypeFunc* digestBase_implCompressMB_Type();
|
|
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
|
|
index 65dfcf69b..91e52f033 100644
|
|
--- a/hotspot/src/share/vm/runtime/globals.hpp
|
|
+++ b/hotspot/src/share/vm/runtime/globals.hpp
|
|
@@ -734,6 +734,9 @@ class CommandLineFlags {
|
|
product(bool, UseAESIntrinsics, false, \
|
|
"Use intrinsics for AES versions of crypto") \
|
|
\
|
|
+ product(bool, UseAESCTRIntrinsics, false, \
|
|
+ "Use intrinsics for the paralleled version of AES/CTR crypto") \
|
|
+ \
|
|
product(bool, UseSHA1Intrinsics, false, \
|
|
"Use intrinsics for SHA-1 crypto hash function") \
|
|
\
|
|
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
|
index f2106d13a..d66237137 100644
|
|
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
|
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
|
|
@@ -124,6 +124,7 @@ address StubRoutines::_aescrypt_encryptBlock = NULL;
|
|
address StubRoutines::_aescrypt_decryptBlock = NULL;
|
|
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
|
|
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
|
|
+address StubRoutines::_counterMode_AESCrypt = NULL;
|
|
address StubRoutines::_ghash_processBlocks = NULL;
|
|
|
|
address StubRoutines::_sha1_implCompress = NULL;
|
|
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
|
index 16075d9f4..9fb589540 100644
|
|
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
|
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
|
|
@@ -202,6 +202,7 @@ class StubRoutines: AllStatic {
|
|
static address _aescrypt_decryptBlock;
|
|
static address _cipherBlockChaining_encryptAESCrypt;
|
|
static address _cipherBlockChaining_decryptAESCrypt;
|
|
+ static address _counterMode_AESCrypt;
|
|
static address _ghash_processBlocks;
|
|
|
|
static address _sha1_implCompress;
|
|
@@ -370,6 +371,7 @@ class StubRoutines: AllStatic {
|
|
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
|
|
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
|
|
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
|
|
+ static address counterMode_AESCrypt() { return _counterMode_AESCrypt; }
|
|
static address ghash_processBlocks() { return _ghash_processBlocks; }
|
|
|
|
static address sha1_implCompress() { return _sha1_implCompress; }
|
|
diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp
|
|
index 3f2bfeb74..842b5840d 100644
|
|
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
|
|
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
|
|
@@ -815,6 +815,7 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
|
|
static_field(StubRoutines, _aescrypt_decryptBlock, address) \
|
|
static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \
|
|
static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \
|
|
+ static_field(StubRoutines, _counterMode_AESCrypt, address) \
|
|
static_field(StubRoutines, _ghash_processBlocks, address) \
|
|
static_field(StubRoutines, _updateBytesCRC32, address) \
|
|
static_field(StubRoutines, _crc_table_adr, address) \
|
|
diff --git a/hotspot/test/compiler/7184394/TestAESBase.java b/hotspot/test/compiler/7184394/TestAESBase.java
|
|
index 5c3e6881e..afda2a1f7 100644
|
|
--- a/hotspot/test/compiler/7184394/TestAESBase.java
|
|
+++ b/hotspot/test/compiler/7184394/TestAESBase.java
|
|
@@ -106,8 +106,8 @@ abstract public class TestAESBase {
|
|
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
|
|
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
|
|
|
|
- // CBC init
|
|
- if (mode.equals("CBC")) {
|
|
+ // CBC or CTR init
|
|
+ if (mode.equals("CBC") || mode.equals("CTR")) {
|
|
IvParameterSpec initVector = new IvParameterSpec(iv);
|
|
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
|
|
algParams = cipher.getParameters();
|
|
diff --git a/hotspot/test/compiler/7184394/TestAESMain.java b/hotspot/test/compiler/7184394/TestAESMain.java
|
|
index ddd8eeaef..65949420a 100644
|
|
--- a/hotspot/test/compiler/7184394/TestAESMain.java
|
|
+++ b/hotspot/test/compiler/7184394/TestAESMain.java
|
|
@@ -48,6 +48,13 @@
|
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
|
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
|
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencOutputOffset=1 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DdecOutputOffset=1 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
|
|
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
|
|
*
|
|
* @author Tom Deneau
|
|
*/
|
|
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java b/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
|
|
index aea9336c9..c2bd38a71 100644
|
|
--- a/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
|
|
+++ b/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
|
|
@@ -39,10 +39,10 @@ import java.security.InvalidKeyException;
|
|
* @author Andreas Sterbenz
|
|
* @since 1.4.2
|
|
*/
|
|
-final class CounterMode extends FeedbackCipher {
|
|
+class CounterMode extends FeedbackCipher {
|
|
|
|
// current counter value
|
|
- private final byte[] counter;
|
|
+ final byte[] counter;
|
|
|
|
// encrypted bytes of the previous counter value
|
|
private final byte[] encryptedCounter;
|
|
@@ -137,7 +137,7 @@ final class CounterMode extends FeedbackCipher {
|
|
* <code>cipherOffset</code>.
|
|
*
|
|
* @param in the buffer with the input data to be encrypted
|
|
- * @param inOffset the offset in <code>plain</code>
|
|
+ * @param inOff the offset in <code>plain</code>
|
|
* @param len the length of the input data
|
|
* @param out the buffer for the result
|
|
* @param outOff the offset in <code>cipher</code>
|
|
@@ -176,6 +176,11 @@ final class CounterMode extends FeedbackCipher {
|
|
RangeUtil.nullAndBoundsCheck(in, inOff, len);
|
|
RangeUtil.nullAndBoundsCheck(out, outOff, len);
|
|
|
|
+ return implCrypt(in, inOff, len, out, outOff);
|
|
+ }
|
|
+
|
|
+ // Implementation of crpyt() method. Possibly replaced with a compiler intrinsic.
|
|
+ private int implCrypt(byte[] in, int inOff, int len, byte[] out, int outOff) {
|
|
int result = len;
|
|
while (len-- > 0) {
|
|
if (used >= blockSize) {
|
|
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java b/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
|
|
index f8a3eaa0a..6a394e448 100644
|
|
--- a/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
|
|
+++ b/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
|
+ * Copyright (c) 2013, 2017 Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
@@ -29,52 +29,43 @@
|
|
|
|
package com.sun.crypto.provider;
|
|
|
|
-import java.security.*;
|
|
-import javax.crypto.*;
|
|
+import javax.crypto.IllegalBlockSizeException;
|
|
import static com.sun.crypto.provider.AESConstants.AES_BLOCK_SIZE;
|
|
|
|
/**
|
|
* This class represents the GCTR function defined in NIST 800-38D
|
|
- * under section 6.5. It needs to be constructed w/ an initialized
|
|
- * cipher object, and initial counter block(ICB). Given an input X
|
|
- * of arbitrary length, it processes and returns an output which has
|
|
- * the same length as X. The invariants of this class are:
|
|
- *
|
|
- * (1) The length of intialCounterBlk (and also of its clones, e.g.,
|
|
- * fields counter and counterSave) is equal to AES_BLOCK_SIZE.
|
|
- *
|
|
- * (2) After construction, the field counter never becomes null, it
|
|
- * always contains a byte array of length AES_BLOCK_SIZE.
|
|
+ * under section 6.5. With a given cipher object and initial counter
|
|
+ * block, a counter mode operation is performed. Blocksize is limited
|
|
+ * to 16 bytes.
|
|
*
|
|
* If any invariant is broken, failures can occur because the
|
|
* AESCrypt.encryptBlock method can be intrinsified on the HotSpot VM
|
|
* (see JDK-8067648 for details).
|
|
*
|
|
+ * The counter mode operations can be intrinsified and parallelized
|
|
+ * by using CounterMode.implCrypt() if HotSpot VM supports it on the
|
|
+ * architecture.
|
|
+ *
|
|
* <p>This function is used in the implementation of GCM mode.
|
|
*
|
|
* @since 1.8
|
|
*/
|
|
-final class GCTR {
|
|
-
|
|
- // these fields should not change after the object has been constructed
|
|
- private final SymmetricCipher aes;
|
|
- private final byte[] icb;
|
|
-
|
|
- // the current counter value
|
|
- private byte[] counter;
|
|
+final class GCTR extends CounterMode {
|
|
|
|
- // needed for save/restore calls
|
|
- private byte[] counterSave = null;
|
|
-
|
|
- // NOTE: cipher should already be initialized
|
|
GCTR(SymmetricCipher cipher, byte[] initialCounterBlk) {
|
|
- this.aes = cipher;
|
|
+ super(cipher);
|
|
if (initialCounterBlk.length != AES_BLOCK_SIZE) {
|
|
throw new RuntimeException("length of initial counter block (" + initialCounterBlk.length +
|
|
") not equal to AES_BLOCK_SIZE (" + AES_BLOCK_SIZE + ")");
|
|
}
|
|
- this.icb = initialCounterBlk;
|
|
- this.counter = icb.clone();
|
|
+
|
|
+ iv = initialCounterBlk;
|
|
+ reset();
|
|
+ }
|
|
+
|
|
+ @Override
|
|
+ String getFeedback() {
|
|
+ return "GCTR";
|
|
}
|
|
|
|
// input must be multiples of 128-bit blocks when calling update
|
|
@@ -89,23 +80,11 @@ final class GCTR {
|
|
throw new RuntimeException("output buffer too small");
|
|
}
|
|
|
|
- byte[] encryptedCntr = new byte[AES_BLOCK_SIZE];
|
|
-
|
|
- int numOfCompleteBlocks = inLen / AES_BLOCK_SIZE;
|
|
- for (int i = 0; i < numOfCompleteBlocks; i++) {
|
|
- aes.encryptBlock(counter, 0, encryptedCntr, 0);
|
|
- for (int n = 0; n < AES_BLOCK_SIZE; n++) {
|
|
- int index = (i * AES_BLOCK_SIZE + n);
|
|
- out[outOfs + index] =
|
|
- (byte) ((in[inOfs + index] ^ encryptedCntr[n]));
|
|
- }
|
|
- GaloisCounterMode.increment32(counter);
|
|
- }
|
|
- return inLen;
|
|
+ return encrypt(in, inOfs, inLen, out, outOfs);
|
|
}
|
|
|
|
// input can be arbitrary size when calling doFinal
|
|
- protected int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
|
+ int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
|
int outOfs) throws IllegalBlockSizeException {
|
|
try {
|
|
if (inLen < 0) {
|
|
@@ -118,7 +97,7 @@ final class GCTR {
|
|
if (lastBlockSize != 0) {
|
|
// do the last partial block
|
|
byte[] encryptedCntr = new byte[AES_BLOCK_SIZE];
|
|
- aes.encryptBlock(counter, 0, encryptedCntr, 0);
|
|
+ embeddedCipher.encryptBlock(counter, 0, encryptedCntr, 0);
|
|
for (int n = 0; n < lastBlockSize; n++) {
|
|
out[outOfs + completeBlkLen + n] =
|
|
(byte) ((in[inOfs + completeBlkLen + n] ^
|
|
@@ -131,28 +110,4 @@ final class GCTR {
|
|
}
|
|
return inLen;
|
|
}
|
|
-
|
|
- /**
|
|
- * Resets the content of this object to when it's first constructed.
|
|
- */
|
|
- void reset() {
|
|
- System.arraycopy(icb, 0, counter, 0, icb.length);
|
|
- counterSave = null;
|
|
- }
|
|
-
|
|
- /**
|
|
- * Save the current content of this object.
|
|
- */
|
|
- void save() {
|
|
- this.counterSave = this.counter.clone();
|
|
- }
|
|
-
|
|
- /**
|
|
- * Restores the content of this object to the previous saved one.
|
|
- */
|
|
- void restore() {
|
|
- if (this.counterSave != null) {
|
|
- this.counter = this.counterSave;
|
|
- }
|
|
- }
|
|
}
|
|
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java b/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
|
|
index dc42e6bbf..78f0723d7 100644
|
|
--- a/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
|
|
+++ b/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
|
|
@@ -122,10 +122,10 @@ final class GHASH {
|
|
|
|
}
|
|
|
|
- /* subkeyH and state are stored in long[] for GHASH intrinsic use */
|
|
+ /* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */
|
|
|
|
- // hash subkey H; should not change after the object has been constructed
|
|
- private final long[] subkeyH;
|
|
+ // hashtable subkeyHtbl; holds 2*9 powers of subkeyH computed using carry-less multiplication
|
|
+ private long[] subkeyHtbl;
|
|
|
|
// buffer for storing hash
|
|
private final long[] state;
|
|
@@ -147,9 +147,9 @@ final class GHASH {
|
|
throw new ProviderException("Internal error");
|
|
}
|
|
state = new long[2];
|
|
- this.subkeyH = new long[2];
|
|
- this.subkeyH[0] = getLong(subkeyH, 0);
|
|
- this.subkeyH[1] = getLong(subkeyH, 8);
|
|
+ subkeyHtbl = new long[2*9];
|
|
+ subkeyHtbl[0] = getLong(subkeyH, 0);
|
|
+ subkeyHtbl[1] = getLong(subkeyH, 8);
|
|
}
|
|
|
|
/**
|
|
@@ -192,8 +192,8 @@ final class GHASH {
|
|
if (inLen == 0) {
|
|
return;
|
|
}
|
|
- ghashRangeCheck(in, inOfs, inLen, state, subkeyH);
|
|
- processBlocks(in, inOfs, inLen/AES_BLOCK_SIZE, state, subkeyH);
|
|
+ ghashRangeCheck(in, inOfs, inLen, state, subkeyHtbl);
|
|
+ processBlocks(in, inOfs, inLen/AES_BLOCK_SIZE, state, subkeyHtbl);
|
|
}
|
|
|
|
private static void ghashRangeCheck(byte[] in, int inOfs, int inLen, long[] st, long[] subH) {
|
|
@@ -217,8 +217,8 @@ final class GHASH {
|
|
throw new RuntimeException("internal state has invalid length: " +
|
|
st.length);
|
|
}
|
|
- if (subH.length != 2) {
|
|
- throw new RuntimeException("internal subkeyH has invalid length: " +
|
|
+ if (subH.length != 18) {
|
|
+ throw new RuntimeException("internal subkeyHtbl has invalid length: " +
|
|
subH.length);
|
|
}
|
|
}
|
|
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java b/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
|
|
index ab93e3097..dd2618455 100644
|
|
--- a/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
|
|
+++ b/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
|
|
@@ -439,6 +439,8 @@ public final class SSLSocketImpl
|
|
if (!conContext.isNegotiated) {
|
|
readHandshakeRecord();
|
|
}
|
|
+ } catch (InterruptedIOException iioe) {
|
|
+ handleException(iioe);
|
|
} catch (IOException ioe) {
|
|
throw conContext.fatal(Alert.HANDSHAKE_FAILURE,
|
|
"Couldn't kickstart handshaking", ioe);
|
|
@@ -1309,12 +1311,11 @@ public final class SSLSocketImpl
|
|
}
|
|
} catch (SSLException ssle) {
|
|
throw ssle;
|
|
+ } catch (InterruptedIOException iioe) {
|
|
+ // don't change exception in case of timeouts or interrupts
|
|
+ throw iioe;
|
|
} catch (IOException ioe) {
|
|
- if (!(ioe instanceof SSLException)) {
|
|
- throw new SSLException("readHandshakeRecord", ioe);
|
|
- } else {
|
|
- throw ioe;
|
|
- }
|
|
+ throw new SSLException("readHandshakeRecord", ioe);
|
|
}
|
|
}
|
|
|
|
@@ -1375,6 +1376,9 @@ public final class SSLSocketImpl
|
|
}
|
|
} catch (SSLException ssle) {
|
|
throw ssle;
|
|
+ } catch (InterruptedIOException iioe) {
|
|
+ // don't change exception in case of timeouts or interrupts
|
|
+ throw iioe;
|
|
} catch (IOException ioe) {
|
|
if (!(ioe instanceof SSLException)) {
|
|
throw new SSLException("readApplicationRecord", ioe);
|
|
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java b/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
|
|
index 401822759..ab5712acc 100644
|
|
--- a/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
|
|
+++ b/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
|
|
@@ -26,6 +26,7 @@
|
|
package sun.security.ssl;
|
|
|
|
import java.io.EOFException;
|
|
+import java.io.InterruptedIOException;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
@@ -47,37 +48,31 @@ import sun.security.ssl.SSLCipher.SSLReadCipher;
|
|
final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
private InputStream is = null;
|
|
private OutputStream os = null;
|
|
- private final byte[] temporary = new byte[1024];
|
|
+ private final byte[] header = new byte[headerSize];
|
|
+ private int headerOff = 0;
|
|
+ // Cache for incomplete record body.
|
|
+ private ByteBuffer recordBody = ByteBuffer.allocate(1024);
|
|
|
|
private boolean formatVerified = false; // SSLv2 ruled out?
|
|
|
|
// Cache for incomplete handshake messages.
|
|
private ByteBuffer handshakeBuffer = null;
|
|
|
|
- private boolean hasHeader = false; // Had read the record header
|
|
-
|
|
SSLSocketInputRecord(HandshakeHash handshakeHash) {
|
|
super(handshakeHash, SSLReadCipher.nullTlsReadCipher());
|
|
}
|
|
|
|
@Override
|
|
int bytesInCompletePacket() throws IOException {
|
|
- if (!hasHeader) {
|
|
- // read exactly one record
|
|
- try {
|
|
- int really = read(is, temporary, 0, headerSize);
|
|
- if (really < 0) {
|
|
- // EOF: peer shut down incorrectly
|
|
- return -1;
|
|
- }
|
|
- } catch (EOFException eofe) {
|
|
- // The caller will handle EOF.
|
|
- return -1;
|
|
- }
|
|
- hasHeader = true;
|
|
+ // read header
|
|
+ try {
|
|
+ readHeader();
|
|
+ } catch (EOFException eofe) {
|
|
+ // The caller will handle EOF.
|
|
+ return -1;
|
|
}
|
|
|
|
- byte byteZero = temporary[0];
|
|
+ byte byteZero = header[0];
|
|
int len = 0;
|
|
|
|
/*
|
|
@@ -93,9 +88,9 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
* Last sanity check that it's not a wild record
|
|
*/
|
|
if (!ProtocolVersion.isNegotiable(
|
|
- temporary[1], temporary[2], false)) {
|
|
+ header[1], header[2], false)) {
|
|
throw new SSLException("Unrecognized record version " +
|
|
- ProtocolVersion.nameOf(temporary[1], temporary[2]) +
|
|
+ ProtocolVersion.nameOf(header[1], header[2]) +
|
|
" , plaintext connection?");
|
|
}
|
|
|
|
@@ -109,8 +104,8 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
/*
|
|
* One of the SSLv3/TLS message types.
|
|
*/
|
|
- len = ((temporary[3] & 0xFF) << 8) +
|
|
- (temporary[4] & 0xFF) + headerSize;
|
|
+ len = ((header[3] & 0xFF) << 8) +
|
|
+ (header[4] & 0xFF) + headerSize;
|
|
} else {
|
|
/*
|
|
* Must be SSLv2 or something unknown.
|
|
@@ -121,11 +116,11 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
*/
|
|
boolean isShort = ((byteZero & 0x80) != 0);
|
|
|
|
- if (isShort && ((temporary[2] == 1) || (temporary[2] == 4))) {
|
|
+ if (isShort && ((header[2] == 1) || (header[2] == 4))) {
|
|
if (!ProtocolVersion.isNegotiable(
|
|
- temporary[3], temporary[4], false)) {
|
|
+ header[3], header[4], false)) {
|
|
throw new SSLException("Unrecognized record version " +
|
|
- ProtocolVersion.nameOf(temporary[3], temporary[4]) +
|
|
+ ProtocolVersion.nameOf(header[3], header[4]) +
|
|
" , plaintext connection?");
|
|
}
|
|
|
|
@@ -138,9 +133,9 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
//
|
|
// int mask = (isShort ? 0x7F : 0x3F);
|
|
// len = ((byteZero & mask) << 8) +
|
|
- // (temporary[1] & 0xFF) + (isShort ? 2 : 3);
|
|
+ // (header[1] & 0xFF) + (isShort ? 2 : 3);
|
|
//
|
|
- len = ((byteZero & 0x7F) << 8) + (temporary[1] & 0xFF) + 2;
|
|
+ len = ((byteZero & 0x7F) << 8) + (header[1] & 0xFF) + 2;
|
|
} else {
|
|
// Gobblygook!
|
|
throw new SSLException(
|
|
@@ -160,34 +155,41 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
return null;
|
|
}
|
|
|
|
- if (!hasHeader) {
|
|
- // read exactly one record
|
|
- int really = read(is, temporary, 0, headerSize);
|
|
- if (really < 0) {
|
|
- throw new EOFException("SSL peer shut down incorrectly");
|
|
- }
|
|
- hasHeader = true;
|
|
- }
|
|
+ // read header
|
|
+ readHeader();
|
|
|
|
- Plaintext plaintext = null;
|
|
- if (!formatVerified) {
|
|
- formatVerified = true;
|
|
+ Plaintext[] plaintext = null;
|
|
+ boolean cleanInBuffer = true;
|
|
+ try {
|
|
+ if (!formatVerified) {
|
|
+ formatVerified = true;
|
|
|
|
- /*
|
|
- * The first record must either be a handshake record or an
|
|
- * alert message. If it's not, it is either invalid or an
|
|
- * SSLv2 message.
|
|
- */
|
|
- if ((temporary[0] != ContentType.HANDSHAKE.id) &&
|
|
- (temporary[0] != ContentType.ALERT.id)) {
|
|
- hasHeader = false;
|
|
- return handleUnknownRecord(temporary);
|
|
+ /*
|
|
+ * The first record must either be a handshake record or an
|
|
+ * alert message. If it's not, it is either invalid or an
|
|
+ * SSLv2 message.
|
|
+ */
|
|
+ if ((header[0] != ContentType.HANDSHAKE.id) &&
|
|
+ (header[0] != ContentType.ALERT.id)) {
|
|
+ plaintext = handleUnknownRecord();
|
|
+ }
|
|
}
|
|
- }
|
|
|
|
- // The record header should has consumed.
|
|
- hasHeader = false;
|
|
- return decodeInputRecord(temporary);
|
|
+ // The record header should has consumed.
|
|
+ if (plaintext == null) {
|
|
+ plaintext = decodeInputRecord();
|
|
+ }
|
|
+ } catch(InterruptedIOException e) {
|
|
+ // do not clean header and recordBody in case of Socket Timeout
|
|
+ cleanInBuffer = false;
|
|
+ throw e;
|
|
+ } finally {
|
|
+ if (cleanInBuffer) {
|
|
+ headerOff = 0;
|
|
+ recordBody.clear();
|
|
+ }
|
|
+ }
|
|
+ return plaintext;
|
|
}
|
|
|
|
@Override
|
|
@@ -200,9 +202,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
this.os = outputStream;
|
|
}
|
|
|
|
- // Note that destination may be null
|
|
- private Plaintext[] decodeInputRecord(
|
|
- byte[] header) throws IOException, BadPaddingException {
|
|
+ private Plaintext[] decodeInputRecord() throws IOException, BadPaddingException {
|
|
byte contentType = header[0]; // pos: 0
|
|
byte majorVersion = header[1]; // pos: 1
|
|
byte minorVersion = header[2]; // pos: 2
|
|
@@ -227,30 +227,27 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
}
|
|
|
|
//
|
|
- // Read a complete record.
|
|
+ // Read a complete record and store in the recordBody
|
|
+ // recordBody is used to cache incoming record and restore in case of
|
|
+ // read operation timedout
|
|
//
|
|
- ByteBuffer destination = ByteBuffer.allocate(headerSize + contentLen);
|
|
- int dstPos = destination.position();
|
|
- destination.put(temporary, 0, headerSize);
|
|
- while (contentLen > 0) {
|
|
- int howmuch = Math.min(temporary.length, contentLen);
|
|
- int really = read(is, temporary, 0, howmuch);
|
|
- if (really < 0) {
|
|
- throw new EOFException("SSL peer shut down incorrectly");
|
|
+ if (recordBody.position() == 0) {
|
|
+ if (recordBody.capacity() < contentLen) {
|
|
+ recordBody = ByteBuffer.allocate(contentLen);
|
|
}
|
|
-
|
|
- destination.put(temporary, 0, howmuch);
|
|
- contentLen -= howmuch;
|
|
+ recordBody.limit(contentLen);
|
|
+ } else {
|
|
+ contentLen = recordBody.remaining();
|
|
}
|
|
- destination.flip();
|
|
- destination.position(dstPos + headerSize);
|
|
+ readFully(contentLen);
|
|
+ recordBody.flip();
|
|
|
|
if (SSLLogger.isOn && SSLLogger.isOn("record")) {
|
|
SSLLogger.fine(
|
|
"READ: " +
|
|
ProtocolVersion.nameOf(majorVersion, minorVersion) +
|
|
" " + ContentType.nameOf(contentType) + ", length = " +
|
|
- destination.remaining());
|
|
+ recordBody.remaining());
|
|
}
|
|
|
|
//
|
|
@@ -259,7 +256,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
ByteBuffer fragment;
|
|
try {
|
|
Plaintext plaintext =
|
|
- readCipher.decrypt(contentType, destination, null);
|
|
+ readCipher.decrypt(contentType, recordBody, null);
|
|
fragment = plaintext.fragment;
|
|
contentType = plaintext.contentType;
|
|
} catch (BadPaddingException bpe) {
|
|
@@ -368,8 +365,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
};
|
|
}
|
|
|
|
- private Plaintext[] handleUnknownRecord(
|
|
- byte[] header) throws IOException, BadPaddingException {
|
|
+ private Plaintext[] handleUnknownRecord() throws IOException, BadPaddingException {
|
|
byte firstByte = header[0];
|
|
byte thirdByte = header[2];
|
|
|
|
@@ -411,32 +407,29 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
}
|
|
|
|
int msgLen = ((header[0] & 0x7F) << 8) | (header[1] & 0xFF);
|
|
-
|
|
- ByteBuffer destination = ByteBuffer.allocate(headerSize + msgLen);
|
|
- destination.put(temporary, 0, headerSize);
|
|
- msgLen -= 3; // had read 3 bytes of content as header
|
|
- while (msgLen > 0) {
|
|
- int howmuch = Math.min(temporary.length, msgLen);
|
|
- int really = read(is, temporary, 0, howmuch);
|
|
- if (really < 0) {
|
|
- throw new EOFException("SSL peer shut down incorrectly");
|
|
+ if (recordBody.position() == 0) {
|
|
+ if (recordBody.capacity() < (headerSize + msgLen)) {
|
|
+ recordBody = ByteBuffer.allocate(headerSize + msgLen);
|
|
}
|
|
-
|
|
- destination.put(temporary, 0, howmuch);
|
|
- msgLen -= howmuch;
|
|
+ recordBody.limit(headerSize + msgLen);
|
|
+ recordBody.put(header, 0, headerSize);
|
|
+ } else {
|
|
+ msgLen = recordBody.remaining();
|
|
}
|
|
- destination.flip();
|
|
+ msgLen -= 3; // had read 3 bytes of content as header
|
|
+ readFully(msgLen);
|
|
+ recordBody.flip();
|
|
|
|
/*
|
|
* If we can map this into a V3 ClientHello, read and
|
|
* hash the rest of the V2 handshake, turn it into a
|
|
* V3 ClientHello message, and pass it up.
|
|
*/
|
|
- destination.position(2); // exclude the header
|
|
- handshakeHash.receive(destination);
|
|
- destination.position(0);
|
|
+ recordBody.position(2); // exclude the header
|
|
+ handshakeHash.receive(recordBody);
|
|
+ recordBody.position(0);
|
|
|
|
- ByteBuffer converted = convertToClientHello(destination);
|
|
+ ByteBuffer converted = convertToClientHello(recordBody);
|
|
|
|
if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
|
|
SSLLogger.fine(
|
|
@@ -456,28 +449,42 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
|
|
}
|
|
}
|
|
|
|
- // Read the exact bytes of data, otherwise, return -1.
|
|
- private static int read(InputStream is,
|
|
- byte[] buffer, int offset, int len) throws IOException {
|
|
- int n = 0;
|
|
- while (n < len) {
|
|
- int readLen = is.read(buffer, offset + n, len - n);
|
|
- if (readLen < 0) {
|
|
- if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
|
|
- SSLLogger.fine("Raw read: EOF");
|
|
- }
|
|
- return -1;
|
|
+ // Read the exact bytes of data, otherwise, throw IOException.
|
|
+ private int readFully(int len) throws IOException {
|
|
+ int end = len + recordBody.position();
|
|
+ int off = recordBody.position();
|
|
+ try {
|
|
+ while (off < end) {
|
|
+ off += read(is, recordBody.array(), off, end - off);
|
|
}
|
|
+ } finally {
|
|
+ recordBody.position(off);
|
|
+ }
|
|
+ return len;
|
|
+ }
|
|
+
|
|
+ // Read SSE record header, otherwise, throw IOException.
|
|
+ private int readHeader() throws IOException {
|
|
+ while (headerOff < headerSize) {
|
|
+ headerOff += read(is, header, headerOff, headerSize - headerOff);
|
|
+ }
|
|
+ return headerSize;
|
|
+ }
|
|
|
|
+ private static int read(InputStream is, byte[] buf, int off, int len) throws IOException {
|
|
+ int readLen = is.read(buf, off, len);
|
|
+ if (readLen < 0) {
|
|
if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
|
|
- ByteBuffer bb = ByteBuffer.wrap(buffer, offset + n, readLen);
|
|
- SSLLogger.fine("Raw read", bb);
|
|
+ SSLLogger.fine("Raw read: EOF");
|
|
}
|
|
-
|
|
- n += readLen;
|
|
+ throw new EOFException("SSL peer shut down incorrectly");
|
|
}
|
|
|
|
- return n;
|
|
+ if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
|
|
+ ByteBuffer bb = ByteBuffer.wrap(buf, off, readLen);
|
|
+ SSLLogger.fine("Raw read", bb);
|
|
+ }
|
|
+ return readLen;
|
|
}
|
|
|
|
// Try to use up the input stream without impact the performance too much.
|
|
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLTransport.java b/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
|
|
index b3d03b370..78e13ea2c 100644
|
|
--- a/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
|
|
+++ b/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
|
|
@@ -27,6 +27,7 @@ package sun.security.ssl;
|
|
|
|
import java.io.EOFException;
|
|
import java.io.IOException;
|
|
+import java.io.InterruptedIOException;
|
|
import java.nio.ByteBuffer;
|
|
import javax.crypto.AEADBadTagException;
|
|
import javax.crypto.BadPaddingException;
|
|
@@ -134,6 +135,9 @@ interface SSLTransport {
|
|
} catch (EOFException eofe) {
|
|
// rethrow EOFException, the call will handle it if neede.
|
|
throw eofe;
|
|
+ } catch (InterruptedIOException iioe) {
|
|
+ // don't close the Socket in case of timeouts or interrupts.
|
|
+ throw iioe;
|
|
} catch (IOException ioe) {
|
|
throw context.fatal(Alert.UNEXPECTED_MESSAGE, ioe);
|
|
}
|
|
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
|
|
new file mode 100644
|
|
index 000000000..258672f59
|
|
--- /dev/null
|
|
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
|
|
@@ -0,0 +1,128 @@
|
|
+/*
|
|
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.javax.crypto.full;
|
|
+
|
|
+import org.openjdk.jmh.annotations.Benchmark;
|
|
+import org.openjdk.jmh.annotations.Param;
|
|
+import org.openjdk.jmh.annotations.Setup;
|
|
+
|
|
+import javax.crypto.Cipher;
|
|
+import javax.crypto.spec.GCMParameterSpec;
|
|
+import javax.crypto.spec.SecretKeySpec;
|
|
+
|
|
+/**
|
|
+ * This performance tests runs AES/GCM encryption and decryption using byte[]
|
|
+ * as input and output buffers for single and multi-part testing.
|
|
+ *
|
|
+ * This test rotates the IV and creates a new GCMParameterSpec for each encrypt
|
|
+ * benchmark operation
|
|
+ */
|
|
+
|
|
+public class AESGCMBench extends CryptoBase {
|
|
+
|
|
+ @Param({"128"})
|
|
+ private int keyLength;
|
|
+
|
|
+ @Param({"1024", "1500", "4096", "16384"})
|
|
+ private int dataSize;
|
|
+
|
|
+ byte[] encryptedData;
|
|
+ byte[] in, out;
|
|
+ private Cipher encryptCipher;
|
|
+ private Cipher decryptCipher;
|
|
+ SecretKeySpec ks;
|
|
+ GCMParameterSpec gcm_spec;
|
|
+ byte[] iv;
|
|
+
|
|
+ private static final int IV_BUFFER_SIZE = 32;
|
|
+ private static final int IV_MODULO = IV_BUFFER_SIZE - 16;
|
|
+ int iv_index = 0;
|
|
+ int updateLen = 0;
|
|
+
|
|
+ private int next_iv_index() {
|
|
+ int r = iv_index;
|
|
+ iv_index = (iv_index + 1) % IV_MODULO;
|
|
+ return r;
|
|
+ }
|
|
+
|
|
+ @Setup
|
|
+ public void setup() throws Exception {
|
|
+ setupProvider();
|
|
+
|
|
+ // Setup key material
|
|
+ byte[] keystring = fillSecureRandom(new byte[keyLength / 8]);
|
|
+ ks = new SecretKeySpec(keystring, "AES");
|
|
+ iv = fillSecureRandom(new byte[IV_BUFFER_SIZE]);
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+
|
|
+ // Setup Cipher classes
|
|
+ encryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ decryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+
|
|
+ // Setup input/output buffers
|
|
+ in = fillRandom(new byte[dataSize]);
|
|
+ encryptedData = new byte[encryptCipher.getOutputSize(in.length)];
|
|
+ out = new byte[encryptedData.length];
|
|
+ encryptCipher.doFinal(in, 0, in.length, encryptedData, 0);
|
|
+ updateLen = in.length / 2;
|
|
+
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void encrypt() throws Exception {
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ encryptCipher.doFinal(in, 0, in.length, out, 0);
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void encryptMultiPart() throws Exception {
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ int outOfs = encryptCipher.update(in, 0, updateLen, out, 0);
|
|
+ encryptCipher.doFinal(in, updateLen, in.length - updateLen,
|
|
+ out, outOfs);
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void decrypt() throws Exception {
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+ decryptCipher.doFinal(encryptedData, 0, encryptedData.length, out, 0);
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void decryptMultiPart() throws Exception {
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+ decryptCipher.update(encryptedData, 0, updateLen, out, 0);
|
|
+ decryptCipher.doFinal(encryptedData, updateLen,
|
|
+ encryptedData.length - updateLen, out, 0);
|
|
+ }
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
|
|
new file mode 100644
|
|
index 000000000..cb6d20c51
|
|
--- /dev/null
|
|
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
|
|
@@ -0,0 +1,163 @@
|
|
+/*
|
|
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation. Oracle designates this
|
|
+ * particular file as subject to the "Classpath" exception as provided
|
|
+ * by Oracle in the LICENSE file that accompanied this code.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.javax.crypto.full;
|
|
+
|
|
+import org.openjdk.jmh.annotations.Benchmark;
|
|
+import org.openjdk.jmh.annotations.Param;
|
|
+import org.openjdk.jmh.annotations.Setup;
|
|
+
|
|
+import javax.crypto.Cipher;
|
|
+import javax.crypto.spec.GCMParameterSpec;
|
|
+import javax.crypto.spec.SecretKeySpec;
|
|
+import java.nio.ByteBuffer;
|
|
+
|
|
+/**
|
|
+ * This performance tests runs AES/GCM encryption and decryption using heap and
|
|
+ * direct ByteBuffers as input and output buffers for single and multi-part
|
|
+ * operations.
|
|
+ *
|
|
+ * This test rotates the IV and creates a new GCMParameterSpec for each encrypt
|
|
+ * benchmark operation
|
|
+ */
|
|
+
|
|
+public class AESGCMByteBuffer extends CryptoBase {
|
|
+
|
|
+ @Param({"128"})
|
|
+ private int keyLength;
|
|
+
|
|
+ @Param({"1024", "1500", "4096", "16384"})
|
|
+ private int dataSize;
|
|
+
|
|
+ @Param({"direct", "heap"})
|
|
+ private String dataMethod;
|
|
+
|
|
+ byte[] data;
|
|
+ ByteBuffer encryptedData;
|
|
+ ByteBuffer in, out;
|
|
+ private Cipher encryptCipher;
|
|
+ private Cipher decryptCipher;
|
|
+ SecretKeySpec ks;
|
|
+ GCMParameterSpec gcm_spec;
|
|
+ byte[] iv;
|
|
+
|
|
+ private static final int IV_BUFFER_SIZE = 32;
|
|
+ private static final int IV_MODULO = IV_BUFFER_SIZE - 16;
|
|
+ int iv_index = 0;
|
|
+ int updateLen = 0;
|
|
+
|
|
+ private int next_iv_index() {
|
|
+ int r = iv_index;
|
|
+ iv_index = (iv_index + 1) % IV_MODULO;
|
|
+ return r;
|
|
+ }
|
|
+
|
|
+ @Setup
|
|
+ public void setup() throws Exception {
|
|
+ setupProvider();
|
|
+
|
|
+ // Setup key material
|
|
+ byte[] keystring = fillSecureRandom(new byte[keyLength / 8]);
|
|
+ ks = new SecretKeySpec(keystring, "AES");
|
|
+ iv = fillSecureRandom(new byte[IV_BUFFER_SIZE]);
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+
|
|
+ // Setup Cipher classes
|
|
+ encryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ decryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+
|
|
+ // Setup input/output buffers
|
|
+ data = fillRandom(new byte[dataSize]);
|
|
+ if (dataMethod.equalsIgnoreCase("direct")) {
|
|
+ in = ByteBuffer.allocateDirect(data.length);
|
|
+ in.put(data);
|
|
+ in.flip();
|
|
+ encryptedData = ByteBuffer.allocateDirect(
|
|
+ encryptCipher.getOutputSize(data.length));
|
|
+ out = ByteBuffer.allocateDirect(encryptedData.capacity());
|
|
+ } else if (dataMethod.equalsIgnoreCase("heap")) {
|
|
+ in = ByteBuffer.wrap(data);
|
|
+ encryptedData = ByteBuffer.allocate(
|
|
+ encryptCipher.getOutputSize(data.length));
|
|
+ out = ByteBuffer.allocate(encryptedData.capacity());
|
|
+ }
|
|
+
|
|
+ encryptCipher.doFinal(in, encryptedData);
|
|
+ encryptedData.flip();
|
|
+ in.flip();
|
|
+ updateLen = in.remaining() / 2;
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void encrypt() throws Exception {
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ encryptCipher.doFinal(in, out);
|
|
+ out.flip();
|
|
+ in.flip();
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void encryptMultiPart() throws Exception {
|
|
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
|
|
+ in.limit(updateLen);
|
|
+ encryptCipher.update(in, out);
|
|
+ in.limit(in.capacity());
|
|
+ encryptCipher.doFinal(in, out);
|
|
+ out.flip();
|
|
+ in.flip();
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void decrypt() throws Exception {
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+ decryptCipher.doFinal(encryptedData, out);
|
|
+ encryptedData.flip();
|
|
+ out.flip();
|
|
+ }
|
|
+
|
|
+ @Benchmark
|
|
+ public void decryptMultiPart() throws Exception {
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
|
|
+ encryptCipher.getParameters().
|
|
+ getParameterSpec(GCMParameterSpec.class));
|
|
+
|
|
+ int len = encryptedData.remaining();
|
|
+ encryptedData.limit(updateLen);
|
|
+ decryptCipher.update(encryptedData, out);
|
|
+ encryptedData.limit(len);
|
|
+
|
|
+ decryptCipher.doFinal(encryptedData, out);
|
|
+ encryptedData.flip();
|
|
+ out.flip();
|
|
+ }
|
|
+
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
|
|
new file mode 100644
|
|
index 000000000..4af12703b
|
|
--- /dev/null
|
|
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
|
|
@@ -0,0 +1,102 @@
|
|
+/*
|
|
+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.javax.crypto.full;
|
|
+
|
|
+import org.openjdk.jmh.annotations.BenchmarkMode;
|
|
+import org.openjdk.jmh.annotations.Fork;
|
|
+import org.openjdk.jmh.annotations.Measurement;
|
|
+import org.openjdk.jmh.annotations.Mode;
|
|
+import org.openjdk.jmh.annotations.OutputTimeUnit;
|
|
+import org.openjdk.jmh.annotations.Param;
|
|
+import org.openjdk.jmh.annotations.Scope;
|
|
+import org.openjdk.jmh.annotations.Setup;
|
|
+import org.openjdk.jmh.annotations.State;
|
|
+import org.openjdk.jmh.annotations.Warmup;
|
|
+
|
|
+import javax.crypto.BadPaddingException;
|
|
+import javax.crypto.Cipher;
|
|
+import javax.crypto.IllegalBlockSizeException;
|
|
+import javax.crypto.NoSuchPaddingException;
|
|
+import java.security.NoSuchAlgorithmException;
|
|
+import java.security.Provider;
|
|
+import java.security.SecureRandom;
|
|
+import java.security.Security;
|
|
+import java.util.Random;
|
|
+import java.util.concurrent.TimeUnit;
|
|
+
|
|
+
|
|
+@Fork(jvmArgsAppend = {"-XX:+AlwaysPreTouch"}, value = 5)
|
|
+@Warmup(iterations = 3, time = 3)
|
|
+@Measurement(iterations = 8, time = 2)
|
|
+@OutputTimeUnit(TimeUnit.SECONDS)
|
|
+@State(Scope.Thread)
|
|
+@BenchmarkMode(Mode.Throughput)
|
|
+public class CryptoBase {
|
|
+
|
|
+ @Param({""})
|
|
+ private String provider;
|
|
+
|
|
+ public Provider prov = null;
|
|
+
|
|
+ @Setup
|
|
+ public void setupProvider() {
|
|
+ if (provider != null && !provider.isEmpty()) {
|
|
+ prov = Security.getProvider(provider);
|
|
+ if (prov == null) {
|
|
+ throw new RuntimeException("Can't find prodiver \"" + provider + "\"");
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public static Cipher makeCipher(Provider prov, String algorithm) throws NoSuchPaddingException, NoSuchAlgorithmException {
|
|
+ return (prov == null) ? Cipher.getInstance(algorithm) : Cipher.getInstance(algorithm, prov);
|
|
+ }
|
|
+
|
|
+ public static byte[][] fillRandom(byte[][] data) {
|
|
+ Random rnd = new Random();
|
|
+ for (byte[] d : data) {
|
|
+ rnd.nextBytes(d);
|
|
+ }
|
|
+ return data;
|
|
+ }
|
|
+
|
|
+ public static byte[] fillRandom(byte[] data) {
|
|
+ Random rnd = new Random();
|
|
+ rnd.nextBytes(data);
|
|
+ return data;
|
|
+ }
|
|
+
|
|
+ public static byte[] fillSecureRandom(byte[] data) {
|
|
+ SecureRandom rnd = new SecureRandom();
|
|
+ rnd.nextBytes(data);
|
|
+ return data;
|
|
+ }
|
|
+
|
|
+ public static byte[][] fillEncrypted(byte[][] data, Cipher encryptCipher) throws BadPaddingException, IllegalBlockSizeException {
|
|
+ byte[][] encryptedData = new byte[data.length][];
|
|
+ for (int i = 0; i < encryptedData.length; i++) {
|
|
+ encryptedData[i] = encryptCipher.doFinal(data[i]);
|
|
+ }
|
|
+ return encryptedData;
|
|
+ }
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
|
|
new file mode 100644
|
|
index 000000000..a21b0c87f
|
|
--- /dev/null
|
|
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
|
|
@@ -0,0 +1,36 @@
|
|
+/*
|
|
+ * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.javax.crypto.small;
|
|
+
|
|
+import org.openjdk.jmh.annotations.Param;
|
|
+
|
|
+public class AESGCMBench extends
|
|
+ org.openjdk.bench.javax.crypto.full.AESGCMBench {
|
|
+
|
|
+ @Param({"128"})
|
|
+ private int keyLength;
|
|
+
|
|
+ @Param({"1024"})
|
|
+ private int dataSize;
|
|
+
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
|
|
new file mode 100644
|
|
index 000000000..2e389d300
|
|
--- /dev/null
|
|
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
|
|
@@ -0,0 +1,36 @@
|
|
+/*
|
|
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
|
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
+ *
|
|
+ * This code is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 only, as
|
|
+ * published by the Free Software Foundation.
|
|
+ *
|
|
+ * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
+ * version 2 for more details (a copy is included in the LICENSE file that
|
|
+ * accompanied this code).
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License version
|
|
+ * 2 along with this work; if not, write to the Free Software Foundation,
|
|
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
+ *
|
|
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
+ * or visit www.oracle.com if you need additional information or have any
|
|
+ * questions.
|
|
+ */
|
|
+package org.openjdk.bench.javax.crypto.small;
|
|
+
|
|
+import org.openjdk.jmh.annotations.Param;
|
|
+
|
|
+public class AESGCMByteBuffer extends
|
|
+ org.openjdk.bench.javax.crypto.full.AESGCMByteBuffer {
|
|
+
|
|
+ @Param({"128"})
|
|
+ private int keyLength;
|
|
+
|
|
+ @Param({"1024"})
|
|
+ private int dataSize;
|
|
+
|
|
+}
|
|
\ No newline at end of file
|
|
diff --git a/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java b/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
|
|
index 3eb1d7b89..7678cc71f 100644
|
|
--- a/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
|
|
+++ b/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
|
|
@@ -26,8 +26,7 @@
|
|
|
|
/*
|
|
* @test
|
|
- * @bug 4836493
|
|
- * @ignore need further evaluation
|
|
+ * @bug 4836493 8239798
|
|
* @summary Socket timeouts for SSLSockets causes data corruption.
|
|
* @run main/othervm ClientTimeout
|
|
*/
|
|
diff --git a/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java b/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
|
|
index 3e626a257..5578ea725 100644
|
|
--- a/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
|
|
+++ b/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
|
|
@@ -36,7 +36,7 @@
|
|
|
|
import javax.net.ssl.*;
|
|
import java.io.*;
|
|
-import java.net.InetAddress;
|
|
+import java.net.*;
|
|
|
|
public class SSLExceptionForIOIssue implements SSLContextTemplate {
|
|
|
|
@@ -139,7 +139,7 @@ public class SSLExceptionForIOIssue implements SSLContextTemplate {
|
|
} catch (SSLProtocolException | SSLHandshakeException sslhe) {
|
|
clientException = sslhe;
|
|
System.err.println("unexpected client exception: " + sslhe);
|
|
- } catch (SSLException ssle) {
|
|
+ } catch (SSLException | SocketTimeoutException ssle) {
|
|
// the expected exception, ignore it
|
|
System.err.println("expected client exception: " + ssle);
|
|
} catch (Exception e) {
|
|
--
|
|
2.17.1
|
|
|