709 lines
25 KiB
Diff
709 lines
25 KiB
Diff
From 6926e9b83bc3f9b785d5298786a5c41e247b2a3f Mon Sep 17 00:00:00 2001
|
|
Date: Mon, 16 Oct 2023 10:56:30 +0800
|
|
Subject: [PATCH 1/5] 8308682: Enhance AES performance
|
|
|
|
Bug url: https://bugs.openjdk.org/browse/JDK-8308682
|
|
---
|
|
.../src/cpu/aarch64/vm/assembler_aarch64.hpp | 2 +
|
|
.../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 451 +++++++++---------
|
|
.../compiler/codegen/aes/CTR_Wraparound.java | 169 +++++++
|
|
3 files changed, 406 insertions(+), 216 deletions(-)
|
|
create mode 100644 hotspot/test/compiler/codegen/aes/CTR_Wraparound.java
|
|
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
index 9202e61f8..b12095aca 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
|
|
@@ -2140,6 +2140,8 @@ public:
|
|
INSN(sshl, 0, 0b010001);
|
|
INSN(ushl, 1, 0b010001);
|
|
|
|
+ INSN(cmhi, 1, 0b001101); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
|
|
+
|
|
#undef INSN
|
|
|
|
#define INSN(NAME, opc, opc2) \
|
|
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
index 565fe559c..f61028d50 100644
|
|
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
|
|
@@ -2804,265 +2804,284 @@ class StubGenerator: public StubCodeGenerator {
|
|
return start;
|
|
}
|
|
|
|
+ // Big-endian 128-bit + 64-bit -> 128-bit addition.
|
|
+ // Inputs: 128-bits. in is preserved.
|
|
+ // The least-significant 64-bit word is in the upper dword of the vector
|
|
+ // inc (the 64-bit increment) is preserved. Its lower dword must be zero
|
|
+ // Output: result
|
|
+ void be_add_128_64(FloatRegister result, FloatRegister in,
|
|
+ FloatRegister inc, FloatRegister tmp) {
|
|
+ assert_different_registers(result, tmp, inc);
|
|
+
|
|
+ __ addv(result, __ T2D, in, inc); // Add inc to the least-significant dword of input
|
|
+ __ cmhi(tmp, __ T2D, inc, result); // Check for result overflowing
|
|
+ __ ins(tmp, __ D, tmp, 0, 1); // Move LSD of comparison result to MSD
|
|
+ __ ins(tmp, __ D, inc, 1, 0); // Move 0 to LSD of comparison result
|
|
+ __ subv(result, __ T2D, result, tmp); // Subtract -1 from MSD if there was an overflow
|
|
+ }
|
|
+
|
|
// CTR AES crypt.
|
|
- // Arguments:
|
|
- //
|
|
- // Inputs:
|
|
- // c_rarg0 - source byte array address
|
|
- // c_rarg1 - destination byte array address
|
|
- // c_rarg2 - K (key) in little endian int array
|
|
- // c_rarg3 - counter vector byte array address
|
|
- // c_rarg4 - input length
|
|
- // c_rarg5 - saved encryptedCounter start
|
|
- // c_rarg6 - saved used length
|
|
+ // Arguments:
|
|
+ //
|
|
+ // Inputs:
|
|
+ // c_rarg0 - source byte array address
|
|
+ // c_rarg1 - destination byte array address
|
|
+ // c_rarg2 - K (key) in little endian int array
|
|
+ // c_rarg3 - counter vector byte array address
|
|
+ // c_rarg4 - input length
|
|
+ // c_rarg5 - saved encryptedCounter start
|
|
+ // c_rarg6 - saved used length
|
|
+ //
|
|
+ // Output:
|
|
+ // r0 - input length
|
|
+ //
|
|
+ address generate_counterMode_AESCrypt() {
|
|
+ const Register in = c_rarg0;
|
|
+ const Register out = c_rarg1;
|
|
+ const Register key = c_rarg2;
|
|
+ const Register counter = c_rarg3;
|
|
+ const Register saved_len = c_rarg4, len = r10;
|
|
+ const Register saved_encrypted_ctr = c_rarg5;
|
|
+ const Register used_ptr = c_rarg6, used = r12;
|
|
+
|
|
+ const Register offset = r7;
|
|
+ const Register keylen = r11;
|
|
+
|
|
+ const unsigned char block_size = 16;
|
|
+ const int bulk_width = 4;
|
|
+ // NB: bulk_width can be 4 or 8. 8 gives slightly faster
|
|
+ // performance with larger data sizes, but it also means that the
|
|
+ // fast path isn't used until you have at least 8 blocks, and up
|
|
+ // to 127 bytes of data will be executed on the slow path. For
|
|
+ // that reason, and also so as not to blow away too much icache, 4
|
|
+ // blocks seems like a sensible compromise.
|
|
+
|
|
+ // Algorithm:
|
|
//
|
|
- // Output:
|
|
- // r0 - input length
|
|
+ // if (len == 0) {
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // int result = len;
|
|
+ // do {
|
|
+ // if (used >= blockSize) {
|
|
+ // if (len >= bulk_width * blockSize) {
|
|
+ // CTR_large_block();
|
|
+ // if (len == 0)
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // for (;;) {
|
|
+ // 16ByteVector v0 = counter;
|
|
+ // embeddedCipher.encryptBlock(v0, 0, encryptedCounter, 0);
|
|
+ // used = 0;
|
|
+ // if (len < blockSize)
|
|
+ // break; /* goto NEXT */
|
|
+ // 16ByteVector v1 = load16Bytes(in, offset);
|
|
+ // v1 = v1 ^ encryptedCounter;
|
|
+ // store16Bytes(out, offset);
|
|
+ // used = blockSize;
|
|
+ // offset += blockSize;
|
|
+ // len -= blockSize;
|
|
+ // if (len == 0)
|
|
+ // goto DONE;
|
|
+ // }
|
|
+ // }
|
|
+ // NEXT:
|
|
+ // out[outOff++] = (byte)(in[inOff++] ^ encryptedCounter[used++]);
|
|
+ // len--;
|
|
+ // } while (len != 0);
|
|
+ // DONE:
|
|
+ // return result;
|
|
//
|
|
- address generate_counterMode_AESCrypt() {
|
|
- const Register in = c_rarg0;
|
|
- const Register out = c_rarg1;
|
|
- const Register key = c_rarg2;
|
|
- const Register counter = c_rarg3;
|
|
- const Register saved_len = c_rarg4, len = r10;
|
|
- const Register saved_encrypted_ctr = c_rarg5;
|
|
- const Register used_ptr = c_rarg6, used = r12;
|
|
-
|
|
- const Register offset = r7;
|
|
- const Register keylen = r11;
|
|
-
|
|
- const unsigned char block_size = 16;
|
|
- const int bulk_width = 4;
|
|
- // NB: bulk_width can be 4 or 8. 8 gives slightly faster
|
|
- // performance with larger data sizes, but it also means that the
|
|
- // fast path isn't used until you have at least 8 blocks, and up
|
|
- // to 127 bytes of data will be executed on the slow path. For
|
|
- // that reason, and also so as not to blow away too much icache, 4
|
|
- // blocks seems like a sensible compromise.
|
|
-
|
|
- // Algorithm:
|
|
- //
|
|
- // if (len == 0) {
|
|
- // goto DONE;
|
|
- // }
|
|
- // int result = len;
|
|
- // do {
|
|
- // if (used >= blockSize) {
|
|
- // if (len >= bulk_width * blockSize) {
|
|
- // CTR_large_block();
|
|
- // if (len == 0)
|
|
- // goto DONE;
|
|
- // }
|
|
- // for (;;) {
|
|
- // 16ByteVector v0 = counter;
|
|
- // embeddedCipher.encryptBlock(v0, 0, encryptedCounter, 0);
|
|
- // used = 0;
|
|
- // if (len < blockSize)
|
|
- // break; /* goto NEXT */
|
|
- // 16ByteVector v1 = load16Bytes(in, offset);
|
|
- // v1 = v1 ^ encryptedCounter;
|
|
- // store16Bytes(out, offset);
|
|
- // used = blockSize;
|
|
- // offset += blockSize;
|
|
- // len -= blockSize;
|
|
- // if (len == 0)
|
|
- // goto DONE;
|
|
- // }
|
|
- // }
|
|
- // NEXT:
|
|
- // out[outOff++] = (byte)(in[inOff++] ^ encryptedCounter[used++]);
|
|
- // len--;
|
|
- // } while (len != 0);
|
|
- // DONE:
|
|
- // return result;
|
|
- //
|
|
- // CTR_large_block()
|
|
- // Wide bulk encryption of whole blocks.
|
|
+ // CTR_large_block()
|
|
+ // Wide bulk encryption of whole blocks.
|
|
|
|
- __ align(CodeEntryAlignment);
|
|
- StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
|
|
- const address start = __ pc();
|
|
- __ enter();
|
|
+ __ align(CodeEntryAlignment);
|
|
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
|
|
+ const address start = __ pc();
|
|
+ __ enter();
|
|
|
|
- Label DONE, CTR_large_block, large_block_return;
|
|
- __ ldrw(used, Address(used_ptr));
|
|
- __ cbzw(saved_len, DONE);
|
|
+ Label DONE, CTR_large_block, large_block_return;
|
|
+ __ ldrw(used, Address(used_ptr));
|
|
+ __ cbzw(saved_len, DONE);
|
|
|
|
- __ mov(len, saved_len);
|
|
- __ mov(offset, 0);
|
|
+ __ mov(len, saved_len);
|
|
+ __ mov(offset, 0);
|
|
|
|
- // Compute #rounds for AES based on the length of the key array
|
|
- __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
|
+ // Compute #rounds for AES based on the length of the key array
|
|
+ __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
|
|
|
- __ aesenc_loadkeys(key, keylen);
|
|
+ __ aesenc_loadkeys(key, keylen);
|
|
|
|
- {
|
|
- Label L_CTR_loop, NEXT;
|
|
+ {
|
|
+ Label L_CTR_loop, NEXT;
|
|
|
|
- __ bind(L_CTR_loop);
|
|
+ __ bind(L_CTR_loop);
|
|
|
|
- __ cmp(used, block_size);
|
|
- __ br(__ LO, NEXT);
|
|
+ __ cmp(used, block_size);
|
|
+ __ br(__ LO, NEXT);
|
|
|
|
- // Maybe we have a lot of data
|
|
- __ subsw(rscratch1, len, bulk_width * block_size);
|
|
- __ br(__ HS, CTR_large_block);
|
|
- __ BIND(large_block_return);
|
|
- __ cbzw(len, DONE);
|
|
+ // Maybe we have a lot of data
|
|
+ __ subsw(rscratch1, len, bulk_width * block_size);
|
|
+ __ br(__ HS, CTR_large_block);
|
|
+ __ BIND(large_block_return);
|
|
+ __ cbzw(len, DONE);
|
|
|
|
- // Setup the counter
|
|
- __ movi(v4, __ T4S, 0);
|
|
- __ movi(v5, __ T4S, 1);
|
|
- __ ins(v4, __ S, v5, 3, 3); // v4 contains { 0, 0, 0, 1 }
|
|
+ // Setup the counter
|
|
+ __ movi(v4, __ T4S, 0);
|
|
+ __ movi(v5, __ T4S, 1);
|
|
+ __ ins(v4, __ S, v5, 2, 2); // v4 contains { 0, 1 }
|
|
|
|
- __ ld1(v0, __ T16B, counter); // Load the counter into v0
|
|
- __ rev32(v16, __ T16B, v0);
|
|
- __ addv(v16, __ T4S, v16, v4);
|
|
- __ rev32(v16, __ T16B, v16);
|
|
- __ st1(v16, __ T16B, counter); // Save the incremented counter back
|
|
+ // 128-bit big-endian increment
|
|
+ __ ld1(v0, __ T16B, counter);
|
|
+ __ rev64(v16, __ T16B, v0);
|
|
+ be_add_128_64(v16, v16, v4, /*tmp*/v5);
|
|
+ __ rev64(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter);
|
|
+ // Previous counter value is in v0
|
|
+ // v4 contains { 0, 1 }
|
|
|
|
- {
|
|
- // We have fewer than bulk_width blocks of data left. Encrypt
|
|
- // them one by one until there is less than a full block
|
|
- // remaining, being careful to save both the encrypted counter
|
|
- // and the counter.
|
|
-
|
|
- Label inner_loop;
|
|
- __ bind(inner_loop);
|
|
- // Counter to encrypt is in v0
|
|
- __ aesecb_encrypt(noreg, noreg, keylen);
|
|
- __ st1(v0, __ T16B, saved_encrypted_ctr);
|
|
-
|
|
- // Do we have a remaining full block?
|
|
-
|
|
- __ mov(used, 0);
|
|
- __ cmp(len, block_size);
|
|
- __ br(__ LO, NEXT);
|
|
-
|
|
- // Yes, we have a full block
|
|
- __ ldrq(v1, Address(in, offset));
|
|
- __ eor(v1, __ T16B, v1, v0);
|
|
- __ strq(v1, Address(out, offset));
|
|
- __ mov(used, block_size);
|
|
- __ add(offset, offset, block_size);
|
|
-
|
|
- __ subw(len, len, block_size);
|
|
- __ cbzw(len, DONE);
|
|
-
|
|
- // Increment the counter, store it back
|
|
- __ orr(v0, __ T16B, v16, v16);
|
|
- __ rev32(v16, __ T16B, v16);
|
|
- __ addv(v16, __ T4S, v16, v4);
|
|
- __ rev32(v16, __ T16B, v16);
|
|
- __ st1(v16, __ T16B, counter); // Save the incremented counter back
|
|
-
|
|
- __ b(inner_loop);
|
|
- }
|
|
+ {
|
|
+ // We have fewer than bulk_width blocks of data left. Encrypt
|
|
+ // them one by one until there is less than a full block
|
|
+ // remaining, being careful to save both the encrypted counter
|
|
+ // and the counter.
|
|
|
|
- __ BIND(NEXT);
|
|
-
|
|
- // Encrypt a single byte, and loop.
|
|
- // We expect this to be a rare event.
|
|
- __ ldrb(rscratch1, Address(in, offset));
|
|
- __ ldrb(rscratch2, Address(saved_encrypted_ctr, used));
|
|
- __ eor(rscratch1, rscratch1, rscratch2);
|
|
- __ strb(rscratch1, Address(out, offset));
|
|
- __ add(offset, offset, 1);
|
|
- __ add(used, used, 1);
|
|
- __ subw(len, len,1);
|
|
- __ cbnzw(len, L_CTR_loop);
|
|
- }
|
|
+ Label inner_loop;
|
|
+ __ bind(inner_loop);
|
|
+ // Counter to encrypt is in v0
|
|
+ __ aesecb_encrypt(noreg, noreg, keylen);
|
|
+ __ st1(v0, __ T16B, saved_encrypted_ctr);
|
|
|
|
- __ bind(DONE);
|
|
- __ strw(used, Address(used_ptr));
|
|
- __ mov(r0, saved_len);
|
|
+ // Do we have a remaining full block?
|
|
|
|
- __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
- __ ret(lr);
|
|
+ __ mov(used, 0);
|
|
+ __ cmp(len, block_size);
|
|
+ __ br(__ LO, NEXT);
|
|
|
|
- // Bulk encryption
|
|
+ // Yes, we have a full block
|
|
+ __ ldrq(v1, Address(in, offset));
|
|
+ __ eor(v1, __ T16B, v1, v0);
|
|
+ __ strq(v1, Address(out, offset));
|
|
+ __ mov(used, block_size);
|
|
+ __ add(offset, offset, block_size);
|
|
|
|
- __ BIND (CTR_large_block);
|
|
- assert(bulk_width == 4 || bulk_width == 8, "must be");
|
|
+ __ subw(len, len, block_size);
|
|
+ __ cbzw(len, DONE);
|
|
|
|
- if (bulk_width == 8) {
|
|
- __ sub(sp, sp, 4 * 16);
|
|
- __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
|
|
+ // Increment the counter, store it back
|
|
+ __ orr(v0, __ T16B, v16, v16);
|
|
+ __ rev64(v16, __ T16B, v16);
|
|
+ be_add_128_64(v16, v16, v4, /*tmp*/v5);
|
|
+ __ rev64(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter); // Save the incremented counter back
|
|
+
|
|
+ __ b(inner_loop);
|
|
}
|
|
- __ sub(sp, sp, 4 * 16);
|
|
- __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
|
|
- RegSet saved_regs = (RegSet::of(in, out, offset)
|
|
- + RegSet::of(saved_encrypted_ctr, used_ptr, len));
|
|
- __ push(saved_regs, sp);
|
|
- __ andr(len, len, -16 * bulk_width); // 8/4 encryptions, 16 bytes per encryption
|
|
- __ add(in, in, offset);
|
|
- __ add(out, out, offset);
|
|
|
|
- // Keys should already be loaded into the correct registers
|
|
+ __ BIND(NEXT);
|
|
+
|
|
+ // Encrypt a single byte, and loop.
|
|
+ // We expect this to be a rare event.
|
|
+ __ ldrb(rscratch1, Address(in, offset));
|
|
+ __ ldrb(rscratch2, Address(saved_encrypted_ctr, used));
|
|
+ __ eor(rscratch1, rscratch1, rscratch2);
|
|
+ __ strb(rscratch1, Address(out, offset));
|
|
+ __ add(offset, offset, 1);
|
|
+ __ add(used, used, 1);
|
|
+ __ subw(len, len,1);
|
|
+ __ cbnzw(len, L_CTR_loop);
|
|
+ }
|
|
|
|
- __ ld1(v0, __ T16B, counter); // v0 contains the first counter
|
|
- __ rev32(v16, __ T16B, v0); // v16 contains byte-reversed counter
|
|
+ __ bind(DONE);
|
|
+ __ strw(used, Address(used_ptr));
|
|
+ __ mov(r0, saved_len);
|
|
|
|
- // AES/CTR loop
|
|
- {
|
|
- Label L_CTR_loop;
|
|
- __ BIND(L_CTR_loop);
|
|
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
|
|
+ __ ret(lr);
|
|
|
|
- // Setup the counters
|
|
- __ movi(v8, __ T4S, 0);
|
|
- __ movi(v9, __ T4S, 1);
|
|
- __ ins(v8, __ S, v9, 3, 3); // v8 contains { 0, 0, 0, 1 }
|
|
+ // Bulk encryption
|
|
|
|
- for (FloatRegister f = v0; f < v0 + bulk_width; f++) {
|
|
- __ rev32(f, __ T16B, v16);
|
|
- __ addv(v16, __ T4S, v16, v8);
|
|
- }
|
|
+ __ BIND (CTR_large_block);
|
|
+ assert(bulk_width == 4 || bulk_width == 8, "must be");
|
|
|
|
- __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16));
|
|
+ if (bulk_width == 8) {
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
|
|
+ }
|
|
+ __ sub(sp, sp, 4 * 16);
|
|
+ __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
|
|
+ RegSet saved_regs = (RegSet::of(in, out, offset)
|
|
+ + RegSet::of(saved_encrypted_ctr, used_ptr, len));
|
|
+ __ push(saved_regs, sp);
|
|
+ __ andr(len, len, -16 * bulk_width); // 8/4 encryptions, 16 bytes per encryption
|
|
+ __ add(in, in, offset);
|
|
+ __ add(out, out, offset);
|
|
|
|
- // Encrypt the counters
|
|
- __ aesecb_encrypt(noreg, noreg, keylen, v0, bulk_width);
|
|
+ // Keys should already be loaded into the correct registers
|
|
|
|
- if (bulk_width == 8) {
|
|
- __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16));
|
|
- }
|
|
+ __ ld1(v0, __ T16B, counter); // v0 contains the first counter
|
|
+ __ rev64(v16, __ T16B, v0); // v16 contains byte-reversed counter
|
|
|
|
- // XOR the encrypted counters with the inputs
|
|
- for (int i = 0; i < bulk_width; i++) {
|
|
- __ eor(v0 + i, __ T16B, v0 + i, v8 + i);
|
|
- }
|
|
+ // AES/CTR loop
|
|
+ {
|
|
+ Label L_CTR_loop;
|
|
+ __ BIND(L_CTR_loop);
|
|
|
|
- // Write the encrypted data
|
|
- __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16));
|
|
- if (bulk_width == 8) {
|
|
- __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16));
|
|
- }
|
|
+ // Setup the counters
|
|
+ __ movi(v8, __ T4S, 0);
|
|
+ __ movi(v9, __ T4S, 1);
|
|
+ __ ins(v8, __ S, v9, 2, 2); // v8 contains { 0, 1 }
|
|
|
|
- __ subw(len, len, 16 * bulk_width);
|
|
- __ cbnzw(len, L_CTR_loop);
|
|
+ for (FloatRegister f = v0; f < v0 + bulk_width; f++) {
|
|
+ __ rev64(f, __ T16B, v16);
|
|
+ be_add_128_64(v16, v16, v8, /*tmp*/v9);
|
|
}
|
|
|
|
- // Save the counter back where it goes
|
|
- __ rev32(v16, __ T16B, v16);
|
|
- __ st1(v16, __ T16B, counter);
|
|
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16));
|
|
|
|
- __ pop(saved_regs, sp);
|
|
+ // Encrypt the counters
|
|
+ __ aesecb_encrypt(noreg, noreg, keylen, v0, bulk_width);
|
|
|
|
- __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
|
|
if (bulk_width == 8) {
|
|
- __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
|
|
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16));
|
|
}
|
|
|
|
- __ andr(rscratch1, len, -16 * bulk_width);
|
|
- __ sub(len, len, rscratch1);
|
|
- __ add(offset, offset, rscratch1);
|
|
- __ mov(used, 16);
|
|
- __ strw(used, Address(used_ptr));
|
|
- __ b(large_block_return);
|
|
+ // XOR the encrypted counters with the inputs
|
|
+ for (int i = 0; i < bulk_width; i++) {
|
|
+ __ eor(v0 + i, __ T16B, v0 + i, v8 + i);
|
|
+ }
|
|
|
|
- return start;
|
|
+ // Write the encrypted data
|
|
+ __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16));
|
|
+ if (bulk_width == 8) {
|
|
+ __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16));
|
|
+ }
|
|
+
|
|
+ __ subw(len, len, 16 * bulk_width);
|
|
+ __ cbnzw(len, L_CTR_loop);
|
|
}
|
|
|
|
+ // Save the counter back where it goes
|
|
+ __ rev64(v16, __ T16B, v16);
|
|
+ __ st1(v16, __ T16B, counter);
|
|
+
|
|
+ __ pop(saved_regs, sp);
|
|
+
|
|
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
|
|
+ if (bulk_width == 8) {
|
|
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
|
|
+ }
|
|
+
|
|
+ __ andr(rscratch1, len, -16 * bulk_width);
|
|
+ __ sub(len, len, rscratch1);
|
|
+ __ add(offset, offset, rscratch1);
|
|
+ __ mov(used, 16);
|
|
+ __ strw(used, Address(used_ptr));
|
|
+ __ b(large_block_return);
|
|
+
|
|
+ return start;
|
|
+ }
|
|
+
|
|
|
|
|
|
// Arguments:
|
|
diff --git a/hotspot/test/compiler/codegen/aes/CTR_Wraparound.java b/hotspot/test/compiler/codegen/aes/CTR_Wraparound.java
|
|
new file mode 100644
|
|
index 000000000..f578b432c
|
|
--- /dev/null
|
|
+++ b/hotspot/test/compiler/codegen/aes/CTR_Wraparound.java
|
|
@@ -0,0 +1,169 @@
|
|
+import javax.crypto.Cipher;
|
|
+import javax.crypto.spec.IvParameterSpec;
|
|
+import javax.crypto.spec.SecretKeySpec;
|
|
+import java.lang.reflect.Executable;
|
|
+import java.util.Arrays;
|
|
+import java.util.Random;
|
|
+import java.util.concurrent.Callable;
|
|
+
|
|
+/**
|
|
+ * @test
|
|
+ * @bug 8308682
|
|
+ * @summary Check for 128-bit AES/CTR wraparound
|
|
+ * @library /testlibrary /testlibrary/whitebox /compiler/whitebox /compiler/testlibrary
|
|
+ * @build CTR_Wraparound
|
|
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
|
|
+ * sun.hotspot.WhiteBox$WhiteBoxPermission
|
|
+ * @run main/othervm -Xbootclasspath/a:.
|
|
+ * -XX:+UnlockDiagnosticVMOptions
|
|
+ * -XX:+WhiteBoxAPI
|
|
+ * CTR_Wraparound 32
|
|
+ * @run main/othervm -Xbootclasspath/a:.
|
|
+ * -XX:+UnlockDiagnosticVMOptions
|
|
+ * -XX:+WhiteBoxAPI
|
|
+ * CTR_Wraparound 1009
|
|
+ * @run main/othervm -Xbootclasspath/a:.
|
|
+ * -XX:+UnlockDiagnosticVMOptions
|
|
+ * -XX:+WhiteBoxAPI
|
|
+ * CTR_Wraparound 2048
|
|
+ */
|
|
+
|
|
+public class CTR_Wraparound extends CompilerWhiteBoxTest {
|
|
+ private static final String ALGO = "AES/CTR/NoPadding";
|
|
+ private static final int LOOPS = 100000;
|
|
+ private int length;
|
|
+ private int maxOffset;
|
|
+
|
|
+ public CTR_Wraparound(int len,int offset){
|
|
+ super(new CTR_WraparoundTestCase());
|
|
+ length = len;
|
|
+ maxOffset = offset;
|
|
+ }
|
|
+
|
|
+ public static class CTR_WraparoundTestCase implements TestCase {
|
|
+
|
|
+ public String name() {
|
|
+ return "CTR_WraparoundTestCase";
|
|
+ }
|
|
+
|
|
+ public Executable getExecutable(){
|
|
+ try {
|
|
+ return Class.forName("com.sun.crypto.provider.CounterMode").getDeclaredMethod("implCrypt", byte[].class, int.class, int.class, byte[].class, int.class);
|
|
+ } catch (NoSuchMethodException e) {
|
|
+ throw new RuntimeException("Test bug, method unavailable. " + e);
|
|
+ } catch (ClassNotFoundException e) {
|
|
+ throw new RuntimeException("Test bug, class unavailable. " + e);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public Callable<Integer> getCallable() {
|
|
+ return null;
|
|
+ }
|
|
+
|
|
+ public boolean isOsr() {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ private static boolean isServerVM(String VMName) { return VMName.toLowerCase().contains("server");}
|
|
+
|
|
+
|
|
+
|
|
+ protected static boolean checkIntrinsicForCompilationLevel(Executable method, int compLevel) {
|
|
+ boolean intrinsicEnabled = Boolean.valueOf(getVMOption("UseAESCTRIntrinsics"));
|
|
+ boolean intrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(method,
|
|
+ compLevel);
|
|
+ if(intrinsicAvailable && intrinsicEnabled){
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ public static void main(String[] args) throws Exception {
|
|
+ int length = Integer.parseInt(args[0]);
|
|
+ int maxOffset = 60;
|
|
+ if (args.length > 1) {
|
|
+ maxOffset = Integer.parseInt(args[1]);
|
|
+ System.out.println("InitialOffset = " + maxOffset);
|
|
+ }
|
|
+ new CTR_Wraparound(length,maxOffset).test();
|
|
+ }
|
|
+
|
|
+ @Override
|
|
+ protected void test() throws Exception {
|
|
+
|
|
+ String VMName = System.getProperty("java.vm.name");
|
|
+ Executable intrinsicMethod = testCase.getExecutable();
|
|
+ boolean isIntrinsicEnabled = false;
|
|
+ if (isServerVM(VMName)) {
|
|
+ if (TIERED_COMPILATION) {
|
|
+ isIntrinsicEnabled = checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
|
|
+ }
|
|
+ isIntrinsicEnabled = checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_FULL_OPTIMIZATION);
|
|
+ } else {
|
|
+ isIntrinsicEnabled = checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
|
|
+ }
|
|
+ if(!isIntrinsicEnabled){
|
|
+ return;
|
|
+ }
|
|
+
|
|
+
|
|
+ long SEED = Long.getLong("jdk.test.lib.random.seed", new Random().nextLong());
|
|
+ Random random = new Random(SEED);
|
|
+
|
|
+ byte[] keyBytes = new byte[32];
|
|
+ Arrays.fill(keyBytes, (byte)0xff);
|
|
+ SecretKeySpec key = new SecretKeySpec(keyBytes, "AES");
|
|
+
|
|
+ byte[] ivBytes = new byte[16];
|
|
+
|
|
+ Arrays.fill(ivBytes, (byte)0xff);
|
|
+
|
|
+ byte[][] plaintext = new byte[maxOffset][];
|
|
+ byte[][] ciphertext = new byte[maxOffset][];
|
|
+
|
|
+ for (int offset = 0; offset < maxOffset; offset++) {
|
|
+ ivBytes[ivBytes.length - 1] = (byte)-offset;
|
|
+ IvParameterSpec iv = new IvParameterSpec(ivBytes);
|
|
+
|
|
+ Cipher encryptCipher = Cipher.getInstance(ALGO);
|
|
+ Cipher decryptCipher = Cipher.getInstance(ALGO);
|
|
+
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, key, iv);
|
|
+ decryptCipher.init(Cipher.DECRYPT_MODE, key, iv);
|
|
+
|
|
+ plaintext[offset] = new byte[length];
|
|
+ ciphertext[offset] = new byte[length];
|
|
+ random.nextBytes(plaintext[offset]);
|
|
+
|
|
+ byte[] decrypted = new byte[length];
|
|
+
|
|
+ encryptCipher.doFinal(plaintext[offset], 0, length, ciphertext[offset]);
|
|
+ decryptCipher.doFinal(ciphertext[offset], 0, length, decrypted);
|
|
+
|
|
+ if (!Arrays.equals(plaintext[offset], decrypted)) {
|
|
+ throw new Exception("mismatch in setup at offset " + offset);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (int offset = 0; offset < maxOffset; offset++) {
|
|
+ ivBytes[ivBytes.length - 1] = (byte)-offset;
|
|
+ IvParameterSpec iv = new IvParameterSpec(ivBytes);
|
|
+
|
|
+ Cipher encryptCipher = Cipher.getInstance(ALGO);
|
|
+
|
|
+ encryptCipher.init(Cipher.ENCRYPT_MODE, key, iv);
|
|
+
|
|
+ byte[] encrypted = new byte[length];
|
|
+
|
|
+ for (int i = 0; i < LOOPS; i++) {
|
|
+ encryptCipher.doFinal(plaintext[offset], 0, length, encrypted);
|
|
+ if (!Arrays.equals(ciphertext[offset], encrypted)) {
|
|
+ throw new Exception("array mismatch at offset " + offset
|
|
+ + " with length " + length);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
--
|
|
2.19.1
|
|
|