openssl/backport-Always-end-BN_mod_exp_mont_consttime-with-normal-Mon.patch
2022-12-21 10:51:12 +08:00

407 lines
25 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 0ed27fb7a8d85685cb671bf0a1e41bcdfc2624dc Mon Sep 17 00:00:00 2001
From: Tomas Mraz <tomas@openssl.org>
Date: Thu, 9 Jun 2022 12:34:55 +0200
Subject: [PATCH] Always end BN_mod_exp_mont_consttime with normal Montgomery
reduction.
This partially fixes a bug where, on x86_64, BN_mod_exp_mont_consttime
would sometimes return m, the modulus, when it should have returned
zero. Thanks to Guido Vranken for reporting it. It is only a partial fix
because the same bug also exists in the "rsaz" codepath.
The bug only affects zero outputs (with non-zero inputs), so we believe
it has no security impact on our cryptographic functions.
The fx is to delete lowercase bn_from_montgomery altogether, and have the
mont5 path use the same BN_from_montgomery ending as the non-mont5 path.
This only impacts the final step of the whole exponentiation and has no
measurable perf impact.
See the original BoringSSL commit
https://boringssl.googlesource.com/boringssl/+/13c9d5c69d04485a7a8840c12185c832026c8315
for further analysis.
Original-author: David Benjamin <davidben@google.com>
Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/18511)
---
crypto/bn/asm/x86_64-mont5.pl | 196 -------------------------
crypto/bn/bn_exp.c | 44 +++---
test/recipes/10-test_bn_data/bnmod.txt | 67 +++++++++
3 files changed, 93 insertions(+), 214 deletions(-)
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 8c37d132e4..cc7b610145 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -2101,193 +2101,6 @@ __bn_post4x_internal:
.size __bn_post4x_internal,.-__bn_post4x_internal
___
}
-{
-$code.=<<___;
-.globl bn_from_montgomery
-.type bn_from_montgomery,\@abi-omnipotent
-.align 32
-bn_from_montgomery:
-.cfi_startproc
- testl \$7,`($win64?"48(%rsp)":"%r9d")`
- jz bn_from_mont8x
- xor %eax,%eax
- ret
-.cfi_endproc
-.size bn_from_montgomery,.-bn_from_montgomery
-
-.type bn_from_mont8x,\@function,6
-.align 32
-bn_from_mont8x:
-.cfi_startproc
- .byte 0x67
- mov %rsp,%rax
-.cfi_def_cfa_register %rax
- push %rbx
-.cfi_push %rbx
- push %rbp
-.cfi_push %rbp
- push %r12
-.cfi_push %r12
- push %r13
-.cfi_push %r13
- push %r14
-.cfi_push %r14
- push %r15
-.cfi_push %r15
-.Lfrom_prologue:
-
- shl \$3,${num}d # convert $num to bytes
- lea ($num,$num,2),%r10 # 3*$num in bytes
- neg $num
- mov ($n0),$n0 # *n0
-
- ##############################################################
- # Ensure that stack frame doesn't alias with $rptr+3*$num
- # modulo 4096, which covers ret[num], am[num] and n[num]
- # (see bn_exp.c). The stack is allocated to aligned with
- # bn_power5's frame, and as bn_from_montgomery happens to be
- # last operation, we use the opportunity to cleanse it.
- #
- lea -320(%rsp,$num,2),%r11
- mov %rsp,%rbp
- sub $rptr,%r11
- and \$4095,%r11
- cmp %r11,%r10
- jb .Lfrom_sp_alt
- sub %r11,%rbp # align with $aptr
- lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
- jmp .Lfrom_sp_done
-
-.align 32
-.Lfrom_sp_alt:
- lea 4096-320(,$num,2),%r10
- lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
- sub %r10,%r11
- mov \$0,%r10
- cmovc %r10,%r11
- sub %r11,%rbp
-.Lfrom_sp_done:
- and \$-64,%rbp
- mov %rsp,%r11
- sub %rbp,%r11
- and \$-4096,%r11
- lea (%rbp,%r11),%rsp
- mov (%rsp),%r10
- cmp %rbp,%rsp
- ja .Lfrom_page_walk
- jmp .Lfrom_page_walk_done
-
-.Lfrom_page_walk:
- lea -4096(%rsp),%rsp
- mov (%rsp),%r10
- cmp %rbp,%rsp
- ja .Lfrom_page_walk
-.Lfrom_page_walk_done:
-
- mov $num,%r10
- neg $num
-
- ##############################################################
- # Stack layout
- #
- # +0 saved $num, used in reduction section
- # +8 &t[2*$num], used in reduction section
- # +32 saved *n0
- # +40 saved %rsp
- # +48 t[2*$num]
- #
- mov $n0, 32(%rsp)
- mov %rax, 40(%rsp) # save original %rsp
-.cfi_cfa_expression %rsp+40,deref,+8
-.Lfrom_body:
- mov $num,%r11
- lea 48(%rsp),%rax
- pxor %xmm0,%xmm0
- jmp .Lmul_by_1
-
-.align 32
-.Lmul_by_1:
- movdqu ($aptr),%xmm1
- movdqu 16($aptr),%xmm2
- movdqu 32($aptr),%xmm3
- movdqa %xmm0,(%rax,$num)
- movdqu 48($aptr),%xmm4
- movdqa %xmm0,16(%rax,$num)
- .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr
- movdqa %xmm1,(%rax)
- movdqa %xmm0,32(%rax,$num)
- movdqa %xmm2,16(%rax)
- movdqa %xmm0,48(%rax,$num)
- movdqa %xmm3,32(%rax)
- movdqa %xmm4,48(%rax)
- lea 64(%rax),%rax
- sub \$64,%r11
- jnz .Lmul_by_1
-
- movq $rptr,%xmm1
- movq $nptr,%xmm2
- .byte 0x67
- mov $nptr,%rbp
- movq %r10, %xmm3 # -num
-___
-$code.=<<___ if ($addx);
- mov OPENSSL_ia32cap_P+8(%rip),%r11d
- and \$0x80108,%r11d
- cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
- jne .Lfrom_mont_nox
-
- lea (%rax,$num),$rptr
- call __bn_sqrx8x_reduction
- call __bn_postx4x_internal
-
- pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- jmp .Lfrom_mont_zero
-
-.align 32
-.Lfrom_mont_nox:
-___
-$code.=<<___;
- call __bn_sqr8x_reduction
- call __bn_post4x_internal
-
- pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- jmp .Lfrom_mont_zero
-
-.align 32
-.Lfrom_mont_zero:
- mov 40(%rsp),%rsi # restore %rsp
-.cfi_def_cfa %rsi,8
- movdqa %xmm0,16*0(%rax)
- movdqa %xmm0,16*1(%rax)
- movdqa %xmm0,16*2(%rax)
- movdqa %xmm0,16*3(%rax)
- lea 16*4(%rax),%rax
- sub \$32,$num
- jnz .Lfrom_mont_zero
-
- mov \$1,%rax
- mov -48(%rsi),%r15
-.cfi_restore %r15
- mov -40(%rsi),%r14
-.cfi_restore %r14
- mov -32(%rsi),%r13
-.cfi_restore %r13
- mov -24(%rsi),%r12
-.cfi_restore %r12
- mov -16(%rsi),%rbp
-.cfi_restore %rbp
- mov -8(%rsi),%rbx
-.cfi_restore %rbx
- lea (%rsi),%rsp
-.cfi_def_cfa_register %rsp
-.Lfrom_epilogue:
- ret
-.cfi_endproc
-.size bn_from_mont8x,.-bn_from_mont8x
-___
-}
}}}
if ($addx) {{{
@@ -3894,10 +3707,6 @@ mul_handler:
.rva .LSEH_begin_bn_power5
.rva .LSEH_end_bn_power5
.rva .LSEH_info_bn_power5
-
- .rva .LSEH_begin_bn_from_mont8x
- .rva .LSEH_end_bn_from_mont8x
- .rva .LSEH_info_bn_from_mont8x
___
$code.=<<___ if ($addx);
.rva .LSEH_begin_bn_mulx4x_mont_gather5
@@ -3929,11 +3738,6 @@ $code.=<<___;
.byte 9,0,0,0
.rva mul_handler
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # HandlerData[]
-.align 8
-.LSEH_info_bn_from_mont8x:
- .byte 9,0,0,0
- .rva mul_handler
- .rva .Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue # HandlerData[]
___
$code.=<<___ if ($addx);
.align 8
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 8c54ab005c..e21dcff027 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -900,14 +900,21 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
#if defined(OPENSSL_BN_ASM_MONT5)
if (window == 5 && top > 1) {
/*
- * This optimization uses ideas from http://eprint.iacr.org/2011/239,
- * specifically optimization of cache-timing attack countermeasures
- * and pre-computation optimization.
- */
-
- /*
- * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
- * 512-bit RSA is hardly relevant, we omit it to spare size...
+ * This optimization uses ideas from https://eprint.iacr.org/2011/239,
+ * specifically optimization of cache-timing attack countermeasures,
+ * pre-computation optimization, and Almost Montgomery Multiplication.
+ *
+ * The paper discusses a 4-bit window to optimize 512-bit modular
+ * exponentiation, used in RSA-1024 with CRT, but RSA-1024 is no longer
+ * important.
+ *
+ * |bn_mul_mont_gather5| and |bn_power5| implement the "almost"
+ * reduction variant, so the values here may not be fully reduced.
+ * They are bounded by R (i.e. they fit in |top| words), not |m|.
+ * Additionally, we pass these "almost" reduced inputs into
+ * |bn_mul_mont|, which implements the normal reduction variant.
+ * Given those inputs, |bn_mul_mont| may not give reduced
+ * output, but it will still produce "almost" reduced output.
*/
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const void *table, const BN_ULONG *np,
@@ -919,9 +926,6 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const void *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power);
int bn_get_bits5(const BN_ULONG *ap, int off);
- int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
- const BN_ULONG *not_used, const BN_ULONG *np,
- const BN_ULONG *n0, int num);
BN_ULONG *n0 = mont->n0, *np;
@@ -1010,14 +1014,18 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
- ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
tmp.top = top;
- bn_correct_top(&tmp);
- if (ret) {
- if (!BN_copy(rr, &tmp))
- ret = 0;
- goto err; /* non-zero ret means it's not error */
- }
+ /*
+ * The result is now in |tmp| in Montgomery form, but it may not be
+ * fully reduced. This is within bounds for |BN_from_montgomery|
+ * (tmp < R <= m*R) so it will, when converting from Montgomery form,
+ * produce a fully reduced result.
+ *
+ * This differs from Figure 2 of the paper, which uses AMM(h, 1) to
+ * convert from Montgomery form with unreduced output, followed by an
+ * extra reduction step. In the paper's terminology, we replace
+ * steps 9 and 10 with MM(h, 1).
+ */
} else
#endif
{
diff --git a/test/recipes/10-test_bn_data/bnmod.txt b/test/recipes/10-test_bn_data/bnmod.txt
index 6c94a0f025..69f8af43d5 100644
--- a/test/recipes/10-test_bn_data/bnmod.txt
+++ b/test/recipes/10-test_bn_data/bnmod.txt
@@ -2474,6 +2474,73 @@ A = 9025e6183706105e948b1b0edf922f9011b9e11887d70adb00b26f272b9e76a38f3099084d9c
E = d7e6df5d755284929b986cd9b61c9c2c8843f24c711fbdbae1a468edcae159400943725570726cdc92b3ea94f9f206729516fdda83e31d815b0c7720e7598a91d992273e3bd8ac413b441d8f1dfe5aa7c3bf3ef573adc38292676217467731e6cf440a59611b8110af88d3e62f60209b513b01fbb69a097458ad02096b5e38f0
M = e4e784aa1fa88625a43ba0185a153a929663920be7fe674a4d33c943d3b898cff051482e7050a070cede53be5e89f31515772c7aea637576f99f82708f89d9e244f6ad3a24a02cbe5c0ff7bcf2dad5491f53db7c3f2698a7c41b44f086652f17bb05fe4c5c0a92433c34086b49d7e1825b28bab6c5a9bd0bc95b53d659afa0d7
+# The following inputs trigger an edge case between Montgomery reduction and the
+# "almost" reduction variant from https://eprint.iacr.org/2011/239
+ModExp = 00
+A = 19c7bc9b97c6083cd7b8d1cd001452c9b67983247169c6532047eb7fc8933014dbf69fee7a358769f1429802c8ea89d4f9ca6ba6f368fbdb1fa5717b4a00
+E = bbc7e09147408571050e8d0c634682c5863b7e8a573626648902cff12e590c74f5a23ecce39732266bc15b8afbd6c48a48c83fbdc33947515cc0b6e4fb98ae2cd730e58f951fec8be7e2e3c74f4506c7fd7e29bdb28675fe8a59789ab1148e931a2ebd2d36f78bc241682a3d8083d8ff538858cd240c5a693936e5a391dc9d77118062a3f868c058440a4192267faaaba91112f45eee5842060febbf9353a6d3e7f7996573209136a5506062ea23d74067f08c613f3ff74bade25f8c3368e6dba84eae672eac11be1137fc514924fcab8c82e46d092bd047dcbadaa48c67a096ec1a04f392a8511e6acbad9954949b703e71ff837337b594055ae6f3c0fc154447a687c9ac8a2cdfd64a2e680c6ff21254735af7f5eb6b43f0bce86bda55a04143a991711081435ed4f4a89b23fc3a588022b7a8543db4bf5c8ac93603367c750ff2191f59a716340fab49bb7544759c8d846465eec1438e76395f73e7b5e945f31f1b87fefa854a0d208846eaab5fa27144fd039911608bab0eaee80f1d3553dfa2d9ba95268479b97a059613660df5ad79796e0b272244aca90ccc13449ec15c206eeed7b60405a4c5cfdf5da5d136c27fa9385d810ad198dfe794ffce9955e10520efea1e2eb794e379401b9affd863b9566ce941c4726755574a1b1946acf0090bfb93f37dd55f524485bbba7fa84b53addfde01ae1de9c57fe50d4b708dd0fa45d02af398b3d05c6d17f84c11e9aacdbe0b146cad6ddbd877731e26a17f3ebed459560d12ed7a6abc2ea6fe922e69d2622ef11b6b245b9ba8f0940faaa671a4beb727be5393a94dafaeff7221b29183e7418f4c5bb95a6a586c93dbc8ce0236d9dbe26c40513611b4141fed66599adbfb20fc30e09a4815e4159f65a6708f34584a7a77b3843941cd61a6917dcc3d07a3dfb5a2cb108bacea7e782f2111b4d22ecaaeff469ecd0da371df1ac5e9bf6df6ccba2d3a9f393d597499eaca2c206bfb81c3426c5fe45bcf16e38aecd246a319a1f37041c638b75a4839517e43a6d01bee7d85eaeedbce13cd15699d3ee42c7414cfed576590e4fb6ddb6edd3e1957efaf039bfe8b9dc75869b1f93abff15cae8b234161070fa3542303c2ed35ca66083d0ac299b81182317a2a3985269602b1fa1e822fcbda48e686d80b273f06b0a702ca7f42cbbbd2fc2b3601422c8bff6302eda3c61b293049636002649b16f3c1f0be2b6599d66493a4497cd795b10a2ab8220fafad24fa90e1bfcf39ecce337e705695c7a224bf9f445a287d6aab221341659ca4be7861f6ac4c9d33dac811e6
+M = 519b6e57781d40d897ec0c1b648d195526726b295438c9a70928ac25979563d72db91c8c42298a33b572edecdf40904c68a23337aa5341b56e92b0da5041
+
+# To fully exercise BN_mod_exp_mont_consttime codepaths, we generate inputs at
+# different bitwidths. rsaz-avx2.pl only runs at 1024-bit moduli, and
+# x86_64-mont5.pl unrolls 8 64-bit words at a time, so we want to capture both
+# multiples of 512- and non-multiples. Also include moduli that are not quite a
+# full word.
+# 512-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 8f42c9e9e351ba9b32ab0cf69da43f4acf7028d19cff6e5059ea0e3fcc97c97f36a31470044737d4c0c933ac441ecb29e32c81401523afdac7de9c3fd8493c97
+
+# 1024-bit
+# TODO(davidben): This test breaks the RSAZ implementation. Fix it and enable
+# this test.
+# ModExp = 00
+# A = 800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002f
+# E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+# M = 9da8dc26fdf4d2e49833b240ee552beb7a6e251caa91bfb5d6cafaf8ed9461877fda8f6ac299036d35806bc1ae7872e54eaac1ec6bee6d02c6621a9cf8883b3abc33c49b3e601203e0e86ef8f0562412cc689ee2670704583909ca6d7774c9f9f9f4d77d37fedef9cb51d207cb629ec02fa03b526fd6594bfa8f2da71238a0b7
+
+# 1025-bit
+ModExp = 00
+A = 010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 010223abfdda02e84e11cec8ee7fc784fa135733935f7b9054bb70f1f06d234d76dcf3beed55c7f39e955dc1fef2b65009240fd02f7a1b27a78fc2867144bf666efb929856db9f671c356c4c67a068a70fe83c52eebda03668872fd270d0794f0771d217fb6b93b12529a944f7f0496a9158757c55b8ee14f803f1d2d887e2f561
+
+# 1088-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = e91f6d748773cb212a23aa348125615123b1800c9ea222c9374c757702ae4140fa333790ed8f6bf60a1d7dda65c2767cc5f33e32e333d19fbfb5a2b85795757c9ca070268763a618e9d33873d28a89bf88acd209efbb15b80cd33b92a6b3a682e1c91782fc24fb86ddff4f809219c977b54b99359094bbcc51dfe17b992ab24b74a17950ad754281
+
+# 1472-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = a8770362f4bfe4fc1ab0e52705c11a9b6ba235d5a5f22197c2d68e27ed18426ede3316af706aa79bcf943dbd51459eb15ae1f9386216b3f3a847f94440a65b97659bc5ba2adb67173714ecaa886c0b926d7a64ea45576f9d2171784ce7e801724d5b0abfd93357d538ea7ad3ad89a74f4660bdb66dfb5f684dcf00402e3cdf0ab58afd867c943c8f47b80268a789456aa7c50a619dd2f9f5e3f74b5d810f0f8dadbf4ad5b917cdcb156c4c132611c8b3b035118a9e03551f
+
+# 1536-bit
+ModExp = 00
+A = 800000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 878cd000778f927b2f1a4b8bac86efd282079a7ac0d25e09ffd2f72fbc282e65e233929d2457c7b1d63c56fb706cdfa04fb87e654c578c98d7cf59c2293dc5641086b68db4867105981daaf147a0ee91f6932ef064deae4142c19e58d50c0686f0eaf778be72450f89a98b4680bbc5ffab942195e44dd20616150fd1deca058068ca31ab2f861e99082588f17a2025bf5e536150142fca3187a259c791fc721430f24d7e338f8dc02e693a7e694d42775e80f7f7c03600b6ae86b4aba2b0e991
+
+# 2048-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 9f40a7535c561208ecb38e17c9336d9bc8484d335901b2cd42759cf03689227f6992f10cb6b586d767fbcdf30e9d82a0eda60d2694ccd0194fa96b50b56e0cdeec1951ea9e58b07e334a7f108841a0ab28256917fecea561388807ed124a17386a7a7b501f9cbf3404247a76948d0561e48137d3f9669e36f175731796aeaf78851f7d866917f661422186a4814aa35c066b5a90b9cfc918af769a9f0bb30c12581027df64ac328a0f07dbd20adb704479f6d0f233a131828c71bab19c3c34795ea4fb68aa632c6f688e5b3b84413c9031d8dc251003a590dec0dd09bfa6109ed4570701439b6f265b84ac2170c317357b5fbe5535e2bbdd93c1aacfdaa28c85
+
+# 3072-bit
+ModExp = 00
+A = 80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001d
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = c23dfd244a58a668d514498a705c8f8f548311b24f0f98b023d2d33632534c2ae948d6641d41fd7a29fbbd594bfc7fdd6e8162cbb3056af3075347b6fc8876458d33a9d0ffdbcdf482de0c73d1310fd8fa8f9f92dd0dbb0e2034e98a30f6c11b482f7476c5b593f673a322b1130daa4314e9074270dce1076436f0d56cf196afcbb235a9a7b3ac85b9062e85fc0e63a12c468c787019f6805f9faab64fc6a0babc80785d88740243f11366bffb40ccbe8b2bb7a99a2c8238a6f656bb0117d7b2602aa400f4d77de5f93c673f13264ca70de949454e3e3f261993c1aa427e8ef4f507af744f71f3b4aaf3c981d44cc1bfb1eb1151168762b242b740573df698e500d99612e17dc760f7b3bf7c235e39e81ad7edbe6c07dbb8b139745bb394d61cb799bcafec5de074932b0b2d74797e779ac8d81f63a2b2e9baa229dfaa7f90f34ffade1d2ad022a3407d35eb2d7477c6ae8ad100f6e95c05b4f947c1fabfb11a17add384e6b4cd3a02fd9b43f46805c6c74e366b74aa3b766be7a5fbbd67fa81
+
+# 4096-bit
+ModExp = 00
+A = 8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001
+E = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+M = 8030411ecbddcb0fe4e76fd6b5bf542e8b015d1610cf96130ded12ba2cda0641bd9692080f218ea8b0d751845b519d95b843542ec8d2a07f1f93afe3189b69a4f35c983011c7f7928c3df458cc3eae85c36e6934a4b1bc0a67c8a521de336642c49e10a7ffa8d0af911aacc19e3900449161940f139220e099a150dcaf0ff96ffff6e726c1ac139969103cf6a828ac3adf0301506aa02787b4f570d5dde53a34acab8fec6fa94760abf16ee99954371ad65a6e899daab87b95811d069404991de9abe064ebbddf886e970f10d260c899dda940191a82d4c8bd36651363aff5493f4f59e700007dcadf37ebea7fcfd7600d16617ffea0d9ae659446d851d93c564e50e558f734c894d735fa273770703dab62844d9f01badf632f3d14a00f739c022c9be95f54e9cea46ec6da7cb11f4602e06962951c48204726b7f120ddbd0eb3566dc8d1e6f195a9196e96db33322d088b43aecffe9b4df182dd016aca0bd14f1c56cd1a18b89165c027029862b09ffd78e92ab614349c4fd67f49cb12cd33d0728930d0538bda57acef1365a73cc8fbac7d463b9e3c3bae0bb6224b080cdb8b5cd47d546d53111fdc22b7ff679bcfe27192920ee163b2be337d8cccc93b4de7d2d31934b9c0e97af291dcc1135b4a473bd37114eec3ba75c411887b57799d3188e7353f33a4d31735ebfc9fcfc044985148dd96da3876a5ab7ea7a404b411
# These test vectors satisfy (ModSqrt * ModSqrt) mod P = A mod P with P a prime.
# ModSqrt is in [0, (P-1)/2].
--
2.17.1