tidb/add-riscv-support-for-vendor.patch

diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_386.s vendor/github.com/remyoudompheng/bigfft/arith_386.s
--- vendor/github.com/remyoudompheng/bigfft/arith_386.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_386.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,36 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	JMP	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-TEXT ·subVV(SB),NOSPLIT,$0
-	JMP	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	JMP	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-	JMP	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	JMP	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	JMP	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	JMP	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	JMP	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_amd64.s vendor/github.com/remyoudompheng/bigfft/arith_amd64.s
--- vendor/github.com/remyoudompheng/bigfft/arith_amd64.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_amd64.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,38 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	JMP	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-// (same as addVV except for SBBQ instead of ADCQ and label names)
-TEXT ·subVV(SB),NOSPLIT,$0
-	JMP	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	JMP	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
-TEXT ·subVW(SB),NOSPLIT,$0
-	JMP	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	JMP	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	JMP	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	JMP	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	JMP	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_arm64.s vendor/github.com/remyoudompheng/bigfft/arith_arm64.s
--- vendor/github.com/remyoudompheng/bigfft/arith_arm64.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_arm64.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,36 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	B	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-TEXT ·subVV(SB),NOSPLIT,$0
-	B	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	B	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-	B	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	B	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	B	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	B	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	B	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_arm.s vendor/github.com/remyoudompheng/bigfft/arith_arm.s
--- vendor/github.com/remyoudompheng/bigfft/arith_arm.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_arm.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,36 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	B	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-TEXT ·subVV(SB),NOSPLIT,$0
-	B	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	B	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-	B	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	B	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	B	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	B	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	B	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_decl.go vendor/github.com/remyoudompheng/bigfft/arith_decl.go
--- vendor/github.com/remyoudompheng/bigfft/arith_decl.go	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_decl.go	2023-07-19 10:40:33.208904639 +0800
@@ -4,13 +4,30 @@

 package bigfft

-import . "math/big"
+import (
+	"math/big"
+	_ "unsafe"
+)

-// implemented in arith_$GOARCH.s
+type Word = big.Word
+
+//go:linkname addVV math/big.addVV
 func addVV(z, x, y []Word) (c Word)
+
+//go:linkname subVV math/big.subVV
 func subVV(z, x, y []Word) (c Word)
+
+//go:linkname addVW math/big.addVW
 func addVW(z, x []Word, y Word) (c Word)
+
+//go:linkname subVW math/big.subVW
 func subVW(z, x []Word, y Word) (c Word)
+
+//go:linkname shlVU math/big.shlVU
 func shlVU(z, x []Word, s uint) (c Word)
+
+//go:linkname mulAddVWW math/big.mulAddVWW
 func mulAddVWW(z, x []Word, y, r Word) (c Word)
+
+//go:linkname addMulVVW math/big.addMulVVW
 func addMulVVW(z, x []Word, y Word) (c Word)
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s
--- vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_mips64x.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,40 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-// +build mips64 mips64le
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	JMP	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-// (same as addVV except for SBBQ instead of ADCQ and label names)
-TEXT ·subVV(SB),NOSPLIT,$0
-	JMP	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	JMP	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
-TEXT ·subVW(SB),NOSPLIT,$0
-	JMP	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	JMP	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	JMP	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	JMP	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	JMP	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s
--- vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_mipsx.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,40 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-// +build mips mipsle
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	JMP	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-// (same as addVV except for SBBQ instead of ADCQ and label names)
-TEXT ·subVV(SB),NOSPLIT,$0
-	JMP	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	JMP	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-// (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
-TEXT ·subVW(SB),NOSPLIT,$0
-	JMP	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	JMP	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	JMP	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	JMP	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	JMP	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s
--- vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/arith_ppc64x.s	1970-01-01 08:00:00.000000000 +0800
@@ -1,38 +0,0 @@
-// Trampolines to math/big assembly implementations.
-
-// +build ppc64 ppc64le
-
-#include "textflag.h"
-
-// func addVV(z, x, y []Word) (c Word)
-TEXT ·addVV(SB),NOSPLIT,$0
-	BR	math∕big·addVV(SB)
-
-// func subVV(z, x, y []Word) (c Word)
-TEXT ·subVV(SB),NOSPLIT,$0
-	BR	math∕big·subVV(SB)
-
-// func addVW(z, x []Word, y Word) (c Word)
-TEXT ·addVW(SB),NOSPLIT,$0
-	BR	math∕big·addVW(SB)
-
-// func subVW(z, x []Word, y Word) (c Word)
-TEXT ·subVW(SB),NOSPLIT,$0
-	BR	math∕big·subVW(SB)
-
-// func shlVU(z, x []Word, s uint) (c Word)
-TEXT ·shlVU(SB),NOSPLIT,$0
-	BR	math∕big·shlVU(SB)
-
-// func shrVU(z, x []Word, s uint) (c Word)
-TEXT ·shrVU(SB),NOSPLIT,$0
-	BR	math∕big·shrVU(SB)
-
-// func mulAddVWW(z, x []Word, y, r Word) (c Word)
-TEXT ·mulAddVWW(SB),NOSPLIT,$0
-	BR	math∕big·mulAddVWW(SB)
-
-// func addMulVVW(z, x []Word, y Word) (c Word)
-TEXT ·addMulVVW(SB),NOSPLIT,$0
-	BR	math∕big·addMulVVW(SB)
-
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/bench vendor/github.com/remyoudompheng/bigfft/benchmarks/bench
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/bench	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/bench	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,20 @@
+# Benchmark of combined FFT and math/big (using threshold)
+# Run on a Core i5-4590 (Go 1.8)
+BenchmarkMul_1kb      	 3000000	       525 ns/op
+BenchmarkMul_10kb     	  100000	     19274 ns/op
+BenchmarkMul_50kb     	    5000	    213375 ns/op
+BenchmarkMul_100kb    	    2000	    651794 ns/op
+BenchmarkMul_1Mb      	     200	   8546244 ns/op
+BenchmarkMul_5Mb      	      30	  49127283 ns/op
+BenchmarkMul_10Mb     	      10	 109888838 ns/op
+BenchmarkMul_20Mb     	       5	 227088971 ns/op
+BenchmarkMul_50Mb     	       2	 731298339 ns/op
+BenchmarkMul_100Mb    	       1	1480340166 ns/op
+
+BenchmarkMul_1x5Mb    	      50	  28872973 ns/op
+BenchmarkMul_1x10Mb   	      20	  58841416 ns/op
+BenchmarkMul_1x20Mb   	      10	 124189252 ns/op
+BenchmarkMul_1x50Mb   	       3	 349402586 ns/op
+BenchmarkMul_5x20Mb   	      10	 153528843 ns/op
+BenchmarkMul_5x50Mb   	       3	 348753322 ns/op
+
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.big vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.big
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.big	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.big	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,19 @@
+# Benchmarks run on a Core i5-4590 (Go 1.8)
+BenchmarkMul_1kb      	 5000000	       298 ns/op
+BenchmarkMul_10kb     	  100000	     14562 ns/op
+BenchmarkMul_50kb     	   10000	    204698 ns/op
+BenchmarkMul_100kb    	    2000	    636230 ns/op
+BenchmarkMul_1Mb      	      50	  25950594 ns/op
+BenchmarkMul_5Mb      	       5	 314799939 ns/op
+BenchmarkMul_10Mb     	       2	 943065686 ns/op
+BenchmarkMul_20Mb     	       1	2837283743 ns/op
+BenchmarkMul_50Mb     	       1	14329431306 ns/op
+BenchmarkMul_100Mb    	       1	42590328264 ns/op
+
+BenchmarkMul_1x5Mb    	      10	 126106007 ns/op
+BenchmarkMul_1x10Mb   	       5	 248876061 ns/op
+BenchmarkMul_1x20Mb   	       3	 492849546 ns/op
+BenchmarkMul_1x50Mb   	       1	1249673962 ns/op
+BenchmarkMul_5x20Mb   	       1	1261943492 ns/op
+BenchmarkMul_5x50Mb   	       1	3098019651 ns/op
+
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.fft vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.fft
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.fft	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.fft	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,20 @@
+# Benchmarks using the mulFFT function only.
+# Run on a Core i5-4590 (Go 1.8)
+BenchmarkMul_1kb      	  200000	      9737 ns/op
+BenchmarkMul_10kb     	   10000	    105408 ns/op
+BenchmarkMul_50kb     	    3000	    584090 ns/op
+BenchmarkMul_100kb    	    2000	    973130 ns/op
+BenchmarkMul_1Mb      	     200	   8622463 ns/op
+BenchmarkMul_5Mb      	      30	  48602728 ns/op
+BenchmarkMul_10Mb     	      10	 109184721 ns/op
+BenchmarkMul_20Mb     	       5	 227053895 ns/op
+BenchmarkMul_50Mb     	       2	 727421044 ns/op
+BenchmarkMul_100Mb    	       1	1550029484 ns/op
+
+BenchmarkMul_1x5Mb    	      50	  28827150 ns/op
+BenchmarkMul_1x10Mb   	      20	  58097775 ns/op
+BenchmarkMul_1x20Mb   	      10	 124998246 ns/op
+BenchmarkMul_1x50Mb   	       3	 350045770 ns/op
+BenchmarkMul_5x20Mb   	      10	 160220847 ns/op
+BenchmarkMul_5x50Mb   	       3	 350824154 ns/op
+
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.gmp vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.gmp
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.gmp	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/bench.gmp	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,21 @@
+# These benchamrks were realised using gmpbench at
+# http://gmplib.org/gmpbench.html
+# and converted to the go test output format.
+# Numbers are for a Core i5-4590 with GMP 6.1.2
+BenchmarkMul_1kb        47143107	       175 ns/op
+BenchmarkMul_10kb	 1321291	      7573 ns/op
+BenchmarkMul_50kb	  125645	     79693 ns/op
+BenchmarkMul_100kb	   47298	    211500 ns/op
+BenchmarkMul_1Mb	    2950	   3344500 ns/op
+BenchmarkMul_5Mb	     413	  23920000 ns/op
+BenchmarkMul_10Mb	     164          60606000 ns/op
+BenchmarkMul_20Mb	      78	 127700000 ns/op
+BenchmarkMul_50Mb	       8	 352100000 ns/op
+BenchmarkMul_100Mb	       4	 746270000 ns/op
+
+BenchmarkMul_1x5Mb        884          11670000 ns/op
+BenchmarkMul_1x10Mb       337          27174000 ns/op
+BenchmarkMul_1x20Mb       195          52630000 ns/op
+BenchmarkMul_1x50Mb        70         131000000 ns/op
+BenchmarkMul_5x20Mb       134          74188000 ns/op
+BenchmarkMul_5x50Mb        49         207770000 ns/op
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.big vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.big
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.big	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.big	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,10 @@
+Benchmarks on a Core i5-4590
+
+BenchmarkScan1k-4      	  200000	      9995 ns/op	      32 B/op	       1 allocs/op
+BenchmarkScan10k-4     	   10000	    175356 ns/op	      57 B/op	       1 allocs/op
+BenchmarkScan100k-4    	     200	   9427422 ns/op	  117499 B/op	       6 allocs/op
+BenchmarkScan1M-4      	       1	1776707489 ns/op	2197961776 B/op	   10386 allocs/op
+BenchmarkScan2M-4      	       1	6865499995 ns/op	8708998320 B/op	   20774 allocs/op
+BenchmarkScan5M-4      	       1	42641034189 ns/op	54105679664 B/op	   51925 allocs/op
+BenchmarkScan10M-4     	       1	151975273589 ns/op	215978795792 B/op	  103837 allocs/op
+
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.bigfft vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.bigfft
--- vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.bigfft	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/benchmarks/scan.bigfft	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,10 @@
+Benchmarks on a Core i5-4590
+
+BenchmarkScan1k-4     	  200000	     10876 ns/op	    2784 B/op	      13 allocs/op
+BenchmarkScan10k-4    	    5000	    243806 ns/op	   86796 B/op	      64 allocs/op
+BenchmarkScan100k-4   	     200	   6780545 ns/op	 1733425 B/op	     332 allocs/op
+BenchmarkScan1M-4     	      10	 144867502 ns/op	41509963 B/op	    3130 allocs/op
+BenchmarkScan2M-4     	       3	 346540778 ns/op	94912754 B/op	    6213 allocs/op
+BenchmarkScan5M-4     	       1	1069878799 ns/op	278606280 B/op	   15444 allocs/op
+BenchmarkScan10M-4    	       1	2693328580 ns/op	625284488 B/op	   30842 allocs/op
+
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/calibrate_test.go vendor/github.com/remyoudompheng/bigfft/calibrate_test.go
--- vendor/github.com/remyoudompheng/bigfft/calibrate_test.go	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/calibrate_test.go	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,159 @@
+// Usage: go test -run=TestCalibrate -calibrate
+
+package bigfft
+
+import (
+	"flag"
+	"fmt"
+	"testing"
+	"time"
+)
+
+var calibrate = flag.Bool("calibrate", false, "run calibration test")
+
+// measureMul benchmarks math/big versus FFT for a given input size
+// (in bits).
+func measureMul(th int) (tBig, tFFT time.Duration) {
+	bigLoad := func(b *testing.B) { benchmarkMulBig(b, th, th) }
+	fftLoad := func(b *testing.B) { benchmarkMulFFT(b, th, th) }
+
+	res1 := testing.Benchmark(bigLoad)
+	res2 := testing.Benchmark(fftLoad)
+	tBig = time.Duration(res1.NsPerOp())
+	tFFT = time.Duration(res2.NsPerOp())
+	return
+}
+
+func roundDur(d time.Duration) time.Duration {
+	if d > 100*time.Millisecond {
+		return d / time.Millisecond * time.Millisecond
+	} else {
+		return d / time.Microsecond * time.Microsecond
+	}
+}
+
+func TestCalibrateThreshold(t *testing.T) {
+	if !*calibrate {
+		t.Log("not calibrating, use -calibrate to do so.")
+		return
+	}
+
+	lower := int(1e3)   // math/big is faster at this size.
+	upper := int(300e3) // FFT is faster at this size.
+
+	var sizes [9]int
+	var speedups [9]float64
+	for i := 0; i < 3; i++ {
+		for idx := 1; idx <= 9; idx++ {
+			sz := ((10-idx)*lower + idx*upper) / 10
+			big, fft := measureMul(sz)
+			spd := float64(big) / float64(fft)
+			sizes[idx-1] = sz
+			speedups[idx-1] = spd
+			fmt.Printf("speedup of FFT over math/big at size %d bits: %.2f (%s vs %s)\n",
+				sz, spd, roundDur(big), roundDur(fft))
+		}
+		narrow := false
+		for idx, s := range speedups {
+			if s < .98 {
+				lower = sizes[idx]
+				narrow = true
+			} else {
+				break
+			}
+		}
+		for idx := range speedups {
+			if speedups[8-idx] > 1.02 {
+				upper = sizes[8-idx]
+				narrow = true
+			} else {
+				break
+			}
+		}
+		if lower >= upper {
+			panic("impossible")
+		}
+		if !narrow || (upper-lower) <= 10 {
+			break
+		}
+	}
+	fmt.Printf("sizes: %d\n", sizes)
+	fmt.Printf("speedups: %.2f\n", speedups)
+}
+
+func measureFFTSize(w int, k uint) time.Duration {
+	load := func(b *testing.B) {
+		x := rndNat(w)
+		y := rndNat(w)
+		for i := 0; i < b.N; i++ {
+			m := (w+w)>>k + 1
+			xp := polyFromNat(x, k, m)
+			yp := polyFromNat(y, k, m)
+			rp := xp.Mul(&yp)
+			_ = rp.Int()
+		}
+	}
+	res := testing.Benchmark(load)
+	return time.Duration(res.NsPerOp())
+}
+
+func TestCalibrateFFT(t *testing.T) {
+	if !*calibrate {
+		t.Log("not calibrating, use -calibrate to do so.")
+		return
+	}
+
+	lows := [...]int{10, 10, 10, 10,
+		20, 50, 100, 200, 500, // 8
+		1000, 2000, 5000, 10000, // 12
+		20000, 50000, 100e3, 200e3, // 16
+	}
+	his := [...]int{100, 100, 100, 200,
+		500, 1000, 2000, 5000, 10000, // 8
+		50e3, 100e3, 200e3, 800e3, // 12
+		2e6, 5e6, 10e6, 20e6, // 16
+	}
+	for k := uint(3); k <= 16; k++ {
+		// Measure the speedup between k and k+1
+		low := lows[k] // FFT of size 1<<k known to be faster
+		hi := his[k]   // FFT of size 2<<k known to be faster
+		var sizes [9]int
+		var speedups [9]float64
+		for i := 0; i < 3; i++ {
+			for idx := 1; idx <= 9; idx++ {
+				sz := ((10-idx)*low + idx*hi) / 10
+				t1, t2 := measureFFTSize(sz, k), measureFFTSize(sz, k+1)
+				spd := float64(t1) / float64(t2)
+				sizes[idx-1] = sz
+				speedups[idx-1] = spd
+				fmt.Printf("speedup of %d vs %d at size %d words: %.2f (%s vs %s)\n",
+					k+1, k, sz, spd, roundDur(t1), roundDur(t2))
+			}
+			narrow := false
+			for idx, s := range speedups {
+				if s < .98 {
+					low = sizes[idx]
+					narrow = true
+				} else {
+					break
+				}
+			}
+			for idx := range speedups {
+				if speedups[8-idx] > 1.02 {
+					hi = sizes[8-idx]
+					narrow = true
+				} else {
+					break
+				}
+			}
+			if low >= hi {
+				panic("impossible")
+			}
+			if !narrow || (hi-low) <= 10 {
+				break
+			}
+		}
+		fmt.Printf("sizes: %d\n", sizes)
+		fmt.Printf("speedups: %.2f\n", speedups)
+	}
+}
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/fermat_test.go vendor/github.com/remyoudompheng/bigfft/fermat_test.go
--- vendor/github.com/remyoudompheng/bigfft/fermat_test.go	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/fermat_test.go	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,153 @@
+package bigfft
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+)
+
+type (
+	Int = big.Int
+)
+
+// parseHex reads an hex-formatted number modulo 2^bits+1.
+func parseHex(s string, bits int) fermat {
+	z := new(Int)
+	z, ok := z.SetString(s, 0)
+	if !ok {
+		panic(s)
+	}
+	f := fermat(z.Bits())
+	for len(f)*_W <= bits {
+		f = append(f, 0)
+	}
+	return f
+}
+
+func compare(t *testing.T, prefix string, a, b fermat) {
+	var x, y Int
+	x.SetBits(a)
+	y.SetBits(b)
+	if x.Cmp(&y) != 0 {
+		t.Errorf("%s: %x != %x", prefix, &x, &y)
+	}
+}
+
+func TestFermatShift(t *testing.T) {
+	const n = 4
+	f := make(fermat, n+1)
+	for i := 0; i < n; i++ {
+		f[i] = Word(rnd.Int63())
+	}
+	b := big.NewInt(1)
+	b = b.Lsh(b, uint(n*_W))
+	b = b.Add(b, big.NewInt(1))
+	z := make(fermat, len(f)) // Test with uninitialized z.
+	for shift := -2048; shift < 2048; shift++ {
+		z.Shift(f, shift)
+
+		z2 := new(Int)
+		z2.SetBits(f)
+		if shift < 0 {
+			s2 := (-shift) % (2 * n * _W)
+			z2 = z2.Lsh(z2, uint(2*n*_W-s2))
+		} else {
+			z2 = z2.Lsh(z2, uint(shift))
+		}
+		z2 = z2.Mod(z2, b)
+		compare(t, fmt.Sprintf("shift %d", shift), z, z2.Bits())
+	}
+}
+
+func TestFermatShiftHalf(t *testing.T) {
+	const n = 3
+	f := make(fermat, n+1)
+	for i := 0; i < n; i++ {
+		f[i] = ^Word(0)
+	}
+	b := big.NewInt(1)
+	b = b.Lsh(b, uint(n*_W))
+	b = b.Add(b, big.NewInt(1))
+	z := make(fermat, len(f)) // Test with uninitialized z.
+	tmp := make(fermat, len(f))
+	tmp2 := make(fermat, len(f))
+	for shift := 0; shift < 16384; shift++ {
+		// Shift twice by shift/2
+		z.ShiftHalf(f, shift, tmp)
+		copy(tmp, z)
+		z.ShiftHalf(tmp, shift, tmp2)
+
+		z2 := new(Int)
+		z2 = z2.Lsh(new(Int).SetBits(f), uint(shift))
+		z2 = z2.Mod(z2, b)
+		compare(t, fmt.Sprintf("shift %d", shift), z, z2.Bits())
+	}
+}
+
+type test struct{ a, b, c fermat }
+
+// addTests is a series of mod 2^256+1 tests.
+var addTests = []test{
+	{
+		parseHex("0x5555555555555555555555555555555555555555555555555555555555555555", 256),
+		parseHex("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", 256),
+		parseHex("0x10000000000000000000000000000000000000000000000000000000000000000", 256),
+	},
+	{
+		parseHex("0x5555555555555555555555555555555555555555555555555555555555555555", 256),
+		parseHex("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 256),
+		parseHex("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 256),
+	},
+	{
+		parseHex("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 256),
+		parseHex("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 256),
+		parseHex("0x5555555555555555555555555555555555555555555555555555555555555553", 256),
+	},
+}
+
+func TestFermatAdd(t *testing.T) {
+	for i, item := range addTests {
+		z := make(fermat, len(item.a))
+		z = z.Add(item.a, item.b)
+		compare(t, fmt.Sprintf("addTests[%d]", i), z, item.c)
+	}
+}
+
+var mulTests = []test{
+	{ // 3^400 = 3^200 * 3^200
+		parseHex("0xc21a937a76f3432ffd73d97e447606b683ecf6f6e4a7ae223c2578e26c486a03", 256),
+		parseHex("0xc21a937a76f3432ffd73d97e447606b683ecf6f6e4a7ae223c2578e26c486a03", 256),
+		parseHex("0x0e65f4d3508036eaca8faa2b8194ace009c863e44bdc040c459a7127bf8bcc62", 256),
+	},
+	{ // 2^256 * 2^256 mod (2^256+1) = 1.
+		parseHex("0x10000000000000000000000000000000000000000000000000000000000000000", 256),
+		parseHex("0x10000000000000000000000000000000000000000000000000000000000000000", 256),
+		parseHex("0x1", 256),
+	},
+	{ // (2^256-1) * (2^256-1) mod (2^256+1) = 4.
+		parseHex("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 256),
+		parseHex("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 256),
+		parseHex("0x4", 256),
+	},
+	{ // 1<<(64W) * 1<<(64W) mod (1<<64W+1) = 1
+		fermat{64: 1},
+		fermat{64: 1},
+		fermat{0: 1},
+	},
+	{
+		// Test case from issue 1. One of the squares of the Fourier
+		// transforms was miscomputed.
+		// The input number is made of 18 words, but we are working modulo 2^1280+1
+		parseHex("0xfffffffffffffffffffffffeffffffffffffffffffffffffffffffffffff00000000000000000000000100000000000000000000000000000000000000000000000000000000fffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff000100000000000000000000000100000000000000000000000000000000fffefffffffffffffffffffd", 1280),
+		parseHex("0xfffffffffffffffffffffffeffffffffffffffffffffffffffffffffffff00000000000000000000000100000000000000000000000000000000000000000000000000000000fffeffffffffffffffffffffffffffffffffffffffffffffffffffffffff000100000000000000000000000100000000000000000000000000000000fffefffffffffffffffffffd", 1280),
+		parseHex("0xfffe00000003fffc0000000000000000fff40003000000000000000000060001fffffffd0001fffffffffffffffe000dfffbfffffffffffffffffffafffe0000000200000000000000000002fff60002fffffffffffffffa00060001ffffffff0000000000000000fffc0007fffe0000000000000007fff8fffdfffffffffffffffffffa00000004fffa0000fffffffffffffff600080000000000000000000a", 1280),
+	},
+}
+
+func TestFermatMul(t *testing.T) {
+	for i, item := range mulTests {
+		z := make(fermat, 3*len(item.a))
+		z = z.Mul(item.a, item.b)
+		compare(t, fmt.Sprintf("mulTests[%d]", i), z, item.c)
+	}
+}
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/fft_test.go vendor/github.com/remyoudompheng/bigfft/fft_test.go
--- vendor/github.com/remyoudompheng/bigfft/fft_test.go	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/fft_test.go	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,354 @@
+package bigfft
+
+import (
+	"fmt"
+	"math/big"
+	"math/rand"
+	"testing"
+)
+
+func cmpnat(t *testing.T, x, y nat) int {
+	var a, b Int
+	a.SetBits(x)
+	b.SetBits(y)
+	c := a.Cmp(&b)
+	if c != 0 {
+		t.Logf("a.len=%d, b.len=%d", a.BitLen(), b.BitLen())
+		for i := 0; i < len(x) || i < len(y); i++ {
+			var u, v Word
+			if i < len(x) {
+				u = x[i]
+			}
+			if i < len(y) {
+				v = y[i]
+			}
+			if diff := u ^ v; diff != 0 {
+				t.Logf("diff at word %d: %x", i, diff)
+			}
+		}
+	}
+	return c
+}
+
+func TestRoundTripIntPoly(t *testing.T) {
+	N := 4
+	step := 500
+	if testing.Short() {
+		N = 2
+	}
+	// Sizes 12800 and 34300 may cause problems.
+	for size := 300; size < 50000; size += step {
+		n := make(nat, size)
+		for i := 0; i < N; i++ {
+			for p := range n {
+				n[p] = Word(rand.Int63())
+			}
+			k, m := fftSize(n, nil)
+			pol := polyFromNat(n, k, m)
+			n2 := pol.Int()
+			if cmpnat(t, n, n2) != 0 {
+				t.Errorf("different n and n2, size=%d, iter=%d", size, i)
+			}
+		}
+	}
+}
+
+func TestFourierSizes(t *testing.T) {
+	sizes := []int{
+		2e3, 3e3, 5e3, 7e3, 10e3, 14e3,
+		2e4, 3e4, 5e4, 7e4, 10e4, 14e4,
+		2e5, 3e5, 5e5, 7e5, 10e5, 14e5,
+		2e6, 3e6, 5e6, 7e6, 10e6, 14e6,
+		2e7, 3e7, 5e7, 7e7, 10e7, 14e7,
+		2e8, 3e8, 5e8, 7e8, 10e8, 14e8,
+	}
+	for _, s := range sizes {
+		k, m := fftSize(make(nat, s/_W), make(nat, s/_W))
+		v := valueSize(k, m, 2)
+		t.Logf("bits=%d => FFT size %d, chunk size = %d, value size = %d",
+			s, 1<<k, m, v)
+		needed := 2*m*_W + int(k)
+		got := v * _W
+		t.Logf("inefficiency: value/chunk_product=%.2f, fftsize/inputsize=%.2f",
+			float64(got)/float64(needed), float64(v<<k)/float64(2*s/_W))
+		if v > 3*m {
+			t.Errorf("FFT word size %d >> input word size %d", v, m)
+		}
+	}
+}
+
+func testFourier(t *testing.T, N int, k uint) {
+	// Random coefficients
+	src := make([]fermat, 1<<k)
+	for i := range src {
+		src[i] = make(fermat, N+1)
+		for p := 0; p < N; p++ {
+			src[i][p] = Word(rnd.Int63())
+		}
+	}
+	cmpFourier(t, N, k, src, false)
+	cmpFourier(t, N, k, src, true)
+
+	// Saturated coefficients (b^N-1)
+	for i := range src {
+		for p := 0; p < N; p++ {
+			src[i][p] = ^Word(0)
+		}
+	}
+	cmpFourier(t, N, k, src, false)
+	cmpFourier(t, N, k, src, true)
+}
+
+// cmpFourier computes the Fourier transform of src
+// and compares it to the FFT result.
+func cmpFourier(t *testing.T, N int, k uint, src []fermat, inverse bool) {
+	t.Logf("testFourier(t, %d, %d, inverse=%v)", N, k, inverse)
+	ωshift := (4 * N * _W) >> k
+	if inverse {
+		ωshift = -ωshift
+	}
+	dst1 := make([]fermat, 1<<k)
+	dst2 := make([]fermat, 1<<k)
+	for i := range src {
+		dst1[i] = make(fermat, N+1)
+		dst2[i] = make(fermat, N+1)
+	}
+
+	// naive transform
+	tmp := make(fermat, N+1)
+	tmp2 := make(fermat, N+1)
+	for i := range src {
+		for j := range dst1 {
+			tmp.ShiftHalf(src[i], i*j*ωshift, tmp2)
+			dst1[j].Add(dst1[j], tmp)
+		}
+	}
+
+	// fast transform
+	fourier(dst2, src, inverse, N, k)
+
+	for i := range src {
+		if cmpnat(t, nat(dst1[i]), nat(dst2[i])) != 0 {
+			var x, y Int
+			x.SetBits(dst1[i])
+			y.SetBits(dst2[i])
+			t.Errorf("difference in dst[%d]: %x %x", i, &x, &y)
+		}
+	}
+}
+
+func TestFourier(t *testing.T) {
+	// 1-word transforms.
+	testFourier(t, 1, 2)
+	testFourier(t, 1, 3)
+	testFourier(t, 1, 4)
+
+	// 2-word transforms
+	testFourier(t, 2, 2)
+	testFourier(t, 2, 3)
+	testFourier(t, 2, 4)
+	testFourier(t, 2, 8)
+
+	testFourier(t, 4, 4)
+	testFourier(t, 4, 5)
+	testFourier(t, 4, 6)
+	testFourier(t, 4, 8)
+
+	// Test a few limit cases. This is when
+	// N*WordSize is a multiple of 1<<(k-2) but not 1<<(k-1)
+	if _W == 64 {
+		testFourier(t, 1, 8)
+		testFourier(t, 3, 8)
+		testFourier(t, 5, 8)
+		testFourier(t, 7, 8)
+		testFourier(t, 9, 8)
+		testFourier(t, 11, 8)
+	}
+}
+
+// Tests Fourier transform and its reciprocal.
+func TestRoundTripPolyValues(t *testing.T) {
+	Size := 100000
+	if testing.Short() {
+		Size = 50
+	}
+	// Build a polynomial from an integer.
+	n := make(nat, Size)
+	for p := range n {
+		n[p] = Word(rand.Int63())
+	}
+	k, m := fftSize(n, nil)
+	pol := polyFromNat(n, k, m)
+
+	// Transform it.
+	f := valueSize(k, m, 1)
+	values := pol.Transform(f)
+
+	// Inverse transform.
+	pol2 := values.InvTransform()
+	pol2.m = m
+
+	t.Logf("k=%d, m=%d", k, m)
+
+	// Evaluate and compare.
+	n2 := pol2.Int()
+	if cmpnat(t, n, n2) != 0 {
+		t.Errorf("different n and n2")
+	}
+}
+
+var rnd = rand.New(rand.NewSource(0x43de683f473542af))
+
+func rndNat(n int) nat {
+	x := make(nat, n)
+	for i := 0; i < n; i++ {
+		x[i] = Word(rnd.Int63()<<1 + rnd.Int63n(2))
+	}
+	return x
+}
+
+func TestMul(t *testing.T) {
+	sizes := []int{1e3, 5e3, 15e3, 25e3, 70e3, 200e3, 500e3}
+	iters := 10
+	if testing.Short() {
+		iters = 1
+	}
+
+	var x, y Int
+	for i := 0; i < iters; i++ {
+		for _, size1 := range sizes {
+			for _, size2 := range sizes {
+				x.SetBits(rndNat(size1 / _W))
+				y.SetBits(rndNat(size2 / _W))
+				z := new(Int).Mul(&x, &y)
+				z2 := Mul(&x, &y)
+				if z.Cmp(z2) != 0 {
+					t.Errorf("z (%d bits) != z2 (%d bits)", z.BitLen(), z2.BitLen())
+					logbig(t, new(Int).Xor(z, z2))
+				}
+			}
+		}
+	}
+}
+
+func logbig(t *testing.T, n *Int) {
+	s := fmt.Sprintf("%x", n)
+	for len(s) > 64 {
+		t.Log(s[:64])
+		s = s[64:]
+	}
+	t.Log(s)
+}
+
+func benchmarkMulBig(b *testing.B, sizex, sizey int) {
+	mulx := rndNat(sizex / _W)
+	muly := rndNat(sizey / _W)
+	b.ResetTimer()
+	var x, y, z Int
+	x.SetBits(mulx)
+	y.SetBits(muly)
+	for i := 0; i < b.N; i++ {
+		z.Mul(&x, &y)
+	}
+}
+
+func benchmarkMulFFT(b *testing.B, sizex, sizey int) {
+	mulx := rndNat(sizex / _W)
+	muly := rndNat(sizey / _W)
+	b.ResetTimer()
+	var x, y Int
+	x.SetBits(mulx)
+	y.SetBits(muly)
+	for i := 0; i < b.N; i++ {
+		_ = mulFFT(&x, &y)
+	}
+}
+
+func BenchmarkMulBig_1kb(b *testing.B)   { benchmarkMulBig(b, 1e3, 1e3) }
+func BenchmarkMulBig_10kb(b *testing.B)  { benchmarkMulBig(b, 1e4, 1e4) }
+func BenchmarkMulBig_50kb(b *testing.B)  { benchmarkMulBig(b, 5e4, 5e4) }
+func BenchmarkMulBig_100kb(b *testing.B) { benchmarkMulBig(b, 1e5, 1e5) }
+func BenchmarkMulBig_200kb(b *testing.B) { benchmarkMulBig(b, 2e5, 2e5) }
+func BenchmarkMulBig_500kb(b *testing.B) { benchmarkMulBig(b, 5e5, 5e5) }
+func BenchmarkMulBig_1Mb(b *testing.B)   { benchmarkMulBig(b, 1e6, 1e6) }
+func BenchmarkMulBig_2Mb(b *testing.B)   { benchmarkMulBig(b, 2e6, 2e6) }
+func BenchmarkMulBig_5Mb(b *testing.B)   { benchmarkMulBig(b, 5e6, 5e6) }
+func BenchmarkMulBig_10Mb(b *testing.B)  { benchmarkMulBig(b, 10e6, 10e6) }
+func BenchmarkMulBig_20Mb(b *testing.B)  { benchmarkMulBig(b, 20e6, 20e6) }
+func BenchmarkMulBig_50Mb(b *testing.B)  { benchmarkMulBig(b, 50e6, 50e6) }
+func BenchmarkMulBig_100Mb(b *testing.B) { benchmarkMulBig(b, 100e6, 100e6) }
+
+func BenchmarkMulFFT_1kb(b *testing.B)   { benchmarkMulFFT(b, 1e3, 1e3) }
+func BenchmarkMulFFT_10kb(b *testing.B)  { benchmarkMulFFT(b, 1e4, 1e4) }
+func BenchmarkMulFFT_50kb(b *testing.B)  { benchmarkMulFFT(b, 5e4, 5e4) }
+func BenchmarkMulFFT_100kb(b *testing.B) { benchmarkMulFFT(b, 1e5, 1e5) }
+func BenchmarkMulFFT_200kb(b *testing.B) { benchmarkMulFFT(b, 2e5, 2e5) }
+func BenchmarkMulFFT_500kb(b *testing.B) { benchmarkMulFFT(b, 5e5, 5e5) }
+func BenchmarkMulFFT_1Mb(b *testing.B)   { benchmarkMulFFT(b, 1e6, 1e6) }
+func BenchmarkMulFFT_2Mb(b *testing.B)   { benchmarkMulFFT(b, 2e6, 2e6) }
+func BenchmarkMulFFT_5Mb(b *testing.B)   { benchmarkMulFFT(b, 5e6, 5e6) }
+func BenchmarkMulFFT_10Mb(b *testing.B)  { benchmarkMulFFT(b, 10e6, 10e6) }
+func BenchmarkMulFFT_20Mb(b *testing.B)  { benchmarkMulFFT(b, 20e6, 20e6) }
+func BenchmarkMulFFT_50Mb(b *testing.B)  { benchmarkMulFFT(b, 50e6, 50e6) }
+func BenchmarkMulFFT_100Mb(b *testing.B) { benchmarkMulFFT(b, 100e6, 100e6) }
+func BenchmarkMulFFT_200Mb(b *testing.B) { benchmarkMulFFT(b, 200e6, 200e6) }
+func BenchmarkMulFFT_500Mb(b *testing.B) { benchmarkMulFFT(b, 500e6, 500e6) }
+func BenchmarkMulFFT_1Gb(b *testing.B)   { benchmarkMulFFT(b, 1e9, 1e9) }
+
+func benchmarkMul(b *testing.B, sizex, sizey int) {
+	mulx := rndNat(sizex / _W)
+	muly := rndNat(sizey / _W)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		var x, y Int
+		x.SetBits(mulx)
+		y.SetBits(muly)
+		_ = Mul(&x, &y)
+	}
+}
+
+func BenchmarkMul_50kb(b *testing.B)  { benchmarkMul(b, 5e4, 5e4) }
+func BenchmarkMul_100kb(b *testing.B) { benchmarkMul(b, 1e5, 1e5) }
+func BenchmarkMul_200kb(b *testing.B) { benchmarkMul(b, 2e5, 2e5) }
+func BenchmarkMul_500kb(b *testing.B) { benchmarkMul(b, 5e5, 5e5) }
+func BenchmarkMul_1Mb(b *testing.B)   { benchmarkMul(b, 1e6, 1e6) }
+func BenchmarkMul_2Mb(b *testing.B)   { benchmarkMul(b, 2e6, 2e6) }
+func BenchmarkMul_5Mb(b *testing.B)   { benchmarkMul(b, 5e6, 5e6) }
+func BenchmarkMul_10Mb(b *testing.B)  { benchmarkMul(b, 10e6, 10e6) }
+func BenchmarkMul_20Mb(b *testing.B)  { benchmarkMul(b, 20e6, 20e6) }
+func BenchmarkMul_50Mb(b *testing.B)  { benchmarkMul(b, 50e6, 50e6) }
+func BenchmarkMul_100Mb(b *testing.B) { benchmarkMul(b, 100e6, 100e6) }
+
+// Unbalanced multiplication benchmarks
+func BenchmarkMul_1x5Mb(b *testing.B)  { benchmarkMul(b, 1e6, 5e6) }
+func BenchmarkMul_1x10Mb(b *testing.B) { benchmarkMul(b, 1e6, 10e6) }
+func BenchmarkMul_1x20Mb(b *testing.B) { benchmarkMul(b, 1e6, 20e6) }
+func BenchmarkMul_1x50Mb(b *testing.B) { benchmarkMul(b, 1e6, 50e6) }
+func BenchmarkMul_5x20Mb(b *testing.B) { benchmarkMul(b, 5e6, 20e6) }
+func BenchmarkMul_5x50Mb(b *testing.B) { benchmarkMul(b, 5e6, 50e6) }
+
+func BenchmarkMulBig_1x5Mb(b *testing.B)  { benchmarkMulBig(b, 1e6, 5e6) }
+func BenchmarkMulBig_1x10Mb(b *testing.B) { benchmarkMulBig(b, 1e6, 10e6) }
+func BenchmarkMulBig_1x20Mb(b *testing.B) { benchmarkMulBig(b, 1e6, 20e6) }
+func BenchmarkMulBig_1x50Mb(b *testing.B) { benchmarkMulBig(b, 1e6, 50e6) }
+func BenchmarkMulBig_5x20Mb(b *testing.B) { benchmarkMulBig(b, 5e6, 20e6) }
+func BenchmarkMulBig_5x50Mb(b *testing.B) { benchmarkMulBig(b, 5e6, 50e6) }
+
+func BenchmarkMulFFT_1x5Mb(b *testing.B)  { benchmarkMulFFT(b, 1e6, 5e6) }
+func BenchmarkMulFFT_1x10Mb(b *testing.B) { benchmarkMulFFT(b, 1e6, 10e6) }
+func BenchmarkMulFFT_1x20Mb(b *testing.B) { benchmarkMulFFT(b, 1e6, 20e6) }
+func BenchmarkMulFFT_1x50Mb(b *testing.B) { benchmarkMulFFT(b, 1e6, 50e6) }
+func BenchmarkMulFFT_5x20Mb(b *testing.B) { benchmarkMulFFT(b, 5e6, 20e6) }
+func BenchmarkMulFFT_5x50Mb(b *testing.B) { benchmarkMulFFT(b, 5e6, 50e6) }
+
+func TestIssue1(t *testing.T) {
+	e := big.NewInt(1)
+	e.SetBit(e, 132048, 1)
+	e.Sub(e, big.NewInt(4)) // e == 1<<132048 - 4
+	g := big.NewInt(0).Set(e)
+	e.Mul(e, e)
+	g = Mul(g, g)
+	if g.Cmp(e) != 0 {
+		t.Fatal("incorrect Mul result")
+	}
+}
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/README vendor/github.com/remyoudompheng/bigfft/README
--- vendor/github.com/remyoudompheng/bigfft/README	2021-08-20 15:12:42.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/README	2023-07-19 10:40:33.208904639 +0800
@@ -1,3 +1,14 @@
+This library is a toy proof-of-concept implementation of the
+well-known Schonhage-Strassen method for multiplying integers.
+It is not expected to have a real life usecase outside number
+theory computations, nor is it expected to be used in any production
+system.
+
+If you are using it in your project, you may want to carefully
+examine the actual requirement or problem you are trying to solve.
+
+# Comparison with the standard library and GMP
+
 Benchmarking math/big vs. bigfft

 Number size    old ns/op    new ns/op    delta
diff -ur --new-file vendor/github.com/remyoudompheng/bigfft/scan_test.go vendor/github.com/remyoudompheng/bigfft/scan_test.go
--- vendor/github.com/remyoudompheng/bigfft/scan_test.go	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/remyoudompheng/bigfft/scan_test.go	2023-07-19 10:40:33.208904639 +0800
@@ -0,0 +1,67 @@
+package bigfft
+
+import (
+	"math/big"
+	"testing"
+	"time"
+)
+
+func TestScan(t *testing.T) {
+	for size := 10; size <= 1e5; size += 191 {
+		s := rndStr(size)
+		x, ok := new(big.Int).SetString(s, 10)
+		if !ok {
+			t.Fatal("cannot parse", s)
+		}
+		t0 := time.Now()
+		y := FromDecimalString(s)
+		if x.Cmp(y) != 0 {
+			t.Errorf("failed at size %d", size)
+		} else {
+			t.Logf("OK for size %d in %s", size, time.Since(t0))
+		}
+	}
+}
+
+func BenchmarkScanFast1k(b *testing.B)   { benchmarkScanFast(1e3, b) }
+func BenchmarkScanFast10k(b *testing.B)  { benchmarkScanFast(10e3, b) }
+func BenchmarkScanFast100k(b *testing.B) { benchmarkScanFast(100e3, b) }
+func BenchmarkScanFast1M(b *testing.B)   { benchmarkScanFast(1e6, b) }
+func BenchmarkScanFast2M(b *testing.B)   { benchmarkScanFast(2e6, b) }
+func BenchmarkScanFast5M(b *testing.B)   { benchmarkScanFast(5e6, b) }
+func BenchmarkScanFast10M(b *testing.B)  { benchmarkScanFast(10e6, b) }
+
+//func BenchmarkScanFast100M(b *testing.B) { benchmarkScanFast(100e6, b) }
+
+func benchmarkScanFast(n int, b *testing.B) {
+	s := rndStr(n)
+	var x *big.Int
+	for i := 0; i < b.N; i++ {
+		x = FromDecimalString(s)
+	}
+	_ = x
+}
+
+func BenchmarkScanBig1k(b *testing.B)   { benchmarkScanBig(1e3, b) }
+func BenchmarkScanBig10k(b *testing.B)  { benchmarkScanBig(10e3, b) }
+func BenchmarkScanBig100k(b *testing.B) { benchmarkScanBig(100e3, b) }
+func BenchmarkScanBig1M(b *testing.B)   { benchmarkScanBig(1e6, b) }
+func BenchmarkScanBig2M(b *testing.B)   { benchmarkScanBig(2e6, b) }
+func BenchmarkScanBig5M(b *testing.B)   { benchmarkScanBig(5e6, b) }
+func BenchmarkScanBig10M(b *testing.B)  { benchmarkScanBig(10e6, b) }
+
+func benchmarkScanBig(n int, b *testing.B) {
+	s := rndStr(n)
+	var x big.Int
+	for i := 0; i < b.N; i++ {
+		x.SetString(s, 10)
+	}
+}
+
+func rndStr(n int) string {
+	x := make([]byte, n)
+	for i := 0; i < n; i++ {
+		x[i] = '0' + byte(rnd.Intn(10))
+	}
+	return string(x)
+}
diff -ur --new-file vendor/github.com/shirou/gopsutil/host/host_linux_riscv64.go vendor/github.com/shirou/gopsutil/host/host_linux_riscv64.go
--- vendor/github.com/shirou/gopsutil/host/host_linux_riscv64.go	1970-01-01 08:00:00.000000000 +0800
+++ vendor/github.com/shirou/gopsutil/host/host_linux_riscv64.go	2023-07-18 21:42:35.443531356 +0800
@@ -0,0 +1,49 @@
+// Created by cgo -godefs - DO NOT EDIT
+// cgo -godefs types_linux.go
+
+package host
+
+const (
+	sizeofPtr      = 0x8
+	sizeofShort    = 0x2
+	sizeofInt      = 0x4
+	sizeofLong     = 0x8
+	sizeofLongLong = 0x8
+	sizeOfUtmp     = 0x180
+)
+
+type (
+	_C_short     int16
+	_C_int       int32
+	_C_long      int64
+	_C_long_long int64
+)
+
+type utmp struct {
+	Type              int16
+	Pid               int32
+	Line              [32]int8
+	Id                [4]int8
+	User              [32]int8
+	Host              [256]int8
+	Exit              exit_status
+	Session           int32
+	Tv                _Ctype_struct___0
+	Addr_v6           [4]int32
+	X__glibc_reserved [20]uint8
+}
+
+type exit_status struct {
+	Termination int16
+	Exit        int16
+}
+
+type timeval struct {
+	Sec  int64
+	Usec int64
+}
+
+type _Ctype_struct___0 struct {
+	Sec  int32
+	Usec int32
+}