Index: wrk-4.2.0/obj/LuaJIT-2.1/.gitattributes
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/.gitattributes
@@ -0,0 +1 @@
+/.relver export-subst
Index: wrk-4.2.0/obj/LuaJIT-2.1/.relver
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/.relver
@@ -0,0 +1 @@
+$Format:%ct$
Index: wrk-4.2.0/obj/LuaJIT-2.1/COPYRIGHT
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/COPYRIGHT
+++ wrk-4.2.0/obj/LuaJIT-2.1/COPYRIGHT
@@ -1,7 +1,7 @@
 ===============================================================================
 LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
 
-Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
Index: wrk-4.2.0/obj/LuaJIT-2.1/Makefile
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/Makefile
+++ wrk-4.2.0/obj/LuaJIT-2.1/Makefile
@@ -10,16 +10,21 @@
 # For MSVC, please follow the instructions given in src/msvcbuild.bat.
 # For MinGW and Cygwin, cd to src and run make with the Makefile there.
 #
-# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
 MINVER=  1
-RELVER=  0
-PREREL=  -beta3
-VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
 ABIVER=  5.1
 
+# LuaJIT uses rolling releases. The release version is based on the time of
+# the latest git commit. The 'git' command must be available during the build.
+RELVER= $(shell cat src/luajit_relver.txt 2>/dev/null || : )
+# Note: setting it with := doesn't work, since it will change during the build.
+
+MMVERSION= $(MAJVER).$(MINVER)
+VERSION= $(MMVERSION).$(RELVER)
+
 ##############################################################################
 #
 # Change the installation path as needed. This automatically adjusts
@@ -33,9 +38,10 @@ DPREFIX= $(DESTDIR)$(PREFIX)
 INSTALL_BIN=   $(DPREFIX)/bin
 INSTALL_LIB=   $(DPREFIX)/$(MULTILIB)
 INSTALL_SHARE= $(DPREFIX)/share
-INSTALL_INC=   $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER)
+INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION)
+INSTALL_INC=   $(INSTALL_DEFINC)
 
-INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION)
+INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION)
 INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit
 INSTALL_LMODD= $(INSTALL_SHARE)/lua
 INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER)
@@ -49,10 +55,10 @@ INSTALL_TSYMNAME= luajit
 INSTALL_ANAME= libluajit-$(ABIVER).a
 INSTALL_SOSHORT1= libluajit-$(ABIVER).so
 INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER)
-INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER)
+INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION)
 INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib
 INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib
-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib
+INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib
 INSTALL_PCNAME= luajit.pc
 
 INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME)
@@ -77,7 +83,11 @@ INSTALL_F= install -m 0644
 UNINSTALL= $(RM)
 LDCONFIG= ldconfig -n 2>/dev/null
 SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \
-            -e "s|^multilib=.*|multilib=$(MULTILIB)|"
+	    -e "s|^multilib=.*|multilib=$(MULTILIB)|" \
+	    -e "s|^relver=.*|relver=$(RELVER)|"
+ifneq ($(INSTALL_DEFINC),$(INSTALL_INC))
+  SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|"
+endif
 
 FILE_T= luajit
 FILE_A= libluajit.a
@@ -88,7 +98,10 @@ FILES_INC= lua.h lualib.h lauxlib.h luac
 FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
 	      dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
 	      dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
-	      dis_mips64.lua dis_mips64el.lua vmdef.lua
+	      dis_mips64.lua dis_mips64el.lua \
+	      dis_mips64r6.lua dis_mips64r6el.lua \
+		  dis_riscv.lua dis_riscv64.lua \
+	      vmdef.lua
 
 ifeq (,$(findstring Windows,$(OS)))
   HOST_SYS:= $(shell uname -s)
@@ -109,9 +122,9 @@ endif
 INSTALL_DEP= src/luajit
 
 default all $(INSTALL_DEP):
-	@echo "==== Building LuaJIT $(VERSION) ===="
+	@echo "==== Building LuaJIT $(MMVERSION) ===="
 	$(MAKE) -C src
-	@echo "==== Successfully built LuaJIT $(VERSION) ===="
+	@echo "==== Successfully built LuaJIT $(MMVERSION) ===="
 
 install: $(INSTALL_DEP)
 	@echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ===="
@@ -130,18 +143,12 @@ install: $(INSTALL_DEP)
 	  $(RM) $(FILE_PC).tmp
 	cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
 	cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
+	$(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
 	@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
-	@echo ""
-	@echo "Note: the development releases deliberately do NOT install a symlink for luajit"
-	@echo "You can do this now by running this command (with sudo):"
-	@echo ""
-	@echo "  $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
-	@echo ""
-
 
 uninstall:
 	@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
-	$(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+	$(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
 	for file in $(FILES_JITLIB); do \
 	  $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
 	  done
@@ -155,8 +162,9 @@ uninstall:
 ##############################################################################
 
 amalg:
-	@echo "Building LuaJIT $(VERSION)"
+	@echo "==== Building LuaJIT $(MMVERSION) (amalgamation) ===="
 	$(MAKE) -C src amalg
+	@echo "==== Successfully built LuaJIT $(MMVERSION) (amalgamation) ===="
 
 clean:
 	$(MAKE) -C src clean
Index: wrk-4.2.0/obj/LuaJIT-2.1/README
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/README
+++ wrk-4.2.0/obj/LuaJIT-2.1/README
@@ -1,11 +1,11 @@
-README for LuaJIT 2.1.0-beta3
------------------------------
+README for LuaJIT 2.1
+---------------------
 
 LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
 
 Project Homepage: https://luajit.org/
 
-LuaJIT is Copyright (C) 2005-2021 Mike Pall.
+LuaJIT is Copyright (C) 2005-2023 Mike Pall.
 LuaJIT is free software, released under the MIT license.
 See full Copyright Notice in the COPYRIGHT file or in luajit.h.
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/README.md
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/README.md
@@ -0,0 +1,31 @@
+# LJRV - LuaJIT RISC-V 64 Port
+
+LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language,
+RISC-V is a free and open ISA enabling a new era of processor innovation.
+
+## Introduction
+
+LJRV is a ongoing porting project of LuaJIT to the RISC-V 64-bit architecture by PLCT Lab, ISCAS.
+The ultimate goal is to provide a RISC-V 64 LuaJIT implementation and have it upstreamed to the official LuaJIT repository.
+
+## Progress
+
+- [x] Interpreter Runtime
+- [x] JIT Compiler
+
+LJRV is still of beta quality, particularly the JIT compiler.
+For production usage, we suggests disable the JIT compiler during compilation by setting `XCFLAGS+= -DLUAJIT_DISABLE_JIT` in Makefile or environment variable.
+
+## Bug Report
+
+Please report bugs to [Issues](https://github.com/ruyisdk/LuaJIT/issues).
+
+## Copyright
+
+LuaJIT is Copyright (C) 2005-2023 Mike Pall.
+LuaJIT is free software, released under the MIT license.
+See full Copyright Notice in the COPYRIGHT file or in luajit.h.
+
+LJRV is Copyright (C) 2022-2023 PLCT Lab, ISCAS. Contributed by gns.
+LJRV is free software, released under the MIT license.
+LJRV is part of RuyiSDK.
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/bluequad-print.css
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/bluequad-print.css
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/bluequad-print.css
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2021 Mike Pall.
+/* Copyright (C) 2004-2023 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/bluequad.css
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/bluequad.css
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/bluequad.css
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2021 Mike Pall.
+/* Copyright (C) 2004-2023 Mike Pall.
  *
  * You are welcome to use the general ideas of this design for your own sites.
  * But please do not steal the stylesheet, the layout or the color scheme.
@@ -206,11 +206,9 @@ img.right {
 .ext {
   color: #ff8000;
 }
-.new {
-  font-size: 6pt;
-  vertical-align: middle;
-  background: #ff8000;
-  color: #ffffff;
+.note {
+  padding: 0.5em 1em;
+  border-left: 3px solid #bfcfff;
 }
 #site {
   clear: both;
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/contact.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/contact.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/contact.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Contact</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -44,11 +46,9 @@
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -84,10 +84,17 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D")
 </p>
 </noscript>
 
+<p><i>
+Note: I cannot reply to GMail, Google Workplace, Outlook or Office365
+mail addresses, since they prefer to mindlessly filter out mails sent
+from small domains using independent mail servers, such as mine. If you
+don't like that, please complain to Google or Microsoft, not me.
+</i></p>
+
 <h2>Copyright</h2>
 <p>
 All documentation is
-Copyright &copy; 2005-2021 Mike Pall.
+Copyright &copy; 2005-2023 Mike Pall.
 </p>
 
 
@@ -95,7 +102,7 @@ Copyright &copy; 2005-2021 Mike Pall.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_buffer.html
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_buffer.html
@@ -0,0 +1,689 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>String Buffer Library</title>
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+.lib {
+  vertical-align: middle;
+  margin-left: 5px;
+  padding: 0 5px;
+  font-size: 60%;
+  border-radius: 5px;
+  background: #c5d5ff;
+  color: #000;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>String Buffer Library</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a class="current" href="ext_buffer.html">String Buffers</a>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+The string buffer library allows <b>high-performance manipulation of
+string-like data</b>.
+</p>
+<p>
+Unlike Lua strings, which are constants, string buffers are
+<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
+can be stored, formatted and encoded into a string buffer and later
+converted, extracted or decoded.
+</p>
+<p>
+The convenient string buffer API simplifies common string manipulation
+tasks, that would otherwise require creating many intermediate strings.
+String buffers improve performance by eliminating redundant memory
+copies, object creation, string interning and garbage collection
+overhead. In conjunction with the FFI library, they allow zero-copy
+operations.
+</p>
+<p>
+The string buffer library also includes a high-performance
+<a href="serialize">serializer</a> for Lua objects.
+</p>
+
+<h2 id="use">Using the String Buffer Library</h2>
+<p>
+The string buffer library is built into LuaJIT by default, but it's not
+loaded by default. Add this to the start of every Lua file that needs
+one of its functions:
+</p>
+<pre class="code">
+local buffer = require("string.buffer")
+</pre>
+<p>
+The convention for the syntax shown on this page is that <tt>buffer</tt>
+refers to the buffer library and <tt>buf</tt> refers to an individual
+buffer object.
+</p>
+<p>
+Please note the difference between a Lua function call, e.g.
+<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
+<tt>buf:reset()</tt> (with a colon).
+</p>
+
+<h3 id="buffer_object">Buffer Objects</h3>
+<p>
+A buffer object is a garbage-collected Lua object. After creation with
+<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
+When the last reference to a buffer object is gone, it will eventually
+be freed by the garbage collector, along with the allocated buffer
+space.
+</p>
+<p>
+Buffers operate like a FIFO (first-in first-out) data structure. Data
+can be appended (written) to the end of the buffer and consumed (read)
+from the front of the buffer. These operations may be freely mixed.
+</p>
+<p>
+The buffer space that holds the characters is managed automatically
+&mdash; it grows as needed and already consumed space is recycled. Use
+<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more
+control.
+</p>
+<p>
+The maximum size of a single buffer is the same as the maximum size of a
+Lua string, which is slightly below two gigabytes. For huge data sizes,
+neither strings nor buffers are the right data structure &mdash; use the
+FFI library to directly map memory or files up to the virtual memory
+limit of your OS.
+</p>
+
+<h3 id="buffer_overview">Buffer Method Overview</h3>
+<ul>
+<li>
+The <tt>buf:put*()</tt>-like methods append (write) characters to the
+end of the buffer.
+</li>
+<li>
+The <tt>buf:get*()</tt>-like methods consume (read) characters from the
+front of the buffer.
+</li>
+<li>
+Other methods, like <tt>buf:tostring()</tt> only read the buffer
+contents, but don't change the buffer.
+</li>
+<li>
+The <tt>buf:set()</tt> method allows zero-copy consumption of a string
+or an FFI cdata object as a buffer.
+</li>
+<li>
+The FFI-specific methods allow zero-copy read/write-style operations or
+modifying the buffer contents in-place. Please check the
+<a href="#ffi_caveats">FFI caveats</a> below, too.
+</li>
+<li>
+Methods that don't need to return anything specific, return the buffer
+object itself as a convenience. This allows method chaining, e.g.:
+<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt>
+</li>
+</ul>
+
+<h2 id="create">Buffer Creation and Management</h2>
+
+<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br>
+local buf = buffer.new([options])</tt></h3>
+<p>
+Creates a new buffer object.
+</p>
+<p>
+The optional <tt>size</tt> argument ensures a minimum initial buffer
+size. This is strictly an optimization when the required buffer size is
+known beforehand. The buffer space will grow as needed, in any case.
+</p>
+<p>
+The optional table <tt>options</tt> sets various
+<a href="#serialize_options">serialization options</a>.
+</p>
+
+<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
+<p>
+Reset (empty) the buffer. The allocated buffer space is not freed and
+may be reused.
+</p>
+
+<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
+<p>
+The buffer space of the buffer object is freed. The object itself
+remains intact, empty and may be reused.
+</p>
+<p>
+Note: you normally don't need to use this method. The garbage collector
+automatically frees the buffer space, when the buffer object is
+collected. Use this method, if you need to free the associated memory
+immediately.
+</p>
+
+<h2 id="write">Buffer Writers</h2>
+
+<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3>
+<p>
+Appends a string <tt>str</tt>, a number <tt>num</tt> or any object
+<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer.
+Multiple arguments are appended in the given order.
+</p>
+<p>
+Appending a buffer to a buffer is possible and short-circuited
+internally. But it still involves a copy. Better combine the buffer
+writes to use a single buffer.
+</p>
+
+<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3>
+<p>
+Appends the formatted arguments to the buffer. The <tt>format</tt>
+string supports the same options as <tt>string.format()</tt>.
+</p>
+
+<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+Appends the given <tt>len</tt> number of bytes from the memory pointed
+to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
+be convertible to a (constant) pointer.
+</p>
+
+<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
+buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+This method allows zero-copy consumption of a string or an FFI cdata
+object as a buffer. It stores a reference to the passed string
+<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer
+space originally allocated is freed. This is <i>not</i> an append
+operation, unlike the <tt>buf:put*()</tt> methods.
+</p>
+<p>
+After calling this method, the buffer behaves as if
+<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata,&nbsp;len)</tt>
+had been called. However, the data is only referenced and not copied, as
+long as the buffer is only consumed.
+</p>
+<p>
+In case the buffer is written to later on, the referenced data is copied
+and the object reference is removed (copy-on-write semantics).
+</p>
+<p>
+The stored reference is an anchor for the garbage collector and keeps the
+originally passed string or FFI cdata object alive.
+</p>
+
+<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br>
+<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3>
+<p>
+The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of
+write space in the buffer. It returns an <tt>uint8_t&nbsp;*</tt> FFI
+cdata pointer <tt>ptr</tt> that points to this space.
+</p>
+<p>
+The available length in bytes is returned in <tt>len</tt>. This is at
+least <tt>size</tt> bytes, but may be more to facilitate efficient
+buffer growth. You can either make use of the additional space or ignore
+<tt>len</tt> and only use <tt>size</tt> bytes.
+</p>
+<p>
+The <tt>commit</tt> method appends the <tt>used</tt> bytes of the
+previously returned write space to the buffer data.
+</p>
+<p>
+This pair of methods allows zero-copy use of C read-style APIs:
+</p>
+<pre class="code">
+local MIN_SIZE = 65536
+repeat
+  local ptr, len = buf:reserve(MIN_SIZE)
+  local n = C.read(fd, ptr, len)
+  if n == 0 then break end -- EOF.
+  if n &lt; 0 then error("read error") end
+  buf:commit(n)
+until false
+</pre>
+<p>
+The reserved write space is <i>not</i> initialized. At least the
+<tt>used</tt> bytes <b>must</b> be written to before calling the
+<tt>commit</tt> method. There's no need to call the <tt>commit</tt>
+method, if nothing is added to the buffer (e.g. on error).
+</p>
+
+<h2 id="read">Buffer Readers</h2>
+
+<h3 id="buffer_length"><tt>len = #buf</tt></h3>
+<p>
+Returns the current length of the buffer data in bytes.
+</p>
+
+<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3>
+<p>
+The Lua concatenation operator <tt>..</tt> also accepts buffers, just
+like strings or numbers. It always returns a string and not a buffer.
+</p>
+<p>
+Note that although this is supported for convenience, this thwarts one
+of the main reasons to use buffers, which is to avoid string
+allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>.
+</p>
+<p>
+Mixing this with unrelated objects that have a <tt>__concat</tt>
+metamethod may not work, since these probably only expect strings.
+</p>
+
+<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3>
+<p>
+Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
+length of the buffer data.
+</p>
+
+<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3>
+<p>
+Consumes the buffer data and returns one or more strings. If called
+without arguments, the whole buffer data is consumed. If called with a
+number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument
+consumes the remaining buffer space (this only makes sense as the last
+argument). Multiple arguments consume the buffer data in the given
+order.
+</p>
+<p>
+Note: a zero length or no remaining buffer data returns an empty string
+and not <tt>nil</tt>.
+</p>
+
+<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
+str = tostring(buf)</tt></h3>
+<p>
+Creates a string from the buffer data, but doesn't consume it. The
+buffer remains unchanged.
+</p>
+<p>
+Buffer objects also define a <tt>__tostring</tt> metamethod. This means
+buffers can be passed to the global <tt>tostring()</tt> function and
+many other functions that accept this in place of strings. The important
+internal uses in functions like <tt>io.write()</tt> are short-circuited
+to avoid the creation of an intermediate string object.
+</p>
+
+<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3>
+<p>
+Returns an <tt>uint8_t&nbsp;*</tt> FFI cdata pointer <tt>ptr</tt> that
+points to the buffer data. The length of the buffer data in bytes is
+returned in <tt>len</tt>.
+</p>
+<p>
+The returned pointer can be directly passed to C functions that expect a
+buffer and a length. You can also do bytewise reads
+(<tt>local&nbsp;x&nbsp;=&nbsp;ptr[i]</tt>) or writes
+(<tt>ptr[i]&nbsp;=&nbsp;0x40</tt>) of the buffer data.
+</p>
+<p>
+In conjunction with the <tt>skip</tt> method, this allows zero-copy use
+of C write-style APIs:
+</p>
+<pre class="code">
+repeat
+  local ptr, len = buf:ref()
+  if len == 0 then break end
+  local n = C.write(fd, ptr, len)
+  if n &lt; 0 then error("write error") end
+  buf:skip(n)
+until n >= len
+</pre>
+<p>
+Unlike Lua strings, buffer data is <i>not</i> implicitly
+zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
+expect zero-terminated strings. If you're not using <tt>len</tt>, then
+you're doing something wrong.
+</p>
+
+<h2 id="serialize">Serialization of Lua Objects</h2>
+<p>
+The following functions and methods allow <b>high-speed serialization</b>
+(encoding) of a Lua object into a string and decoding it back to a Lua
+object. This allows convenient storage and transport of <b>structured
+data</b>.
+</p>
+<p>
+The encoded data is in an <a href="#serialize_format">internal binary
+format</a>. The data can be stored in files, binary-transparent
+databases or transmitted to other LuaJIT instances across threads,
+processes or networks.
+</p>
+<p>
+Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
+server-class system, even when serializing many small objects. Decoding
+speed is mostly constrained by object creation cost.
+</p>
+<p>
+The serializer handles most Lua types, common FFI number types and
+nested structures. Functions, thread objects, other FFI cdata and full
+userdata cannot be serialized (yet).
+</p>
+<p>
+The encoder serializes nested structures as trees. Multiple references
+to a single object will be stored separately and create distinct objects
+after decoding. Circular references cause an error.
+</p>
+
+<h3 id="serialize_methods">Serialization Functions and Methods</h3>
+
+<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
+buf = buf:encode(obj)</tt></h3>
+<p>
+Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
+function returns a string <tt>str</tt>. The buffer method appends the
+encoding to the buffer.
+</p>
+<p>
+<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
+need to be a Lua table.
+</p>
+<p>
+This function may throw an error when attempting to serialize
+unsupported object types, circular references or deeply nested tables.
+</p>
+
+<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
+obj = buf:decode()</tt></h3>
+<p>
+The stand-alone function deserializes (decodes) the string
+<tt>str</tt>, the buffer method deserializes one object from the
+buffer. Both return a Lua object <tt>obj</tt>.
+</p>
+<p>
+The returned object may be any of the supported Lua types &mdash;
+even <tt>nil</tt>.
+</p>
+<p>
+This function may throw an error when fed with malformed or incomplete
+encoded data. The stand-alone function throws when there's left-over
+data after decoding a single top-level object. The buffer method leaves
+any left-over data in the buffer.
+</p>
+<p>
+Attempting to deserialize an FFI type will throw an error, if the FFI
+library is not built-in or has not been loaded, yet.
+</p>
+
+<h3 id="serialize_options">Serialization Options</h3>
+<p>
+The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain
+the following members (all optional):
+</p>
+<ul>
+<li>
+<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that
+commonly occur as table keys of objects you are serializing. These keys
+are compactly encoded as indexes during serialization. A well-chosen
+dictionary saves space and improves serialization performance.
+</li>
+<li>
+<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
+for the table objects you are serializing.
+</li>
+</ul>
+<p>
+<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
+to be an array of tables. Both starting at index 1 and without holes (no
+<tt>nil</tt> in between). The tables are anchored in the buffer object and
+internally modified into a two-way index (don't do this yourself, just pass
+a plain array). The tables must not be modified after they have been passed
+to <tt>buffer.new()</tt>.
+</p>
+<p>
+The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
+decoder must be the same. Put the most common entries at the front. Extend
+at the end to ensure backwards-compatibility &mdash; older encodings can
+then still be read. You may also set some indexes to <tt>false</tt> to
+explicitly drop backwards-compatibility. Old encodings that use these
+indexes will throw an error when decoded.
+</p>
+<p>
+Metatables that are not found in the <tt>metatable</tt> dictionary are
+ignored when encoding. Decoding returns a table with a <tt>nil</tt>
+metatable.
+</p>
+<p>
+Note: parsing and preparation of the options table is somewhat
+expensive. Create a buffer object only once and recycle it for multiple
+uses. Avoid mixing encoder and decoder buffers, since the
+<tt>buf:set()</tt> method frees the already allocated buffer space:
+</p>
+<pre class="code">
+local options = {
+  dict = { "commonly", "used", "string", "keys" },
+}
+local buf_enc = buffer.new(options)
+local buf_dec = buffer.new(options)
+
+local function encode(obj)
+  return buf_enc:reset():encode(obj):get()
+end
+
+local function decode(str)
+  return buf_dec:set(str):decode()
+end
+</pre>
+
+<h3 id="serialize_stream">Streaming Serialization</h3>
+<p>
+In some contexts, it's desirable to do piecewise serialization of large
+datasets, also known as <i>streaming</i>.
+</p>
+<p>
+This serialization format can be safely concatenated and supports streaming.
+Multiple encodings can simply be appended to a buffer and later decoded
+individually:
+</p>
+<pre class="code">
+local buf = buffer.new()
+buf:encode(obj1)
+buf:encode(obj2)
+local copy1 = buf:decode()
+local copy2 = buf:decode()
+</pre>
+<p>
+Here's how to iterate over a stream:
+</p>
+<pre class="code">
+while #buf ~= 0 do
+  local obj = buf:decode()
+  -- Do something with obj.
+end
+</pre>
+<p>
+Since the serialization format doesn't prepend a length to its encoding,
+network applications may need to transmit the length, too.
+</p>
+
+<h3 id="serialize_format">Serialization Format Specification</h3>
+<p>
+This serialization format is designed for <b>internal use</b> by LuaJIT
+applications. Serialized data is upwards-compatible and portable across
+all supported LuaJIT platforms.
+</p>
+<p>
+It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
+embedded zeroes and stores embedded Lua string objects unmodified, which
+are 8-bit-clean, too. Encoded data can be safely concatenated for
+streaming and later decoded one top-level object at a time.
+</p>
+<p>
+The encoding is reasonably compact, but tuned for maximum performance,
+not for minimum space usage. It compresses well with any of the common
+byte-oriented data compression algorithms.
+</p>
+<p>
+Although documented here for reference, this format is explicitly
+<b>not</b> intended to be a 'public standard' for structured data
+interchange across computer languages (like JSON or MessagePack). Please
+do not use it as such.
+</p>
+<p>
+The specification is given below as a context-free grammar with a
+top-level <tt>object</tt> as the starting point. Alternatives are
+separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
+Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
+either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
+suffix.
+</p>
+<pre>
+object    → nil | false | true
+          | null | lightud32 | lightud64
+          | int | num | tab | tab_mt
+          | int64 | uint64 | complex
+          | string
+
+nil       → 0x00
+false     → 0x01
+true      → 0x02
+
+null      → 0x03                            // NULL lightuserdata
+lightud32 → 0x04 data.I                   // 32 bit lightuserdata
+lightud64 → 0x05 data.L                   // 64 bit lightuserdata
+
+int       → 0x06 int.I                                 // int32_t
+num       → 0x07 double.L
+
+tab       → 0x08                                   // Empty table
+          | 0x09 h.U h*{object object}          // Key/value hash
+          | 0x0a a.U a*object                    // 0-based array
+          | 0x0b a.U a*object h.U h*{object object}      // Mixed
+          | 0x0c a.U (a-1)*object                // 1-based array
+          | 0x0d a.U (a-1)*object h.U h*{object object}  // Mixed
+tab_mt    → 0x0e (index-1).U tab          // Metatable dict entry
+
+int64     → 0x10 int.L                             // FFI int64_t
+uint64    → 0x11 uint.L                           // FFI uint64_t
+complex   → 0x12 re.L im.L                         // FFI complex
+
+string    → (0x20+len).U len*char.B
+          | 0x0f (index-1).U                 // String dict entry
+
+.B = 8 bit
+.I = 32 bit little-endian
+.L = 64 bit little-endian
+.U = prefix-encoded 32 bit unsigned number n:
+     0x00..0xdf   → n.B
+     0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
+   0x1fe0..       → 0xff n.I
+</pre>
+
+<h2 id="error">Error handling</h2>
+<p>
+Many of the buffer methods can throw an error. Out-of-memory or usage
+errors are best caught with an outer wrapper for larger parts of code.
+There's not much one can do after that, anyway.
+</p>
+<p>
+OTOH, you may want to catch some errors individually. Buffer methods need
+to receive the buffer object as the first argument. The Lua colon-syntax
+<tt>obj:method()</tt> does that implicitly. But to wrap a method with
+<tt>pcall()</tt>, the arguments need to be passed like this:
+</p>
+<pre class="code">
+local ok, err = pcall(buf.encode, buf, obj)
+if not ok then
+  -- Handle error in err.
+end
+</pre>
+
+<h2 id="ffi_caveats">FFI caveats</h2>
+<p>
+The string buffer library has been designed to work well together with
+the FFI library. But due to the low-level nature of the FFI library,
+some care needs to be taken:
+</p>
+<p>
+First, please remember that FFI pointers are zero-indexed. The space
+returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the
+returned pointer and ends before <tt>len</tt> bytes after that.
+</p>
+<p>
+I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
+is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid
+index at all. The returned pointer may even be <tt>NULL</tt>.
+</p>
+<p>
+The space pointed to by the returned pointer is only valid as long as
+the buffer is not modified in any way (neither append, nor consume, nor
+reset, etc.). The pointer is also not a GC anchor for the buffer object
+itself.
+</p>
+<p>
+Buffer data is only guaranteed to be byte-aligned. Casting the returned
+pointer to a data type with higher alignment may cause unaligned
+accesses. It depends on the CPU architecture whether this is allowed or
+not (it's always OK on x86/x64 and mostly OK on other modern
+architectures).
+</p>
+<p>
+FFI pointers or references do not count as GC anchors for an underlying
+object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is
+anchored by <tt>buf:set(array,&nbsp;len)</tt>, but not by
+<tt>buf:set(array+offset,&nbsp;len)</tt>. The addition of the offset
+creates a new pointer, even when the offset is zero. In this case, you
+need to make sure there's still a reference to the original array as
+long as its contents are in use by the buffer.
+</p>
+<p>
+Even though each LuaJIT VM instance is single-threaded (but you can
+create multiple VMs), FFI data structures can be accessed concurrently.
+Be careful when reading/writing FFI cdata from/to buffers to avoid
+concurrent accesses or modifications. In particular, the memory
+referenced by <tt>buf:set(cdata,&nbsp;len)</tt> must not be modified
+while buffer readers are working on it. Shared, but read-only memory
+mappings of files are OK, but only if the file does not change.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2023
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_c_api.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_c_api.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_c_api.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Lua/C API Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a class="current" href="ext_c_api.html">Lua/C API</a>
@@ -44,11 +46,9 @@
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -105,7 +105,7 @@ Turn the whole JIT compiler on or off or
 This sets the mode for the function at the stack index <tt>idx</tt> or
 the parent of the calling function (<tt>idx = 0</tt>). It either
 enables JIT compilation for a function, disables it and flushes any
-already compiled code or only flushes already compiled code. This
+already compiled code, or only flushes already compiled code. This
 applies recursively to all sub-functions of the function with
 <tt>LUAJIT_MODE_ALLFUNC</tt> or only to the sub-functions with
 <tt>LUAJIT_MODE_ALLSUBFUNC</tt>.
@@ -124,7 +124,7 @@ traces which link to it.
 This mode defines a wrapper function for calls to C functions. If
 called with <tt>LUAJIT_MODE_ON</tt>, the stack index at <tt>idx</tt>
 must be a <tt>lightuserdata</tt> object holding a pointer to the wrapper
-function. From now on all C functions are called through the wrapper
+function. From now on, all C functions are called through the wrapper
 function. If called with <tt>LUAJIT_MODE_OFF</tt> this mode is turned
 off and all C functions are directly called.
 </p>
@@ -173,7 +173,7 @@ Also note that this mechanism is not wit
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_ffi.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -44,11 +46,9 @@
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -155,7 +155,7 @@ call the binding function. Phew!
 <h2 id="cdata">Motivating Example: Using C Data Structures</h2>
 <p>
 The FFI library allows you to create and access C&nbsp;data
-structures. Of course the main use for this is for interfacing with
+structures. Of course, the main use for this is for interfacing with
 C&nbsp;functions. But they can be used stand-alone, too.
 </p>
 <p>
@@ -167,7 +167,7 @@ implemented with a big table holding lot
 both a substantial memory overhead as well as a performance overhead.
 </p>
 <p>
-Here's a sketch of a library that operates on color images plus a
+Here's a sketch of a library that operates on color images, plus a
 simple benchmark. First, the plain Lua version:
 </p>
 <pre class="code">
@@ -182,7 +182,7 @@ local function image_ramp_green(n)
   return img
 end
 
-local function image_to_grey(img, n)
+local function image_to_gray(img, n)
   for i=1,n do
     local y = floor(0.3*img[i].red + 0.59*img[i].green + 0.11*img[i].blue)
     img[i].red = y; img[i].green = y; img[i].blue = y
@@ -192,14 +192,14 @@ end
 local N = 400*400
 local img = image_ramp_green(N)
 for i=1,1000 do
-  image_to_grey(img, N)
+  image_to_gray(img, N)
 end
 </pre>
 <p>
 This creates a table with 160.000 pixels, each of which is a table
-holding four number values in the range of 0-255. First an image with
+holding four number values in the range of 0-255. First, an image with
 a green ramp is created (1D for simplicity), then the image is
-converted to greyscale 1000 times. Yes, that's silly, but I was in
+converted to grayscale 1000 times. Yes, that's silly, but I was in
 need of a simple example ...
 </p>
 <p>
@@ -306,7 +306,7 @@ be more compact and faster. This is cert
 ~1.7x). Switching to a struct-of-arrays would help, too.
 </p>
 <p style="font-size: 8pt;">
-However the resulting code would be less idiomatic and rather
+However, the resulting code would be less idiomatic and rather
 error-prone. And it still doesn't get even close to the performance of
 the FFI version of the code. Also, high-level data structures cannot
 be easily passed to other C&nbsp;functions, especially I/O functions,
@@ -316,7 +316,7 @@ without undue conversion penalties.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_api.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_ffi_api.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_api.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>ffi.* API Functions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -42,6 +42,8 @@ td.abiparam { font-weight: bold; width:
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -49,11 +51,9 @@ td.abiparam { font-weight: bold; width:
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -119,7 +119,7 @@ separated by semicolons. The trailing se
 declaration may be omitted.
 </p>
 <p>
-Please note that external symbols are only <em>declared</em>, but they
+Please note, that external symbols are only <em>declared</em>, but they
 are <em>not bound</em> to any specific address, yet. Binding is
 achieved with C&nbsp;library namespaces (see below).
 </p>
@@ -207,7 +207,7 @@ parse the cdecl only once and get its ct
 <tt>ffi.typeof()</tt>. Then use the ctype as a constructor repeatedly.
 </p>
 <p style="font-size: 8pt;">
-Please note that an anonymous <tt>struct</tt> declaration implicitly
+Please note, that an anonymous <tt>struct</tt> declaration implicitly
 creates a new and distinguished ctype every time you use it for
 <tt>ffi.new()</tt>. This is probably <b>not</b> what you want,
 especially if you create more than one cdata object. Different anonymous
@@ -254,12 +254,12 @@ afterwards. Neither the contents of the
 contents of an <tt>__index</tt> table (if any) may be modified
 afterwards. The associated metatable automatically applies to all uses
 of this type, no matter how the objects are created or where they
-originate from. Note that pre-defined operations on types have
+originate from. Note that predefined operations on types have
 precedence (e.g. declared field names cannot be overridden).
 </p>
 <p>
 All standard Lua metamethods are implemented. These are called directly,
-without shortcuts and on any mix of types. For binary operations, the
+without shortcuts, and on any mix of types. For binary operations, the
 left operand is checked first for a valid ctype metamethod. The
 <tt>__gc</tt> metamethod only applies to <tt>struct</tt>/<tt>union</tt>
 types and performs an implicit <a href="#ffi_gc"><tt>ffi.gc()</tt></a>
@@ -463,8 +463,10 @@ otherwise. The following parameters are
 <tr class="odd">
 <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
 <tr class="even">
-<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<td class="abiparam">pauth</td><td class="abidesc">Pointer authentication ABI</td></tr>
 <tr class="odd">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="even">
 <td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
 </table>
 
@@ -490,7 +492,7 @@ have some extra methods:
 <p>
 Free the resources associated with a callback. The associated Lua
 function is unanchored and may be garbage collected. The callback
-function pointer is no longer valid and must not be called anymore
+function pointer is no longer valid and must not be called again
 (it may be reused by a subsequently created callback).
 </p>
 
@@ -556,7 +558,7 @@ named <tt>i</tt>.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_semantics.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_ffi_semantics.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_semantics.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Semantics</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -42,6 +42,8 @@ td.convop { font-style: italic; width: 4
 <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -49,11 +51,9 @@ td.convop { font-style: italic; width: 4
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -86,7 +86,7 @@ footprint. It's used by the <a href="ext
 functions</a> to declare C&nbsp;types or external symbols.
 </p>
 <p>
-It's only purpose is to parse C&nbsp;declarations, as found e.g. in
+Its only purpose is to parse C&nbsp;declarations, as found e.g. in
 C&nbsp;header files. Although it does evaluate constant expressions,
 it's <em>not</em> a C&nbsp;compiler. The body of <tt>inline</tt>
 C&nbsp;function definitions is simply ignored.
@@ -163,7 +163,7 @@ function declarations.</li>
 
 </ul>
 <p>
-The following C&nbsp;types are pre-defined by the C&nbsp;parser (like
+The following C&nbsp;types are predefined by the C&nbsp;parser (like
 a <tt>typedef</tt>, except re-declarations will be ignored):
 </p>
 <ul>
@@ -581,9 +581,9 @@ ffi.new("struct nested", {x=1,y={2,3}})
 
 <h2 id="cdata_ops">Operations on cdata Objects</h2>
 <p>
-All of the standard Lua operators can be applied to cdata objects or a
+All standard Lua operators can be applied to cdata objects or a
 mix of a cdata object and another Lua object. The following list shows
-the pre-defined operations.
+the predefined operations.
 </p>
 <p>
 Reference types are dereferenced <em>before</em> performing each of
@@ -591,7 +591,7 @@ the operations below &mdash; the operati
 C&nbsp;type pointed to by the reference.
 </p>
 <p>
-The pre-defined operations are always tried first before deferring to a
+The predefined operations are always tried first before deferring to a
 metamethod or index table (if any) for the corresponding ctype (except
 for <tt>__new</tt>). An error is raised if the metamethod lookup or
 index table lookup fails.
@@ -641,7 +641,7 @@ assigning to an index of a vector raises
 </ul>
 <p>
 A ctype object can be indexed with a string key, too. The only
-pre-defined operation is reading scoped constants of
+predefined operation is reading scoped constants of
 <tt>struct</tt>/<tt>union</tt> types. All other accesses defer
 to the corresponding metamethods or index tables (if any).
 </p>
@@ -654,7 +654,7 @@ certain optimizations.
 <p>
 As a consequence, the <em>elements</em> of complex numbers and
 vectors are immutable. But the elements of an aggregate holding these
-types <em>may</em> be modified of course. I.e. you cannot assign to
+types <em>may</em> be modified, of course. I.e. you cannot assign to
 <tt>foo.c.im</tt>, but you can assign a (newly created) complex number
 to <tt>foo.c</tt>.
 </p>
@@ -673,8 +673,8 @@ through unions is explicitly detected an
 to <tt>ffi.new(ct, ...)</tt>, unless a <tt>__new</tt> metamethod is
 defined. The <tt>__new</tt> metamethod is called with the ctype object
 plus any other arguments passed to the constructor. Note that you have to
-use <tt>ffi.new</tt> inside of it, since calling <tt>ct(...)</tt> would
-cause infinite recursion.</li>
+use <tt>ffi.new</tt> inside the metamethod, since calling <tt>ct(...)</tt>
+would cause infinite recursion.</li>
 
 <li><b>C&nbsp;function call</b>: a cdata function or cdata function
 pointer can be called. The passed arguments are
@@ -685,7 +685,7 @@ variable argument part of vararg C&nbsp;
 C&nbsp;function is called and the return value (if any) is
 <a href="#convert_tolua">converted to a Lua object</a>.<br>
 On Windows/x86 systems, <tt>__stdcall</tt> functions are automatically
-detected and a function declared as <tt>__cdecl</tt> (the default) is
+detected, and a function declared as <tt>__cdecl</tt> (the default) is
 silently fixed up after the first call.</li>
 
 </ul>
@@ -695,7 +695,7 @@ silently fixed up after the first call.<
 
 <li><b>Pointer arithmetic</b>: a cdata pointer/array and a cdata
 number or a Lua number can be added or subtracted. The number must be
-on the right hand side for a subtraction. The result is a pointer of
+on the right-hand side for a subtraction. The result is a pointer of
 the same type with an address plus or minus the number value
 multiplied by the element size in bytes. An error is raised if the
 element size is undefined.</li>
@@ -710,7 +710,7 @@ operators (<tt>+&nbsp;-&nbsp;*&nbsp;/&nb
 minus) can be applied to two cdata numbers, or a cdata number and a
 Lua number. If one of them is an <tt>uint64_t</tt>, the other side is
 converted to an <tt>uint64_t</tt> and an unsigned arithmetic operation
-is performed. Otherwise both sides are converted to an
+is performed. Otherwise, both sides are converted to an
 <tt>int64_t</tt> and a signed arithmetic operation is performed. The
 result is a boxed 64&nbsp;bit cdata object.<br>
 
@@ -757,7 +757,7 @@ which is compatible with any other point
 <li><b>64&nbsp;bit integer comparison</b>: two cdata numbers, or a
 cdata number and a Lua number can be compared with each other. If one
 of them is an <tt>uint64_t</tt>, the other side is converted to an
-<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise
+<tt>uint64_t</tt> and an unsigned comparison is performed. Otherwise,
 both sides are converted to an <tt>int64_t</tt> and a signed
 comparison is performed.<br>
 
@@ -782,9 +782,9 @@ keys!</b>
 A cdata object is treated like any other garbage-collected object and
 is hashed and compared by its address for table indexing. Since
 there's no interning for cdata value types, the same value may be
-boxed in different cdata objects with different addresses. Thus
+boxed in different cdata objects with different addresses. Thus,
 <tt>t[1LL+1LL]</tt> and <tt>t[2LL]</tt> usually <b>do not</b> point to
-the same hash slot and they certainly <b>do not</b> point to the same
+the same hash slot, and they certainly <b>do not</b> point to the same
 hash slot as <tt>t[2]</tt>.
 </p>
 <p>
@@ -806,7 +806,7 @@ the resulting Lua number as a key when i
 One obvious benefit: <tt>t[tonumber(2LL)]</tt> <b>does</b> point to
 the same slot as <tt>t[2]</tt>.</li>
 
-<li>Otherwise use either <tt>tostring()</tt> on 64&nbsp;bit integers
+<li>Otherwise, use either <tt>tostring()</tt> on 64&nbsp;bit integers
 or complex numbers or combine multiple fields of a cdata aggregate to
 a Lua string (e.g. with
 <a href="ext_ffi_api.html#ffi_string"><tt>ffi.string()</tt></a>). Then
@@ -814,7 +814,7 @@ use the resulting Lua string as a key wh
 
 <li>Create your own specialized hash table implementation using the
 C&nbsp;types provided by the FFI library, just like you would in
-C&nbsp;code. Ultimately this may give much better performance than the
+C&nbsp;code. Ultimately, this may give much better performance than the
 other alternatives or what a generic by-value hash table could
 possibly provide.</li>
 
@@ -880,7 +880,7 @@ garbage collector will automatically fre
 the end of the next GC cycle).
 </p>
 <p>
-Please note that pointers themselves are cdata objects, however they
+Please note, that pointers themselves are cdata objects, however they
 are <b>not</b> followed by the garbage collector. So e.g. if you
 assign a cdata array to a pointer, you must keep the cdata object
 holding the array alive as long as the pointer is still in use:
@@ -929,18 +929,18 @@ of the function pointer and the Lua func
 </p>
 <p>
 This can happen implicitly due to the usual conversions, e.g. when
-passing a Lua function to a function pointer argument. Or you can use
+passing a Lua function to a function pointer argument. Or, you can use
 <tt>ffi.cast()</tt> to explicitly cast a Lua function to a
 C&nbsp;function pointer.
 </p>
 <p>
-Currently only certain C&nbsp;function types can be used as callback
+Currently, only certain C&nbsp;function types can be used as callback
 functions. Neither C&nbsp;vararg functions nor functions with
 pass-by-value aggregate argument or result types are supported. There
-are no restrictions for the kind of Lua functions that can be called
+are no restrictions on the kind of Lua functions that can be called
 from the callback &mdash; no checks for the proper number of arguments
 are made. The return value of the Lua function will be converted to the
-result type and an error will be thrown for invalid conversions.
+result type, and an error will be thrown for invalid conversions.
 </p>
 <p>
 It's allowed to throw errors across a callback invocation, but it's not
@@ -1001,7 +1001,7 @@ convention cannot be automatically detec
 <tt>__stdcall</tt> calls <em>to</em> Windows functions.
 </p>
 <p>
-For some use cases it's necessary to free up the resources or to
+For some use cases, it's necessary to free up the resources or to
 dynamically redirect callbacks. Use an explicit cast to a
 C&nbsp;function pointer and keep the resulting cdata object. Then use
 the <a href="ext_ffi_api.html#callback_free"><tt>cb:free()</tt></a>
@@ -1054,7 +1054,7 @@ GUI application, which waits for user in
 </p>
 <p>
 For new designs <b>avoid push-style APIs</b>: a C&nbsp;function repeatedly
-calling a callback for each result. Instead <b>use pull-style APIs</b>:
+calling a callback for each result. Instead, <b>use pull-style APIs</b>:
 call a C&nbsp;function repeatedly to get a new result. Calls from Lua
 to C via the FFI are much faster than the other way round. Most well-designed
 libraries already use pull-style APIs (read/write, get/put).
@@ -1073,7 +1073,7 @@ function.
 </p>
 <p>
 Indexing a C&nbsp;library namespace object with a symbol name (a Lua
-string) automatically binds it to the library. First the symbol type
+string) automatically binds it to the library. First, the symbol type
 is resolved &mdash; it must have been declared with
 <a href="ext_ffi_api.html#ffi_cdef"><tt>ffi.cdef</tt></a>. Then the
 symbol address is resolved by searching for the symbol name in the
@@ -1128,7 +1128,7 @@ Performance notice: the JIT compiler spe
 namespace objects and to the strings used to index it. This
 effectively turns function cdata objects into constants. It's not
 useful and actually counter-productive to explicitly cache these
-function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH it
+function objects, e.g. <tt>local strlen = ffi.C.strlen</tt>. OTOH, it
 <em>is</em> useful to cache the namespace itself, e.g. <tt>local C =
 ffi.C</tt>.
 </p>
@@ -1153,14 +1153,14 @@ This behavior is inevitable, since the g
 interoperability with C&nbsp;code. Adding extra safety measures, like
 bounds checks, would be futile. There's no way to detect
 misdeclarations of C&nbsp;functions, since shared libraries only
-provide symbol names, but no type information. Likewise there's no way
+provide symbol names, but no type information. Likewise, there's no way
 to infer the valid range of indexes for a returned pointer.
 </p>
 <p>
 Again: the FFI library is a low-level library. This implies it needs
 to be used with care, but it's flexibility and performance often
 outweigh this concern. If you're a C or C++ developer, it'll be easy
-to apply your existing knowledge. OTOH writing code for the FFI
+to apply your existing knowledge. OTOH, writing code for the FFI
 library is not for the faint of heart and probably shouldn't be the
 first exercise for someone with little experience in Lua, C or C++.
 </p>
@@ -1188,7 +1188,7 @@ currently incomplete:
 <li>C&nbsp;declarations are not passed through a C&nbsp;pre-processor,
 yet.</li>
 <li>The C&nbsp;parser is able to evaluate most constant expressions
-commonly found in C&nbsp;header files. However it doesn't handle the
+commonly found in C&nbsp;header files. However, it doesn't handle the
 full range of C&nbsp;expression semantics and may fail for some
 obscure constructs.</li>
 <li><tt>static const</tt> declarations only work for integer types
@@ -1246,7 +1246,7 @@ compiled.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_tutorial.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_ffi_tutorial.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_ffi_tutorial.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>FFI Tutorial</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -44,6 +44,8 @@ td.idiomlua b { font-weight: normal; col
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -51,11 +53,9 @@ td.idiomlua b { font-weight: normal; col
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -83,7 +83,7 @@ of its functions:
 local ffi = require("ffi")
 </pre>
 <p>
-Please note this doesn't define an <tt>ffi</tt> variable in the table
+Please note, this doesn't define an <tt>ffi</tt> variable in the table
 of globals &mdash; you really need to use the local variable. The
 <tt>require</tt> function ensures the library is only loaded once.
 </p>
@@ -192,7 +192,7 @@ don't need to declare them as such.
 <span class="mark">&#9316;</span> The <tt>poll()</tt>
 function takes a couple more arguments we're not going to use. You can
 simply use <tt>nil</tt> to pass a <tt>NULL</tt> pointer and <tt>0</tt>
-for the <tt>nfds</tt> parameter. Please note that the
+for the <tt>nfds</tt> parameter. Please note, that the
 number&nbsp;<tt>0</tt> <em>does not convert to a pointer value</em>,
 unlike in C++. You really have to pass pointers to pointer arguments
 and numbers to number arguments.
@@ -289,12 +289,12 @@ Here's the step-by-step explanation:
 <p>
 <span class="mark">&#9312;</span> This defines some of the
 C&nbsp;functions provided by zlib. For the sake of this example, some
-type indirections have been reduced and it uses the pre-defined
+type indirections have been reduced and it uses the predefined
 fixed-size integer types, while still adhering to the zlib API/ABI.
 </p>
 <p>
 <span class="mark">&#9313;</span> This loads the zlib shared
-library. On POSIX systems it's named <tt>libz.so</tt> and usually
+library. On POSIX systems, it's named <tt>libz.so</tt> and usually
 comes pre-installed. Since <tt>ffi.load()</tt> automatically adds any
 missing standard prefixes/suffixes, we can simply load the
 <tt>"z"</tt> library. On Windows it's named <tt>zlib1.dll</tt> and
@@ -322,7 +322,7 @@ actual length that was used.
 <p>
 In C you'd pass in the address of a local variable
 (<tt>&amp;buflen</tt>). But since there's no address-of operator in
-Lua, we'll just pass in a one-element array. Conveniently it can be
+Lua, we'll just pass in a one-element array. Conveniently, it can be
 initialized with the maximum buffer size in one step. Calling the
 actual <tt>zlib.compress2</tt> function is then straightforward.
 </p>
@@ -346,7 +346,7 @@ for garbage collection and string intern
 <span class="mark">&#9317;</span> The <tt>uncompress</tt>
 functions does the exact opposite of the <tt>compress</tt> function.
 The compressed data doesn't include the size of the original string,
-so this needs to be passed in. Otherwise no surprises here.
+so this needs to be passed in. Otherwise, no surprises here.
 </p>
 <p>
 <span class="mark">&#9318;</span> The code, that makes use
@@ -380,7 +380,7 @@ Ok, so the <tt>ffi.*</tt> functions gene
 wherever you'd want to use a number. That's why we get a away with
 passing <tt>n</tt> to <tt>ffi.string()</tt> above. But other Lua
 library functions or modules don't know how to deal with this. So for
-maximum portability one needs to use <tt>tonumber()</tt> on returned
+maximum portability, one needs to use <tt>tonumber()</tt> on returned
 <tt>long</tt> results before passing them on. Otherwise the
 application might work on some systems, but would fail in a POSIX/x64
 environment.
@@ -452,7 +452,7 @@ the origin.
 </p>
 <p>
 <span class="mark">&#9315;</span> If we run out of operators, we can
-define named methods, too. Here the <tt>__index</tt> table defines an
+define named methods, too. Here, the <tt>__index</tt> table defines an
 <tt>area</tt> function. For custom indexing needs, one might want to
 define <tt>__index</tt> and <tt>__newindex</tt> <em>functions</em> instead.
 </p>
@@ -466,13 +466,13 @@ be used e.g. to create an array of point
 apply to any and all uses of this type.
 </p>
 <p>
-Please note that the association with a metatable is permanent and
+Please note, that the association with a metatable is permanent and
 <b>the metatable must not be modified afterwards!</b> Ditto for the
 <tt>__index</tt> table.
 </p>
 <p>
 <span class="mark">&#9317;</span> Here are some simple usage examples
-for the point type and their expected results. The pre-defined
+for the point type and their expected results. The predefined
 operations (such as <tt>a.x</tt>) can be freely mixed with the newly
 defined metamethods. Note that <tt>area</tt> is a method and must be
 called with the Lua syntax for methods: <tt>a:area()</tt>, not
@@ -481,7 +481,7 @@ called with the Lua syntax for methods:
 <p>
 The C&nbsp;type metamethod mechanism is most useful when used in
 conjunction with C&nbsp;libraries that are written in an object-oriented
-style. Creators return a pointer to a new instance and methods take an
+style. Creators return a pointer to a new instance, and methods take an
 instance pointer as the first argument. Sometimes you can just point
 <tt>__index</tt> to the library namespace and <tt>__gc</tt> to the
 destructor and you're done. But often enough you'll want to add
@@ -567,7 +567,7 @@ end
 </pre>
 <p>
 This turns them into indirect calls and generates bigger and slower
-machine code. Instead you'll want to cache the namespace itself and
+machine code. Instead, you'll want to cache the namespace itself and
 rely on the JIT compiler to eliminate the lookups:
 </p>
 <pre class="code">
@@ -587,7 +587,7 @@ it to a local variable in the function s
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_jit.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_jit.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_jit.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>jit.* Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a class="current" href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -44,11 +46,9 @@
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -135,7 +135,9 @@ Contains the LuaJIT version string.
 <h3 id="jit_version_num"><tt>jit.version_num</tt></h3>
 <p>
 Contains the version number of the LuaJIT core. Version xx.yy.zz
-is represented by the decimal number xxyyzz.
+is represented by the decimal number xxyyzz.<br>
+<b>DEPRECATED after the switch to
+<a href="https://luajit.org/status.html#release"><span class="ext">&raquo;</span>&nbsp;rolling releases</a>. zz is frozen at 99.</b>
 </p>
 
 <h3 id="jit_os"><tt>jit.os</tt></h3>
@@ -152,7 +154,7 @@ Contains the target architecture name:
 
 <h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
 <p>
-This sub-module provides the backend for the <tt>-O</tt> command line
+This submodule provides the backend for the <tt>-O</tt> command line
 option.
 </p>
 <p>
@@ -172,7 +174,7 @@ which was one of the ways to enable opti
 
 <h2 id="jit_util"><tt>jit.util.*</tt> &mdash; JIT compiler introspection</h2>
 <p>
-This sub-module holds functions to introspect the bytecode, generated
+This submodule holds functions to introspect the bytecode, generated
 traces, the IR and the generated machine code. The functionality
 provided by this module is still in flux and therefore undocumented.
 </p>
@@ -185,7 +187,7 @@ if you want to know more.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_profiler.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/ext_profiler.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/ext_profiler.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Profiler</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -37,6 +37,8 @@
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -44,11 +46,9 @@
 <a class="current" href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -156,7 +156,7 @@ To see how much time is spent in differe
 Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
 views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
 spent in a VM state or zone vs. hotspots. This can be used to answer
-questions like "Which time consuming functions are only interpreted?" or
+questions like "Which time-consuming functions are only interpreted?" or
 "What's the garbage collector overhead for a specific function?".
 </p>
 <p>
@@ -215,7 +215,7 @@ local profile = require("jit.profile")
 This module can be used to implement your own higher-level profiler.
 A typical profiling run starts the profiler, captures stack dumps in
 the profiler callback, adds them to a hash table to aggregate the number
-of samples, stops the profiler and then analyzes all of the captured
+of samples, stops the profiler and then analyzes all captured
 stack dumps. Other parameters can be sampled in the profiler callback,
 too. But it's important not to spend too much time in the callback,
 since this may skew the statistics.
@@ -269,9 +269,9 @@ returns a string with a stack dump for t
 formatted according to the <tt>fmt</tt> argument:
 </p>
 <ul>
-<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise,
 only the file name is used.</li>
-<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise,
 use module:line.</li>
 <li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
 <li><tt>l</tt> &mdash; Dump module:line.</li>
@@ -349,7 +349,7 @@ use.
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/extensions.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/extensions.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/extensions.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -54,6 +54,8 @@ td.excinterop {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -61,16 +63,15 @@ td.excinterop {
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
 </div>
 <div id="main">
+
 <p>
 LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
 <a href="https://www.lua.org/manual/5.1/manual.html#5"><span class="ext">&raquo;</span>&nbsp;standard Lua
@@ -86,7 +87,7 @@ or LuaJIT.
 </p>
 <p>
 LuaJIT extends the standard Lua VM with new functionality and adds
-several extension modules. Please note this page is only about
+several extension modules. Please note, this page is only about
 <em>functional</em> enhancements and not about performance enhancements,
 such as the optimized VM, the faster interpreter or the JIT compiler.
 </p>
@@ -195,7 +196,7 @@ usage. See also the
 </p>
 <p>
 The generated bytecode is portable and can be loaded on any architecture
-that LuaJIT supports, independent of word size or endianess. However the
+that LuaJIT supports, independent of word size or endianess. However, the
 bytecode compatibility versions must match. Bytecode stays compatible
 for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
 minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
@@ -227,7 +228,7 @@ avoids managing backlinks, saves an allo
 incremental array/hash part growth.
 </p>
 <p>
-Please note this function is meant for very specific situations. In most
+Please note, this function is meant for very specific situations. In most
 cases it's better to replace the (usually single) link with a new table
 and let the GC do its work.
 </p>
@@ -237,7 +238,7 @@ and let the GC do its work.
 LuaJIT uses a Tausworthe PRNG with period 2^223 to implement
 <tt>math.random()</tt> and <tt>math.randomseed()</tt>. The quality of
 the PRNG results is much superior compared to the standard Lua
-implementation which uses the platform-specific ANSI rand().
+implementation, which uses the platform-specific ANSI rand().
 </p>
 <p>
 The PRNG generates the same sequences from the same seeds on all
@@ -255,7 +256,7 @@ Important: Neither this nor any other PR
 <h3 id="io"><tt>io.*</tt> functions handle 64&nbsp;bit file offsets</h3>
 <p>
 The file I/O functions in the standard <tt>io.*</tt> library handle
-64&nbsp;bit file offsets. In particular this means it's possible
+64&nbsp;bit file offsets. In particular, this means it's possible
 to open files larger than 2&nbsp;Gigabytes and to reposition or obtain
 the current file position for offsets beyond 2&nbsp;GB
 (<tt>fp:seek()</tt> method).
@@ -392,29 +393,19 @@ the toolchain used to compile LuaJIT:
 <td class="excinterop">Interoperability</td>
 </tr>
 <tr class="odd separate">
-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+, Clang</td>
+<td class="excplatform">External frame unwinding</td>
+<td class="exccompiler">GCC, Clang, MSVC</td>
 <td class="excinterop"><b style="color: #00a000;">Full</b></td>
 </tr>
 <tr class="even">
-<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
-<td class="exccompiler">GCC, Clang</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
-<tr class="odd">
-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
+<td class="excplatform">Internal frame unwinding + DWARF2</td>
 <td class="exccompiler">GCC, Clang</td>
 <td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
-<tr class="even">
-<td class="excplatform">Windows/x64</td>
-<td class="exccompiler">MSVC</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
-</tr>
 <tr class="odd">
-<td class="excplatform">Windows/x86</td>
-<td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+<td class="excplatform">Windows 64 bit</td>
+<td class="exccompiler">non-MSVC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
 </tr>
 <tr class="even">
 <td class="excplatform">Other platforms</td>
@@ -470,7 +461,7 @@ C++ destructors.</li>
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/faq.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/faq.html
+++ /dev/null
@@ -1,185 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-<title>Frequently Asked Questions (FAQ)</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
-<meta name="Language" content="en">
-<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
-<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
-<style type="text/css">
-dd { margin-left: 1.5em; }
-</style>
-</head>
-<body>
-<div id="site">
-<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
-</div>
-<div id="head">
-<h1>Frequently Asked Questions (FAQ)</h1>
-</div>
-<div id="nav">
-<ul><li>
-<a href="luajit.html">LuaJIT</a>
-<ul><li>
-<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
-</li><li>
-<a href="install.html">Installation</a>
-</li><li>
-<a href="running.html">Running</a>
-</li></ul>
-</li><li>
-<a href="extensions.html">Extensions</a>
-<ul><li>
-<a href="ext_ffi.html">FFI Library</a>
-<ul><li>
-<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
-</li><li>
-<a href="ext_ffi_api.html">ffi.* API</a>
-</li><li>
-<a href="ext_ffi_semantics.html">FFI Semantics</a>
-</li></ul>
-</li><li>
-<a href="ext_jit.html">jit.* Library</a>
-</li><li>
-<a href="ext_c_api.html">Lua/C API</a>
-</li><li>
-<a href="ext_profiler.html">Profiler</a>
-</li></ul>
-</li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a class="current" href="faq.html">FAQ</a>
-</li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
-</li><li>
-<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
-</li></ul>
-</div>
-<div id="main">
-<dl id="info">
-<dt>Q: Where can I learn more about LuaJIT and Lua?</dt>
-<dd>
-<ul style="padding: 0;">
-<li>The <a href="https://luajit.org/list.html"><span class="ext">&raquo;</span>&nbsp;LuaJIT mailing list</a> focuses on topics
-related to LuaJIT.</li>
-<li>The <a href="http://wiki.luajit.org/"><span class="ext">&raquo;</span>&nbsp;LuaJIT wiki</a> gathers community
-resources about LuaJIT.</li>
-<li>News about Lua itself can be found at the
-<a href="https://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
-The mailing list archives are worth checking out for older postings
-about LuaJIT.</li>
-<li>The <a href="https://lua.org"><span class="ext">&raquo;</span>&nbsp;main Lua.org site</a> has complete
-<a href="https://www.lua.org/docs.html"><span class="ext">&raquo;</span>&nbsp;documentation</a> of the language
-and links to books and papers about Lua.</li>
-<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a>
-has information about diverse topics.</li>
-</ul>
-</dl>
-
-<dl id="tech">
-<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt>
-<dd>
-Please use the following Google Scholar searches to find relevant papers:<br>
-Search for: <a href="https://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">&raquo;</span>&nbsp;Trace Compiler</a><br>
-Search for: <a href="https://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">&raquo;</span>&nbsp;JIT Compiler</a><br>
-Search for: <a href="https://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">&raquo;</span>&nbsp;Dynamic Language Optimizations</a><br>
-Search for: <a href="https://scholar.google.com/scholar?q=SSA+Form"><span class="ext">&raquo;</span>&nbsp;SSA Form</a><br>
-Search for: <a href="https://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">&raquo;</span>&nbsp;Linear Scan Register Allocation</a><br>
-Here is a list of the <a href="http://lua-users.org/lists/lua-l/2009-11/msg00089.html"><span class="ext">&raquo;</span>&nbsp;innovative features in LuaJIT</a>.<br>
-And, you know, reading the source is of course the only way to enlightenment.
-</dd>
-</dl>
-
-<dl id="arg">
-<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br>
-Q: My vararg functions fail after switching to LuaJIT!</dt>
-<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
-support the implicit <tt>arg</tt> parameter for old-style vararg
-functions from Lua 5.0.<br>Please convert your code to the
-<a href="https://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">&raquo;</span>&nbsp;Lua 5.1
-vararg syntax</a>.</dd>
-</dl>
-
-<dl id="x87">
-<dt>Q: Why do I get this error: "bad FPU precision"?<br>
-<dt>Q: I get weird behavior after initializing Direct3D.<br>
-<dt>Q: Some FPU operations crash after I load a Delphi DLL.<br>
-</dt>
-<dd>
-
-DirectX/Direct3D (up to version 9) sets the x87 FPU to single-precision
-mode by default. This violates the Windows ABI and interferes with the
-operation of many programs &mdash; LuaJIT is affected, too. Please make
-sure you always use the <tt>D3DCREATE_FPU_PRESERVE</tt> flag when
-initializing Direct3D.<br>
-
-Direct3D version 10 or higher do not show this behavior anymore.
-Consider testing your application with older versions, too.<br>
-
-Similarly, the Borland/Delphi runtime modifies the FPU control word and
-enables FP exceptions. Of course this violates the Windows ABI, too.
-Please check the Delphi docs for the Set8087CW method.
-
-</dl>
-
-<dl id="ctrlc">
-<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
-<dd>The interrupt signal handler sets a Lua debug hook. But this is
-ignored by compiled code. If your program is running in a tight loop
-and never falls back to the interpreter, the debug hook never runs and
-can't throw the "interrupted!" error.<br>
-You have to press Ctrl-C twice to get stop your program. That's similar
-to when it's stuck running inside a C function under the Lua interpreter.</dd>
-</dl>
-
-<dl id="sandbox">
-<dt>Q: Can Lua code be safely sandboxed?</dt>
-<dd>
-Maybe for an extremly restricted subset of Lua and if you relentlessly
-scrutinize every single interface function you offer to the untrusted code.<br>
-
-Although Lua provides some sandboxing functionality (<tt>setfenv()</tt>, hooks),
-it's very hard to get this right even for the Lua core libraries. Of course,
-you'll need to inspect any extension library, too. And there are libraries
-that are inherently unsafe, e.g. the <a href="ext_ffi.html">FFI library</a>.<br>
-
-More reading material at the <a href="http://lua-users.org/wiki/SandBoxes"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a> and <a href="https://en.wikipedia.org/wiki/Sandbox_(computer_security)"><span class="ext">&raquo;</span>&nbsp;Wikipedia</a>.<br><br>
-
-Relatedly, <b>loading untrusted bytecode is not safe!</b><br>
-
-It's trivial to crash the Lua or LuaJIT VM with maliciously crafted bytecode.
-This is well known and there's no bytecode verification on purpose, so please
-don't report a bug about it. Check the <tt>mode</tt> parameter for the
-<tt>load*()</tt> functions to disable loading of bytecode.<br><br>
-
-<b>In general, the only promising approach is to sandbox Lua code at the
-process level and not the VM level.</b>
-</dd>
-</dl>
-
-<dl id="arch">
-<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
-<dd>Because it's a compiler &mdash; it needs to generate native
-machine code. This means the code generator must be ported to each
-architecture. And the fast interpreter is written in assembler and
-must be ported, too. This is quite an undertaking.<br>
-The <a href="install.html">install documentation</a> shows the supported
-architectures.<br>
-Other architectures may follow based on sufficient user demand and
-market-relevance of the architecture. Sponsoring is required to develop
-the port itself, to integrate it and to continuously maintain it in the
-actively developed branches.</dd>
-</dl>
-<br class="flush">
-</div>
-<div id="foot">
-<hr class="hide">
-Copyright &copy; 2005-2021
-<span class="noprint">
-&middot;
-<a href="contact.html">Contact</a>
-</span>
-</div>
-</body>
-</html>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/install.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/install.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/install.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Installation</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -14,25 +14,20 @@ table.compat {
 }
 table.compat td {
   border: 1px solid #bfcfff;
-  height: 2.5em;
+  height: 1.5em;
 }
 table.compat tr.compathead td {
   font-weight: bold;
   border-bottom: 2px solid #bfcfff;
 }
-tr.compathead td.compatos {
-  vertical-align: top;
+td.compatname {
+  width: 10%;
 }
-table.compat td.compatcpu {
-  width: 18%;
-  border-right: 2px solid #bfcfff;
+td.compatbits {
+  width: 5%;
 }
-td.compatos {
+td.compatx {
   width: 21%;
-  vertical-align: middle;
-}
-td.compatno {
-  background-color: #d0d0d0;
 }
 </style>
 </head>
@@ -65,6 +60,8 @@ td.compatno {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -72,20 +69,18 @@ td.compatno {
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
 </div>
 <div id="main">
 <p>
-LuaJIT is only distributed as a source package. This page explains
-how to build and install LuaJIT with different operating systems
-and C&nbsp;compilers.
+LuaJIT is only distributed as source code &mdash; get it from the
+<a href="https://luajit.org/download.html"><span class="ext">&raquo;</span>&nbsp;git repository</a>. This page explains how to build
+and install the LuaJIT binary and library for different operating systems.
 </p>
 <p>
 For the impatient (on POSIX systems):
@@ -93,62 +88,24 @@ For the impatient (on POSIX systems):
 <pre class="code">
 make &amp;&amp; sudo make install
 </pre>
+
+<h2 id="req">Requirements</h2>
 <p>
-LuaJIT currently builds out-of-the box on most systems.
-Here's the compatibility matrix for the supported combinations of
-operating systems, CPUs and compilers:
+LuaJIT currently builds out-of-the box on most systems. Please check the
+supported operating systems and CPU architectures on the
+<a href="https://luajit.org/status.html"><span class="ext">&raquo;</span>&nbsp;status page</a>.
+</p>
+<p>
+Building LuaJIT requires a recent toolchain based on GCC, Clang/LLVM or
+MSVC++.
+</p>
+<p>
+The Makefile-based build system requires GNU Make and supports
+cross-builds.
+</p>
+<p>
+Batch files are provided for MSVC++ builds and console cross-builds.
 </p>
-<table class="compat">
-<tr class="compathead">
-<td class="compatcpu">CPU / OS</td>
-<td class="compatos"><a href="#posix">Linux</a> or<br><a href="#android">Android</a></td>
-<td class="compatos"><a href="#posix">*BSD, Other</a></td>
-<td class="compatos"><a href="#posix">macOS 10.4+</a> or<br><a href="#ios">iOS 3.0+</a></td>
-<td class="compatos"><a href="#windows">Windows 7<br>or later</a></td>
-</tr>
-<tr class="odd separate">
-<td class="compatcpu">x86 (32 bit)</td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos">MSVC<br>MinGW, Cygwin</td>
-</tr>
-<tr class="even">
-<td class="compatcpu">x64 (64 bit)</td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
-<td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
-</tr>
-<tr class="odd">
-<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
-<td class="compatos">GCC 4.2+</td>
-<td class="compatos">GCC 4.2+<br>PSP2 (<a href="#psvita">PS VITA</a>)</td>
-<td class="compatos">XCode 5.0+<br>Clang</td>
-<td class="compatos compatno">&nbsp;</td>
-</tr>
-<tr class="even">
-<td class="compatcpu"><a href="#cross2">ARM64<br>ARM64be</a></td>
-<td class="compatos">GCC 4.8+</td>
-<td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
-<td class="compatos compatno">&nbsp;</td>
-</tr>
-<tr class="odd">
-<td class="compatcpu"><a href="#cross2">PPC</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
-<td class="compatos compatno">&nbsp;</td>
-<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
-</tr>
-<tr class="even">
-<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64<br>MIPS64r6</a></td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos">GCC 4.3+</td>
-<td class="compatos compatno">&nbsp;</td>
-<td class="compatos compatno">&nbsp;</td>
-</tr>
-</table>
 
 <h2>Configuring LuaJIT</h2>
 <p>
@@ -157,7 +114,6 @@ Usually there is no need to tweak the se
 hold all user-configurable settings:
 </p>
 <ul>
-<li><tt>src/luaconf.h</tt> sets some configuration variables.</li>
 <li><tt>Makefile</tt> has settings for <b>installing</b> LuaJIT (POSIX
 only).</li>
 <li><tt>src/Makefile</tt> has settings for <b>compiling</b> LuaJIT
@@ -180,20 +136,12 @@ Please check the note about the
 <h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
 <h3>Prerequisites</h3>
 <p>
-Depending on your distribution, you may need to install a package for
-GCC, the development headers and/or a complete SDK. E.g. on a current
-Debian/Ubuntu, install <tt>libc6-dev</tt> with the package manager.
+Depending on your distribution, you may need to install a package for a
+compiler (GCC or Clang/LLVM), the development headers and/or a complete SDK.
+E.g. on a current Debian/Ubuntu, install <tt>build-essential</tt> with the
+package manager.
 </p>
-<p>
-The recommended way to fetch the latest version is to do a pull from
-the git repository. Alternatively download the latest source package of
-LuaJIT (pick the .tar.gz). Move it to a directory of your choice,
-open a terminal window and change to this directory. Now unpack the archive
-and change to the newly created directory:
-</p>
-<pre class="code">
-tar zxf LuaJIT-2.1.0-beta3.tar.gz
-cd LuaJIT-2.1.0-beta3</pre>
+</pre>
 <h3>Building LuaJIT</h3>
 <p>
 The supplied Makefiles try to auto-detect the settings needed for your
@@ -253,15 +201,10 @@ Either install one of the open source SD
 GCC plus the required development headers.
 Or install Microsoft's Visual Studio (MSVC).
 </p>
-<p>
-Next, pull from the git repository or download the source package and
-unpack it using an archive manager (e.g. the Windows Explorer) to
-a directory of your choice.
-</p>
 <h3>Building with MSVC</h3>
 <p>
-Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
-directory where you've unpacked the sources and run these commands:
+Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
+directory with the source code and run these commands:
 </p>
 <pre class="code">
 cd src
@@ -271,11 +214,14 @@ msvcbuild
 Check the <tt>msvcbuild.bat</tt> file for more options.
 Then follow the installation instructions below.
 </p>
+<p>
+For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
+</p>
 <h3>Building with MinGW or Cygwin</h3>
 <p>
 Open a command prompt window and make sure the MinGW or Cygwin programs
-are in your path. Then <tt>cd</tt> to the directory of the git repository
-or where you've unpacked the sources. Then run this command for MinGW:
+are in your path. Then <tt>cd</tt> to the directory of the git repository.
+Then run this command for MinGW:
 </p>
 <pre class="code">
 mingw32-make
@@ -377,15 +323,15 @@ make HOST_CC="gcc -m32" CROSS=arm-linux-
 make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
 
 # ARM64
-make CROSS=aarch64-linux-
+make CROSS=aarch64-linux-gnu-
 
 # PPC
 make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
 
 # MIPS32 big-endian
-make HOST_CC="gcc -m32" CROSS=mips-linux-
+make HOST_CC="gcc -m32" CROSS=mips-linux-gnu-
 # MIPS32 little-endian
-make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+make HOST_CC="gcc -m32" CROSS=mipsel-linux-gnu-
 
 # MIPS64 big-endian
 make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
@@ -405,7 +351,8 @@ NDKCROSS=$NDKBIN/aarch64-linux-android-
 NDKCC=$NDKBIN/aarch64-linux-android21-clang
 make CROSS=$NDKCROSS \
      STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
-     TARGET_LD=$NDKCC
+     TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \
+     TARGET_STRIP=$NDKBIN/llvm-strip
 
 # Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
 NDKDIR=/opt/android/ndk
@@ -414,7 +361,8 @@ NDKCROSS=$NDKBIN/arm-linux-androideabi-
 NDKCC=$NDKBIN/armv7a-linux-androideabi16-clang
 make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
      STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
-     TARGET_LD=$NDKCC
+     TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \
+     TARGET_STRIP=$NDKBIN/llvm-strip
 </pre>
 <p>
 You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
@@ -438,8 +386,7 @@ make DEFAULT_CC=clang CROSS="$(dirname $
 <h3 id="consoles">Cross-compiling for consoles</h3>
 <p>
 Building LuaJIT for consoles requires both a supported host compiler
-(x86 or x64) and a cross-compiler (to PPC or ARM) from the official
-console SDK.
+(x86 or x64) and a cross-compiler from the official console SDK.
 </p>
 <p>
 Due to restrictions on consoles, the JIT compiler is disabled and only
@@ -460,45 +407,58 @@ To cross-compile for <b id="ps3">PS3</b>
 make HOST_CC="gcc -m32" CROSS=ppu-lv2-
 </pre>
 <p>
-To cross-compile for <b id="ps4">PS4</b> from a Windows host,
-open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
-<tt>cd</tt> to the directory where you've unpacked the sources and
-run the following commands:
+To cross-compile for the other consoles from a Windows host, open a
+"Native Tools Command Prompt for VS". You need to choose either the 32
+or the 64&nbsp;bit version of the host compiler to match the target.
+Then <tt>cd</tt> to the <tt>src</tt> directory below the source code
+and run the build command given in the table:
 </p>
-<pre class="code">
-cd src
-ps4build
-</pre>
-<p>
-To cross-compile for <b id="psvita">PS Vita</b> from a Windows host,
-open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
-<tt>cd</tt> to the directory where you've unpacked the sources and
-run the following commands:
-</p>
-<pre class="code">
-cd src
-psvitabuild
-</pre>
-<p>
-To cross-compile for <b id="xbox360">Xbox 360</b> from a Windows host,
-open a "Visual Studio .NET Command Prompt" (32&nbsp;bit host compiler),
-<tt>cd</tt> to the directory where you've unpacked the sources and run
-the following commands:
-</p>
-<pre class="code">
-cd src
-xedkbuild
-</pre>
+<table class="compat">
+<tr class="compathead">
+<td class="compatname">Console</td>
+<td class="compatbits">Bits</td>
+<td class="compatx">Build Command</td>
+</tr>
+<tr class="odd separate">
+<td class="compatname"><b id="ps4">PS4</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>ps4build</tt></td>
+</tr>
+<tr class="even">
+<td class="compatname"><b id="ps5">PS5</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>ps5build</tt></td>
+</tr>
+<tr class="odd">
+<td class="compatname"><b id="psvita">PS Vita</b></td>
+<td class="compatbits">32</td>
+<td class="compatx"><tt>psvitabuild</tt></td>
+</tr>
+<tr class="even">
+<td class="compatname"><b id="xbox360">Xbox 360</b></td>
+<td class="compatbits">32</td>
+<td class="compatx"><tt>xedkbuild</tt></td>
+</tr>
+<tr class="odd">
+<td class="compatname"><b id="xboxone">Xbox One</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>xb1build</tt></td>
+</tr>
+<tr class="even">
+<td class="compatname"><b id="nx32">Nintendo Switch NX32</b></td>
+<td class="compatbits">32</td>
+<td class="compatx"><tt>nxbuild</tt></td>
+</tr>
+<tr class="odd">
+<td class="compatname"><b id="nx64">Nintendo Switch NX64</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>nxbuild</tt></td>
+</tr>
+</table>
 <p>
-To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
-open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
-<tt>cd</tt> to the directory where you've unpacked the sources and run
-the following commands:
+Please check out the comments in the corresponding <tt>*.bat</tt>
+file for more options.
 </p>
-<pre class="code">
-cd src
-xb1build
-</pre>
 
 <h2 id="embed">Embedding LuaJIT</h2>
 <p>
@@ -540,7 +500,7 @@ allocator from your system (no support f
 of calling <tt>luaopen_base</tt> etc. directly.</li>
 <li>To change or extend the list of standard libraries to load, copy
 <tt>src/lib_init.c</tt> to your project and modify it accordingly.
-Make sure the <tt>jit</tt> library is loaded or the JIT compiler
+Make sure the <tt>jit</tt> library is loaded, or the JIT compiler
 will not be activated.</li>
 <li>The <tt>bit.*</tt> module for bitwise operations
 is already built-in. There's no need to statically link
@@ -559,7 +519,7 @@ in unspeakable ways.
 There should be absolutely no need to patch <tt>luaconf.h</tt> or any
 of the Makefiles. And please do not hand-pick files for your packages &mdash;
 simply use whatever <tt>make install</tt> creates. There's a reason
-for all of the files <em>and</em> directories it creates.
+for all the files <em>and</em> directories it creates.
 </p>
 <p>
 The build system uses GNU make and auto-detects most settings based on
@@ -611,7 +571,7 @@ to me (the upstream) and not you (the pa
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/luajit.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/luajit.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/luajit.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -67,30 +67,6 @@ table.fcompat td {
   background-image: -o-linear-gradient(#41bfbf 10%, #b0ffff 95%);
   background-image: -ms-linear-gradient(#41bfbf 10%, #b0ffff 95%);
 }
-table.stats td {
-  color: #ffffff;
-  background: #a0a0a0;
-  background-image: linear-gradient(#808080 10%, #d0d0d0 95%);
-  background-image: -moz-linear-gradient(#808080 10%, #d0d0d0 95%);
-  background-image: -webkit-linear-gradient(#808080 10%, #d0d0d0 95%);
-  background-image: -o-linear-gradient(#808080 10%, #d0d0d0 95%);
-  background-image: -ms-linear-gradient(#808080 10%, #d0d0d0 95%);
-}
-table.stats td.speed {
-  color: #ff4020;
-}
-table.stats td.kb {
-  color: #ffff80;
-  background: #808080;
-  background-image: linear-gradient(#606060 10%, #c0c0c0 95%);
-  background-image: -moz-linear-gradient(#606060 10%, #c0c0c0 95%);
-  background-image: -webkit-linear-gradient(#606060 10%, #c0c0c0 95%);
-  background-image: -o-linear-gradient(#606060 10%, #c0c0c0 95%);
-  background-image: -ms-linear-gradient(#606060 10%, #c0c0c0 95%);
-}
-table.feature small {
-  font-size: 50%;
-}
 </style>
 </head>
 <body>
@@ -122,6 +98,8 @@ table.feature small {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -129,11 +107,9 @@ table.feature small {
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="faq.html">FAQ</a>
-</li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
@@ -146,7 +122,7 @@ Lua is a powerful, dynamic and light-wei
 It may be embedded or used as a general-purpose, stand-alone language.
 </p>
 <p>
-LuaJIT is Copyright &copy; 2005-2021 Mike Pall, released under the
+LuaJIT is Copyright &copy; 2005-2023 Mike Pall, released under the
 <a href="https://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT open source license</a>.
 </p>
 <p>
@@ -160,7 +136,7 @@ LuaJIT is Copyright &copy; 2005-2021 Mik
 <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
 </table>
 <table class="feature os os3">
-<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
+<tr><td>PS3</td><td>PS4<br>PS5</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td><td>Nintendo<br>Switch</td></tr>
 </table>
 <table class="feature compiler">
 <tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
@@ -173,23 +149,20 @@ LuaJIT is Copyright &copy; 2005-2021 Mik
 </table>
 
 <h2>Overview</h2>
-<table class="feature stats">
-<tr>
-<td class="speed">3x<br>-&nbsp;&nbsp;100x</td>
-<td class="kb">115&nbsp;<small>KB</small><br>VM</td>
-<td class="kb">90&nbsp;<small>KB</small><br>JIT</td>
-<td class="kloc">63&nbsp;<small>KLOC</small><br>C</td>
-<td class="kloc">24&nbsp;<small>KLOC</small><br>ASM</td>
-<td class="kloc">11&nbsp;<small>KLOC</small><br>Lua</td>
-</tr>
-</table>
 <p style="margin-top: 1em;">
 LuaJIT has been successfully used as a <b>scripting middleware</b> in
 games, appliances, network and graphics apps, numerical simulations,
-trading platforms and many other specialty applications. It scales from
-embedded devices, smartphones, desktops up to server farms. It combines
-high flexibility with high performance
-and an unmatched <b>low memory footprint</b>.
+trading platforms and many other specialty applications.
+</p>
+<p>
+LuaJIT is part of a hundred million web sites, huge SaaS installations,
+network switches, set-top boxes and other embedded devices. You've probably
+already used LuaJIT without knowing about it.
+</p>
+<p>
+LuaJIT scales from embedded devices, smartphones, desktops up to server
+farms. It combines high flexibility with high performance and an unmatched
+<b>low memory footprint</b>.
 </p>
 <p>
 LuaJIT has been in continuous development since 2005. It's widely
@@ -220,7 +193,7 @@ Please select a sub-topic in the navigat
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/running.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/running.html
+++ wrk-4.2.0/obj/LuaJIT-2.1/doc/running.html
@@ -1,9 +1,9 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
 <html>
 <head>
 <title>Running LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2023">
 <meta name="Language" content="en">
 <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
 <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
@@ -59,6 +59,8 @@ td.param_default {
 <a href="ext_ffi_semantics.html">FFI Semantics</a>
 </li></ul>
 </li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
 <a href="ext_jit.html">jit.* Library</a>
 </li><li>
 <a href="ext_c_api.html">Lua/C API</a>
@@ -66,16 +68,15 @@ td.param_default {
 <a href="ext_profiler.html">Profiler</a>
 </li></ul>
 </li><li>
-<a href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
+<a href="https://luajit.org/status.html">Status <span class="ext">&raquo;</span></a>
 </li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+<a href="https://luajit.org/faq.html">FAQ <span class="ext">&raquo;</span></a>
 </li><li>
 <a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
 </li></ul>
 </div>
 <div id="main">
+
 <p>
 LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on
 POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple
@@ -109,6 +110,7 @@ are accepted:
 <li><tt>-t type</tt> &mdash; Set output file type (default: auto-detect from output name).</li>
 <li><tt>-a arch</tt> &mdash; Override architecture for object files (default: native).</li>
 <li><tt>-o os</tt> &mdash; Override OS for object files (default: native).</li>
+<li><tt>-F name</tt> &mdash; Override filename (default: input filename).</li>
 <li><tt>-e chunk</tt> &mdash; Use chunk string as input.</li>
 <li><tt>-</tt> (a single minus sign) &mdash; Use stdin as input and/or stdout as output.</li>
 </ul>
@@ -118,7 +120,8 @@ file name:
 </p>
 <ul>
 <li><tt>c</tt> &mdash; C source file, exported bytecode data.</li>
-<li><tt>h</tt> &mdash; C header file, static bytecode data.</li>
+<li><tt>cc</tt> &mdash; C++ source file, exported bytecode data.</li>
+<li><tt>h</tt> &mdash; C/C++ header file, static bytecode data.</li>
 <li><tt>obj</tt> or <tt>o</tt> &mdash; Object file, exported bytecode data
 (OS- and architecture-specific).</li>
 <li><tt>raw</tt> or any other extension &mdash; Raw bytecode file (portable).
@@ -182,9 +185,9 @@ written in Lua. They are mainly used for
 itself. For a description of their options and output format, please
 read the comment block at the start of their source.
 They can be found in the <tt>lib</tt> directory of the source
-distribution or installed under the <tt>jit</tt> directory. By default
-this is <tt>/usr/local/share/luajit-2.1.0-beta3/jit</tt> on POSIX
-systems.
+distribution or installed under the <tt>jit</tt> directory. By default,
+this is <tt>/usr/local/share/luajit-XX.YY.ZZ>/jit</tt> on POSIX
+systems (replace XX.YY.ZZ by the installed version).
 </p>
 
 <h3 id="opt_O"><tt>-O[level]</tt><br>
@@ -214,11 +217,17 @@ to a specific value.
 You can either use this option multiple times (like <tt>-Ocse
 -O-dce -Ohotloop=10</tt>) or separate several settings with a comma
 (like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from
-left to right and later settings override earlier ones. You can freely
+left to right, and later settings override earlier ones. You can freely
 mix the three forms, but note that setting an optimization level
 overrides all earlier flags.
 </p>
 <p>
+Note that <tt>-Ofma</tt> is not enabled by default at any level,
+because it affects floating-point result accuracy. Only enable this,
+if you fully understand the trade-offs of FMA for performance (higher),
+determinism (lower) and numerical accuracy (higher).
+</p>
+<p>
 Here are the available flags and at what optimization levels they
 are enabled:
 </p>
@@ -250,6 +259,8 @@ are enabled:
 <td class="flag_name">sink</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Allocation/Store Sinking</td></tr>
 <tr class="even">
 <td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
+<tr class="odd">
+<td class="flag_name">fma </td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_desc">Fused multiply-add</td></tr>
 </table>
 <p>
 Here are the parameters and their default settings:
@@ -293,7 +304,7 @@ Here are the parameters and their defaul
 </div>
 <div id="foot">
 <hr class="hide">
-Copyright &copy; 2005-2021
+Copyright &copy; 2005-2023
 <span class="noprint">
 &middot;
 <a href="contact.html">Contact</a>
Index: wrk-4.2.0/obj/LuaJIT-2.1/doc/status.html
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/doc/status.html
+++ /dev/null
@@ -1,111 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-<title>Status</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-<meta name="Copyright" content="Copyright (C) 2005-2021">
-<meta name="Language" content="en">
-<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
-<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
-<style type="text/css">
-ul li { padding-bottom: 0.3em; }
-</style>
-</head>
-<body>
-<div id="site">
-<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
-</div>
-<div id="head">
-<h1>Status</h1>
-</div>
-<div id="nav">
-<ul><li>
-<a href="luajit.html">LuaJIT</a>
-<ul><li>
-<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
-</li><li>
-<a href="install.html">Installation</a>
-</li><li>
-<a href="running.html">Running</a>
-</li></ul>
-</li><li>
-<a href="extensions.html">Extensions</a>
-<ul><li>
-<a href="ext_ffi.html">FFI Library</a>
-<ul><li>
-<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
-</li><li>
-<a href="ext_ffi_api.html">ffi.* API</a>
-</li><li>
-<a href="ext_ffi_semantics.html">FFI Semantics</a>
-</li></ul>
-</li><li>
-<a href="ext_jit.html">jit.* Library</a>
-</li><li>
-<a href="ext_c_api.html">Lua/C API</a>
-</li><li>
-<a href="ext_profiler.html">Profiler</a>
-</li></ul>
-</li><li>
-<a class="current" href="status.html">Status</a>
-</li><li>
-<a href="faq.html">FAQ</a>
-</li><li>
-<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
-</li><li>
-<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
-</li></ul>
-</div>
-<div id="main">
-<p>
-This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt>
-directory in each git branch for the version-specific documentation.
-</p>
-<p>
-The currently developed branches are LuaJIT&nbsp;2.1 and LuaJIT&nbsp;2.0.
-</p>
-<p>
-LuaJIT&nbsp;2.0 is in feature-freeze &mdash; new features will only
-be added to LuaJIT&nbsp;2.1.
-</p>
-
-<h2>Current Status</h2>
-<p>
-LuaJIT ought to run all Lua&nbsp;5.1-compatible source code just fine.
-It's considered a serious bug if the VM crashes or produces unexpected
-results &mdash; please report this.
-</p>
-<p>
-Known incompatibilities and issues in LuaJIT&nbsp;2.0:
-</p>
-<ul>
-<li>
-There are some differences in <b>implementation-defined</b> behavior.
-These either have a good reason, are arbitrary design choices
-or are due to quirks in the VM. The latter cases may get fixed if a
-demonstrable need is shown.
-</li>
-<li>
-The Lua <b>debug API</b> is missing a couple of features (return
-hooks for non-Lua functions) and shows slightly different behavior
-in LuaJIT (no per-coroutine hooks, no tail call counting).
-</li>
-<li>
-Currently some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
-handled correctly. The error may fall through an on-trace
-<tt>pcall</tt> or it may be passed on to the function set with
-<tt>lua_atpanic</tt> on x64.
-</li>
-</ul>
-<br class="flush">
-</div>
-<div id="foot">
-<hr class="hide">
-Copyright &copy; 2005-2021
-<span class="noprint">
-&middot;
-<a href="contact.html">Contact</a>
-</span>
-</div>
-</body>
-</html>
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_arm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM ARM encoding engine.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -70,7 +70,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -87,7 +87,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -98,12 +97,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -123,7 +117,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -148,6 +142,7 @@ void dasm_setup(Dst_DECL, const void *ac
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -294,7 +289,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +366,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp - 4);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
@@ -393,7 +391,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_arm.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM ARM module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -9,9 +9,9 @@
 local _info = {
   arch =	"arm",
   description =	"DynASM ARM module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm64.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_arm64.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm64.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM ARM64 encoding engine.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -21,8 +21,9 @@ enum {
   /* The following actions need a buffer position. */
   DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
   /* The following actions also have an argument. */
-  DASM_REL_PC, DASM_LABEL_PC,
+  DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
   DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+  DASM_IMMV, DASM_VREG,
   DASM__MAX
 };
 
@@ -39,6 +40,7 @@ enum {
 #define DASM_S_RANGE_LG		0x13000000
 #define DASM_S_RANGE_PC		0x14000000
 #define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_RANGE_VREG	0x16000000
 #define DASM_S_UNDEF_LG		0x21000000
 #define DASM_S_UNDEF_PC		0x22000000
 
@@ -70,7 +72,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -87,7 +89,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -98,12 +99,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -123,7 +119,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -148,6 +144,7 @@ void dasm_setup(Dst_DECL, const void *ac
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -156,10 +153,10 @@ void dasm_setup(Dst_DECL, const void *ac
 #ifdef DASM_CHECKS
 #define CK(x, st) \
   do { if (!(x)) { \
-    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+    D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
 #define CKPL(kind, st) \
   do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
-    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+    D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
 #else
 #define CK(x, st)	((void)0)
 #define CKPL(kind, st)	((void)0)
@@ -188,7 +185,9 @@ static int dasm_imm13(int lo, int hi)
   unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
   unsigned long long m = 1ULL, a, b, c;
   if (n & 1) { n = ~n; inv = 1; }
-  a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
+  a = n & (unsigned long long)-(long long)n;
+  b = (n+a)&(unsigned long long)-(long long)(n+a);
+  c = (n+a-b)&(unsigned long long)-(long long)(n+a-b);
   xa = dasm_ffs(a); xb = dasm_ffs(b);
   if (c) {
     w = dasm_ffs(c) - xa;
@@ -247,7 +246,7 @@ void dasm_put(Dst_DECL, int start, ...)
 	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
 	D->section = &D->sections[n]; goto stop;
       case DASM_ESC: p++; ofs += 4; break;
-      case DASM_REL_EXT: break;
+      case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
       case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
       case DASM_REL_LG:
 	n = (ins & 2047) - 10; pl = D->lglabels + n;
@@ -268,6 +267,11 @@ void dasm_put(Dst_DECL, int start, ...)
 	  *pl = pos;
 	}
 	pos++;
+	if ((ins & 0x8000)) ofs += 8;
+	break;
+      case DASM_REL_A:
+	b[pos++] = n;
+	b[pos++] = va_arg(ap, int);
 	break;
       case DASM_LABEL_LG:
 	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
@@ -312,13 +316,21 @@ void dasm_put(Dst_DECL, int start, ...)
 	}
       case DASM_IMML: {
 #ifdef DASM_CHECKS
-	int scale = (p[-2] >> 30);
+	int scale = (ins & 3);
 	CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
 	   (unsigned int)(n+256) < 512, RANGE_I);
 #endif
 	b[pos++] = n;
 	break;
 	}
+      case DASM_IMMV:
+	ofs += 4;
+	b[pos++] = n;
+	break;
+      case DASM_VREG:
+	CK(n < 32, RANGE_VREG);
+	b[pos++] = n;
+	break;
       }
     }
   }
@@ -348,7 +360,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -375,8 +387,8 @@ int dasm_link(Dst_DECL, size_t *szp)
 	case DASM_REL_LG: case DASM_REL_PC: pos++; break;
 	case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
 	case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
-	case DASM_IMML: pos++; break;
-	case DASM_IMM13X: pos += 2; break;
+	case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
+	case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
 	}
       }
       stop: (void)0;
@@ -391,7 +403,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
 #ifdef DASM_CHECKS
 #define CK(x, st) \
-  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+  do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
 #else
 #define CK(x, st)	((void)0)
 #endif
@@ -423,10 +435,15 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
 	  goto patchrel;
 	case DASM_ALIGN:
-	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
+	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    ptrdiff_t na = (ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4;
+	    n = (int)na;
+	    CK((ptrdiff_t)n == na, RANGE_REL);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
@@ -446,10 +463,26 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  } else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
 	    CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
 	    cp[-1] |= ((n << 3) & 0x0007ffe0);
+	  } else if ((ins & 0x8000)) {  /* absolute */
+	    cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
+	    cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
+	    cp += 2;
 	  }
 	  break;
+	case DASM_REL_A: {
+	  ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
+	  if ((ins & 0x3000) == 0x3000) {  /* ADRP */
+	    ins &= ~0x1000;
+	    na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
+	  } else {
+	    na = na - (ptrdiff_t)cp + 4;
+	  }
+	  n = (int)na;
+	  CK((ptrdiff_t)n == na, RANGE_REL);
+	  goto patchrel;
+	}
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:
@@ -468,11 +501,17 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  cp[-1] |= (dasm_imm13(n, *b++) << 10);
 	  break;
 	case DASM_IMML: {
-	  int scale = (p[-2] >> 30);
+	  int scale = (ins & 3);
 	  cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
 	    ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
 	  break;
 	  }
+	case DASM_IMMV:
+	  *cp++ = n;
+	  break;
+	case DASM_VREG:
+	  cp[-1] |= (n & 0x1f) << (ins & 0x1f);
+	  break;
 	default: *cp++ = ins; break;
 	}
       }
@@ -512,7 +551,7 @@ int dasm_checkstep(Dst_DECL, int secmatc
   }
   if (D->status == DASM_S_OK && secmatch >= 0 &&
       D->section != &D->sections[secmatch])
-    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+    D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
   return D->status;
 }
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_arm64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_arm64.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM ARM64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -9,9 +9,9 @@
 local _info = {
   arch =	"arm",
   description =	"DynASM ARM64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -23,12 +23,12 @@ local _M = { _info = _info }
 local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
 local assert, setmetatable, rawget = assert, setmetatable, rawget
 local _s = string
-local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local format, byte, char = _s.format, _s.byte, _s.char
 local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
 local concat, sort, insert = table.concat, table.sort, table.insert
 local bit = bit or require("bit")
 local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
-local ror, tohex = bit.ror, bit.tohex
+local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
 
 -- Inherited tables and callbacks.
 local g_opt, g_arch
@@ -39,7 +39,9 @@ local wline, werror, wfatal, wwarn
 local action_names = {
   "STOP", "SECTION", "ESC", "REL_EXT",
   "ALIGN", "REL_LG", "LABEL_LG",
-  "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
+  "REL_PC", "LABEL_PC", "REL_A",
+  "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
+  "VREG",
 }
 
 -- Maximum number of section buffer positions for dasm_put().
@@ -246,9 +248,12 @@ local map_cond = {
 
 local parse_reg_type
 
-local function parse_reg(expr)
+local function parse_reg(expr, shift, no_vreg)
   if not expr then werror("expected register name") end
   local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+  if not tname then
+    tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
+  end
   local tp = map_type[tname or expr]
   if tp then
     local reg = ovreg or tp.reg
@@ -266,18 +271,28 @@ local function parse_reg(expr)
       elseif parse_reg_type ~= rt then
 	werror("register size mismatch")
       end
-      return r, tp
+      return shl(r, shift), tp
     end
   end
+  local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
+  if vreg then
+    if not parse_reg_type then
+      parse_reg_type = vrt
+    elseif parse_reg_type ~= vrt then
+      werror("register size mismatch")
+    end
+    if not no_vreg then waction("VREG", shift, vreg) end
+    return 0
+  end
   werror("bad register name `"..expr.."'")
 end
 
 local function parse_reg_base(expr)
   if expr == "sp" then return 0x3e0 end
-  local base, tp = parse_reg(expr)
+  local base, tp = parse_reg(expr, 5)
   if parse_reg_type ~= "x" then werror("bad register type") end
   parse_reg_type = false
-  return shl(base, 5), tp
+  return base, tp
 end
 
 local parse_ctx = {}
@@ -297,7 +312,7 @@ local function parse_number(n)
   local code = loadenv("return "..n)
   if code then
     local ok, y = pcall(code)
-    if ok then return y end
+    if ok and type(y) == "number" then return y end
   end
   return nil
 end
@@ -403,7 +418,7 @@ local function parse_imm_load(imm, scale
     end
     werror("out of range immediate `"..imm.."'")
   else
-    waction("IMML", 0, imm)
+    waction("IMML", scale, imm)
     return 0
   end
 end
@@ -462,6 +477,7 @@ end
 
 local function parse_load(params, nparams, n, op)
   if params[n+2] then werror("too many operands") end
+  local scale = shr(op, 30)
   local pn, p2 = params[n], params[n+1]
   local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
   if not p1 then
@@ -470,14 +486,13 @@ local function parse_load(params, nparam
       if reg and tailr ~= "" then
 	local base, tp = parse_reg_base(reg)
 	if tp then
-	  waction("IMML", 0, format(tp.ctypefmt, tailr))
+	  waction("IMML", scale, format(tp.ctypefmt, tailr))
 	  return op + base
 	end
       end
     end
     werror("expected address operand")
   end
-  local scale = shr(op, 30)
   if p2 then
     if wb == "!" then werror("bad use of '!'") end
     op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
@@ -494,7 +509,7 @@ local function parse_load(params, nparam
 	op = op + parse_imm_load(imm, scale)
       else
 	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
-	op = op + shl(parse_reg(p2b), 16) + 0x00200800
+	op = op + parse_reg(p2b, 16) + 0x00200800
 	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
 	  werror("bad index register type")
 	end
@@ -534,7 +549,7 @@ end
 local function parse_load_pair(params, nparams, n, op)
   if params[n+2] then werror("too many operands") end
   local pn, p2 = params[n], params[n+1]
-  local scale = shr(op, 30) == 0 and 2 or 3
+  local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
   local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
   if not p1 then
     if not p2 then
@@ -561,14 +576,14 @@ local function parse_load_pair(params, n
 end
 
 local function parse_label(label, def)
-  local prefix = sub(label, 1, 2)
+  local prefix = label:sub(1, 2)
   -- =>label (pc label reference)
   if prefix == "=>" then
-    return "PC", 0, sub(label, 3)
+    return "PC", 0, label:sub(3)
   end
   -- ->name (global label reference)
   if prefix == "->" then
-    return "LG", map_global[sub(label, 3)]
+    return "LG", map_global[label:sub(3)]
   end
   if def then
     -- [1-9] (local label definition)
@@ -586,8 +601,11 @@ local function parse_label(label, def)
     if extname then
       return "EXT", map_extern[extname]
     end
+    -- &expr (pointer)
+    if label:sub(1, 1) == "&" then
+      return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
+    end
   end
-  werror("bad label `"..label.."'")
 end
 
 local function branch_type(op)
@@ -620,24 +638,24 @@ local function alias_bfx(p)
 end
 
 local function alias_bfiz(p)
-  parse_reg(p[1])
+  parse_reg(p[1], 0, true)
   if parse_reg_type == "w" then
-    p[3] = "#-("..p[3]:sub(2)..")%32"
+    p[3] = "#(32-("..p[3]:sub(2).."))%32"
     p[4] = "#("..p[4]:sub(2)..")-1"
   else
-    p[3] = "#-("..p[3]:sub(2)..")%64"
+    p[3] = "#(64-("..p[3]:sub(2).."))%64"
     p[4] = "#("..p[4]:sub(2)..")-1"
   end
 end
 
 local alias_lslimm = op_alias("ubfm_4", function(p)
-  parse_reg(p[1])
+  parse_reg(p[1], 0, true)
   local sh = p[3]:sub(2)
   if parse_reg_type == "w" then
-    p[3] = "#-("..sh..")%32"
+    p[3] = "#(32-("..sh.."))%32"
     p[4] = "#31-("..sh..")"
   else
-    p[3] = "#-("..sh..")%64"
+    p[3] = "#(64-("..sh.."))%64"
     p[4] = "#63-("..sh..")"
   end
 end)
@@ -788,8 +806,8 @@ map_op = {
   ["ldrsw_*"] = "98000000DxB|b8800000DxL",
   -- NOTE: ldur etc. are handled by ldr et al.
 
-  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
-  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
+  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
   ["ldpsw_*"] = "68400000DAxP",
 
   -- Branches.
@@ -805,6 +823,13 @@ map_op = {
   tbz_3  = "36000000DTBw|36000000DTBx",
   tbnz_3 = "37000000DTBw|37000000DTBx",
 
+  -- ARM64e: Pointer authentication codes (PAC).
+  blraaz_1  = "d63f081fNx",
+  braa_2    = "d71f0800NDx",
+  braaz_1   = "d61f081fNx",
+  pacibsp_0 = "d503237f",
+  retab_0   = "d65f0fff",
+
   -- Miscellaneous instructions.
   -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
   -- TODO: sys, sysl, ic, dc, at, tlbi
@@ -881,25 +906,25 @@ end
 
 -- Handle opcodes defined with template strings.
 local function parse_template(params, template, nparams, pos)
-  local op = tonumber(sub(template, 1, 8), 16)
+  local op = tonumber(template:sub(1, 8), 16)
   local n = 1
   local rtt = {}
 
   parse_reg_type = false
 
   -- Process each character.
-  for p in gmatch(sub(template, 9), ".") do
+  for p in gmatch(template:sub(9), ".") do
     local q = params[n]
     if p == "D" then
-      op = op + parse_reg(q); n = n + 1
+      op = op + parse_reg(q, 0); n = n + 1
     elseif p == "N" then
-      op = op + shl(parse_reg(q), 5); n = n + 1
+      op = op + parse_reg(q, 5); n = n + 1
     elseif p == "M" then
-      op = op + shl(parse_reg(q), 16); n = n + 1
+      op = op + parse_reg(q, 16); n = n + 1
     elseif p == "A" then
-      op = op + shl(parse_reg(q), 10); n = n + 1
+      op = op + parse_reg(q, 10); n = n + 1
     elseif p == "m" then
-      op = op + shl(parse_reg(params[n-1]), 16)
+      op = op + parse_reg(params[n-1], 16)
 
     elseif p == "p" then
       if q == "sp" then params[n] = "@x31" end
@@ -917,7 +942,7 @@ local function parse_template(params, te
 	werror("bad register type")
       end
       parse_reg_type = false
-    elseif p == "x" or p == "w" or p == "d" or p == "s" then
+    elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
       if parse_reg_type ~= p then
 	werror("register size mismatch")
       end
@@ -930,8 +955,14 @@ local function parse_template(params, te
 
     elseif p == "B" then
       local mode, v, s = parse_label(q, false); n = n + 1
+      if not mode then werror("bad label `"..q.."'") end
       local m = branch_type(op)
-      waction("REL_"..mode, v+m, s, 1)
+      if mode == "A" then
+	waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
+	actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
+      else
+	waction("REL_"..mode, v+m, s, 1)
+      end
 
     elseif p == "I" then
       op = op + parse_imm12(q); n = n + 1
@@ -977,8 +1008,8 @@ function op_template(params, template, n
   if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
 
   -- Limit number of section buffer positions used by a single dasm_put().
-  -- A single opcode needs a maximum of 3 positions.
-  if secpos+3 > maxsecpos then wflush() end
+  -- A single opcode needs a maximum of 4 positions.
+  if secpos+4 > maxsecpos then wflush() end
   local pos = wpos()
   local lpos, apos, spos = #actlist, #actargs, secpos
 
@@ -990,9 +1021,11 @@ function op_template(params, template, n
     actlist[lpos+1] = nil
     actlist[lpos+2] = nil
     actlist[lpos+3] = nil
+    actlist[lpos+4] = nil
     actargs[apos+1] = nil
     actargs[apos+2] = nil
     actargs[apos+3] = nil
+    actargs[apos+4] = nil
   end
   error(err, 0)
 end
@@ -1036,23 +1069,50 @@ map_op[".label_1"] = function(params)
   if not params then return "[1-9] | ->global | =>pcexpr" end
   if secpos+1 > maxsecpos then wflush() end
   local mode, n, s = parse_label(params[1], true)
-  if mode == "EXT" then werror("bad label definition") end
+  if not mode or mode == "EXT" then werror("bad label definition") end
   waction("LABEL_"..mode, n, s, 1)
 end
 
 ------------------------------------------------------------------------------
 
 -- Pseudo-opcodes for data storage.
-map_op[".long_*"] = function(params)
+local function op_data(params)
   if not params then return "imm..." end
+  local sz = params.op == ".long" and 4 or 8
   for _,p in ipairs(params) do
-    local n = tonumber(p)
-    if not n then werror("bad immediate `"..p.."'") end
-    if n < 0 then n = n + 2^32 end
-    wputw(n)
+    local imm = parse_number(p)
+    if imm then
+      local n = tobit(imm)
+      if n == imm or (n < 0 and n + 2^32 == imm) then
+	wputw(n < 0 and n + 2^32 or n)
+	if sz == 8 then
+	  wputw(imm < 0 and 0xffffffff or 0)
+	end
+      elseif sz == 4 then
+	werror("bad immediate `"..p.."'")
+      else
+	imm = nil
+      end
+    end
+    if not imm then
+      local mode, v, s = parse_label(p, false)
+      if sz == 4 then
+	if mode then werror("label does not fit into .long") end
+	waction("IMMV", 0, p)
+      elseif mode and mode ~= "A" then
+	waction("REL_"..mode, v+0x8000, s, 1)
+      else
+	if mode == "A" then p = s end
+	waction("IMMV", 0, format("(unsigned int)(%s)", p))
+	waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
+      end
+    end
     if secpos+2 > maxsecpos then wflush() end
   end
 end
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
 
 -- Alignment pseudo-opcode.
 map_op[".align_1"] = function(params)
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_mips.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM MIPS encoding engine.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *ac
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -155,10 +150,10 @@ void dasm_setup(Dst_DECL, const void *ac
 #ifdef DASM_CHECKS
 #define CK(x, st) \
   do { if (!(x)) { \
-    D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+    D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
 #define CKPL(kind, st) \
   do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
-    D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+    D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
 #else
 #define CK(x, st)	((void)0)
 #define CKPL(kind, st)	((void)0)
@@ -273,7 +268,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -314,7 +309,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
 #ifdef DASM_CHECKS
 #define CK(x, st) \
-  do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+  do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
 #else
 #define CK(x, st)	((void)0)
 #endif
@@ -349,7 +344,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
@@ -366,7 +364,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  }
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMMS:
@@ -414,7 +412,7 @@ int dasm_checkstep(Dst_DECL, int secmatc
   }
   if (D->status == DASM_S_OK && secmatch >= 0 &&
       D->section != &D->sections[secmatch])
-    D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+    D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
   return D->status;
 }
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_mips.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM MIPS32/MIPS64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -12,9 +12,9 @@ local mipsr6 = _map_def.MIPSR6
 local _info = {
   arch =	mips64 and "mips64" or "mips",
   description =	"DynASM MIPS32/MIPS64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2020-01-20",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_mips64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_mips64.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM MIPS64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_ppc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_ppc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_ppc.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM PPC/PPC64 encoding engine.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -69,7 +69,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -86,7 +86,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -97,12 +96,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -122,7 +116,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -147,6 +141,7 @@ void dasm_setup(Dst_DECL, const void *ac
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -277,7 +272,7 @@ int dasm_link(Dst_DECL, size_t *szp)
 
   { /* Handle globals not defined in this translation unit. */
     int idx;
-    for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
       int n = D->lglabels[idx];
       /* Undefined label: Collapse rel chain and replace with marker (< 0). */
       while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -353,7 +348,10 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
 	  break;
 	case DASM_REL_LG:
-	  CK(n >= 0, UNDEF_LG);
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp);
+	    goto patchrel;
+	  }
 	  /* fallthrough */
 	case DASM_REL_PC:
 	  CK(n >= 0, UNDEF_PC);
@@ -365,7 +363,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  cp[-1] |= ((n+4) & ((ins & 2048) ? 0x0000fffc: 0x03fffffc));
 	  break;
 	case DASM_LABEL_LG:
-	  ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+	  ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n);
 	  break;
 	case DASM_LABEL_PC: break;
 	case DASM_IMM:
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_ppc.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_ppc.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_ppc.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM PPC/PPC64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 --
 -- Support for various extensions contributed by Caio Souza Oliveira.
@@ -11,9 +11,9 @@
 local _info = {
   arch =	"ppc",
   description =	"DynASM PPC module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_proto.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_proto.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_proto.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM encoding engine prototypes.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -10,8 +10,8 @@
 #include <stddef.h>
 #include <stdarg.h>
 
-#define DASM_IDENT	"DynASM 1.4.0"
-#define DASM_VERSION	10400	/* 1.4.0 */
+#define DASM_IDENT	"DynASM 1.5.0"
+#define DASM_VERSION	10500	/* 1.5.0 */
 
 #ifndef Dst_DECL
 #define Dst_DECL	dasm_State **Dst
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv.h
@@ -0,0 +1,433 @@
+/*
+** DynASM RISC-V encoding engine.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH		"riscv"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d)	0
+#endif
+
+/* Action definitions. */
+enum {
+  DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+  /* The following actions need a buffer position. */
+  DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+  /* The following actions also have an argument. */
+  DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
+  DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS		25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK		0x00000000
+#define DASM_S_NOMEM		0x01000000
+#define DASM_S_PHASE		0x02000000
+#define DASM_S_MATCH_SEC	0x03000000
+#define DASM_S_RANGE_I		0x11000000
+#define DASM_S_RANGE_SEC	0x12000000
+#define DASM_S_RANGE_LG		0x13000000
+#define DASM_S_RANGE_PC		0x14000000
+#define DASM_S_RANGE_REL	0x15000000
+#define DASM_S_UNDEF_LG		0x21000000
+#define DASM_S_UNDEF_PC		0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos)	((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos)	((pos)&0xff000000)
+#define DASM_SEC2POS(sec)	((sec)<<24)
+#define DASM_POS2SEC(pos)	((pos)>>24)
+#define DASM_POS2PTR(D, pos)	(D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+  int *rbuf;		/* Biased buffer pointer (negative section bias). */
+  int *buf;		/* True buffer pointer. */
+  size_t bsize;		/* Buffer size in bytes. */
+  int pos;		/* Biased buffer position. */
+  int epos;		/* End of biased buffer position - max single put. */
+  int ofs;		/* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+  size_t psize;			/* Allocated size of this structure. */
+  dasm_ActList actionlist;	/* Current actionlist pointer. */
+  int *lglabels;		/* Local/global chain/pos ptrs. */
+  size_t lgsize;
+  int *pclabels;		/* PC label chains/pos ptrs. */
+  size_t pcsize;
+  void **globals;		/* Array of globals. */
+  dasm_Section *section;	/* Pointer to active section. */
+  size_t codesize;		/* Total size of all code sections. */
+  int maxsection;		/* 0 <= sectionidx < maxsection. */
+  int status;			/* Status code. */
+  dasm_Section sections[1];	/* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms)	(sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+  dasm_State *D;
+  size_t psz = 0;
+  Dst_REF = NULL;
+  DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+  D = Dst_REF;
+  D->psize = psz;
+  D->lglabels = NULL;
+  D->lgsize = 0;
+  D->pclabels = NULL;
+  D->pcsize = 0;
+  D->globals = NULL;
+  D->maxsection = maxsection;
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  for (i = 0; i < D->maxsection; i++)
+    if (D->sections[i].buf)
+      DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+  if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+  if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+  DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+  dasm_State *D = Dst_REF;
+  D->globals = gl;
+  DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+  dasm_State *D = Dst_REF;
+  size_t osz = D->pcsize;
+  DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+  memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+  dasm_State *D = Dst_REF;
+  int i;
+  D->actionlist = (dasm_ActList)actionlist;
+  D->status = DASM_S_OK;
+  D->section = &D->sections[0];
+  memset((void *)D->lglabels, 0, D->lgsize);
+  if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+  for (i = 0; i < D->maxsection; i++) {
+    D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
+    D->sections[i].ofs = 0;
+  }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) { \
+    D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+  do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+    D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st)	((void)0)
+#define CKPL(kind, st)	((void)0)
+#endif
+
+static int dasm_imms(int n)
+{
+  return (n >= -2048 && n < 2048) ? n : 4096;
+}
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+  va_list ap;
+  dasm_State *D = Dst_REF;
+  dasm_ActList p = D->actionlist + start;
+  dasm_Section *sec = D->section;
+  int pos = sec->pos, ofs = sec->ofs;
+  int *b;
+
+  if (pos >= sec->epos) {
+    DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+      sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+    sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+    sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+  }
+
+  b = sec->rbuf;
+  b[pos++] = start;
+
+  va_start(ap, start);
+  while (1) {
+    unsigned int ins = *p++;
+    unsigned int action = (ins >> 20);
+    if (action >= DASM__MAX || (ins & 0xf)) {
+      ofs += 4;
+    } else {
+      ins >>= 4;
+      int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+      switch (action) {
+      case DASM_STOP: goto stop;
+      case DASM_SECTION:
+	n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+	D->section = &D->sections[n]; goto stop;
+      case DASM_ESC: p++; ofs += 4; break;
+      case DASM_REL_EXT: break;
+      case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+      case DASM_REL_LG:
+	n = (ins & 2047) - 10; pl = D->lglabels + n;
+	/* Bkwd rel or global. */
+	if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+	pl += 10; n = *pl;
+	if (n < 0) n = 0;  /* Start new chain for fwd rel if label exists. */
+	goto linkrel;
+      case DASM_REL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putrel:
+	n = *pl;
+	if (n < 0) {  /* Label exists. Get label pos and store it. */
+	  b[pos] = -n;
+	} else {
+      linkrel:
+	  b[pos] = n;  /* Else link to rel chain, anchored at label. */
+	  *pl = pos;
+	}
+	pos++;
+	break;
+      case DASM_LABEL_LG:
+	pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+      case DASM_LABEL_PC:
+	pl = D->pclabels + n; CKPL(pc, PC);
+      putlabel:
+	n = *pl;  /* n > 0: Collapse rel chain and replace with label pos. */
+	while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+  }
+	*pl = -pos;  /* Label exists now. */
+	b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	break;
+      case DASM_IMM:
+#ifdef DASM_CHECKS
+	CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+#endif
+	n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+	if (ins & 0x8000)
+	  CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+	else
+	  CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+	b[pos++] = n;
+	break;
+      case DASM_IMMS:
+#ifdef DASM_CHECKS
+        CK(dasm_imms(n) != 4096, RANGE_I);
+#endif
+	      b[pos++] = n;
+	      break;
+      }
+    }
+  }
+stop:
+  va_end(ap);
+  sec->pos = pos;
+  sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+  dasm_State *D = Dst_REF;
+  int secnum;
+  int ofs = 0;
+
+#ifdef DASM_CHECKS
+  *szp = 0;
+  if (D->status != DASM_S_OK) return D->status;
+  {
+    int pc;
+    for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+      if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+  }
+#endif
+
+  { /* Handle globals not defined in this translation unit. */
+    int idx;
+    for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+      int n = D->lglabels[idx];
+      /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+      while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+    }
+  }
+
+  /* Combine all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->rbuf;
+    int pos = DASM_SEC2POS(secnum);
+    int lastpos = sec->pos;
+
+    while (pos != lastpos) {
+      dasm_ActList p = D->actionlist + b[pos++];
+      while (1) {
+	  unsigned int ins = *p++;
+	  unsigned int action = (ins >> 20);
+	  if (ins & 0xf) continue; else ins >>= 4;
+	  switch (action) {
+	  case DASM_STOP: case DASM_SECTION: goto stop;
+	  case DASM_ESC: p++; break;
+	  case DASM_REL_EXT: break;
+	  case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+	  case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+	  case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+	  case DASM_IMM: case DASM_IMMS: pos++; break;
+	  }
+      }
+      stop: (void)0;
+    }
+    ofs += sec->ofs;  /* Next section starts right after current section. */
+  }
+
+  D->codesize = ofs;  /* Total size of all code sections */
+  *szp = ofs;
+  return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+  do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st)	((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+  dasm_State *D = Dst_REF;
+  char *base = (char *)buffer;
+  unsigned int *cp = (unsigned int *)buffer;
+  int secnum;
+
+  /* Encode all code sections. No support for data sections (yet). */
+  for (secnum = 0; secnum < D->maxsection; secnum++) {
+    dasm_Section *sec = D->sections + secnum;
+    int *b = sec->buf;
+    int *endb = sec->rbuf + sec->pos;
+
+    while (b != endb) {
+      dasm_ActList p = D->actionlist + *b++;
+      while (1) {
+	unsigned int ins = *p++;
+	if (ins & 0xf) { *cp++ = ins; continue; }
+	unsigned int action = (ins >> 20);
+	unsigned int val = (ins >> 4);
+	int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+	switch (action) {
+	case DASM_STOP: case DASM_SECTION: goto stop;
+	case DASM_ESC: *cp++ = *p++; break;
+	case DASM_REL_EXT:
+	  n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1);
+	  goto patchrel;
+	case DASM_ALIGN:
+	  val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000;
+	  break;
+	case DASM_REL_LG:
+	  if (n < 0) {
+	    n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4);
+	    goto patchrel;
+	  }
+	  /* fallthrough */
+	case DASM_REL_PC:
+	  CK(n >= 0, UNDEF_PC);
+	  n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+	patchrel:
+	  if (val & 2048) { /* B */
+	    CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000)
+	           |  ((n << 7)  & 0x00000f00) | ((n >> 4)  & 0x00000080);
+	  } else { /* J */
+	    CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+	    cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000)
+	           |  ((n << 9)  & 0x00100000) | (n & 0x000ff000);
+	  }
+	  break;
+	case DASM_LABEL_LG:
+	  val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n);
+	  break;
+	case DASM_LABEL_PC: break;
+	case DASM_IMM:
+	  cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31);
+	  break;
+	case DASM_IMMS:
+	  cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80));
+	  break;
+	default: *cp++ = ins; break;
+	}
+      }
+      stop: (void)0;
+    }
+  }
+
+  if (base + D->codesize != (char *)cp)  /* Check for phase errors. */
+    return DASM_S_PHASE;
+  return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+  dasm_State *D = Dst_REF;
+  if (pc*sizeof(int) < D->pcsize) {
+    int pos = D->pclabels[pc];
+    if (pos < 0) return *DASM_POS2PTR(D, -pos);
+    if (pos > 0) return -1;  /* Undefined. */
+  }
+  return -2;  /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+  dasm_State *D = Dst_REF;
+  if (D->status == DASM_S_OK) {
+    int i;
+    for (i = 1; i <= 9; i++) {
+      if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+      D->lglabels[i] = 0;
+    }
+  }
+  if (D->status == DASM_S_OK && secmatch >= 0 &&
+      D->section != &D->sections[secmatch])
+    D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
+  return D->status;
+}
+#endif
+
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv.lua
@@ -0,0 +1,981 @@
+------------------------------------------------------------------------------
+-- DynASM RISC-V module.
+--
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+local riscv32 = riscv32
+local riscv64 = riscv64
+
+-- Module information:
+local _info = {
+  arch =	riscv32 and "riscv32" or riscv64 and "riscv64",
+  description =	"DynASM RISC-V module",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2022-07-12",
+  author =	"Mike Pall",
+  license =	"MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable = assert, setmetatable
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch = _s.match, _s.gmatch
+local concat, sort = table.concat, table.sort
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
+
+local function __orderedIndexGen(t)
+    local orderedIndex = {}
+    for key in pairs(t) do
+        table.insert(orderedIndex, key)
+    end
+    table.sort( orderedIndex )
+    return orderedIndex
+end
+
+local function __orderedNext(t, state)
+    local key = nil
+    if state == nil then
+        t.__orderedIndex = __orderedIndexGen(t)
+        key = t.__orderedIndex[1]
+    else
+        local j = 0
+        for _,_ in pairs(t.__orderedIndex) do j = j + 1 end
+        for i = 1, j do
+            if t.__orderedIndex[i] == state then
+                key = t.__orderedIndex[i+1]
+            end
+        end
+    end
+
+    if key then
+        return key, t[key]
+    end
+
+    t.__orderedIndex = nil
+    return
+end
+
+local function opairs(t)
+    return __orderedNext, t, nil
+end
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+  "STOP", "SECTION", "ESC", "REL_EXT",
+  "ALIGN", "REL_LG", "LABEL_LG",
+  "REL_PC", "LABEL_PC", "IMM", "IMMS",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+  map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+  out:write("DynASM encoding engine action codes:\n")
+  for n,name in ipairs(action_names) do
+    local num = map_action[name]
+    out:write(format("  %-10s %02X  %d\n", name, num, num))
+  end
+  out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+  local nn = #actlist
+  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+  for i = 1,nn-1 do
+    assert(out:write("0x", tohex(actlist[i]), ",\n"))
+  end
+  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+  local w = assert(map_action[action], "bad action name `"..action.."'")
+  wputxw(w * 0x100000 + (val or 0) * 16)
+  if a then actargs[#actargs+1] = a end
+  if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+  if #actlist == actargs[1] then return end -- Nothing to flush.
+  if not term then waction("STOP") end -- Terminate action list.
+  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+  secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+  if band(n, 0xf) == 0 then waction("ESC") end
+  wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+  local pos = #actlist+1
+  actlist[pos] = ""
+  return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+  assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+  actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+  local n = next_global
+  if n > 2047 then werror("too many global labels") end
+  next_global = n + 1
+  t[name] = n
+  return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("Global labels:\n")
+  for i=20,next_global-1 do
+    out:write(format("  %s\n", t[i]))
+  end
+  out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("enum {\n")
+  for i=20,next_global-1 do
+    out:write("  ", prefix, t[i], ",\n")
+  end
+  out:write("  ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+  local t = {}
+  for name, n in pairs(map_global) do t[n] = name end
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=20,next_global-1 do
+    out:write("  \"", t[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+  -- No restrictions on the name for now.
+  local n = next_extern
+  if n > 2047 then werror("too many extern labels") end
+  next_extern = n + 1
+  t[name] = n
+  map_extern_[n] = name
+  return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+  out:write("Extern labels:\n")
+  for i=0,next_extern-1 do
+    out:write(format("  %s\n", map_extern_[i]))
+  end
+  out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+  out:write("static const char *const ", name, "[] = {\n")
+  for i=0,next_extern-1 do
+    out:write("  \"", map_extern_[i], "\",\n")
+  end
+  out:write("  (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = {
+  ra = "x1", sp = "x2",
+} -- Ext. register name -> int. name.
+
+local map_type = {}		-- Type name -> { ctype, reg }
+local ctypenum = 0		-- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+  if s == "x1" then return "ra"
+  elseif s == "x2" then return "sp" end
+  return s
+end
+
+------------------------------------------------------------------------------
+
+-- Template strings for RISC-V instructions.
+local map_op = {}
+
+local map_op_rv32imafd = {
+
+  -- DASM pseudo-instrs
+  empty_0 = "ffffffff",
+  call_1 = "7fffffffJ",
+
+  -- RV32I
+  lui_2 = "00000037DU",
+  auipc_2 = "00000017DA",
+
+  jal_2  = "0000006fDJ",
+  jalr_3 = "00000067DRJ",
+  -- pseudo-instrs
+  j_1 = "0000006fJ",
+  jal_1 = "000000efJ",
+  jr_1 = "00000067R",
+  jalr_1 = "000000e7R",
+  jalr_2 = "000000e7RJ",
+
+  beq_3  = "00000063RrB",
+  bne_3  = "00001063RrB",
+  blt_3  = "00004063RrB",
+  bge_3  = "00005063RrB",
+  bltu_3 = "00006063RrB",
+  bgeu_3 = "00007063RrB",
+  -- pseudo-instrs
+  bnez_2 = "00001063RB",
+  beqz_2 = "00000063RB",
+  blez_2 = "00005063rB",
+  bgez_2 = "00005063RB",
+  bltz_2 = "00004063RB",
+  bgtz_2 = "00004063rB",
+  bgt_3 = "00004063rRB",
+  ble_3 = "00005063rRB",
+  bgtu_3 = "00006063rRB",
+  bleu_3 = "00007063rRB",
+
+  lb_2  = "00000003DL",
+  lh_2  = "00001003DL",
+  lw_2  = "00002003DL",
+  lbu_2 = "00004003DL",
+  lhu_2 = "00005003DL",
+
+  sb_2 = "00000023rS",
+  sh_2 = "00001023rS",
+  sw_2 = "00002023rS",
+
+  addi_3  = "00000013DRI",
+  slti_3  = "00002013DRI",
+  sltiu_3 = "00003013DRI",
+  xori_3 = "00004013DRI",
+  ori_3 = "00006013DRI",
+  andi_3 = "00007013DRI",
+  slli_3 = "00001013DRi",
+  srli_3 = "00005013DRi",
+  srai_3 = "40005013DRi",
+  -- pseudo-instrs
+  seqz_2 = "00103013DR",
+  ["zext.b_2"] = "0ff07013DR",
+
+  add_3 = "00000033DRr",
+  sub_3 = "40000033DRr",
+  sll_3 = "00001033DRr",
+  slt_3 = "00002033DRr",
+  sltu_3 = "00003033DRr",
+  xor_3 = "00004033DRr",
+  srl_3 = "00005033DRr",
+  sra_3 = "40005033DRr",
+  or_3 = "00006033DRr",
+  and_3 = "00007033DRr",
+  -- pseudo-instrs
+  snez_2 = "00003033Dr",
+  sltz_2 = "00002033DR",
+  sgtz_2 = "00002033Dr",
+
+  ecall_0 = "00000073",
+  ebreak_0 = "00100073",
+
+  nop_0 = "00000013",
+  li_2 = "00000013DI",
+  mv_2 = "00000013DR",
+  not_2 = "fff04013DR",
+  neg_2 = "40000033Dr",
+  ret_0 = "00008067",
+
+  -- RV32M
+  mul_3    = "02000033DRr",
+  mulh_3   = "02001033DRr",
+  mulhsu_3 = "02002033DRr",
+  mulhu_3  = "02003033DRr",
+  div_3  = "02004033DRr",
+  divu_3 = "02005033DRr",
+  rem_3  = "02006033DRr",
+  remu_3 = "02007033DRr",
+
+  -- RV32A
+  ["lr.w_2"] = "c0000053FR",
+  ["sc.w_2"] = "c0001053FRr",
+  ["amoswap.w_3"] = "c0002053FRr",
+  ["amoadd.w_3"] = "c0003053FRr",
+  ["amoxor.w_3"] = "c0004053FRr",
+  ["amoor.w_3"] = "c0005053FRr",
+  ["amoand.w_3"] = "c0006053FRr",
+  ["amomin.w_3"] = "c0007053FRr",
+  ["amomax.w_3"] = "c0008053FRr",
+  ["amominu.w_3"] = "c0009053FRr",
+  ["amomaxu.w_3"] = "c000a053FRr",
+
+  -- RV32F
+  ["flw_2"] = "00002007FL",
+  ["fsw_2"] = "00002027gS",
+
+  ["fmadd.s_4"]  = "00000043FGgH",
+  ["fmsub.s_4"]  = "00000047FGgH",
+  ["fnmsub.s_4"] = "0000004bFGgH",
+  ["fnmadd.s_4"] = "0000004fFGgH",
+  ["fmadd.s_5"]  = "00000043FGgHM",
+  ["fmsub.s_5"]  = "00000047FGgHM",
+  ["fnmsub.s_5"] = "0000004bFGgHM",
+  ["fnmadd.s_5"] = "0000004fFGgHM",
+
+  ["fadd.s_3"]  = "00000053FGg",
+  ["fsub.s_3"]  = "08000053FGg",
+  ["fmul.s_3"]  = "10000053FGg",
+  ["fdiv.s_3"]  = "18000053FGg",
+  ["fsqrt.s_2"] = "58000053FG",
+  ["fadd.s_4"]  = "00000053FGgM",
+  ["fsub.s_4"]  = "08000053FGgM",
+  ["fmul.s_4"]  = "10000053FGgM",
+  ["fdiv.s_4"]  = "18000053FGgM",
+  ["fsqrt.s_3"] = "58000053FGM",
+
+  ["fsgnj.s_3"]  = "20000053FGg",
+  ["fsgnjn.s_3"] = "20001053FGg",
+  ["fsgnjx.s_3"] = "20002053FGg",
+
+  ["fmin.s_3"] = "28000053FGg",
+  ["fmax.s_3"] = "28001053FGg",
+
+  ["fcvt.w.s_2"]  = "c0000053DG",
+  ["fcvt.wu.s_2"] = "c0100053DG",
+  ["fcvt.w.s_3"]  = "c0000053DGM",
+  ["fcvt.wu.s_3"] = "c0100053DGM",
+  ["fmv.x.w_2"] = "e0000053DG",
+
+  ["feq.s_3"] = "a0002053DGg",
+  ["flt.s_3"] = "a0001053DGg",
+  ["fle.s_3"] = "a0000053DGg",
+
+  ["fclass.s_2"] = "e0001053DG",
+
+  ["fcvt.s.w_2"]  = "d0000053FR",
+  ["fcvt.s.wu_2"] = "d0100053FR",
+  ["fcvt.s.w_3"]  = "d0000053FRM",
+  ["fcvt.s.wu_3"] = "d0100053FRM",
+  ["fmv.w.x_2"] = "f0000053FR",
+
+  -- RV32D
+  ["fld_2"] = "00003007FL",
+  ["fsd_2"] = "00003027gS",
+  
+  ["fmadd.d_4"]  = "02000043FGgH",
+  ["fmsub.d_4"]  = "02000047FGgH",
+  ["fnmsub.d_4"] = "0200004bFGgH",
+  ["fnmadd.d_4"] = "0200004fFGgH",
+  ["fmadd.d_5"]  = "02000043FGgHM",
+  ["fmsub.d_5"]  = "02000047FGgHM",
+  ["fnmsub.d_5"] = "0200004bFGgHM",
+  ["fnmadd.d_5"] = "0200004fFGgHM",
+
+  ["fadd.d_3"]  = "02000053FGg",
+  ["fsub.d_3"]  = "0a000053FGg",
+  ["fmul.d_3"]  = "12000053FGg",
+  ["fdiv.d_3"]  = "1a000053FGg",
+  ["fsqrt.d_2"] = "5a000053FG",
+  ["fadd.d_4"]  = "02000053FGgM",
+  ["fsub.d_4"]  = "0a000053FGgM",
+  ["fmul.d_4"]  = "12000053FGgM",
+  ["fdiv.d_4"]  = "1a000053FGgM",
+  ["fsqrt.d_3"] = "5a000053FGM",
+
+  ["fsgnj.d_3"]  = "22000053FGg",
+  ["fsgnjn.d_3"] = "22001053FGg",
+  ["fsgnjx.d_3"] = "22002053FGg",
+  ["fmin.d_3"] = "2a000053FGg",
+  ["fmax.d_3"] = "2a001053FGg",
+  ["fcvt.s.d_2"] = "40100053FG",
+  ["fcvt.d.s_2"] = "42000053FG",
+  ["feq.d_3"] = "a2002053DGg",
+  ["flt.d_3"] = "a2001053DGg",
+  ["fle.d_3"] = "a2000053DGg",
+  ["fclass.d_2"] = "e2001053DG",
+  ["fcvt.w.d_2"]  = "c2000053DG",
+  ["fcvt.wu.d_2"] = "c2100053DG",
+  ["fcvt.d.w_2"]  = "d2000053FR",
+  ["fcvt.d.wu_2"] = "d2100053FR",
+  ["fcvt.w.d_3"]  = "c2000053DGM",
+  ["fcvt.wu.d_3"] = "c2100053DGM",
+  ["fcvt.d.w_3"]  = "d2000053FRM",
+  ["fcvt.d.wu_3"] = "d2100053FRM",
+
+  ["fmv.d_2"] = "22000053FY",
+  ["fneg.d_2"] = "22001053FY",
+  ["fabs.d_2"] = "22002053FY",
+
+}
+
+local map_op_rv64imafd = {
+
+  -- RV64I
+  lwu_2 = "00006003DL",
+  ld_2  = "00003003DL",
+
+  sd_2 = "00003023rS",
+
+  slli_3 = "00001013DRj",
+  srli_3 = "00005013DRj",
+  srai_3 = "40005013DRj",
+
+  addiw_3 = "0000001bDRI",
+  slliw_3 = "0000101bDRi",
+  srliw_3 = "0000501bDRi",
+  sraiw_3 = "4000501bDRi",
+
+  addw_3 = "0000003bDRr",
+  subw_3 = "4000003bDRr",
+  sllw_3 = "0000103bDRr",
+  srlw_3 = "0000503bDRr",
+  sraw_3 = "4000503bDRr",
+
+  negw_2 = "4000003bDr",
+  ["sext.w_2"] = "0000001bDR",
+
+  -- RV64M
+  mulw_3  = "0200003bDRr",
+  divw_3  = "0200403bDRr",
+  divuw_3 = "0200503bDRr",
+  remw_3  = "0200603bDRr",
+  remuw_3 = "0200703bDRr",
+
+  -- RV64A
+  ["lr.d_2"] = "c2000053FR",
+  ["sc.d_2"] = "c2001053FRr",
+  ["amoswap.d_3"] = "c2002053FRr",
+  ["amoadd.d_3"] = "c2003053FRr",
+  ["amoxor.d_3"] = "c2004053FRr",
+  ["amoor.d_3"] = "c2005053FRr",
+  ["amoand.d_3"] = "c2006053FRr",
+  ["amomin.d_3"] = "c2007053FRr",
+  ["amomax.d_3"] = "c2008053FRr",
+  ["amominu.d_3"] = "c2009053FRr",
+  ["amomaxu.d_3"] = "c200a053FRr",
+
+  -- RV64F
+  ["fcvt.l.s_2"]  = "c0200053DG",
+  ["fcvt.lu.s_2"] = "c0300053DG",
+  ["fcvt.l.s_3"]  = "c0200053DGM",
+  ["fcvt.lu.s_3"] = "c0300053DGM",
+  ["fcvt.s.l_2"]  = "d0200053FR",
+  ["fcvt.s.lu_2"] = "d0300053FR",
+  ["fcvt.s.l_3"]  = "d0200053FRM",
+  ["fcvt.s.lu_3"] = "d0300053FRM",
+
+  -- RV64D
+  ["fcvt.l.d_2"]  = "c2200053DG",
+  ["fcvt.lu.d_2"] = "c2300053DG",
+  ["fcvt.l.d_3"]  = "c2200053DGM",
+  ["fcvt.lu.d_3"] = "c2300053DGM",
+  ["fmv.x.d_2"]   = "e2000053DG",
+  ["fcvt.d.l_2"]  = "d2200053FR",
+  ["fcvt.d.lu_2"] = "d2300053FR",
+  ["fcvt.d.l_3"]  = "d2200053FRM",
+  ["fcvt.d.lu_3"] = "d2300053FRM",
+  ["fmv.d.x_2"]   = "f2000053FR",
+
+}
+
+local map_op_zicsr = {
+  csrrw_3 = "00001073DCR",
+  csrrs_3 = "00002073DCR",
+  csrrc_3 = "00003073DCR",
+  csrrwi_3 = "00005073DCu",
+  csrrsi_3 = "00006073DCu",
+  csrrci_3 = "00007073DCu",
+
+  -- pseudo-ops
+  csrrw_2 = "00001073DC",
+  csrrs_2 = "00002073CR",
+  csrrc_2 = "00003073CR",
+  csrrwi_2 = "00005073Cu",
+  csrrsi_2 = "00006073Cu",
+  csrrci_2 = "00007073Cu",
+
+  rdinstret_1 = "C0202073D",
+  rdcycle_1 = "C0002073D",
+  rdtime_1 = "C0102073D",
+  rdinstreth_1 = "C8202073D",
+  rdcycleh_1 = "C8002073D",
+  rdtimeh_1 = "C8102073D",
+
+  frcsr_1 = "00302073D",
+  fscsr_2 = "00301073DR",
+  fscsr_1 = "00301073R",
+  frrm_1 = "00202073D",
+  fsrm_2 = "00201073DR",
+  fsrm_1 = "00201073R",
+  fsrmi_2 = "00205073Du",
+  fsrmi_1 = "00205073u",
+  frflags_1 = "00102073D",
+  fsflags_2 = "00101073DR",
+  fsflagsi_2 = "00105073Du",
+  fsflagsi_1 = "00105073u",
+}
+
+local map_op_zifencei = {
+  ["fence.i_3"] = "0000100fDRI",
+}
+
+local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr }
+local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr }
+
+if riscv32 then for _, map in opairs(list_map_op_rv32) do
+  for k, v in pairs(map) do map_op[k] = v end
+  end
+end
+if riscv64 then for _, map in opairs(list_map_op_rv64) do
+  for k, v in pairs(map) do map_op[k] = v end
+  end
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+  local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$")
+  local tp = map_type[tname or expr]
+  if tp then
+    local reg = ovreg or tp.reg
+    if not reg then
+      werror("type `"..(tname or expr).."' needs a register override")
+    end
+    expr = reg
+  end
+  local r = match(expr, "^x([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r, tp end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_fpr(expr)
+  local r = match(expr, "^f([1-3]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 31 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed, action)
+  local n = tonumber(imm)
+  if n then
+    local m = sar(n, scale)
+    if shl(m, scale) == n then
+      if signed then
+          local s = sar(m, bits-1)
+          if s == 0 then return shl(m, shift)
+          elseif s == -1 then return shl(m + shl(1, bits), shift) end
+      else
+          if sar(m, bits) == 0 then return shl(m, shift) end
+      end
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif match(imm, "^[xf]([1-3]?[0-9])$") or
+           match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction(action or "IMM",
+        (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
+    return 0
+  end
+end
+
+local function parse_csr(expr)
+  local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$")
+  if r then
+    r = tonumber(r)
+    if r <= 4095 then return r end
+  end
+  werror("bad register name `"..expr.."'")
+end
+
+local function parse_imms(imm)
+  local n = tonumber(imm)
+  if n then
+    if n >= -2048 and n < 2048 then
+      local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5)
+      return shl(imm5, 7) + shl(imm7, 25)
+    end
+    werror("out of range immediate `"..imm.."'")
+  elseif match(imm, "^[xf]([1-3]?[0-9])$") or
+         match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then
+    werror("expected immediate operand, got register")
+  else
+    waction("IMMS", 0, imm); return 0
+  end
+end
+
+local function parse_rm(mode)
+  local rnd_mode = {
+    rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7
+  }
+  local n = rnd_mode[mode]
+  if n then return n
+  else werror("bad rounding mode `"..mode.."'") end
+end
+
+local function parse_disp(disp, mode)
+  local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$")
+  if imm then
+    local r = shl(parse_gpr(reg), 15)
+    local extname = match(imm, "^extern%s+(%S+)$")
+    if extname then
+      waction("REL_EXT", map_extern[extname], nil, 1)
+      return r
+    else
+      if mode == "load" then
+        return r + parse_imm(imm, 12, 20, 0, true)
+      elseif mode == "store" then
+        return r + parse_imms(imm)
+      else
+        werror("bad displacement mode '"..mode.."'")
+      end
+    end
+  end
+  local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$")
+  if reg and tailr ~= "" then
+    local r, tp = parse_gpr(reg)
+    if tp then
+      if mode == "load" then
+          waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr))
+      elseif mode == "store" then
+          waction("IMMS", 0, format(tp.ctypefmt, tailr))
+      else
+        werror("bad displacement mode '"..mode.."'")
+      end
+      return shl(r, 15)
+    end
+  end
+  werror("bad displacement `"..disp.."'")
+end
+
+local function parse_label(label, def)
+  local prefix = sub(label, 1, 2)
+  -- =>label (pc label reference)
+  if prefix == "=>" then
+    return "PC", 0, sub(label, 3)
+  end
+  -- ->name (global label reference)
+  if prefix == "->" then
+    return "LG", map_global[sub(label, 3)]
+  end
+  if def then
+    -- [1-9] (local label definition)
+    if match(label, "^[1-9]$") then
+      return "LG", 10+tonumber(label)
+    end
+  else
+    -- [<>][1-9] (local label reference)
+    local dir, lnum = match(label, "^([<>])([1-9])$")
+    if dir then -- Fwd: 1-9, Bkwd: 11-19.
+      return "LG", lnum + (dir == ">" and 0 or 10)
+    end
+    -- extern label (extern label reference)
+    local extname = match(label, "^extern%s+(%S+)$")
+    if extname then
+      return "EXT", map_extern[extname]
+    end
+  end
+  werror("bad label `"..label.."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+  if not params then return sub(template, 9) end
+  local op = tonumber(sub(template, 1, 8), 16)
+  local n = 1
+
+  -- Limit number of section buffer positions used by a single dasm_put().
+  -- A single opcode needs a maximum of 2 positions (ins/ext).
+  if secpos+2 > maxsecpos then wflush() end
+  local pos = wpos()
+
+  -- Process each character.
+  for p in gmatch(sub(template, 9), ".") do
+    if p == "D" then  -- gpr rd
+      op = op + shl(parse_gpr(params[n]), 7); n = n + 1
+    elseif p == "R" then  -- gpr rs1
+      op = op + shl(parse_gpr(params[n]), 15); n = n + 1
+    elseif p == "r" then  -- gpr rs2
+      op = op + shl(parse_gpr(params[n]), 20); n = n + 1
+    elseif p == "F" then  -- fpr rd
+      op = op + shl(parse_fpr(params[n]), 7); n = n + 1
+    elseif p == "G" then  -- fpr rs1
+      op = op + shl(parse_fpr(params[n]), 15); n = n + 1
+    elseif p == "g" then  -- fpr rs2
+      op = op + shl(parse_fpr(params[n]), 20); n = n + 1
+    elseif p == "H" then  -- fpr rs3
+      op = op + shl(parse_fpr(params[n]), 27); n = n + 1
+    elseif p == "C" then  -- csr
+      op = op + shl(parse_csr(params[n]), 20); n = n + 1
+    elseif p == "M" then  -- fpr rounding mode
+      op = op + shl(parse_rm(params[n]), 12); n = n + 1
+    elseif p == "Y" then  -- fpr psuedo-op
+      local r = parse_fpr(params[n])
+      op = op + shl(r, 15) + shl(r, 20); n = n + 1
+    elseif p == "I" then  -- I-type imm12
+      op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1
+    elseif p == "i" then  -- I-type shamt5
+      op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1
+    elseif p == "j" then  -- I-type shamt6
+      op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1
+    elseif p == "u" then  -- I-type uimm
+      op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1
+    elseif p == "U" then  -- U-type imm20
+      op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1
+    elseif p == "L" then  -- load
+      op = op + parse_disp(params[n], "load"); n = n + 1
+    elseif p == "S" then  -- store
+      op = op + parse_disp(params[n], "store"); n = n + 1
+    elseif p == "B" or p == "J" then  -- control flow
+      local mode, m, s = parse_label(params[n], false)
+      if p == "B" then m = m + 2048 end
+      waction("REL_"..mode, m, s, 1); n = n + 1
+    elseif p == "A" then  -- AUIPC
+      local mode, m, s = parse_label(params[n], false)
+      waction("REL_"..mode, m, s, 1); n = n + 1
+    else
+      assert(false)
+    end
+  end
+  wputpos(pos, op)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+  if not params then return "prefix" end
+  local prefix = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+  if not params then return "cvar" end
+  local name = params[1] -- No syntax check. You get to keep the pieces.
+  wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+  if not params then return "[1-9] | ->global | =>pcexpr" end
+  if secpos+1 > maxsecpos then wflush() end
+  local mode, n, s = parse_label(params[1], true)
+  if mode == "EXT" then werror("bad label definition") end
+  waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+  if not params then return "imm..." end
+  for _,p in ipairs(params) do
+    local n = tonumber(p)
+    if not n then werror("bad immediate `"..p.."'") end
+    if n < 0 then n = n + 2^32 end
+    wputw(n)
+    if secpos+2 > maxsecpos then wflush() end
+  end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+  if not params then return "numpow2" end
+  if secpos+1 > maxsecpos then wflush() end
+  local align = tonumber(params[1])
+  if align then
+    local x = align
+    -- Must be a power of 2 in the range (2 ... 256).
+    for i=1,8 do
+      x = x / 2
+      if x == 1 then
+    waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+    return
+      end
+    end
+  end
+  werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+  if not params then
+    return nparams == 2 and "name, ctype" or "name, ctype, reg"
+  end
+  local name, ctype, reg = params[1], params[2], params[3]
+  if not match(name, "^[%a_][%w_]*$") then
+    werror("bad type name `"..name.."'")
+  end
+  local tp = map_type[name]
+  if tp then
+    werror("duplicate type `"..name.."'")
+  end
+  -- Add #type to defines. A bit unclean to put it in map_archdef.
+  map_archdef["#"..name] = "sizeof("..ctype..")"
+  -- Add new type and emit shortcut define.
+  local num = ctypenum + 1
+  map_type[name] = {
+    ctype = ctype,
+    ctypefmt = format("Dt%X(%%s)", num),
+    reg = reg,
+  }
+  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+  ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+  local t = {}
+  for name in pairs(map_type) do t[#t+1] = name end
+  sort(t)
+  out:write("Type definitions:\n")
+  for _,name in ipairs(t) do
+    local tp = map_type[name]
+    local reg = tp.reg or ""
+    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
+  end
+  out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+  waction("SECTION", num)
+  wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+  out:write(format("DynASM %s version %s, released %s\n\n",
+    _info.arch, _info.version, _info.release))
+  dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+  dumptypes(out, lvl)
+  dumpglobals(out, lvl)
+  dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+  wline, werror, wfatal, wwarn = wl, we, wf, ww
+  return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+  g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+  setmetatable(map_op, { __index = map_coreop })
+  setmetatable(map_def, { __index = map_archdef })
+  return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv32.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv32.lua
@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM RISC-V 32 module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 32 bit mode for the combined RISC-V module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+riscv32 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_riscv")
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv64.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_riscv64.lua
@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM RISC-V 64 module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined RISC-V module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+riscv64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_riscv")
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_x64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x64.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 -- This module just sets 64 bit mode for the combined x86/x64 module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x86.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_x86.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x86.h
@@ -1,6 +1,6 @@
 /*
 ** DynASM x86 encoding engine.
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 ** Released under the MIT license. See dynasm.lua for full copyright notice.
 */
 
@@ -68,7 +68,7 @@ struct dasm_State {
   size_t lgsize;
   int *pclabels;		/* PC label chains/pos ptrs. */
   size_t pcsize;
-  void **globals;		/* Array of globals (bias -10). */
+  void **globals;		/* Array of globals. */
   dasm_Section *section;	/* Pointer to active section. */
   size_t codesize;		/* Total size of all code sections. */
   int maxsection;		/* 0 <= sectionidx < maxsection. */
@@ -85,7 +85,6 @@ void dasm_init(Dst_DECL, int maxsection)
 {
   dasm_State *D;
   size_t psz = 0;
-  int i;
   Dst_REF = NULL;
   DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
   D = Dst_REF;
@@ -96,12 +95,7 @@ void dasm_init(Dst_DECL, int maxsection)
   D->pcsize = 0;
   D->globals = NULL;
   D->maxsection = maxsection;
-  for (i = 0; i < maxsection; i++) {
-    D->sections[i].buf = NULL;  /* Need this for pass3. */
-    D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
-    D->sections[i].bsize = 0;
-    D->sections[i].epos = 0;  /* Wrong, but is recalculated after resize. */
-  }
+  memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section));
 }
 
 /* Free DynASM state. */
@@ -121,7 +115,7 @@ void dasm_free(Dst_DECL)
 void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
 {
   dasm_State *D = Dst_REF;
-  D->globals = gl - 10;  /* Negative bias to compensate for locals. */
+  D->globals = gl;
   DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
 }
 
@@ -146,6 +140,7 @@ void dasm_setup(Dst_DECL, const void *ac
   if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
   for (i = 0; i < D->maxsection; i++) {
     D->sections[i].pos = DASM_SEC2POS(i);
+    D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos;
     D->sections[i].ofs = 0;
   }
 }
@@ -239,8 +234,11 @@ void dasm_put(Dst_DECL, int start, ...)
 	}
 	pos++;
 	ofs += 4;  /* Maximum offset needed. */
-	if (action == DASM_REL_LG || action == DASM_REL_PC)
+	if (action == DASM_REL_LG || action == DASM_REL_PC) {
 	  b[pos++] = ofs;  /* Store pass1 offset estimate. */
+	} else if (sizeof(ptrdiff_t) == 8) {
+	  ofs += 4;
+	}
 	break;
       case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
       case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -365,10 +363,22 @@ int dasm_link(Dst_DECL, size_t *szp)
   do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
 #define dasmd(x) \
   do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#define dasmq(x) \
+  do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
 #else
 #define dasmw(x)	do { dasmb(x); dasmb((x)>>8); } while (0)
 #define dasmd(x)	do { dasmw(x); dasmw((x)>>16); } while (0)
+#define dasmq(x)	do { dasmd(x); dasmd((x)>>32); } while (0)
 #endif
+static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
+{
+  if (sizeof(ptrdiff_t) == 8)
+    dasmq((unsigned long long)x);
+  else
+    dasmd((unsigned int)x);
+  return cp;
+}
+#define dasma(x)	(cp = dasma_(cp, (x)))
 
 /* Pass 3: Encode sections. */
 int dasm_encode(Dst_DECL, void *buffer)
@@ -430,7 +440,7 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  break;
 	}
 	case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
-	  b++; n = (int)(ptrdiff_t)D->globals[-n];
+	  b++; n = (int)(ptrdiff_t)D->globals[-n-10];
 	  /* fallthrough */
 	case DASM_REL_A: rel_a:
 	  n -= (unsigned int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
@@ -443,17 +453,18 @@ int dasm_encode(Dst_DECL, void *buffer)
 	  goto wb;
 	}
 	case DASM_IMM_LG:
-	  p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+	  p++;
+	  if (n < 0) { dasma((ptrdiff_t)D->globals[-n-10]); break; }
 	  /* fallthrough */
 	case DASM_IMM_PC: {
 	  int *pb = DASM_POS2PTR(D, n);
-	  n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
-	  goto wd;
+	  dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
+	  break;
 	}
 	case DASM_LABEL_LG: {
 	  int idx = *p++;
 	  if (idx >= 10)
-	    D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+	    D->globals[idx-10] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
 	  break;
 	}
 	case DASM_LABEL_PC: case DASM_SETLABEL: break;
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x86.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dasm_x86.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dasm_x86.lua
@@ -1,7 +1,7 @@
 ------------------------------------------------------------------------------
 -- DynASM x86/x64 module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See dynasm.lua for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -11,9 +11,9 @@ local x64 = x64
 local _info = {
   arch =	x64 and "x64" or "x86",
   description =	"DynASM x86/x64 module",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   license =	"MIT",
 }
@@ -484,6 +484,22 @@ local function wputdarg(n)
   end
 end
 
+-- Put signed or unsigned qword or arg.
+local function wputqarg(n)
+  local tn = type(n)
+  if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
+    wputb(band(n, 255))
+    wputb(band(shr(n, 8), 255))
+    wputb(band(shr(n, 16), 255))
+    wputb(shr(n, 24))
+    local sign = n < 0 and 255 or 0
+    wputb(sign); wputb(sign); wputb(sign); wputb(sign)
+  else
+    waction("IMM_D", format("(unsigned int)(%s)", n))
+    waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
+  end
+end
+
 -- Put operand-size dependent number or arg (defaults to dword).
 local function wputszarg(sz, n)
   if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -663,10 +679,16 @@ local function opmodestr(op, args)
 end
 
 -- Convert number to valid integer or nil.
-local function toint(expr)
+local function toint(expr, isqword)
   local n = tonumber(expr)
   if n then
-    if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+    if n % 1 ~= 0 then
+      werror("not an integer number `"..expr.."'")
+    elseif isqword then
+      if n < -2147483648 or n > 2147483647 then
+	n = nil -- Handle it as an expression to avoid precision loss.
+      end
+    elseif n < -2147483648 or n > 4294967295 then
       werror("bad integer number `"..expr.."'")
     end
     return n
@@ -749,7 +771,7 @@ local function rtexpr(expr)
 end
 
 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
-local function parseoperand(param)
+local function parseoperand(param, isqword)
   local t = {}
 
   local expr = param
@@ -810,7 +832,7 @@ local function parseoperand(param)
       if t.disp then break end
 
       -- [reg+xreg...]
-      local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
+      local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
       xreg, t.xreg, tp = rtexpr(xreg)
       if not t.xreg then
 	-- [reg+-expr]
@@ -837,7 +859,7 @@ local function parseoperand(param)
       t.disp = dispexpr(tailx)
     else
       -- imm or opsize*imm
-      local imm = toint(expr)
+      local imm = toint(expr, isqword)
       if not imm and sub(expr, 1, 1) == "*" and t.opsize then
 	imm = toint(sub(expr, 2))
 	if imm then
@@ -1952,7 +1974,7 @@ local function dopattern(pat, args, sz,
 	local a = args[narg]
 	narg = narg + 1
 	local mode, imm = a.mode, a.imm
-	if mode == "iJ" and not match("iIJ", c) then
+	if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
 	  werror("bad operand size for label")
 	end
 	if c == "S" then
@@ -2144,14 +2166,16 @@ end
 local function op_data(params)
   if not params then return "imm..." end
   local sz = sub(params.op, 2, 2)
-  if sz == "a" then sz = addrsize end
+  if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
   for _,p in ipairs(params) do
-    local a = parseoperand(p)
+    local a = parseoperand(p, sz == "q")
     if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
       werror("bad mode or size in `"..p.."'")
     end
     if a.mode == "iJ" then
       wputlabel("IMM_", a.imm, 1)
+    elseif sz == "q" then
+      wputqarg(a.imm)
     else
       wputszarg(sz, a.imm)
     end
@@ -2163,7 +2187,11 @@ map_op[".byte_*"] = op_data
 map_op[".sbyte_*"] = op_data
 map_op[".word_*"] = op_data
 map_op[".dword_*"] = op_data
+map_op[".qword_*"] = op_data
 map_op[".aword_*"] = op_data
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
 
 ------------------------------------------------------------------------------
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dynasm.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/dynasm/dynasm.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/dynasm/dynasm.lua
@@ -2,7 +2,7 @@
 -- DynASM. A dynamic assembler for code generation engines.
 -- Originally designed and implemented for LuaJIT.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- See below for full copyright notice.
 ------------------------------------------------------------------------------
 
@@ -10,14 +10,14 @@
 local _info = {
   name =	"DynASM",
   description =	"A dynamic assembler for code generation engines",
-  version =	"1.4.0",
-  vernum =	 10400,
-  release =	"2015-10-18",
+  version =	"1.5.0",
+  vernum =	 10500,
+  release =	"2021-05-02",
   author =	"Mike Pall",
   url =		"https://luajit.org/dynasm.html",
   license =	"MIT",
   copyright =	[[
-Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
Index: wrk-4.2.0/obj/LuaJIT-2.1/etc/luajit.1
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/etc/luajit.1
+++ wrk-4.2.0/obj/LuaJIT-2.1/etc/luajit.1
@@ -74,7 +74,7 @@ luajit \-jv \-e "for i=1,10 do for j=1,1
 Runs some nested loops and shows the resulting traces.
 .SH COPYRIGHT
 .PP
-\fBLuaJIT\fR is Copyright \(co 2005-2021 Mike Pall.
+\fBLuaJIT\fR is Copyright \(co 2005-2023 Mike Pall.
 .br
 \fBLuaJIT\fR is open source software, released under the MIT license.
 .SH SEE ALSO
Index: wrk-4.2.0/obj/LuaJIT-2.1/etc/luajit.pc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/etc/luajit.pc
+++ wrk-4.2.0/obj/LuaJIT-2.1/etc/luajit.pc
@@ -1,8 +1,8 @@
 # Package information for LuaJIT to be used by pkg-config.
 majver=2
 minver=1
-relver=0
-version=${majver}.${minver}.${relver}-beta3
+relver=ROLLING
+version=${majver}.${minver}.${relver}
 abiver=5.1
 
 prefix=/usr/local
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/.gitignore
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/.gitignore
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/.gitignore
@@ -1,4 +1,6 @@
 luajit
+luajit.h
+luajit_relver.txt
 lj_bcdef.h
 lj_ffdef.h
 lj_libdef.h
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/Makefile
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/Makefile
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/Makefile
@@ -7,12 +7,11 @@
 # Also works with MinGW and Cygwin on Windows.
 # Please check msvcbuild.bat for building with MSVC on Windows.
 #
-# Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 ##############################################################################
 
 MAJVER=  2
 MINVER=  1
-RELVER=  0
 ABIVER=  5.1
 NODOTABIVER= 51
 
@@ -53,6 +52,7 @@ CCOPT_arm=
 CCOPT_arm64=
 CCOPT_ppc=
 CCOPT_mips=
+CCOPT_riscv64=
 #
 CCDEBUG=
 # Uncomment the next line to generate debug information:
@@ -211,7 +211,7 @@ TARGET_CC= $(STATIC_CC)
 TARGET_STCC= $(STATIC_CC)
 TARGET_DYNCC= $(DYNAMIC_CC)
 TARGET_LD= $(CROSS)$(CC)
-TARGET_AR= $(CROSS)ar rcus 2>/dev/null
+TARGET_AR= $(CROSS)ar rcus
 TARGET_STRIP= $(CROSS)strip
 
 TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib)
@@ -234,7 +234,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_X
 TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
 TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
 
-TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
+TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
 ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
   TARGET_LJARCH= x64
 else
@@ -268,6 +268,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(T
     TARGET_LJARCH= mips
   endif
 else
+ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= riscv32
+else
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
+    TARGET_LJARCH= riscv64
+else
   $(error Unsupported target architecture)
 endif
 endif
@@ -275,6 +281,8 @@ endif
 endif
 endif
 endif
+endif
+endif
 
 ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
   TARGET_SYS= PS3
@@ -311,6 +319,7 @@ ifeq (Windows,$(TARGET_SYS))
   TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME)
   TARGET_DYNXLDOPTS=
 else
+  TARGET_AR+= 2>/dev/null
 ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1))
   TARGET_XCFLAGS+= -fno-stack-protector
 endif
@@ -319,23 +328,27 @@ ifeq (Darwin,$(TARGET_SYS))
     $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
   endif
   TARGET_STRIP+= -x
+  TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
-  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
-  ifeq (x64,$(TARGET_LJARCH))
-    TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
-    TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
-  endif
+  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
 else
 ifeq (iOS,$(TARGET_SYS))
   TARGET_STRIP+= -x
   TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
   TARGET_DYNXLDOPTS=
-  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+  TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).255
   ifeq (arm64,$(TARGET_LJARCH))
     TARGET_XCFLAGS+= -fno-omit-frame-pointer
   endif
 else
+  ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+    # Find out whether the target toolchain always generates unwind tables.
+    TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
+    ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+      TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+    endif
+  endif
   ifneq (SunOS,$(TARGET_SYS))
     ifneq (PS3,$(TARGET_SYS))
       TARGET_XLDFLAGS+= -Wl,-E
@@ -383,10 +396,11 @@ MINILUA_O= host/minilua.o
 MINILUA_LIBS= -lm
 MINILUA_T= host/minilua
 MINILUA_X= $(MINILUA_T)
+MINILUA_DEP=
 
 ifeq (,$(HOST_LUA))
   HOST_LUA= $(MINILUA_X)
-  DASM_DEP= $(MINILUA_T)
+  MINILUA_DEP= $(MINILUA_T)
 endif
 
 DASM_DIR= ../dynasm
@@ -428,6 +442,10 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TA
   DASM_AFLAGS+= -D NO_UNWIND
   TARGET_ARCH+= -DLUAJIT_NO_UNWIND
 endif
+ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D PAUTH
+  TARGET_ARCH+= -DLJ_ABI_PAUTH=1
+endif
 DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
 ifeq (Windows,$(TARGET_SYS))
   DASM_AFLAGS+= -D WIN
@@ -459,12 +477,26 @@ ifeq (ppc,$(TARGET_LJARCH))
     DASM_AFLAGS+= -D PPE -D TOC
   endif
 endif
+ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D RISCV32
+endif
+ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH)))
+  DASM_AFLAGS+= -D RISCV64
+endif
 endif
 endif
 
 DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
 DASM_DASC= vm_$(DASM_ARCH).dasc
 
+GIT= git
+ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
+  GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+else
+  GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
+endif
+GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
+
 BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \
 	   host/buildvm_lib.o host/buildvm_fold.o
 BUILDVM_T= host/buildvm
@@ -479,13 +511,15 @@ LJVM_BOUT= $(LJVM_S)
 LJVM_MODE= elfasm
 
 LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
-	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+	 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
+	 lib_buffer.o
 LJLIB_C= $(LJLIB_O:.o=.c)
 
 LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
-	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
+	  lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
+	  lj_api.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
@@ -509,8 +543,8 @@ LUAJIT_T= luajit
 
 ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T)
 ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \
-	    host/buildvm_arch.h
-ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP)
+	    host/buildvm_arch.h luajit.h
+ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) luajit_relver.txt $(LIB_VMDEFP)
 WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk
 ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
 
@@ -634,7 +668,12 @@ $(MINILUA_T): $(MINILUA_O)
 	$(E) "HOSTLINK  $@"
 	$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
 
-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h
+luajit.h: $(MINILUA_DEP) $(GIT_DEP) luajit_rolling.h
+	$(E) "VERSION   $@"
+	$(Q)$(GIT_RELVER)
+	$(Q)$(HOST_LUA) host/genversion.lua
+
+host/buildvm_arch.h: $(DASM_DASC) $(MINILUA_DEP) lj_arch.h lua.h luaconf.h
 	$(E) "DYNASM    $@"
 	$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/Makefile.dep
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/Makefile.dep
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/Makefile.dep
@@ -1,15 +1,19 @@
 lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_vmevent.h
 lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
- lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
- lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_strfmt.h lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
+ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
+ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
 lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
  lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
  lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
  lj_ffdef.h lj_lib.h lj_libdef.h
+lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
+ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
 lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
  lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
  lj_libdef.h
@@ -48,10 +52,10 @@ lj_api.o: lj_api.c lj_obj.h lua.h luacon
  lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
  lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
 lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
- lj_asm_*.h
+ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
+ lj_prng.h lj_emit_*.h lj_asm_*.h
 lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
 lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
  lj_bcdef.h
@@ -77,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h
  lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h
 lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
- lj_ccallback.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
+ lj_cdata.h lj_cconv.h lj_ccallback.h
 lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
 lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
@@ -110,32 +114,32 @@ lj_err.o: lj_err.c lj_obj.h lua.h luacon
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
  lj_traceerr.h lj_vm.h lj_strfmt.h
 lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
+ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
+ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
 lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
  lj_traceerr.h lj_vm.h
 lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
  lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
- lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_vmevent.h
 lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
  lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
- lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_prng.h
+ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
  lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
  lj_strfmt.h
 lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
- lj_bcdump.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
+ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
 lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
  lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
@@ -170,15 +174,18 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h lu
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
  lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
  lj_vm.h lj_vmevent.h
+lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
  lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
-lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
  lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
  lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
+lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
 lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
  lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
@@ -189,9 +196,10 @@ lj_state.o: lj_state.c lj_obj.h lua.h lu
  lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
  lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
 lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
+ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
 lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
 lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -204,7 +212,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h lu
  lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
  lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
 lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_udata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
 lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
  lj_vm.h lj_vmevent.h
@@ -214,25 +222,25 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lau
  lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
  lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
  lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
- lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
- lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
- lj_prng.c lj_prng.h lj_state.c lj_lex.h lj_alloc.h luajit.h \
- lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
- lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c \
- lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \
- lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \
- lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \
- lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
- lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
- lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
- lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
- lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
- lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lj_traceerr.h lj_vm.h lj_vmevent.h lj_err.c lj_debug.h lj_ff.h \
+ lj_ffdef.h lj_strfmt.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c \
+ lj_buf.c lj_str.c lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c \
+ lj_strscan.h lj_lib.h lj_debug.c lj_prng.c lj_state.c lj_lex.h \
+ lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h lj_profile.h \
+ lj_vmevent.c lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c \
+ lj_serialize.c lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h \
+ lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
+ lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
+ lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
+ lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
+ lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
+ lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
+ lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
+ lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
+ lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
+ lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
+ lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
+ lib_ffi.c lib_buffer.c lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** This is a tool to build the hand-tuned assembler code required for
 ** LuaJIT's bytecode interpreter. It supports a variety of output formats
@@ -18,8 +18,10 @@
 #include "lj_obj.h"
 #include "lj_gc.h"
 #include "lj_bc.h"
+#if LJ_HASJIT
 #include "lj_ir.h"
 #include "lj_ircall.h"
+#endif
 #include "lj_frame.h"
 #include "lj_dispatch.h"
 #if LJ_HASFFI
@@ -65,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx,
 #include "../dynasm/dasm_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "../dynasm/dasm_mips.h"
+#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64
+#include "../dynasm/dasm_riscv.h"
 #else
 #error "No support for this architecture (yet)"
 #endif
@@ -250,6 +254,7 @@ BCDEF(BCNAME)
   NULL
 };
 
+#if LJ_HASJIT
 const char *const ir_names[] = {
 #define IRNAME(name, m, m1, m2)	#name,
 IRDEF(IRNAME)
@@ -290,7 +295,9 @@ static const char *const trace_errors[]
 #include "lj_traceerr.h"
   NULL
 };
+#endif
 
+#if LJ_HASJIT
 static const char *lower(char *buf, const char *s)
 {
   char *p = buf;
@@ -301,6 +308,7 @@ static const char *lower(char *buf, cons
   *p = '\0';
   return buf;
 }
+#endif
 
 /* Emit C source code for bytecode-related definitions. */
 static void emit_bcdef(BuildCtx *ctx)
@@ -318,15 +326,19 @@ static void emit_bcdef(BuildCtx *ctx)
 /* Emit VM definitions as Lua code for debug modules. */
 static void emit_vmdef(BuildCtx *ctx)
 {
+#if LJ_HASJIT
   char buf[80];
+#endif
   int i;
   fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
+  fprintf(ctx->fp, "assert(require(\"jit\").version == \"%s\", \"LuaJIT core/library version mismatch\")\n\n", LUAJIT_VERSION);
   fprintf(ctx->fp, "return {\n\n");
 
   fprintf(ctx->fp, "bcnames = \"");
   for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
   fprintf(ctx->fp, "\",\n\n");
 
+#if LJ_HASJIT
   fprintf(ctx->fp, "irnames = \"");
   for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
   fprintf(ctx->fp, "\",\n\n");
@@ -355,6 +367,7 @@ static void emit_vmdef(BuildCtx *ctx)
   for (i = 0; trace_errors[i]; i++)
     fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
   fprintf(ctx->fp, "},\n\n");
+#endif
 }
 
 /* -- Argument parsing ---------------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm.h
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _BUILDVM_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_asm.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm_asm.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_asm.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: Assembler source code emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
@@ -97,9 +97,15 @@ static void emit_asm_words(BuildCtx *ctx
 #if LJ_TARGET_ARM64 && LJ_BE
     ins = lj_bswap(ins);  /* ARM64 instructions are always little-endian. */
 #endif
-    if ((i & 15) == 0)
+    if ((i & 15) == 0) {
+#if LJ_TARGET_RISCV64
+      while (ins == 0xffffffffu) { i += 4; ins = *(uint32_t *)(p+i); }
+#endif
       fprintf(ctx->fp, "\t.long 0x%08x", ins);
-    else
+    } else
+#if LJ_TARGET_RISCV64
+    if (ins != 0xffffffffu)
+#endif
       fprintf(ctx->fp, ",0x%08x", ins);
     if ((i & 15) == 12) putc('\n', ctx->fp);
   }
@@ -156,6 +162,21 @@ static void emit_asm_wordreloc(BuildCtx
 	  "Error: unsupported opcode %08x for %s symbol relocation.\n",
 	  ins, sym);
   exit(1);
+#elif LJ_TARGET_RISCV64
+  if (ins == 0x7fffffffu) {
+    fprintf(ctx->fp, "\tcall %s\n", sym);
+  } else if ((ins & 0x7f) == 0x17u) {
+    fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym);
+  } else if ((ins & 0x7f) == 0x67u) {
+    fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym);
+  } else if ((ins & 0x7f) == 0x6fu) {
+    fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym);
+  } else {
+    fprintf(stderr,
+  	    "Error: unsupported opcode %08x for %s symbol relocation.\n",
+  	    ins, sym);
+    exit(1);
+  }
 #else
 #error "missing relocation support for this architecture"
 #endif
@@ -243,6 +264,15 @@ void emit_asm(BuildCtx *ctx)
 
   fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
   fprintf(ctx->fp, "\t.text\n");
+#if LJ_TARGET_MIPS32 && !LJ_ABI_SOFTFP
+  fprintf(ctx->fp, "\t.module fp=32\n");
+#endif
+#if LJ_TARGET_MIPS
+  fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n");
+#endif
+#if LJ_TARGET_RISCV64
+  fprintf(ctx->fp, ".option arch, -c\n.option norelax\n");
+#endif
   emit_asm_align(ctx, 4);
 
 #if LJ_TARGET_PS3
@@ -269,9 +299,6 @@ void emit_asm(BuildCtx *ctx)
 	  ".pad #28\n");
 #endif
 #endif
-#if LJ_TARGET_MIPS
-  fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
-#endif
 
   for (i = rel = 0; i < ctx->nsym; i++) {
     int32_t ofs = ctx->sym[i].ofs;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_fold.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm_fold.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_fold.c
@@ -1,10 +1,11 @@
 /*
 ** LuaJIT VM builder: IR folding hash table generator.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
 #include "lj_obj.h"
+#if LJ_HASJIT
 #include "lj_ir.h"
 
 /* Context for the folding hash table generator. */
@@ -226,4 +227,10 @@ void emit_fold(BuildCtx *ctx)
 
   makehash(ctx);
 }
+#else
+void emit_fold(BuildCtx *ctx)
+{
+  UNUSED(ctx);
+}
+#endif
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_lib.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm_lib.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_lib.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: library definition compiler.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "buildvm.h"
@@ -379,12 +379,21 @@ void emit_lib(BuildCtx *ctx)
       /* Simplistic pre-processor. Only handles top-level #if/#endif. */
       if (buf[0] == '#' && buf[1] == 'i' && buf[2] == 'f') {
 	int ok = 1;
-	if (!strcmp(buf, "#if LJ_52\n"))
+	size_t len = strlen(buf);
+	if (buf[len-1] == '\n') {
+	  buf[len-1] = 0;
+	  if (buf[len-2] == '\r') {
+	    buf[len-2] = 0;
+	  }
+	}
+	if (!strcmp(buf, "#if LJ_52"))
 	  ok = LJ_52;
-	else if (!strcmp(buf, "#if LJ_HASJIT\n"))
+	else if (!strcmp(buf, "#if LJ_HASJIT"))
 	  ok = LJ_HASJIT;
-	else if (!strcmp(buf, "#if LJ_HASFFI\n"))
+	else if (!strcmp(buf, "#if LJ_HASFFI"))
 	  ok = LJ_HASFFI;
+	else if (!strcmp(buf, "#if LJ_HASBUFFER"))
+	  ok = LJ_HASBUFFER;
 	if (!ok) {
 	  int lvl = 1;
 	  while (fgets(buf, sizeof(buf), fp) != NULL) {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_libbc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm_libbc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_libbc.h
@@ -4,42 +4,67 @@ static const int libbc_endian = 0;
 
 static const uint8_t libbc_code[] = {
 #if LJ_FR2
-0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
-0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
-16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
-0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
-128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
-0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
-0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
-0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
-8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
-0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
-0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
-2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
-3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
-0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
-41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
-18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
-6,252,127,76,4,2,0,0
+/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
+220,203,178,130,4,
+/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
+198,190,199,252,3,
+/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
+BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
+BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
+BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
+/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
+2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0,
+BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
+BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
+/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
+0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
+BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
+BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
+BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
+BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
+BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
+/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
+2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
+4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
+128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
+BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
+BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
+BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
+11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
 #else
-0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
-0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
-16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
-0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
-128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
-0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
-0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
-0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
-8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
-0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
-0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
-2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
-3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
-0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
-41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
-18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
-6,252,127,76,4,2,0,0
+/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
+220,203,178,130,4,
+/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
+198,190,199,252,3,
+/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
+BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
+BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
+BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
+/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
+2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0,
+BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
+BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
+/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
+0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
+BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
+BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
+BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
+BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
+BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
+/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
+2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
+4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
+128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
+BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
+BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
+BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
+11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
 #endif
+0
 };
 
 static const struct { const char *name; int ofs; } libbc_map[] = {
@@ -48,9 +73,9 @@ static const struct { const char *name;
 {"string_len",50},
 {"table_foreachi",69},
 {"table_foreach",136},
-{"table_getn",207},
-{"table_remove",226},
-{"table_move",355},
-{NULL,502}
+{"table_getn",213},
+{"table_remove",232},
+{"table_move",361},
+{NULL,508}
 };
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_peobj.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/buildvm_peobj.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/buildvm_peobj.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM builder: PE object emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Only used for building on Windows, since we cannot assume the presence
 ** of a suitable assembler. The host and target byte order must match.
@@ -9,7 +9,7 @@
 #include "buildvm.h"
 #include "lj_bc.h"
 
-#if LJ_TARGET_X86ORX64
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
 
 /* Context for PE object emitter. */
 static char *strtab;
@@ -93,6 +93,17 @@ typedef struct PEsymaux {
 #define PEOBJ_RELOC_ADDR32NB	0x03
 #define PEOBJ_RELOC_OFS		0
 #define PEOBJ_TEXT_FLAGS	0x60500020  /* 60=r+x, 50=align16, 20=code. */
+#define PEOBJ_PDATA_NRELOC	6
+#define PEOBJ_XDATA_SIZE	(8*2+4+6*2)
+#elif LJ_TARGET_ARM64
+#define PEOBJ_ARCH_TARGET	0xaa64
+#define PEOBJ_RELOC_REL32	0x03  /* MS: BRANCH26. */
+#define PEOBJ_RELOC_DIR32	0x01
+#define PEOBJ_RELOC_ADDR32NB	0x02
+#define PEOBJ_RELOC_OFS		(-4)
+#define PEOBJ_TEXT_FLAGS	0x60500020  /* 60=r+x, 50=align16, 20=code. */
+#define PEOBJ_PDATA_NRELOC	4
+#define PEOBJ_XDATA_SIZE	(4+24+4 +4+8)
 #endif
 
 /* Section numbers (0-based). */
@@ -100,7 +111,7 @@ enum {
   PEOBJ_SECT_ABS = -2,
   PEOBJ_SECT_UNDEF = -1,
   PEOBJ_SECT_TEXT,
-#if LJ_TARGET_X64
+#ifdef PEOBJ_PDATA_NRELOC
   PEOBJ_SECT_PDATA,
   PEOBJ_SECT_XDATA,
 #elif LJ_TARGET_X86
@@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
   uint32_t sofs;
   int i, nrsym;
   union { uint8_t b; uint32_t u; } host_endian;
+#ifdef PEOBJ_PDATA_NRELOC
+  uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
+#endif
 
   sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
 
@@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
   /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
   pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
 
-#if LJ_TARGET_X64
+#ifdef PEOBJ_PDATA_NRELOC
   memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
   pesect[PEOBJ_SECT_PDATA].ofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
+  sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
   pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
+  sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
   pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
 
   memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
   pesect[PEOBJ_SECT_XDATA].ofs = sofs;
-  sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2);  /* See below. */
+  sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE);  /* See below. */
   pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
   sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
   /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
   */
   nrsym = ctx->nrelocsym;
   pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
-#if LJ_TARGET_X64
+#ifdef PEOBJ_PDATA_NRELOC
   pehdr.nsyms += 1;  /* Symbol for lj_err_unwind_win. */
 #endif
 
@@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
 
 #if LJ_TARGET_X64
   { /* Write .pdata section. */
-    uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
     uint32_t pdata[3];  /* Start of .text, end of .text and .xdata. */
     PEreloc reloc;
     pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@@ -308,6 +321,87 @@ void emit_peobj(BuildCtx *ctx)
     reloc.type = PEOBJ_RELOC_ADDR32NB;
     owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
   }
+#elif LJ_TARGET_ARM64
+  /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
+  { /* Write .pdata section. */
+    uint32_t pdata[4];
+    PEreloc reloc;
+    pdata[0] = 0;
+    pdata[1] = 0;
+    pdata[2] = fcofs;
+    pdata[3] = 4+24+4;
+    owrite(ctx, &pdata, sizeof(pdata));
+    /* Start of .text and start of .xdata. */
+    reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    /* Start of vm_ffi_call and start of second part of .xdata. */
+    reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+    reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
+  { /* Write .xdata section. */
+    uint32_t u32;
+    uint8_t *p, uwc[24];
+    PEreloc reloc;
+
+#define CBE16(x)	(*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
+#define CALLOC_S(s)	(*p++ = ((s) >> 4))  /* s < 512 */
+#define CSAVE_FPLR(o)	(*p++ = 0x40 | ((o) >> 3))  /* o <= 504 */
+#define CSAVE_REGP(r,o)	CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
+#define CSAVE_REGS(r1,r2,o1) do { \
+  int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
+} while (0)
+#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
+#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
+#define CSAVE_FREGS(r1,r2,o1) do { \
+  int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
+} while (0)
+#define CADD_FP(s)	CBE16(0xe200 | ((s) >> 3))  /* s < 8*256 */
+#define CODE_NOP	0xe3
+#define CODE_END	0xe4
+#define CEND_ALIGN	do { \
+  *p++ = CODE_END; \
+  while ((p - uwc) & 3) *p++ = CODE_NOP; \
+} while (0)
+
+    /* Unwind codes for .text section with handler. */
+    p = uwc;
+    CADD_FP(192);		/* +2 */
+    CSAVE_REGS(19, 28, 176);	/* +5*2 */
+    CSAVE_FREGS(8, 15, 96);	/* +4*2 */
+    CSAVE_FPLR(192);		/* +1 */
+    CALLOC_S(208);		/* +1 */
+    CEND_ALIGN;			/* +1 +1 -> 24 */
+
+    u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
+    owrite(ctx, &u32, 4);
+    owrite(ctx, &uwc, 24);
+
+    u32 = 0;  /* Handler RVA to be relocated at 4 + 24. */
+    owrite(ctx, &u32, 4);
+
+    /* Unwind codes for vm_ffi_call without handler. */
+    p = uwc;
+    CADD_FP(16);		/* +2 */
+    CSAVE_FPLR(16);		/* +1 */
+    CSAVE_REGPX(19, -32);	/* +2 */
+    CEND_ALIGN;			/* +1 +2 -> 8 */
+
+    u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
+    owrite(ctx, &u32, 4);
+    owrite(ctx, &uwc, 8);
+
+    reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
+    reloc.type = PEOBJ_RELOC_ADDR32NB;
+    owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+  }
 #elif LJ_TARGET_X86
   /* Write .sxdata section. */
   for (i = 0; i < nrsym; i++) {
@@ -339,7 +433,7 @@ void emit_peobj(BuildCtx *ctx)
       emit_peobj_sym(ctx, ctx->relocsym[i], 0,
 		     PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
 
-#if LJ_TARGET_X64
+#ifdef PEOBJ_PDATA_NRELOC
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
     emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
     emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/genlibbc.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/genlibbc.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/genlibbc.lua
@@ -2,7 +2,7 @@
 -- Lua script to dump the bytecode of the library functions written in Lua.
 -- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 
@@ -55,7 +55,7 @@ local function transform_lua(code)
   end)
   code = string.gsub(code, "PAIRS%((.-)%)", function(var)
     fixup.PAIRS = true
-    return format("nil, %s, 0", var)
+    return format("nil, %s, 0x4dp80", var)
   end)
   return "return "..code, fixup
 end
@@ -79,9 +79,11 @@ local name2itype = {
   str = 5, func = 9, tab = 12, int = 14, num = 15
 }
 
-local BC = {}
+local BC, BCN = {}, {}
 for i=0,#bcnames/6-1 do
-  BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
+  local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "")
+  BC[name] = i
+  BCN[i] = name
 end
 local xop, xra = isbe and 3 or 0, isbe and 2 or 1
 local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
@@ -96,6 +98,7 @@ local function fixup_dump(dump, fixup)
   p = read_uleb128(p)
   p = read_uleb128(p)
   p, sizebc = read_uleb128(p)
+  local startbc = tonumber(p - start)
   local rawtab = {}
   for i=0,sizebc-1 do
     local op = p[xop]
@@ -129,7 +132,10 @@ local function fixup_dump(dump, fixup)
     end
     p = p + 4
   end
-  return ffi.string(start, n)
+  local ndump = ffi.string(start, n)
+  -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX.
+  ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f")
+  return { dump = ndump, startbc = startbc, sizebc = sizebc }
 end
 
 local function find_defs(src)
@@ -149,24 +155,46 @@ local function gen_header(defs)
   local function w(x) t[#t+1] = x end
   w("/* This is a generated file. DO NOT EDIT! */\n\n")
   w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
-  local s = ""
-  for _,name in ipairs(defs) do
-    s = s .. defs[name]
+  local s, sb = "", ""
+  for i,name in ipairs(defs) do
+    local d = defs[name]
+    s = s .. d.dump
+    sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
+	    .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
+	    .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
   end
   w("static const uint8_t libbc_code[] = {\n")
   local n = 0
   for i=1,#s do
     local x = string.byte(s, i)
-    w(x); w(",")
-    n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
-    if n >= 75 then n = 0; w("\n") end
+    local xb = string.byte(sb, i)
+    if xb == 255 then
+      local name = BCN[x]
+      local m = #name + 4
+      if n + m > 78 then n = 0; w("\n") end
+      n = n + m
+      w("BC_"); w(name)
+    else
+      local m = x < 10 and 2 or (x < 100 and 3 or 4)
+      if xb == 0 then
+	if n + m > 78 then n = 0; w("\n") end
+      else
+	local name = defs[xb]:gsub("_", ".")
+	if n ~= 0 then w("\n") end
+	w("/* "); w(name); w(" */ ")
+	n = #name + 7
+      end
+      n = n + m
+      w(x)
+    end
+    w(",")
   end
-  w("0\n};\n\n")
+  w("\n0\n};\n\n")
   w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
   local m = 0
   for _,name in ipairs(defs) do
     w('{"'); w(name); w('",'); w(m) w('},\n')
-    m = m + #defs[name]
+    m = m + #defs[name].dump
   end
   w("{NULL,"); w(m); w("}\n};\n\n")
   return table.concat(t)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/genminilua.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/genminilua.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/genminilua.lua
@@ -2,7 +2,7 @@
 -- Lua script to generate a customized, minified version of Lua.
 -- The resulting 'minilua' is used for the build process of LuaJIT.
 ----------------------------------------------------------------------------
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 
@@ -327,6 +327,12 @@ local function rename_tokens2(src)
   return gsub(src, "ZY([%w_]+)", "union %1")
 end
 
+local function fix_bugs_and_warnings(src)
+ src = gsub(src, "(luaD_checkstack%(L,p%->maxstacksize)%)", "%1+p->numparams)")
+ src = gsub(src, "if%(sep==%-1%)(return'%[';)\nelse (luaX_lexerror%b();)", "if (sep!=-1)%2\n%1")
+ return gsub(src, "(default:{\nNode%*n=mainposition)", "/*fallthrough*/\n%1")
+end
+
 local function func_gather(src)
   local nodes, list = {}, {}
   local pos, len = 1, #src
@@ -425,5 +431,6 @@ src = rename_tokens1(src)
 src = func_collect(src)
 src = rename_tokens2(src)
 src = restore_strings(src)
+src = fix_bugs_and_warnings(src)
 src = merge_header(src, license)
 io.write(src)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/genversion.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/genversion.lua
@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- Lua script to embed the rolling release version in luajit.h.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local arg = {...}
+local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
+local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
+local FILE_LUAJIT_H = arg[3] or "luajit.h"
+
+local function file_read(file)
+  local fp = assert(io.open(file, "rb"), "run from the wrong directory")
+  local data = assert(fp:read("*a"))
+  fp:close()
+  return data
+end
+
+local function file_write_mod(file, data)
+  local fp = io.open(file, "rb")
+  if fp then
+    local odata = assert(fp:read("*a"))
+    fp:close()
+    if odata == data then return end
+  end
+  fp = assert(io.open(file, "wb"))
+  assert(fp:write(data))
+  assert(fp:close())
+end
+
+local text = file_read(FILE_ROLLING_H)
+local relver = file_read(FILE_RELVER_TXT):match("(%d+)")
+
+if relver then
+  text = text:gsub("ROLLING", relver)
+else
+  io.stderr:write([[
+**** WARNING Cannot determine rolling release version from git log.
+**** WARNING The 'git' command must be available during the build.
+]])
+  file_write_mod(FILE_RELVER_TXT, "ROLLING\n") -- Fallback for install target.
+end
+
+file_write_mod(FILE_LUAJIT_H, text)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/host/minilua.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/host/minilua.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/host/minilua.c
@@ -1134,7 +1134,7 @@ if(!cl->isC){
 CallInfo*ci;
 StkId st,base;
 Proto*p=cl->p;
-luaD_checkstack(L,p->maxstacksize);
+luaD_checkstack(L,p->maxstacksize+p->numparams);
 func=restorestack(L,funcr);
 if(!p->is_vararg){
 base=func+1;
@@ -1639,6 +1639,7 @@ lua_number2int(k,n);
 if(luai_numeq(cast_num(k),nvalue(key)))
 return luaH_getnum(t,k);
 }
+/*fallthrough*/
 default:{
 Node*n=mainposition(t,key);
 do{
@@ -2905,8 +2906,8 @@ if(sep>=0){
 read_long_string(ls,seminfo,sep);
 return TK_STRING;
 }
-else if(sep==-1)return'[';
-else luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
+else if (sep!=-1)luaX_lexerror(ls,"invalid long string delimiter",TK_STRING);
+return'[';
 }
 case'=':{
 next(ls);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/bc.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/bc.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/bc.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT bytecode listing module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -41,7 +41,6 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local bit = require("bit")
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/bcsave.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/bcsave.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/bcsave.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT module to save/list bytecode.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -11,7 +11,7 @@
 ------------------------------------------------------------------------------
 
 local jit = require("jit")
-assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20199, "LuaJIT core/library version mismatch")
 local bit = require("bit")
 
 -- Symbol name prefix for LuaJIT bytecode.
@@ -33,11 +33,12 @@ Save LuaJIT bytecode: luajit -b[options]
   -t type   Set output file type (default: auto-detect from output name).
   -a arch   Override architecture for object files (default: native).
   -o os     Override OS for object files (default: native).
+  -F name   Override filename (default: input filename).
   -e chunk  Use chunk string as input.
   --        Stop handling options.
   -         Use stdin as input and/or stdout as output.
 
-File types: c h obj o raw (default)
+File types: c cc h obj o raw (default)
 ]]
   os.exit(1)
 end
@@ -49,10 +50,22 @@ local function check(ok, ...)
   os.exit(1)
 end
 
-local function readfile(input)
+local function readfile(ctx, input)
   if type(input) == "function" then return input end
-  if input == "-" then input = nil end
-  return check(loadfile(input))
+  if ctx.filename then
+    local data
+    if input == "-" then
+      data = io.stdin:read("*a")
+    else
+      local fp = assert(io.open(input, "rb"))
+      data = assert(fp:read("*a"))
+      assert(fp:close())
+    end
+    return check(load(data, ctx.filename))
+  else
+    if input == "-" then input = nil end
+    return check(loadfile(input))
+  end
 end
 
 local function savefile(name, mode)
@@ -60,10 +73,15 @@ local function savefile(name, mode)
   return check(io.open(name, mode))
 end
 
+local function set_stdout_binary(ffi)
+  ffi.cdef[[int _setmode(int fd, int mode);]]
+  ffi.C._setmode(1, 0x8000)
+end
+
 ------------------------------------------------------------------------------
 
 local map_type = {
-  raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
+  raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
 }
 
 local map_arch = {
@@ -79,6 +97,7 @@ local map_arch = {
   mips64el =	{ e = "le", b = 64, m = 8, f = 0x80000007, },
   mips64r6 =	{ e = "be", b = 64, m = 8, f = 0xa0000407, },
   mips64r6el =	{ e = "le", b = 64, m = 8, f = 0xa0000407, },
+  riscv64 =    { e = "le", b = 64, m = 243, f = 0x00000004, },
 }
 
 local map_os = {
@@ -125,6 +144,11 @@ local function bcsave_tail(fp, output, s
 end
 
 local function bcsave_raw(output, s)
+  if output == "-" and jit.os == "Windows" then
+    local ok, ffi = pcall(require, "ffi")
+    check(ok, "FFI library required to write binary file to stdout")
+    set_stdout_binary(ffi)
+  end
   local fp = savefile(output, "wb")
   bcsave_tail(fp, output, s)
 end
@@ -446,18 +470,18 @@ typedef struct {
   uint32_t value;
 } mach_nlist;
 typedef struct {
-  uint32_t strx;
+  int32_t strx;
   uint8_t type, sect;
   uint16_t desc;
   uint64_t value;
 } mach_nlist_64;
 typedef struct
 {
-  uint32_t magic, nfat_arch;
+  int32_t magic, nfat_arch;
 } mach_fat_header;
 typedef struct
 {
-  uint32_t cputype, cpusubtype, offset, size, align;
+  int32_t cputype, cpusubtype, offset, size, align;
 } mach_fat_arch;
 typedef struct {
   struct {
@@ -491,6 +515,18 @@ typedef struct {
   mach_nlist sym_entry;
   uint8_t space[4096];
 } mach_fat_obj;
+typedef struct {
+  mach_fat_header fat;
+  mach_fat_arch fat_arch[2];
+  struct {
+    mach_header_64 hdr;
+    mach_segment_command_64 seg;
+    mach_section_64 sec;
+    mach_symtab_command sym;
+  } arch[2];
+  mach_nlist_64 sym_entry;
+  uint8_t space[4096];
+} mach_fat_obj_64;
 ]]
   local symname = '_'..LJBC_PREFIX..ctx.modname
   local isfat, is64, align, mobj = false, false, 4, "mach_obj"
@@ -499,7 +535,7 @@ typedef struct {
   elseif ctx.arch == "arm" then
     isfat, mobj = true, "mach_fat_obj"
   elseif ctx.arch == "arm64" then
-    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
+    is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
   else
     check(ctx.arch == "x86", "unsupported architecture for OSX")
   end
@@ -568,6 +604,9 @@ end
 local function bcsave_obj(ctx, output, s)
   local ok, ffi = pcall(require, "ffi")
   check(ok, "FFI library required to write this file type")
+  if output == "-" and jit.os == "Windows" then
+    set_stdout_binary(ffi)
+  end
   if ctx.os == "windows" then
     return bcsave_peobj(ctx, output, s, ffi)
   elseif ctx.os == "osx" then
@@ -579,13 +618,13 @@ end
 
 ------------------------------------------------------------------------------
 
-local function bclist(input, output)
-  local f = readfile(input)
+local function bclist(ctx, input, output)
+  local f = readfile(ctx, input)
   require("jit.bc").dump(f, savefile(output, "w"), true)
 end
 
 local function bcsave(ctx, input, output)
-  local f = readfile(input)
+  local f = readfile(ctx, input)
   local s = string.dump(f, ctx.strip)
   local t = ctx.type
   if not t then
@@ -638,6 +677,8 @@ local function docmd(...)
 	    ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
 	  elseif opt == "o" then
 	    ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
+	  elseif opt == "F" then
+	    ctx.filename = "@"..tremove(arg, n)
 	  else
 	    usage()
 	  end
@@ -649,7 +690,7 @@ local function docmd(...)
   end
   if list then
     if #arg == 0 or #arg > 2 then usage() end
-    bclist(arg[1], arg[2] or "-")
+    bclist(ctx, arg[1], arg[2] or "-")
   else
     if #arg ~= 2 then usage() end
     bcsave(ctx, arg[1], arg[2])
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_arm.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT ARM disassembler module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_arm64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm64.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT ARM64 disassembler module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 --
 -- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
@@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted
     [0] = {
       shift = 29, mask = 3,
       [0] = {
-	shift = 21, mask = 7,
-	[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
-	"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+	shift = 21, mask = 1,
+	[0] = "andDNMSg", "bicDNMSg"
       },
       {
-	shift = 21, mask = 7,
-	[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
-	     "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+	shift = 21, mask = 1,
+	[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
       },
       {
-	shift = 21, mask = 7,
-	[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
-	"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+	shift = 21, mask = 1,
+	[0] = "eorDNMSg", "eonDNMSg"
       },
       {
-	shift = 21, mask = 7,
-	[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
-	"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+	shift = 21, mask = 1,
+	[0] = "ands|tstD0NMSg", "bicsDNMSg"
       }
     },
     false -- unallocated
@@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted
   {
     shift = 29, mask = 3,
     [0] = {
-      shift = 21, mask = 7,
-      [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
-      "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+      shift = 21, mask = 1,
+      [0] = "andDNMSg", "bicDNMSg"
     },
     {
-      shift = 21, mask = 7,
-      [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
-      "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+      shift = 21, mask = 1,
+      [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
     },
     {
-      shift = 21, mask = 7,
-      [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
-      "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+      shift = 21, mask = 1,
+      [0] = "eorDNMSg", "eonDNMSg"
     },
     {
-      shift = 21, mask = 7,
-      [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
-      "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+      shift = 21, mask = 1,
+      [0] = "ands|tstD0NMSg", "bicsDNMSg"
     }
   }
 }
@@ -735,7 +727,7 @@ local map_cond = {
   "hi", "ls", "ge", "lt", "gt", "le", "al",
 }
 
-local map_shift = { [0] = "lsl", "lsr", "asr", }
+local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
 
 local map_extend = {
   [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
@@ -956,7 +948,7 @@ local function disass_ins(ctx)
     elseif p == "U" then
       local rn = map_regs.x[band(rshift(op, 5), 31)]
       local sz = band(rshift(op, 30), 3)
-      local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+      local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
       if imm12 ~= 0 then
 	x = "["..rn..", #"..imm12.."]"
       else
@@ -993,8 +985,7 @@ local function disass_ins(ctx)
 	x = x.."]"
       end
     elseif p == "P" then
-      local opcv, sh = rshift(op, 26), 2
-      if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+      local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
       local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
       local rn = map_regs.x[band(rshift(op, 5), 31)]
       local ind = band(rshift(op, 23), 3)
@@ -1089,7 +1080,7 @@ local function disass_ins(ctx)
 	  last = "#"..(sf+32 - immr)
 	  operands[#operands] = last
 	  x = x + 1
-	elseif x >= immr then
+	else
 	  name = a2
 	  x = x - immr + 1
 	end
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm64be.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_arm64be.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_arm64be.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT ARM64BE disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- ARM64 instructions are always little-endian. So just forward to the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mips.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS disassembler module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mips64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS64 disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the big-endian functions from the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64el.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mips64el.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64el.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS64EL disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64r6.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mips64r6.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64r6.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS64R6 disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the r6 big-endian functions from the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64r6el.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mips64r6el.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mips64r6el.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPS64R6EL disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the r6 little-endian functions from the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mipsel.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_mipsel.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_mipsel.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT MIPSEL disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the little-endian functions from the
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_ppc.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_ppc.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_ppc.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT PPC disassembler module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT/X license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_riscv.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_riscv.lua
@@ -0,0 +1,793 @@
+------------------------------------------------------------------------------
+-- LuaJIT RISC-V disassembler module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+--
+-- Contributed by Milos Poletanovic from Syrmia.com.
+------------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most standard RISC-V instructions.
+-- Mode is little-endian
+------------------------------------------------------------------------------
+
+local type = type
+local byte, format = string.byte, string.format
+local match, gmatch = string.match, string.gmatch
+local concat = table.concat
+local bit = require("bit")
+local band, bor, tohex = bit.band, bit.bor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+--RVC32 extension
+
+local map_quad0 = {
+  shift = 13, mask = 7,
+  [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn",
+  false, "c.fsdNMh", "c.swZMn", "c.fswNMn"
+}
+
+local map_sub2quad1 = {
+  shift = 5, mask = 3,
+  [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ"
+}
+
+local map_sub1quad1 = {
+  shift = 10, mask = 3,
+  [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1
+}
+
+local map_quad1 = {
+  shift = 13, mask = 7,
+  [0] = {
+    shift = 7, mask = 31,
+    [0] = "c.nop", _ = "c.addiDx"
+  },
+  [1] = "c.jalT", [2] = "c.liDx",
+  [3] = {
+    shift = 7, mask = 31,
+    [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX",
+    _ = "c.luiDK"
+  },
+  [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq"
+}
+
+local map_sub1quad2 = {
+  shift = 12, mask = 1,
+  [0] = {
+    shift = 2, mask = 31,
+    [0] = "c.jrD", _ = "c.mvDE"
+  },
+  [1] = {
+    shift = 2, mask = 31,
+    [0] = {
+      shift = 7, mask = 31,
+      [0] = "c.ebreak", _ = "c.jalrD"
+    },
+   _ = "c.addDE"
+  }
+}
+
+local map_quad2 = {
+  shift = 13, mask = 7,
+  [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY",
+  [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu"
+}
+
+local map_compr = {
+  [0] = map_quad0, map_quad1, map_quad2
+}
+
+--RV32M
+local map_mext = {
+  shift = 12, mask = 7,
+  [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr",
+  "divDRr", "divuDRr", "remDRr", "remuDRr"
+}
+
+--RV64M
+local map_mext64 = {
+  shift = 12, mask = 7,
+  [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr",
+  [7] = "remuwDRr"
+}
+
+--RV32F, RV64F, RV32D, RV64D
+local map_fload = {
+  shift = 12, mask = 7,
+  [2] = "flwFL", [3] = "fldFL"
+}
+
+local map_fstore = {
+  shift = 12, mask = 7,
+  [2] = "fswSg", [3] = "fsdSg"
+}
+
+local map_fmadd = {
+  shift = 25, mask = 3,
+  [0] = "fmadd.sFGgH", "fmadd.dFGgH"
+}
+
+local map_fmsub = {
+  shift = 25, mask = 3,
+  [0] = "fmsub.sFGgH", "fmsub.dFGgH"
+}
+
+local map_fnmsub = {
+  shift = 25, mask = 3,
+  [0] = "fnmsub.sFGgH", "fnmsub.dFGgH"
+}
+
+local map_fnmadd = {
+  shift = 25, mask = 3,
+  [0] = "fnmadd.sFGgH", "fnmadd.dFGgH"
+}
+
+local map_fsgnjs = {
+  shift = 12, mask = 7,
+  [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6"
+}
+
+local map_fsgnjd = {
+  shift = 12, mask = 7,
+  [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6"
+}
+
+local map_fms = {
+  shift = 12, mask = 7,
+  [0] = "fmin.sFGg", "fmax.sFGg"
+}
+
+local map_fmd = {
+  shift = 12, mask = 7,
+  [0] = "fmin.dFGg", "fmax.dFGg"
+}
+
+local map_fcomps = {
+  shift = 12, mask = 7,
+  [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg"
+}
+
+local map_fcompd = {
+  shift = 12, mask = 7,
+  [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg"
+}
+
+local map_fcvtwls = {
+  shift = 20, mask = 31,
+  [0] = "fcvt.w.sDG", "fcvt.wu.sDG", "fcvt.l.sDG", "fcvt.lu.sDG"
+}
+
+local map_fcvtwld = {
+  shift = 20, mask = 31,
+  [0] = "fcvt.w.dDG", "fcvt.wu.dDG", "fcvt.l.dDG", "fcvt.lu.dDG"
+}
+
+local map_fcvts = {
+  shift = 20, mask = 31,
+  [0] = "fcvt.s.wFR", "fcvt.s.wuFR", "fcvt.s.lFR", "fcvt.s.luFR"
+}
+
+local map_fcvtd = {
+  shift = 20, mask = 31,
+  [0] = "fcvt.d.wFR", "fcvt.d.wuFR", "fcvt.d.lFR", "fcvt.d.luFR"
+}
+
+local map_fext = {
+  shift = 25, mask = 127,
+  [0] = "fadd.sFGg", [1] = "fadd.dFGg", [4] = "fsub.sFGg", [5] = "fsub.dFGg",
+  [8] = "fmul.sFGg", [9] = "fmul.dFGg", [12] = "fdiv.sFGg", [13] = "fdiv.dFGg",
+  [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd,
+  [32] = "fcvt.s.dFG", [33] = "fcvt.d.sFG",[44] = "fsqrt.sFG", [45] = "fsqrt.dFG",
+  [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld,
+  [104] = map_fcvts, [105] = map_fcvtd,
+  [112] = {
+    shift = 12, mask = 7,
+    [0] = "fmv.x.wDG", "fclass.sDG"
+  },
+  [113] = {
+  shift = 12, mask = 7,
+    [0] = "fmv.x.dDG", "fclass.dDG"
+  },
+  [120] = "fmv.w.xFR", [121] = "fmv.d.xFR"
+}
+
+--RV32A, RV64A
+local map_aext = {
+  shift = 27, mask = 31,
+  [0] = {
+    shift = 12, mask = 7,
+    [2] = "amoadd.wDrO", [3] = "amoadd.dDrO"
+  },
+  {
+    shift = 12, mask = 7,
+    [2] = "amoswap.wDrO", [3] = "amoswap.dDrO"
+  },
+  {
+    shift = 12, mask = 7,
+    [2] = "lr.wDO", [3] = "lr.dDO"
+  },
+  {
+    shift = 12, mask = 7,
+    [2] = "sc.wDrO", [3] = "sc.dDrO"
+  },
+  {
+    shift = 12, mask = 7,
+    [2] = "amoxor.wDrO", [3] = "amoxor.dDrO"
+  },
+  [8] = {
+    shift = 12, mask = 7,
+    [2] = "amoor.wDrO", [3] = "amoor.dDrO"
+  },
+  [12] = {
+    shift = 12, mask = 7,
+    [2] = "amoand.wDrO", [3] = "amoand.dDrO"
+  },
+  [16] = {
+    shift = 12, mask = 7,
+    [2] = "amomin.wDrO", [3] = "amomin.dDrO"
+  },
+  [20] = {
+    shift = 12, mask = 7,
+    [2] = "amomax.wDrO", [3] = "amomax.dDrO"
+  },
+  [24] = {
+    shift = 12, mask = 7,
+    [2] = "amominu.wDrO", [3] = "amominu.dDrO"
+  },
+  [28] = {
+   shift = 12, mask = 7,
+   [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO"
+  },
+}
+
+-- RV32I, RV64I
+local map_load = {
+  shift = 12, mask = 7,
+  [0] = "lbDL", "lhDL", "lwDL", "ldDL",
+  "lbuDL", "lhuDL", "lwuDL"
+}
+
+local map_ali = {
+  shift = 12, mask = 7,
+  [0] = {
+    shift = 7, mask = 0x1ffffff,
+    [0] = "nop", _ = "addi|li|mvDR0I2"
+  }
+  ,"slliDRi", "sltiDRI", "sltiu|seqzDRI5",
+  "xori|notDRI4",
+  {
+    shift = 26, mask = 63,
+    [0] = "srliDRi", [16] = "sraiDRi"
+  },
+  "oriDRI", "andiDRI"
+}
+
+local map_branch = {
+  shift = 12, mask = 7,
+  [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false,
+  "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB"
+}
+
+local map_store = {
+  shift = 12, mask = 7,
+  [0] = "sbSr", "shSr", "swSr", "sdSr"
+}
+
+local map_al = {
+  shift = 25, mask = 127,
+  [0] = {
+    shift = 12, mask = 7,
+    [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r",
+    "xorDRr", "srlDRr", "orDRr", "andDRr"
+  },
+  map_mext,
+  [32] = {
+    shift = 12, mask = 7,
+    [0] = "sub|negDR0r", [5] = "sraDRr"
+  }
+}
+
+--64I
+local map_addi_shift = {
+  shift = 12, mask = 7,
+  [0] = "addiw|sext.wDRI0", "slliwDRi",
+  [5] = {
+    shift = 25, mask = 127,
+    [0] = "srliwDRi", [32] = "sraiwDRi"
+  }
+}
+
+local map_arithw_shiftw = {
+  shift = 25, mask = 127,
+  [0] = {
+    shift = 12, mask = 7,
+    [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr"
+  },
+  [1] = map_mext64,
+  [32] = {
+    shift = 12, mask = 7,
+    [0] = "subw|negwDR0r", [5] = "srawDRr"
+  }
+}
+
+local map_ecabre = {
+  shift = 12, mask = 7,
+  [0] = {
+   shift = 20, mask = 4095,
+   [0] = "ecall", "ebreak"
+  }
+}
+
+local map_fence = {
+  shift = 12, mask = 1,
+  [0] = "fence", --"fence.i" ZIFENCEI EXTENSION
+}
+
+local map_jalr = {
+  shift = 7, mask = 0x1ffffff,
+  _ = "jalr|jrDRI7", [256] = "ret"
+}
+
+local map_pri = {
+  [3] = map_load, [7] = map_fload, [15] = map_fence, [19] = map_ali,
+  [23] = "auipcDA", [27] = map_addi_shift,
+  [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_al,
+  [55] = "luiDU", [59] = map_arithw_shiftw, [67] = map_fmadd, [71] = map_fmsub,
+  [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext,
+  [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre
+}
+
+------------------------------------------------------------------------------
+
+local map_gpr = {
+  [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7",
+  "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+  "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+  "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31",
+}
+
+local map_fgpr = {
+  [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
+  "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
+  "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+  "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+  local pos = ctx.pos
+	local extra = ""
+  if ctx.rel then
+    local sym = ctx.symtab[ctx.rel]
+    if sym then extra = "\t->"..sym end
+  end
+  if ctx.hexdump > 0 then
+    ctx.out:write((format("%08x  %s  %-7s %s%s\n",
+    ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra)))
+  else
+    ctx.out(format("%08x  %-7s %s%s\n",
+    ctx.addr+pos, text, concat(operands, ", "), extra))
+  end
+  local pos = ctx.pos
+  local first_byte = byte(ctx.code, ctx.pos+1)
+  --Examine if the next instruction is 16-bits or 32-bits
+  if(band(first_byte, 3) < 3) then
+    ctx.pos = pos + 2
+  else
+    ctx.pos = pos + 4
+  end
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+  return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function get_le(ctx)
+  local pos = ctx.pos
+  --Examine if the next instruction is 16-bits or 32-bits
+  local first_byte = byte(ctx.code, pos+1)
+  if(band(first_byte, 3) < 3) then --checking first two bits of opcode
+    local b0, b1 = byte(ctx.code, pos+1, pos+2)
+    return bor(lshift(b1, 8), b0)
+  else
+    local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+    return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+  end
+end
+
+local function parse_W(opcode)
+  local part1 = band(rshift(opcode, 7), 15) --9:6
+  local part2 = band(rshift(opcode, 11), 3) --5:4
+  local part3 = band(rshift(opcode, 5), 1)--3
+  local part4 = band(rshift(opcode, 6), 1)--2
+  return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4),
+             lshift(part3, 3), lshift(part4, 2))
+end
+
+local function parse_x(opcode)
+  local part1 = band(rshift(opcode, 12), 1) --5
+  local part2 = band(rshift(opcode, 2), 31) --4:0
+  if(part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2)
+  else
+    return bor(lshift(0, 31), lshift(part1, 5), part2)
+  end
+end
+
+local function parse_X(opcode)
+  local part1 = band(rshift(opcode, 12), 1) --12
+  local part2 = band(rshift(opcode, 3), 3) --8:7
+  local part3 = band(rshift(opcode, 5), 1) --6
+  local part4 = band(rshift(opcode, 2), 1) --5
+  local part5 = band(rshift(opcode, 6), 1) --4
+  if(part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7),
+               lshift(part3, 6), lshift(part4, 5), lshift(part5, 4))
+  else
+    return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6),
+               lshift(part4, 5), lshift(part5, 4))
+  end
+end
+
+local function parse_S(opcode)
+  local part1 = band(rshift(opcode, 25), 127) --11:5
+  local sign = band(rshift(part1, 6), 1)
+  local part2 = band(rshift(opcode, 7), 31) --4:0
+  if (sign == 1) then
+    return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2)
+  else
+    return bor(lshift(0, 31), lshift(part1, 5), part2)
+  end
+end
+
+local function parse_B(opcode)
+  local part1 = band(rshift(opcode, 7), 1) --11
+  local part2 = band(rshift(opcode, 25), 63) --10:5
+  local part3 = band(rshift(opcode, 8), 15) -- 4 : 1
+  if (part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
+               lshift(part2, 5), lshift(part3, 1), 0)
+  else
+    return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5),
+               lshift(part3, 1), 0)
+  end
+end
+
+local function parse_q(opcode)
+  local part1 = band(rshift(opcode, 12), 1) --8
+  local part2 = band(rshift(opcode, 5), 3) --7:6
+  local part3 = band(rshift(opcode, 2), 1) --5
+  local part4 = band(rshift(opcode, 10), 3) --4:3
+  local part5 = band(rshift(opcode, 3), 3) --2:1
+  if(part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6),
+               lshift(part3, 5), lshift(part4, 3), lshift(part5, 1))
+  else
+    return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5),
+               lshift(part4, 3), lshift(part5, 1))
+  end
+end
+
+local function parse_J(opcode)
+  local part1 = band(rshift(opcode, 31), 1) --20
+  local part2 = band(rshift(opcode, 12), 255) -- 19:12
+  local part3 = band(rshift(opcode, 20), 1) --11
+  local part4 = band(rshift(opcode, 21), 1023) --10:1
+  if(part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12),
+               lshift(part3, 11), lshift(part4, 1))
+  else
+    return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12),
+               lshift(part3, 11), lshift(part4, 1))
+  end
+end
+
+local function parse_T(opcode)
+  local part1 = band(rshift(opcode, 12), 1) --11
+  local part2 = band(rshift(opcode, 8), 1) --10
+  local part3 = band(rshift(opcode, 9), 3)--9:8
+  local part4 = band(rshift(opcode, 6), 1) --7
+  local part5 = band(rshift(opcode, 7), 1) -- 6
+  local part6 = band(rshift(opcode, 2), 1) --5
+  local part7 = band(rshift(opcode, 11), 1) --4
+  local part8 = band(rshift(opcode, 3), 7) --3:1
+  if(part1 == 1) then
+    return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11),
+               lshift(part2, 10), lshift(part3, 8), lshift(part4, 7),
+               lshift(part5, 6), lshift(part6, 5), lshift(part7, 4),
+               lshift(part8, 1))
+  else
+    return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10),
+               lshift(part3, 8), lshift(part4, 7), lshift(part5, 6),
+               lshift(part6, 5), lshift(part7, 4), lshift(part8, 1))
+  end
+end
+
+local function parse_K(opcode)
+  local part1 = band(rshift(opcode, 12), 1) --5 17
+  local part2 = band(rshift(opcode, 2), 31) --4:0  16:12
+  if(part1 == 1) then
+    return bor(lshift(0, 31), lshift(0x7fff, 5), part2)
+  else
+    return bor(lshift(0, 31), lshift(part1, 5), part2)
+  end
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+  local op = ctx:get()
+  local operands = {}
+  local last = nil
+  ctx.op = op
+  ctx.rel =nil
+
+  local opat = 0
+  --for compressed instructions
+  if(band(op, 3) < 3) then
+    opat = ctx.map_compr[band(op, 3)]
+    while type(opat) ~= "string" do
+      if not opat then return unknown(ctx) end
+      local test = band(rshift(op, opat.shift), opat.mask)
+      opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+    end
+  else
+    opat = ctx.map_pri[band(op,127)]
+    while type(opat) ~= "string" do
+      if not opat then return unknown(ctx) end
+      opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+    end
+  end
+  local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
+  local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+  local a1, a2 = 0
+  if altname then
+   pat = pat2
+  end
+
+  local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0
+
+  for p in gmatch(pat, ".") do
+    local x = nil
+    if p == "D" then
+      x = map_gpr[band(rshift(op, 7), 31)]
+    elseif p == "F" then
+      x = map_fgpr[band(rshift(op, 7), 31)]
+    elseif p == "R" then
+      x = map_gpr[band(rshift(op, 15), 31)]
+    elseif p == "G" then
+      x = map_fgpr[band(rshift(op, 15), 31)]
+    elseif p == "r" then
+      x = map_gpr[band(rshift(op, 20), 31)]
+      if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then
+        local temp = last --because of the diffrent order of the characters
+        operands[#operands] = x
+        x = temp
+      end
+    elseif p == "g" then
+      x = map_fgpr[band(rshift(op, 20), 31)]
+     if(name == "fsw" or name == "fsd") then
+        local temp = last
+        operands[#operands] = x
+        x = temp
+     end
+    elseif p == "Z" then
+      x = map_gpr[8 + band(rshift(op, 2), 7)]
+    elseif p == "N" then
+      x = map_fgpr[8 + band(rshift(op, 2), 7)]
+    elseif p == "M" then
+      x = map_gpr[8 + band(rshift(op, 7), 7)]
+    elseif p == "E" then
+      x = map_gpr[band(rshift(op, 2), 31)]
+    elseif p == "W" then
+      local uimm = parse_W(op)
+      x = format("%s,%d", "sp", uimm)
+    elseif p == "x" then
+      x = parse_x(op)
+    elseif p == "h" then
+      local part1 = band(rshift(op, 5), 3) --7:6
+      local part2 = band(rshift(op, 10), 7) --5:3
+      local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3))
+      operands[#operands] = format("%d(%s)", uimm, last)
+    elseif p == "X" then
+      local imm = parse_X(op)
+      x = format("%s,%d", "sp", imm)
+    elseif p == "O" then
+      x = format("(%s)", map_gpr[band(rshift(op, 15), 31)])
+    elseif p == "H" then
+      x = map_fgpr[band(rshift(op, 27), 31)]
+    elseif p == "L" then
+      local register = map_gpr[band(rshift(op, 15), 31)]
+      local disp = arshift(op, 20)
+      x = format("%d(%s)", disp, register)
+    elseif p == "I" then
+      x = arshift(op, 20)
+      --different for jalr
+      if(name == "jalr") then
+        local reg = map_gpr[band(rshift(op, 15), 31)]
+        if(ctx.reltab[reg] == nil) then
+          operands[#operands] = format("%d(%s)", x, last)
+        else
+          local target = ctx.reltab[reg] + x
+          operands[#operands] = format("%d(%s) #0x%08x", x, last, target)
+          ctx.rel = target
+          ctx.reltab[reg] = nil --assume no reuses of the register
+        end
+        x = nil --not to add additional operand
+      end
+    elseif p == "i" then
+      --both for RV32I AND RV64I
+      local value = band(arshift(op, 20), 63)
+      x = string.format("0x%x", value)
+    elseif p == "S" then
+      local register = map_gpr[band(rshift(op, 15), 31)] --register
+      local imm = parse_S(op)
+      x = format("%d(%s)", imm, register)
+    elseif p == "n" then
+      local part1 = band(rshift(op, 5), 1) --6
+      local part2 = band(rshift(op, 10), 7) --5:3
+      local part3 = band(rshift(op, 6), 1) --2
+      local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3),
+                       lshift(part3, 2))
+      operands[#operands] = format("%d(%s)", uimm, last)
+    elseif p == "A" then
+      local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)]
+      ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12)
+      x = format("0x%x", value)
+    elseif p == "B" then
+      x = ctx.addr + ctx.pos + parse_B(op)
+      ctx.rel = x
+      x = format("0x%08x", x)
+    elseif p == "U" then
+      local value = band(rshift(op, 12), 0xfffff)
+      x = string.format("0x%x", value)
+    elseif p == "Q" then
+      local part1 = band(rshift(op, 2), 7) --8:6
+      local part2 = band(rshift(op, 12), 1) --5
+      local part3 = band(rshift(op, 5), 3) --4:3
+      local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
+                       lshift(part3, 3))
+      x = format("%d(%s)", uimm, "sp")
+   elseif p == "q" then
+      x = ctx.addr + ctx.pos + parse_q(op)
+      ctx.rel = x
+      x = format("0x%08x", x)
+    elseif p == "J" then
+      x = ctx.addr + ctx.pos + parse_J(op)
+      ctx.rel = x
+      x = format("0x%08x", x)
+    elseif p == "K" then
+      local value = parse_K(op)
+      x = string.format("0x%x", value)
+    elseif p == "Y" then
+      local part1 = band(rshift(op, 2), 3) --7:6
+      local part2 = band(rshift(op, 12), 1) --5
+      local part3 = band(rshift(op, 4), 7) --4:2
+      local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5),
+                       lshift(part3, 2))
+      x = format("%d(%s)", uimm, "sp")
+    elseif p == "1" then
+      local part1 = band(rshift(op, 12), 1) --5
+      local part2 = band(rshift(op, 2), 31) --4:0
+      local uimm = bor(lshift(0, 31), lshift(part1, 5), part2)
+      x = string.format("0x%x", uimm)
+    elseif p == "T" then
+      x = ctx.addr + ctx.pos + parse_T(op)
+      ctx.rel = x
+      x = format("0x%08x", x)
+    elseif p == "t" then
+      local part1 = band(rshift(op, 7), 7) --8:6
+      local part2 = band(rshift(op, 10), 7) --5:3
+      local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3))
+      x = format("%d(%s)", uimm, "sp")
+    elseif p == "u" then
+      local part1 = band(rshift(op, 7), 3) --7:6
+      local part2 = band(rshift(op, 9), 15) --5:2
+      local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2))
+      x = format("%d(%s)", uimm, "sp")
+    elseif p == "V" then
+      x = map_fgpr[band(rshift(op, 2), 31)]
+    elseif p == "0" then --PSEUDOINSTRUCTIONS
+      if (last == "zero" or last == 0) then
+        local n = #operands
+        operands[n] = nil
+        last = operands[n-1]
+        local a1, a2 = match(altname, "([^|]*)|(.*)")
+        if a1 then name, altname = a1, a2
+        else name = altname end
+        alias_done = true
+      end
+    elseif (p == "4") then
+      if(last == -1) then
+        name = altname
+        operands[#operands] = nil
+      end
+    elseif (p == "5") then
+      if(last == 1) then
+        name = altname
+        operands[#operands] = nil
+      end
+    elseif (p == "6") then
+      if(last == operands[#operands - 1]) then
+        name = altname
+        operands[#operands] = nil
+      end
+    elseif (p == "7") then --jalr rs
+      local value = string.sub(operands[#operands], 1, 1)
+      local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1)
+      if(value == "0" and
+         (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then
+        if(operands[#operands - 1] == "zero") then
+          name = altname
+        end
+        operands[#operands] = nil
+        operands[#operands] = reg
+      end
+    elseif (p == "2" and alias_done == false) then
+      if (last == "zero" or last == 0) then
+        local a1, a2 = match(altname, "([^|]*)|(.*)")
+        name = a2
+        operands[#operands] = nil
+      end
+    end
+    if x then operands[#operands+1] = x; last = x end
+  end
+  return putop(ctx, name, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+  if not ofs then
+    ofs = 0
+  end
+  local stop = len and ofs+len or #ctx.code
+  --instructions can be both 32 and 16 bits
+  stop = stop - stop % 2
+  ctx.pos = ofs - ofs % 2
+  ctx.rel = nil
+  while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create(code, addr, out)
+  local ctx = {}
+  ctx.code = code
+  ctx.addr = addr or 0
+  ctx.out = out or io.write
+  ctx.symtab = {}
+  ctx.disass = disass_block
+  ctx.hexdump = 8
+  ctx.get = get_le
+  ctx.map_pri = map_pri
+  ctx.map_compr = map_compr
+  ctx.reltab = {}
+  return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass(code, addr, out)
+  create(code, addr, out):disass(addr)
+end
+
+-- Return register name for RID.
+local function regname(r)
+  if r < 32 then return map_gpr[r] end
+  return "f"..(r-32)
+end
+
+-- Public module functions.
+return {
+  create = create,
+  disass = disass,
+  regname = regname
+}
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_riscv64.lua
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_riscv64.lua
@@ -0,0 +1,16 @@
+----------------------------------------------------------------------------
+-- LuaJIT RISC-V 64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the default riscv little-endian functions from the
+-- RISC-V disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv")
+return {
+  create = dis_riscv.create,
+  disass = dis_riscv.disass,
+  regname = dis_riscv.regname
+}
\ No newline at end of file
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_x64.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_x64.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_x64.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x64 disassembler wrapper module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This module just exports the 64 bit functions from the combined
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_x86.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dis_x86.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dis_x86.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT x86/x64 disassembler module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 -- This is a helper module used by the LuaJIT machine code dumper module.
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dump.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/dump.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/dump.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT compiler dump module.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -55,7 +55,6 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -102,10 +101,12 @@ end
 local function fillsymtab(tr, nexit)
   local t = symtab
   if nexitsym == 0 then
+    local maskaddr = jit.arch == "arm" and -2
     local ircall = vmdef.ircall
     for i=0,#ircall do
       local addr = ircalladdr(i)
       if addr ~= 0 then
+	if maskaddr then addr = band(addr, maskaddr) end
 	if addr < 0 then addr = addr + 2^32 end
 	t[addr] = ircall[i]
       end
@@ -217,8 +218,10 @@ local function colorize_text(s)
   return s
 end
 
-local function colorize_ansi(s, t)
-  return format(colortype_ansi[t], s)
+local function colorize_ansi(s, t, extra)
+  local out = format(colortype_ansi[t], s)
+  if extra then out = "\027[3m"..out end
+  return out
 end
 
 local irtype_ansi = setmetatable({},
@@ -227,9 +230,10 @@ local irtype_ansi = setmetatable({},
 
 local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
 
-local function colorize_html(s, t)
+local function colorize_html(s, t, extra)
   s = gsub(s, "[<>&]", html_escape)
-  return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
+  return format('<span class="irt_%s%s">%s</span>',
+		irtype_text[t], extra and " irt_extra" or "", s)
 end
 
 local irtype_html = setmetatable({},
@@ -254,6 +258,7 @@ span.irt_tab { color: #c00000; }
 span.irt_udt, span.irt_lud { color: #00c0c0; }
 span.irt_num { color: #4040c0; }
 span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
+span.irt_extra { font-style: italic; }
 </style>
 ]]
 
@@ -269,6 +274,7 @@ local litname = {
     if band(mode, 8) ~= 0 then s = s.."C" end
     if band(mode, 16) ~= 0 then s = s.."R" end
     if band(mode, 32) ~= 0 then s = s.."I" end
+    if band(mode, 64) ~= 0 then s = s.."K" end
     t[mode] = s
     return s
   end}),
@@ -277,15 +283,18 @@ local litname = {
     local s = irtype[band(mode, 31)]
     s = irtype[band(shr(mode, 5), 31)].."."..s
     if band(mode, 0x800) ~= 0 then s = s.." sext" end
-    local c = shr(mode, 14)
-    if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
+    local c = shr(mode, 12)
+    if c == 1 then s = s.." none"
+    elseif c == 2 then s = s.." index"
+    elseif c == 3 then s = s.." check" end
     t[mode] = s
     return s
   end}),
   ["FLOAD "] = vmdef.irfield,
   ["FREF  "] = vmdef.irfield,
   ["FPMATH"] = vmdef.irfpm,
-  ["BUFHDR"] = { [0] = "RESET", "APPEND" },
+  ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
+  ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
   ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
 }
 
@@ -345,7 +354,7 @@ local function formatk(tr, idx, sn)
   else
     s = tostring(k) -- For primitives.
   end
-  s = colorize(format("%-4s", s), t)
+  s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
   if slot then
     s = format("%s @%d", s, slot)
   end
@@ -365,7 +374,7 @@ local function printsnap(tr, snap)
 	out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
       else
 	local m, ot, op1, op2 = traceir(tr, ref)
-	out:write(colorize(format("%04d", ref), band(ot, 31)))
+	out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0))
       end
       out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
     else
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/p.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/p.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/p.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT profiler.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -41,7 +41,6 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local profile = require("jit.profile")
 local vmdef = require("jit.vmdef")
 local math = math
@@ -238,6 +237,7 @@ local function prof_finish()
     prof_count1 = nil
     prof_count2 = nil
     prof_ud = nil
+    if out ~= stdout then out:close() end
   end
 end
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/v.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/v.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/v.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- Verbose mode of the LuaJIT compiler.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
@@ -59,7 +59,6 @@
 
 -- Cache some library functions and objects.
 local jit = require("jit")
-assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
 local jutil = require("jit.util")
 local vmdef = require("jit.vmdef")
 local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/jit/zone.lua
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/jit/zone.lua
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/jit/zone.lua
@@ -1,7 +1,7 @@
 ----------------------------------------------------------------------------
 -- LuaJIT profiler zones.
 --
--- Copyright (C) 2005-2021 Mike Pall. All rights reserved.
+-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
 -- Released under the MIT license. See Copyright Notice in luajit.h
 ----------------------------------------------------------------------------
 --
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_aux.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_aux.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_aux.c
@@ -1,6 +1,6 @@
 /*
 ** Auxiliary library for the Lua/C API.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -21,6 +21,7 @@
 #include "lj_state.h"
 #include "lj_trace.h"
 #include "lj_lib.h"
+#include "lj_vmevent.h"
 
 #if LJ_TARGET_POSIX
 #include <sys/wait.h>
@@ -318,6 +319,18 @@ static int panic(lua_State *L)
   return 0;
 }
 
+#ifndef LUAJIT_DISABLE_VMEVENT
+static int error_finalizer(lua_State *L)
+{
+  const char *s = lua_tostring(L, -1);
+  fputs("ERROR in finalizer: ", stderr);
+  fputs(s ? s : "?", stderr);
+  fputc('\n', stderr);
+  fflush(stderr);
+  return 0;
+}
+#endif
+
 #ifdef LUAJIT_USE_SYSMALLOC
 
 #if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
@@ -339,7 +352,16 @@ static void *mem_alloc(void *ud, void *p
 LUALIB_API lua_State *luaL_newstate(void)
 {
   lua_State *L = lua_newstate(mem_alloc, NULL);
-  if (L) G(L)->panic = panic;
+  if (L) {
+    G(L)->panic = panic;
+#ifndef LUAJIT_DISABLE_VMEVENT
+    luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+    lua_pushcfunction(L, error_finalizer);
+    lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
+    G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
+    L->top--;
+#endif
+  }
   return L;
 }
 
@@ -353,7 +375,16 @@ LUALIB_API lua_State *luaL_newstate(void
 #else
   L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
 #endif
-  if (L) G(L)->panic = panic;
+  if (L) {
+    G(L)->panic = panic;
+#ifndef LUAJIT_DISABLE_VMEVENT
+    luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+    lua_pushcfunction(L, error_finalizer);
+    lua_rawseti(L, -2, VMEVENT_HASH(LJ_VMEVENT_ERRFIN));
+    G(L)->vmevmask = VMEVENT_MASK(LJ_VMEVENT_ERRFIN);
+    L->top--;
+#endif
+  }
   return L;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_base.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_base.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_base.c
@@ -1,6 +1,6 @@
 /*
 ** Base and coroutine library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -19,6 +19,7 @@
 #include "lj_gc.h"
 #include "lj_err.h"
 #include "lj_debug.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_meta.h"
@@ -75,9 +76,10 @@ LJLIB_ASM_(type)		LJLIB_REC(.)
 /* This solves a circular dependency problem -- change FF_next_N as needed. */
 LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
 
-LJLIB_ASM(next)
+LJLIB_ASM(next)			LJLIB_REC(.)
 {
   lj_lib_checktab(L, 1);
+  lj_err_msg(L, LJ_ERR_NEXTIDX);
   return FFH_UNREACHABLE;
 }
 
@@ -301,7 +303,7 @@ LJLIB_ASM(tonumber)		LJLIB_REC(.)
 	while (lj_char_isspace((unsigned char)(*ep))) ep++;
 	if (*ep == '\0') {
 	  if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
-	    if (neg) ul = (unsigned long)-(long)ul;
+	    if (neg) ul = ~ul+1u;
 	    setintV(L->base-1-LJ_FR2, (int32_t)ul);
 	  } else {
 	    lua_Number n = (lua_Number)ul;
@@ -406,10 +408,22 @@ LJLIB_CF(load)
   GCstr *name = lj_lib_optstr(L, 2);
   GCstr *mode = lj_lib_optstr(L, 3);
   int status;
-  if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) {
-    GCstr *s = lj_lib_checkstr(L, 1);
+  if (L->base < L->top &&
+      (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
+    const char *s;
+    MSize len;
+    if (tvisbuf(L->base)) {
+      SBufExt *sbx = bufV(L->base);
+      s = sbx->r;
+      len = sbufxlen(sbx);
+      if (!name) name = &G(L)->strempty;  /* Buffers are not NUL-terminated. */
+    } else {
+      GCstr *str = lj_lib_checkstr(L, 1);
+      s = strdata(str);
+      len = str->len;
+    }
     lua_settop(L, 4);  /* Ensure env arg exists. */
-    status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
+    status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
 			      mode ? strdata(mode) : NULL);
   } else {
     lj_lib_checkfunc(L, 1);
@@ -602,7 +616,10 @@ static int ffh_resume(lua_State *L, lua_
     setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
     return FFH_RES(2);
   }
-  lj_state_growstack(co, (MSize)(L->top - L->base));
+  if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
+    cTValue *msg = --co->top;
+    lj_err_callermsg(L, strVdata(msg));
+  }
   return FFH_RETRY;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_bit.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_bit.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_bit.c
@@ -1,6 +1,6 @@
 /*
 ** Bit manipulation library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_bit_c
@@ -155,7 +155,8 @@ LJLIB_CF(bit_tohex)		LJLIB_REC(.)
 #endif
   SBuf *sb = lj_buf_tmp_(L);
   SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~(uint32_t)n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
 #if LJ_HASFFI
   if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_buffer.c
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_buffer.c
@@ -0,0 +1,360 @@
+/*
+** Buffer library.
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_buffer_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#endif
+#include "lj_strfmt.h"
+#include "lj_serialize.h"
+#include "lj_lib.h"
+
+/* -- Helper functions ---------------------------------------------------- */
+
+/* Check that the first argument is a string buffer. */
+static SBufExt *buffer_tobuf(lua_State *L)
+{
+  if (!(L->base < L->top && tvisbuf(L->base)))
+    lj_err_argtype(L, 1, "buffer");
+  return bufV(L->base);
+}
+
+/* Ditto, but for writers. */
+static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setsbufXL_(sbx, L);
+  return sbx;
+}
+
+#define buffer_toudata(sbx)	((GCudata *)(sbx)-1)
+
+/* -- Buffer methods ------------------------------------------------------ */
+
+#define LJLIB_MODULE_buffer_method
+
+LJLIB_CF(buffer_method_free)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_free(L, sbx);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_reset)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_reset(sbx);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_skip)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  MSize len = sbufxlen(sbx);
+  if (n < len) {
+    sbx->r += n;
+  } else if (sbufiscow(sbx)) {
+    sbx->r = sbx->w;
+  } else {
+    sbx->r = sbx->w = sbx->b;
+  }
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_set)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  GCobj *ref;
+  const char *p;
+  MSize len;
+#if LJ_HASFFI
+  if (tviscdata(L->base+1)) {
+    CTState *cts = ctype_cts(L);
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+		   L->base+1, CCF_ARG(2));
+    len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+  } else
+#endif
+  {
+    GCstr *str = lj_lib_checkstrx(L, 2);
+    p = strdata(str);
+    len = str->len;
+  }
+  lj_bufx_free(L, sbx);
+  lj_bufx_set_cow(L, sbx, p, len);
+  ref = gcV(L->base+1);
+  setgcref(sbx->cowref, ref);
+  lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_put)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  ptrdiff_t arg, narg = L->top - L->base;
+  for (arg = 1; arg < narg; arg++) {
+    cTValue *o = &L->base[arg], *mo = NULL;
+  retry:
+    if (tvisstr(o)) {
+      lj_buf_putstr((SBuf *)sbx, strV(o));
+    } else if (tvisint(o)) {
+      lj_strfmt_putint((SBuf *)sbx, intV(o));
+    } else if (tvisnum(o)) {
+      lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
+    } else if (tvisbuf(o)) {
+      SBufExt *sbx2 = bufV(o);
+      if (sbx2 == sbx) lj_err_arg(L, (int)(arg+1), LJ_ERR_BUFFER_SELF);
+      lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
+    } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+      /* Call __tostring metamethod inline. */
+      copyTV(L, L->top++, mo);
+      copyTV(L, L->top++, o);
+      lua_call(L, 1, 1);
+      o = &L->base[arg];  /* The stack may have been reallocated. */
+      copyTV(L, &L->base[arg], L->top-1);
+      L->top = L->base + narg;
+      goto retry;  /* Retry with the result. */
+    } else {
+      lj_err_argtype(L, (int)(arg+1), "string/number/__tostring");
+    }
+    /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
+  }
+  L->top = L->base+1;  /* Chain buffer object. */
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method_putf)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
+  L->top = L->base+1;  /* Chain buffer object. */
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method_get)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  ptrdiff_t arg, narg = L->top - L->base;
+  if (narg == 1) {
+    narg++;
+    setnilV(L->top++);  /* get() is the same as get(nil). */
+  }
+  for (arg = 1; arg < narg; arg++) {
+    TValue *o = &L->base[arg];
+    MSize n = tvisnil(o) ? LJ_MAX_BUF :
+	      (MSize) lj_lib_checkintrange(L, (int)(arg+1), 0, LJ_MAX_BUF);
+    MSize len = sbufxlen(sbx);
+    if (n > len) n = len;
+    setstrV(L, o, lj_str_new(L, sbx->r, n));
+    sbx->r += n;
+  }
+  if (sbx->r == sbx->w && !sbufiscow(sbx)) sbx->r = sbx->w = sbx->b;
+  lj_gc_check(L);
+  return (int)(narg-1);
+}
+
+#if LJ_HASFFI
+LJLIB_CF(buffer_method_putcdata)	LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  const char *p;
+  MSize len;
+  if (tviscdata(L->base+1)) {
+    CTState *cts = ctype_cts(L);
+    lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+		   L->base+1, CCF_ARG(2));
+  } else {
+    lj_err_argtype(L, 2, "cdata");
+  }
+  len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+  lj_buf_putmem((SBuf *)sbx, p, len);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_reserve)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  GCcdata *cd;
+  lj_buf_more((SBuf *)sbx, sz);
+  ctype_loadffi(L);
+  cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+  *(void **)cdataptr(cd) = sbx->w;
+  setcdataV(L, L->top++, cd);
+  setintV(L->top++, sbufleft(sbx));
+  return 2;
+}
+
+LJLIB_CF(buffer_method_commit)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+  if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
+  sbx->w += len;
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_ref)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  GCcdata *cd;
+  ctype_loadffi(L);
+  cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+  *(void **)cdataptr(cd) = sbx->r;
+  setcdataV(L, L->top++, cd);
+  setintV(L->top++, sbufxlen(sbx));
+  return 2;
+}
+#endif
+
+LJLIB_CF(buffer_method_encode)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  cTValue *o = lj_lib_checkany(L, 2);
+  lj_serialize_put(sbx, o);
+  lj_gc_check(L);
+  L->top = L->base+1;  /* Chain buffer object. */
+  return 1;
+}
+
+LJLIB_CF(buffer_method_decode)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobufw(L);
+  setnilV(L->top++);
+  sbx->r = lj_serialize_get(sbx, L->top-1);
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method___gc)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  lj_bufx_free(L, sbx);
+  return 0;
+}
+
+LJLIB_CF(buffer_method___tostring)	LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_method___len)		LJLIB_REC(.)
+{
+  SBufExt *sbx = buffer_tobuf(L);
+  setintV(L->top-1, (int32_t)sbufxlen(sbx));
+  return 1;
+}
+
+LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+/* -- Buffer library functions -------------------------------------------- */
+
+#define LJLIB_MODULE_buffer
+
+LJLIB_PUSH(top-2) LJLIB_SET(!)  /* Set environment. */
+
+LJLIB_CF(buffer_new)
+{
+  MSize sz = 0;
+  int targ = 1;
+  GCtab *env, *dict_str = NULL, *dict_mt = NULL;
+  GCudata *ud;
+  SBufExt *sbx;
+  if (L->base < L->top && !tvistab(L->base)) {
+    targ = 2;
+    if (!tvisnil(L->base))
+      sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
+  }
+  if (L->base+targ-1 < L->top) {
+    GCtab *options = lj_lib_checktab(L, targ);
+    cTValue *opt_dict, *opt_mt;
+    opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
+    if (opt_dict && tvistab(opt_dict)) {
+      dict_str = tabV(opt_dict);
+      lj_serialize_dict_prep_str(L, dict_str);
+    }
+    opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
+    if (opt_mt && tvistab(opt_mt)) {
+      dict_mt = tabV(opt_mt);
+      lj_serialize_dict_prep_mt(L, dict_mt);
+    }
+  }
+  env = tabref(curr_func(L)->c.env);
+  ud = lj_udata_new(L, sizeof(SBufExt), env);
+  ud->udtype = UDTYPE_BUFFER;
+  /* NOBARRIER: The GCudata is new (marked white). */
+  setgcref(ud->metatable, obj2gco(env));
+  setudataV(L, L->top++, ud);
+  sbx = (SBufExt *)uddata(ud);
+  lj_bufx_init(L, sbx);
+  setgcref(sbx->dict_str, obj2gco(dict_str));
+  setgcref(sbx->dict_mt, obj2gco(dict_mt));
+  if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_encode)			LJLIB_REC(.)
+{
+  cTValue *o = lj_lib_checkany(L, 1);
+  setstrV(L, L->top++, lj_serialize_encode(L, o));
+  lj_gc_check(L);
+  return 1;
+}
+
+LJLIB_CF(buffer_decode)			LJLIB_REC(.)
+{
+  GCstr *str = lj_lib_checkstrx(L, 1);
+  setnilV(L->top++);
+  lj_serialize_decode(L, L->top-1, str);
+  lj_gc_check(L);
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+int luaopen_string_buffer(lua_State *L)
+{
+  LJ_LIB_REG(L, NULL, buffer_method);
+  lua_getfield(L, -1, "__tostring");
+  lua_setfield(L, -2, "tostring");
+  LJ_LIB_REG(L, NULL, buffer);
+  return 1;
+}
+
+#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_debug.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_debug.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_debug.c
@@ -1,6 +1,6 @@
 /*
 ** Debug library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_ffi.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_ffi.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_ffi.c
@@ -1,6 +1,6 @@
 /*
 ** FFI library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_ffi_c
@@ -573,6 +573,7 @@ LJLIB_CF(ffi_typeinfo)
       setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
     if (gcref(ct->name)) {
       GCstr *s = gco2str(gcref(ct->name));
+      if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
       setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
     }
     lj_gc_check(L);
@@ -638,7 +639,7 @@ LJLIB_CF(ffi_alignof)	LJLIB_REC(ffi_xof
   CTState *cts = ctype_cts(L);
   CTypeID id = ffi_checkctype(L, cts, NULL);
   CTSize sz = 0;
-  CTInfo info = lj_ctype_info(cts, id, &sz);
+  CTInfo info = lj_ctype_info_raw(cts, id, &sz);
   setintV(L->top-1, 1 << ctype_align(info));
   return 1;
 }
@@ -744,6 +745,9 @@ LJLIB_CF(ffi_abi)	LJLIB_REC(.)
 #if LJ_ABI_WIN
     "\003win"
 #endif
+#if LJ_ABI_PAUTH
+    "\005pauth"
+#endif
 #if LJ_TARGET_UWP
     "\003uwp"
 #endif
@@ -769,13 +773,13 @@ LJLIB_CF(ffi_metatype)
   CTypeID id = ffi_checkctype(L, cts, NULL);
   GCtab *mt = lj_lib_checktab(L, 2);
   GCtab *t = cts->miscmap;
-  CType *ct = ctype_get(cts, id);  /* Only allow raw types. */
+  CType *ct = ctype_raw(cts, id);
   TValue *tv;
   GCcdata *cd;
   if (!(ctype_isstruct(ct->info) || ctype_iscomplex(ct->info) ||
 	ctype_isvector(ct->info)))
     lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
-  tv = lj_tab_setinth(L, t, -(int32_t)id);
+  tv = lj_tab_setinth(L, t, -(int32_t)ctype_typeid(cts, ct));
   if (!tvisnil(tv))
     lj_err_caller(L, LJ_ERR_PROTMT);
   settabV(L, tv, mt);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_init.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_init.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_init.c
@@ -1,6 +1,6 @@
 /*
 ** Library initialization.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major parts taken verbatim from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_io.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_io.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_io.c
@@ -1,6 +1,6 @@
 /*
 ** I/O library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2011 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -60,12 +60,12 @@ static IOFileUD *io_tofile(lua_State *L)
   return iof;
 }
 
-static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
+static IOFileUD *io_stdfile(lua_State *L, ptrdiff_t id)
 {
   IOFileUD *iof = IOSTDF_IOF(L, id);
   if (iof->fp == NULL)
     lj_err_caller(L, LJ_ERR_IOSTDCL);
-  return iof->fp;
+  return iof;
 }
 
 static IOFileUD *io_file_new(lua_State *L)
@@ -178,7 +178,7 @@ static int io_file_readlen(lua_State *L,
     MSize n = (MSize)fread(buf, 1, m, fp);
     setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
     lj_gc_check(L);
-    return (n > 0 || m == 0);
+    return n > 0;
   } else {
     int c = getc(fp);
     ungetc(c, fp);
@@ -187,8 +187,9 @@ static int io_file_readlen(lua_State *L,
   }
 }
 
-static int io_file_read(lua_State *L, FILE *fp, int start)
+static int io_file_read(lua_State *L, IOFileUD *iof, int start)
 {
+  FILE *fp = iof->fp;
   int ok, n, nargs = (int)(L->top - L->base) - start;
   clearerr(fp);
   if (nargs == 0) {
@@ -224,8 +225,9 @@ static int io_file_read(lua_State *L, FI
   return n - start;
 }
 
-static int io_file_write(lua_State *L, FILE *fp, int start)
+static int io_file_write(lua_State *L, IOFileUD *iof, int start)
 {
+  FILE *fp = iof->fp;
   cTValue *tv;
   int status = 1;
   for (tv = L->base+start; tv < L->top; tv++) {
@@ -253,13 +255,11 @@ static int io_file_iter(lua_State *L)
     lj_err_caller(L, LJ_ERR_IOCLFL);
   L->top = L->base;
   if (n) {  /* Copy upvalues with options to stack. */
-    if (n > LUAI_MAXCSTACK)
-      lj_err_caller(L, LJ_ERR_STKOV);
     lj_state_checkstack(L, (MSize)n);
     memcpy(L->top, &fn->c.upvalue[1], n*sizeof(TValue));
     L->top += n;
   }
-  n = io_file_read(L, iof->fp, 0);
+  n = io_file_read(L, iof, 0);
   if (ferror(iof->fp))
     lj_err_callermsg(L, strVdata(L->top-2));
   if (tvisnil(L->base) && (iof->type & IOFILE_FLAG_CLOSE)) {
@@ -284,19 +284,25 @@ static int io_file_lines(lua_State *L)
 
 LJLIB_CF(io_method_close)
 {
-  IOFileUD *iof = L->base < L->top ? io_tofile(L) :
-		  IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+  IOFileUD *iof;
+  if (L->base < L->top) {
+    iof = io_tofile(L);
+  } else {
+    iof = IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
+    if (iof->fp == NULL)
+      lj_err_caller(L, LJ_ERR_IOCLFL);
+  }
   return io_file_close(L, iof);
 }
 
 LJLIB_CF(io_method_read)
 {
-  return io_file_read(L, io_tofile(L)->fp, 1);
+  return io_file_read(L, io_tofile(L), 1);
 }
 
 LJLIB_CF(io_method_write)		LJLIB_REC(io_write 0)
 {
-  return io_file_write(L, io_tofile(L)->fp, 1);
+  return io_file_write(L, io_tofile(L), 1);
 }
 
 LJLIB_CF(io_method_flush)		LJLIB_REC(io_flush 0)
@@ -433,7 +439,7 @@ LJLIB_CF(io_popen)
 LJLIB_CF(io_tmpfile)
 {
   IOFileUD *iof = io_file_new(L);
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
   iof->fp = NULL; errno = ENOSYS;
 #else
   iof->fp = tmpfile();
@@ -458,7 +464,7 @@ LJLIB_CF(io_write)		LJLIB_REC(io_write G
 
 LJLIB_CF(io_flush)		LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
 {
-  return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
+  return luaL_fileresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)->fp) == 0, NULL);
 }
 
 static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_jit.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_jit.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_jit.c
@@ -1,6 +1,6 @@
 /*
 ** JIT library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lib_jit_c
@@ -346,11 +346,7 @@ LJLIB_CF(jit_util_tracek)
       ir = &T->ir[ir->op1];
     }
 #if LJ_HASFFI
-    if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    if (ir->o == IR_KINT64) ctype_loadffi(L);
 #endif
     lj_ir_kvalue(L, L->top-2, ir);
     setintV(L->top-1, (int32_t)irt_type(ir->t));
@@ -426,7 +422,8 @@ LJLIB_CF(jit_util_ircalladdr)
 {
   uint32_t idx = (uint32_t)lj_lib_checkint(L, 1);
   if (idx < IRCALL__MAX) {
-    setintptrV(L->top-1, (intptr_t)(void *)lj_ir_callinfo[idx].func);
+    ASMFunction func = lj_ir_callinfo[idx].func;
+    setintptrV(L->top-1, (intptr_t)(void *)lj_ptr_strip(func));
     return 1;
   }
   return 0;
@@ -652,6 +649,81 @@ JIT_PARAMDEF(JIT_PARAMINIT)
 #include <sys/utsname.h>
 #endif
 
+#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX
+#include <setjmp.h>
+#include <signal.h>
+static sigjmp_buf sigbuf = {0};
+static void detect_sigill(int sig)
+{
+  siglongjmp(sigbuf, 1);
+}
+
+static int riscv_compressed()
+{
+#if defined(__riscv_c) || defined(__riscv_compressed)
+  /* Don't bother checking for RVC -- would crash before getting here. */
+  return 1;
+#elif defined(__GNUC__)
+  /* c.nop; c.nop; */
+  __asm__(".4byte 0x00010001");
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_zba()
+{
+#if defined(__riscv_b) || defined(__riscv_zba)
+  /* Don't bother checking for Zba -- would crash before getting here. */
+  return 1;
+#elif defined(__GNUC__)
+  /* Don't bother verifying the result, just check if the instruction exists. */
+  /* add.uw zero, zero, zero */
+  __asm__(".4byte 0x0800003b");
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_zbb()
+{
+#if defined(__riscv_b) || defined(__riscv_zbb)
+  /* Don't bother checking for Zbb -- would crash before getting here. */
+  return 1;
+#elif defined(__GNUC__)
+  register int t asm ("a0");
+  /* addi a0, zero, 255; sext.b a0, a0; */
+  __asm__("addi a0, zero, 255\n\t.4byte 0x60451513");
+  return t < 0;
+#else
+  return 0;
+#endif
+}
+
+static int riscv_xthead()
+{
+#if defined(__GNUC__)
+    register int t asm ("a0");
+    /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */
+    /* Therefore assume XThead* are present if XTheadBb is present. */
+    /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */
+    __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b");
+    return t == -1;		/* In case of collision with other vendor extensions. */
+#else
+    return 0;
+#endif
+}
+
+static uint32_t riscv_probe(int (*func)(void), uint32_t flag)
+{
+    if (sigsetjmp(sigbuf, 1) == 0) {
+        return func() ? flag : 0;
+    } else return 0;
+}
+#endif
+
 /* Arch-dependent CPU feature detection. */
 static uint32_t jit_cpudetect(void)
 {
@@ -723,6 +795,22 @@ static uint32_t jit_cpudetect(void)
   }
 #endif
 
+#elif LJ_TARGET_RISCV64
+#if LJ_HASJIT
+  /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */
+  struct sigaction old = {0}, act = {0};
+  act.sa_handler = detect_sigill;
+  sigaction(SIGILL, &act, &old);
+  flags |= riscv_probe(riscv_compressed, JIT_F_RVC);
+  flags |= riscv_probe(riscv_zba, JIT_F_RVZba);
+  flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb);
+  flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead);
+  sigaction(SIGILL, &old, NULL);
+
+  /* Detect V/P? */
+  /* V have no hardware available, P not ratified yet. */
+#endif
+
 #else
 #error "Missing CPU detection for this architecture"
 #endif
@@ -746,7 +834,7 @@ LUALIB_API int luaopen_jit(lua_State *L)
 #endif
   lua_pushliteral(L, LJ_OS_NAME);
   lua_pushliteral(L, LJ_ARCH_NAME);
-  lua_pushinteger(L, LUAJIT_VERSION_NUM);
+  lua_pushinteger(L, LUAJIT_VERSION_NUM);  /* Deprecated. */
   lua_pushliteral(L, LUAJIT_VERSION);
   LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
 #if LJ_HASPROFILE
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_math.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_math.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_math.c
@@ -1,6 +1,6 @@
 /*
 ** Math library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <math.h>
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_os.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_os.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_os.c
@@ -1,6 +1,6 @@
 /*
 ** OS library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -76,7 +76,7 @@ LJLIB_CF(os_rename)
 
 LJLIB_CF(os_tmpname)
 {
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
   lj_err_caller(L, LJ_ERR_OSUNIQF);
   return 0;
 #else
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_package.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_package.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_package.c
@@ -1,6 +1,6 @@
 /*
 ** Package library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2012 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -57,7 +57,7 @@ static lua_CFunction ll_sym(lua_State *L
 
 static const char *ll_bcsym(void *lib, const char *sym)
 {
-#if defined(RTLD_DEFAULT)
+#if defined(RTLD_DEFAULT) && !defined(NO_RTLD_DEFAULT)
   if (lib == NULL) lib = RTLD_DEFAULT;
 #elif LJ_TARGET_OSX || LJ_TARGET_BSD
   if (lib == NULL) lib = (void *)(intptr_t)-2;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_string.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_string.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_string.c
@@ -1,6 +1,6 @@
 /*
 ** String library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -640,89 +640,14 @@ LJLIB_CF(string_gsub)
 
 /* ------------------------------------------------------------------------ */
 
-/* Emulate tostring() inline. */
-static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
-{
-  TValue *o = L->base+arg-1;
-  cTValue *mo;
-  lj_assertL(o < L->top, "bad usage");  /* Caller already checks for existence. */
-  if (LJ_LIKELY(tvisstr(o)))
-    return strV(o);
-  if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
-    copyTV(L, L->top++, mo);
-    copyTV(L, L->top++, o);
-    lua_call(L, 1, 1);
-    copyTV(L, L->base+arg-1, --L->top);
-    return NULL;  /* Buffer may be overwritten, retry. */
-  }
-  return lj_strfmt_obj(L, o);
-}
-
 LJLIB_CF(string_format)		LJLIB_REC(.)
 {
-  int arg, top = (int)(L->top - L->base);
-  GCstr *fmt;
-  SBuf *sb;
-  FormatState fs;
-  SFormat sf;
   int retry = 0;
-again:
-  arg = 1;
-  sb = lj_buf_tmp_(L);
-  fmt = lj_lib_checkstr(L, arg);
-  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
-  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
-    if (sf == STRFMT_LIT) {
-      lj_buf_putmem(sb, fs.str, fs.len);
-    } else if (sf == STRFMT_ERR) {
-      lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
-    } else {
-      if (++arg > top)
-	luaL_argerror(L, arg, lj_obj_typename[0]);
-      switch (STRFMT_TYPE(sf)) {
-      case STRFMT_INT:
-	if (tvisint(L->base+arg-1)) {
-	  int32_t k = intV(L->base+arg-1);
-	  if (sf == STRFMT_INT)
-	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
-	  else
-	    lj_strfmt_putfxint(sb, sf, k);
-	} else {
-	  lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
-	}
-	break;
-      case STRFMT_UINT:
-	if (tvisint(L->base+arg-1))
-	  lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
-	else
-	  lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
-	break;
-      case STRFMT_NUM:
-	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
-	break;
-      case STRFMT_STR: {
-	GCstr *str = string_fmt_tostring(L, arg, retry);
-	if (str == NULL)
-	  retry = 1;
-	else if ((sf & STRFMT_T_QUOTED))
-	  lj_strfmt_putquoted(sb, str);  /* No formatting. */
-	else
-	  lj_strfmt_putfstr(sb, sf, str);
-	break;
-	}
-      case STRFMT_CHAR:
-	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
-	break;
-      case STRFMT_PTR:  /* No formatting. */
-	lj_strfmt_putptr(sb, lj_obj_ptr(G(L), L->base+arg-1));
-	break;
-      default:
-	lj_assertL(0, "bad string format type");
-	break;
-      }
-    }
-  }
-  if (retry++ == 1) goto again;
+  SBuf *sb;
+  do {
+    sb = lj_buf_tmp_(L);
+    retry = lj_strfmt_putarg(L, sb, 1, -retry);
+  } while (retry > 0);
   setstrV(L, L->top-1, lj_buf_str(L, sb));
   lj_gc_check(L);
   return 1;
@@ -743,6 +668,9 @@ LUALIB_API int luaopen_string(lua_State
   setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
   settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
   mt->nomm = (uint8_t)(~(1u<<MM_index));
+#if LJ_HASBUFFER
+  lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
+#endif
   return 1;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lib_table.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lib_table.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lib_table.c
@@ -1,6 +1,6 @@
 /*
 ** Table library.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -159,7 +159,7 @@ LJLIB_CF(table_concat)		LJLIB_REC(.)
   SBuf *sb = lj_buf_tmp_(L);
   SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
   if (LJ_UNLIKELY(!sbx)) {  /* Error: bad element type. */
-    int32_t idx = (int32_t)(intptr_t)sbufP(sb);
+    int32_t idx = (int32_t)(intptr_t)sb->w;
     cTValue *o = lj_tab_getint(t, idx);
     lj_err_callerv(L, LJ_ERR_TABCAT,
 		   lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_alloc.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_alloc.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_alloc.c
@@ -330,7 +330,7 @@ static void *mmap_plain(size_t size)
 #define CALL_MMAP(prng, size)	mmap_plain(size)
 #endif
 
-#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 && !LJ_TARGET_PS5
 
 #include <sys/resource.h>
 
@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, siz
 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
 #define CALL_MREMAP_NOMOVE	0
 #define CALL_MREMAP_MAYMOVE	1
-#if LJ_64 && !LJ_GC64
+#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64)
 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
 #else
 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_api.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_api.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_api.c
@@ -1,6 +1,6 @@
 /*
 ** Public Lua/C API.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L,
   if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
     return 0;  /* Stack overflow. */
   } else if (size > 0) {
-    lj_state_checkstack(L, (MSize)size);
+    int avail = (int)(mref(L->maxstack, TValue) - L->top);
+    if (size > avail &&
+	lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
+      L->top--;
+      return 0;  /* Out of memory. */
+    }
   }
   return 1;
 }
@@ -707,36 +712,10 @@ LUA_API void lua_pushboolean(lua_State *
   incr_top(L);
 }
 
-#if LJ_64
-static void *lightud_intern(lua_State *L, void *p)
-{
-  global_State *g = G(L);
-  uint64_t u = (uint64_t)p;
-  uint32_t up = lightudup(u);
-  uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
-  MSize segnum = g->gc.lightudnum;
-  if (segmap) {
-    MSize seg;
-    for (seg = 0; seg <= segnum; seg++)
-      if (segmap[seg] == up)  /* Fast path. */
-	return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
-    segnum++;
-  }
-  if (!((segnum-1) & segnum) && segnum != 1) {
-    if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)) lj_err_msg(L, LJ_ERR_BADLU);
-    lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
-    setmref(g->gc.lightudseg, segmap);
-  }
-  g->gc.lightudnum = segnum;
-  segmap[segnum] = up;
-  return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
-}
-#endif
-
 LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
 {
 #if LJ_64
-  p = lightud_intern(L, p);
+  p = lj_lightud_intern(L, p);
 #endif
   setrawlightudV(L->top, p);
   incr_top(L);
@@ -805,7 +784,7 @@ LUA_API void lua_concat(lua_State *L, in
 	L->top -= n;
 	break;
       }
-      n -= (int)(L->top - top);
+      n -= (int)(L->top - (top - 2*LJ_FR2));
       L->top = top+2;
       lj_vm_call(L, top, 1+1);
       L->top -= 1+LJ_FR2;
@@ -919,11 +898,13 @@ LUA_API int lua_next(lua_State *L, int i
   cTValue *t = index2adr(L, idx);
   int more;
   lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
-  more = lj_tab_next(L, tabV(t), L->top-1);
-  if (more) {
+  more = lj_tab_next(tabV(t), L->top-1, L->top-1);
+  if (more > 0) {
     incr_top(L);  /* Return new key and value slot. */
-  } else {  /* End of traversal. */
+  } else if (!more) {  /* End of traversal. */
     L->top--;  /* Remove key slot. */
+  } else {
+    lj_err_msg(L, LJ_ERR_NEXTIDX);
   }
   return more;
 }
@@ -1179,7 +1160,7 @@ static TValue *cpcall(lua_State *L, lua_
   setfuncV(L, top++, fn);
   if (LJ_FR2) setnilV(top++);
 #if LJ_64
-  ud = lightud_intern(L, ud);
+  ud = lj_lightud_intern(L, ud);
 #endif
   setrawlightudV(top++, ud);
   cframe_nres(L->cframe) = 1+0;  /* Zero results. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_arch.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_arch.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_arch.h
@@ -1,6 +1,6 @@
 /*
 ** Target architecture selection.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ARCH_H
@@ -31,6 +31,10 @@
 #define LUAJIT_ARCH_mips32	6
 #define LUAJIT_ARCH_MIPS64	7
 #define LUAJIT_ARCH_mips64	7
+#define LUAJIT_ARCH_RISCV32	8
+#define LUAJIT_ARCH_riscv32	8
+#define LUAJIT_ARCH_RISCV64	9
+#define LUAJIT_ARCH_riscv64	9
 
 /* Target OS. */
 #define LUAJIT_OS_OTHER		0
@@ -57,7 +61,7 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_X64
 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(_M_ARM64)
 #define LUAJIT_TARGET	LUAJIT_ARCH_ARM64
 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
 #define LUAJIT_TARGET	LUAJIT_ARCH_PPC
@@ -65,8 +69,12 @@
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS64
 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
 #define LUAJIT_TARGET	LUAJIT_ARCH_MIPS32
+#elif defined(__riscv) && __riscv_xlen == 32
+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV32
+#elif defined(__riscv) && __riscv_xlen == 64
+#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64
 #else
-#error "No support for this architecture (yet)"
+#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
 #endif
 
 #endif
@@ -83,7 +91,7 @@
 #define LUAJIT_OS	LUAJIT_OS_OSX
 #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
        defined(__NetBSD__) || defined(__OpenBSD__) || \
-       defined(__DragonFly__)) && !defined(__ORBIS__)
+       defined(__DragonFly__)) && !defined(__ORBIS__) && !defined(__PROSPERO__)
 #define LUAJIT_OS	LUAJIT_OS_BSD
 #elif (defined(__sun__) && defined(__svr4__))
 #define LJ_TARGET_SOLARIS	1
@@ -93,6 +101,9 @@
 #elif defined(__CYGWIN__)
 #define LJ_TARGET_CYGWIN	1
 #define LUAJIT_OS	LUAJIT_OS_POSIX
+#elif defined(__QNX__)
+#define LJ_TARGET_QNX		1
+#define LUAJIT_OS	LUAJIT_OS_POSIX
 #else
 #define LUAJIT_OS	LUAJIT_OS_OTHER
 #endif
@@ -139,6 +150,13 @@
 #define NULL ((void*)0)
 #endif
 
+#ifdef __PROSPERO__
+#define LJ_TARGET_PS5		1
+#define LJ_TARGET_CONSOLE	1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
 #ifdef __psp2__
 #define LJ_TARGET_PSVITA	1
 #define LJ_TARGET_CONSOLE	1
@@ -155,6 +173,13 @@
 #define LJ_TARGET_GC64		1
 #endif
 
+#ifdef __NX__
+#define LJ_TARGET_NX		1
+#define LJ_TARGET_CONSOLE	1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
 #ifdef _UWP
 #define LJ_TARGET_UWP		1
 #if LUAJIT_TARGET == LUAJIT_ARCH_X64
@@ -170,14 +195,10 @@
 #define LJ_ARCH_NAME		"x86"
 #define LJ_ARCH_BITS		32
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X86		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	8
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNALIGNED	1
@@ -188,14 +209,10 @@
 #define LJ_ARCH_NAME		"x64"
 #define LJ_ARCH_BITS		64
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN		1
-#else
-#define LJ_ABI_WIN		0
-#endif
 #define LJ_TARGET_X64		1
 #define LJ_TARGET_X86ORX64	1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	16
 #define LJ_TARGET_JUMPRANGE	31	/* +-2^31 = +-2GB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -203,6 +220,8 @@
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_SINGLE_DUAL
 #ifndef LUAJIT_DISABLE_GC64
 #define LJ_TARGET_GC64		1
+#elif LJ_TARGET_OSX
+#error "macOS requires GC64 -- don't disable it"
 #endif
 
 #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
@@ -219,13 +238,14 @@
 #define LJ_ABI_EABI		1
 #define LJ_TARGET_ARM		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	14
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
 #define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR. */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 
-#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
+#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
 #define LJ_ARCH_VERSION		80
 #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
 #define LJ_ARCH_VERSION		70
@@ -247,8 +267,12 @@
 #define LJ_ARCH_NAME		"arm64"
 #define LJ_ARCH_ENDIAN		LUAJIT_LE
 #endif
+#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
+#define LJ_ABI_PAUTH		1
+#endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
+#define LJ_TARGET_EHRAREG	30
 #define LJ_TARGET_JUMPRANGE	27	/* +-2^27 = +-128MB */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -304,6 +328,7 @@
 
 #define LJ_TARGET_PPC		1
 #define LJ_TARGET_EHRETREG	3
+#define LJ_TARGET_EHRAREG	65
 #define LJ_TARGET_JUMPRANGE	25	/* +-2^25 = +-32MB */
 #define LJ_TARGET_MASKSHIFT	0
 #define LJ_TARGET_MASKROT	1
@@ -314,6 +339,7 @@
 #define LJ_ARCH_NOFFI		1
 #elif LJ_ARCH_BITS == 64
 #error "No support for PPC64"
+#undef LJ_TARGET_PPC
 #endif
 
 #if _ARCH_PWR7
@@ -406,6 +432,7 @@
 #endif
 #define LJ_TARGET_MIPS		1
 #define LJ_TARGET_EHRETREG	4
+#define LJ_TARGET_EHRAREG	31
 #define LJ_TARGET_JUMPRANGE	27	/* 2*2^27 = 256MB-aligned region */
 #define LJ_TARGET_MASKSHIFT	1
 #define LJ_TARGET_MASKROT	1
@@ -420,6 +447,30 @@
 #define LJ_ARCH_VERSION		10
 #endif
 
+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV32
+#error "No support for RISC-V 32"
+
+#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64
+#if defined(__riscv_float_abi_double)
+
+#define LJ_ARCH_NAME		"riscv64"
+#define LJ_ARCH_BITS		64
+#define LJ_ARCH_ENDIAN		LUAJIT_LE	/* Forget about BE for now */
+#define LJ_TARGET_RISCV64	1
+#define LJ_TARGET_GC64		1
+#define LJ_TARGET_EHRETREG	10
+#define LJ_TARGET_EHRAREG	1
+#define LJ_TARGET_JUMPRANGE	30	/* JAL +-2^20 = +-1MB,\
+        AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */
+#define LJ_TARGET_MASKSHIFT	1
+#define LJ_TARGET_MASKROT	1
+#define LJ_TARGET_UNIFYROT	2	/* Want only IR_BROR, no ROLI */
+#define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
+
+#else
+#error "No support for RISC-V 64 Soft-float/Single-float"
+#endif
+
 #else
 #error "No target architecture defined"
 #endif
@@ -451,11 +502,17 @@
 #endif
 #endif
 #elif !LJ_TARGET_PS3
+#if __clang__
+#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5))
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
 #error "Need at least GCC 4.3 or newer"
 #endif
 #endif
 #endif
+#endif
 
 /* Check target-specific constraints. */
 #ifndef _BUILDVM_H
@@ -466,36 +523,52 @@
 #elif LJ_TARGET_ARM
 #if defined(__ARMEB__)
 #error "No support for big-endian ARM"
+#undef LJ_TARGET_ARM
 #endif
 #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
 #error "No support for Cortex-M CPUs"
+#undef LJ_TARGET_ARM
 #endif
 #if !(__ARM_EABI__ || LJ_TARGET_IOS)
 #error "Only ARM EABI or iOS 3.0+ ABI is supported"
+#undef LJ_TARGET_ARM
 #endif
 #elif LJ_TARGET_ARM64
 #if defined(_ILP32)
 #error "No support for ILP32 model on ARM64"
+#undef LJ_TARGET_ARM64
 #endif
 #elif LJ_TARGET_PPC
 #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
 #error "No support for little-endian PPC32"
+#undef LJ_TARGET_PPC
 #endif
 #if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
-#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
+#error "No support for PPC/e500, use LuaJIT 2.0"
+#undef LJ_TARGET_PPC
 #endif
 #elif LJ_TARGET_MIPS32
 #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
 #error "Only o32 ABI supported for MIPS32"
+#undef LJ_TARGET_MIPS
 #endif
 #if LJ_TARGET_MIPSR6
 /* Not that useful, since most available r6 CPUs are 64 bit. */
 #error "No support for MIPS32R6"
+#undef LJ_TARGET_MIPS
 #endif
 #elif LJ_TARGET_MIPS64
 #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
 /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
 #error "Only n64 ABI supported for MIPS64"
+#undef LJ_TARGET_MIPS
+#endif
+#elif LJ_TARGET_RISCV
+#if !defined(__riscv_float_abi_double)
+#error "Only RISC-V 64 double float supported for now"
+#endif
+#if defined(__riscv_compressed)
+#error "Compressed instructions not supported for now"
 #endif
 #endif
 #endif
@@ -551,6 +624,13 @@
 #define LJ_HASFFI		1
 #endif
 
+/* Disable or enable the string buffer extension. */
+#if defined(LUAJIT_DISABLE_BUFFER)
+#define LJ_HASBUFFER		0
+#else
+#define LJ_HASBUFFER		1
+#endif
+
 #if defined(LUAJIT_DISABLE_PROFILE)
 #define LJ_HASPROFILE		0
 #elif LJ_TARGET_POSIX
@@ -575,6 +655,10 @@
 #define LJ_SOFTFP		(!LJ_ARCH_HASFPU)
 #define LJ_SOFTFP32		(LJ_SOFTFP && LJ_32)
 
+#ifndef LJ_ABI_PAUTH
+#define LJ_ABI_PAUTH		0
+#endif
+
 #if LJ_ARCH_ENDIAN == LUAJIT_BE
 #define LJ_LE			0
 #define LJ_BE			1
@@ -611,13 +695,10 @@
 #define LJ_NO_SYSTEM		1
 #endif
 
-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
-/* NYI: no support for compact unwind specification, yet. */
-#define LUAJIT_NO_UNWIND	1
-#endif
-
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
-#define LJ_NO_UNWIND		1
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+#define LJ_ABI_WIN		1
+#else
+#define LJ_ABI_WIN		0
 #endif
 
 #if LJ_TARGET_WINDOWS
@@ -632,6 +713,22 @@ extern void *LJ_WIN_LOADLIBA(const char
 #endif
 #endif
 
+#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5
+#define LJ_NO_UNWIND		1
+#endif
+
+#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
+#define LJ_UNWIND_EXT		1
+#else
+#define LJ_UNWIND_EXT		0
+#endif
+
+#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
+#define LJ_UNWIND_JIT		1
+#else
+#define LJ_UNWIND_JIT		0
+#endif
+
 /* Compatibility with Lua 5.1 vs. 5.2. */
 #ifdef LUAJIT_ENABLE_LUA52COMPAT
 #define LJ_52			1
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm.c
@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_asm_c
@@ -11,6 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_gc.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
@@ -28,6 +29,7 @@
 #include "lj_dispatch.h"
 #include "lj_vm.h"
 #include "lj_target.h"
+#include "lj_prng.h"
 
 #ifdef LUA_USE_ASSERT
 #include <stdio.h>
@@ -71,6 +73,8 @@ typedef struct ASMState {
   IRRef snaprename;	/* Rename highwater mark for snapshot check. */
   SnapNo snapno;	/* Current snapshot number. */
   SnapNo loopsnapno;	/* Loop snapshot number. */
+  int snapalloc;	/* Current snapshot needs allocation. */
+  BloomFilter snapfilt1, snapfilt2;	/* Filled with snapshot refs. */
 
   IRRef fuseref;	/* Fusion limit (loopref, 0 or FUSE_DISABLED). */
   IRRef sectref;	/* Section base reference (loopref or 0). */
@@ -84,11 +88,18 @@ typedef struct ASMState {
 
   MCode *mcbot;		/* Bottom of reserved MCode. */
   MCode *mctop;		/* Top of generated MCode. */
+  MCode *mctoporig;	/* Original top of generated MCode. */
   MCode *mcloop;	/* Pointer to loop MCode (or NULL). */
   MCode *invmcp;	/* Points to invertible loop branch (or NULL). */
   MCode *flagmcp;	/* Pending opportunity to merge flag setting ins. */
   MCode *realign;	/* Realign loop if not NULL. */
 
+#ifdef LUAJIT_RANDOM_RA
+  /* Randomize register allocation. OK for fuzz testing, not for production. */
+  uint64_t prngbits;
+  PRNGState prngstate;
+#endif
+
 #ifdef RID_NUM_KREF
   intptr_t krefk[RID_NUM_KREF];
 #endif
@@ -169,6 +180,41 @@ IRFLDEF(FLOFS)
   0
 };
 
+#ifdef LUAJIT_RANDOM_RA
+/* Return a fixed number of random bits from the local PRNG state. */
+static uint32_t ra_random_bits(ASMState *as, uint32_t nbits) {
+  uint64_t b = as->prngbits;
+  uint32_t res = (1u << nbits) - 1u;
+  if (b <= res) b = lj_prng_u64(&as->prngstate) | (1ull << 63);
+  res &= (uint32_t)b;
+  as->prngbits = b >> nbits;
+  return res;
+}
+
+/* Pick a random register from a register set. */
+static Reg rset_pickrandom(ASMState *as, RegSet rs)
+{
+  Reg r = rset_pickbot_(rs);
+  rs >>= r;
+  if (rs > 1) {  /* More than one bit set? */
+    while (1) {
+      /* We need to sample max. the GPR or FPR half of the set. */
+      uint32_t d = ra_random_bits(as, RSET_BITS-1);
+      if ((rs >> d) & 1) {
+	r += d;
+	break;
+      }
+    }
+  }
+  return r;
+}
+#define rset_picktop(rs)	rset_pickrandom(as, rs)
+#define rset_pickbot(rs)	rset_pickrandom(as, rs)
+#else
+#define rset_picktop(rs)	rset_picktop_(rs)
+#define rset_pickbot(rs)	rset_pickbot_(rs)
+#endif
+
 /* -- Target-specific instruction emitter --------------------------------- */
 
 #if LJ_TARGET_X86ORX64
@@ -181,6 +227,8 @@ IRFLDEF(FLOFS)
 #include "lj_emit_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "lj_emit_mips.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_emit_riscv.h"
 #else
 #error "Missing instruction emitter for target CPU"
 #endif
@@ -560,7 +608,11 @@ static Reg ra_allock(ASMState *as, intpt
 	IRIns *ir = IR(ref);
 	if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
 #if LJ_GC64
+#if LJ_TARGET_ARM64
+	    (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
+#else
 	    (ir->o == IR_KINT && k == ir->i) ||
+#endif
 	    (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
 	    ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
 	     k == (intptr_t)ir_kptr(ir))
@@ -694,7 +746,14 @@ static void ra_rename(ASMState *as, Reg
   RA_DBGX((as, "rename    $f $r $r", regcost_ref(as->cost[up]), down, up));
   emit_movrr(as, ir, down, up);  /* Backwards codegen needs inverse move. */
   if (!ra_hasspill(IR(ref)->s)) {  /* Add the rename to the IR. */
-    ra_addrename(as, down, ref, as->snapno);
+    /*
+    ** The rename is effective at the subsequent (already emitted) exit
+    ** branch. This is for the current snapshot (as->snapno). Except if we
+    ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
+    ** then it belongs to the next snapshot.
+    ** See also the discussion at asm_snap_checkrename().
+    */
+    ra_addrename(as, down, ref, as->snapno + as->snapalloc);
   }
 }
 
@@ -807,11 +866,11 @@ static void ra_leftov(ASMState *as, Reg
 }
 #endif
 
-#if !LJ_64
 /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
 static void ra_destpair(ASMState *as, IRIns *ir)
 {
   Reg destlo = ir->r, desthi = (ir+1)->r;
+  IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
   /* First spill unrelated refs blocking the destination registers. */
   if (!rset_test(as->freeset, RID_RETLO) &&
       destlo != RID_RETLO && desthi != RID_RETLO)
@@ -835,29 +894,29 @@ static void ra_destpair(ASMState *as, IR
   /* Check for conflicts and shuffle the registers as needed. */
   if (destlo == RID_RETHI) {
     if (desthi == RID_RETLO) {
-#if LJ_TARGET_X86
+#if LJ_TARGET_X86ORX64
       *--as->mcp = XI_XCHGa + RID_RETHI;
+      if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48;
 #else
-      emit_movrr(as, ir, RID_RETHI, RID_TMP);
-      emit_movrr(as, ir, RID_RETLO, RID_RETHI);
-      emit_movrr(as, ir, RID_TMP, RID_RETLO);
+      emit_movrr(as, irx, RID_RETHI, RID_TMP);
+      emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+      emit_movrr(as, irx, RID_TMP, RID_RETLO);
 #endif
     } else {
-      emit_movrr(as, ir, RID_RETHI, RID_RETLO);
-      if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+      emit_movrr(as, irx, RID_RETHI, RID_RETLO);
+      if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
     }
   } else if (desthi == RID_RETLO) {
-    emit_movrr(as, ir, RID_RETLO, RID_RETHI);
-    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+    emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
   } else {
-    if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
-    if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+    if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
+    if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
   }
   /* Restore spill slots (if any). */
   if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
   if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
 }
-#endif
 
 /* -- Snapshot handling --------- ----------------------------------------- */
 
@@ -892,7 +951,10 @@ static int asm_sunk_store(ASMState *as,
 static void asm_snap_alloc1(ASMState *as, IRRef ref)
 {
   IRIns *ir = IR(ref);
-  if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) {
+  if (!irref_isk(ref) && ir->r != RID_SUNK) {
+    bloomset(as->snapfilt1, ref);
+    bloomset(as->snapfilt2, hashrot(ref, ref + HASH_BIAS));
+    if (ra_used(ir)) return;
     if (ir->r == RID_SINK) {
       ir->r = RID_SUNK;
 #if LJ_HASFFI
@@ -947,11 +1009,12 @@ static void asm_snap_alloc1(ASMState *as
 }
 
 /* Allocate refs escaping to a snapshot. */
-static void asm_snap_alloc(ASMState *as)
+static void asm_snap_alloc(ASMState *as, int snapno)
 {
-  SnapShot *snap = &as->T->snap[as->snapno];
+  SnapShot *snap = &as->T->snap[snapno];
   SnapEntry *map = &as->T->snapmap[snap->mapofs];
   MSize n, nent = snap->nent;
+  as->snapfilt1 = as->snapfilt2 = 0;
   for (n = 0; n < nent; n++) {
     SnapEntry sn = map[n];
     IRRef ref = snap_ref(sn);
@@ -960,7 +1023,7 @@ static void asm_snap_alloc(ASMState *as)
       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
 	lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
 		   "snap %d[%d] points to bad SOFTFP IR %04d",
-		   as->snapno, n, ref - REF_BIAS);
+		   snapno, n, ref - REF_BIAS);
 	asm_snap_alloc1(as, ref+1);
       }
     }
@@ -976,35 +1039,26 @@ static void asm_snap_alloc(ASMState *as)
 */
 static int asm_snap_checkrename(ASMState *as, IRRef ren)
 {
-  SnapShot *snap = &as->T->snap[as->snapno];
-  SnapEntry *map = &as->T->snapmap[snap->mapofs];
-  MSize n, nent = snap->nent;
-  for (n = 0; n < nent; n++) {
-    SnapEntry sn = map[n];
-    IRRef ref = snap_ref(sn);
-    if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) {
-      IRIns *ir = IR(ref);
-      ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
-      RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
-      return 1;  /* Found. */
-    }
+  if (bloomtest(as->snapfilt1, ren) &&
+      bloomtest(as->snapfilt2, hashrot(ren, ren + HASH_BIAS))) {
+    IRIns *ir = IR(ren);
+    ra_spill(as, ir);  /* Register renamed, so force a spill slot. */
+    RA_DBGX((as, "snaprensp $f $s", ren, ir->s));
+    return 1;  /* Found. */
   }
   return 0;  /* Not found. */
 }
 
-/* Prepare snapshot for next guard instruction. */
+/* Prepare snapshot for next guard or throwing instruction. */
 static void asm_snap_prep(ASMState *as)
 {
-  if (as->curins < as->snapref) {
-    do {
-      if (as->snapno == 0) return;  /* Called by sunk stores before snap #0. */
-      as->snapno--;
-      as->snapref = as->T->snap[as->snapno].ref;
-    } while (as->curins < as->snapref);
-    asm_snap_alloc(as);
+  if (as->snapalloc) {
+    /* Alloc on first invocation for each snapshot. */
+    as->snapalloc = 0;
+    asm_snap_alloc(as, as->snapno);
     as->snaprename = as->T->nins;
   } else {
-    /* Process any renames above the highwater mark. */
+    /* Check any renames above the highwater mark. */
     for (; as->snaprename < as->T->nins; as->snaprename++) {
       IRIns *ir = &as->T->ir[as->snaprename];
       if (asm_snap_checkrename(as, ir->op1))
@@ -1013,6 +1067,35 @@ static void asm_snap_prep(ASMState *as)
   }
 }
 
+/* Move to previous snapshot when we cross the current snapshot ref. */
+static void asm_snap_prev(ASMState *as)
+{
+  if (as->curins < as->snapref) {
+    uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
+    if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
+    do {
+      if (as->snapno == 0) return;
+      as->snapno--;
+      as->snapref = as->T->snap[as->snapno].ref;
+      as->T->snap[as->snapno].mcofs = (uint16_t)ofs;  /* Remember mcode ofs. */
+    } while (as->curins < as->snapref);  /* May have no ins inbetween. */
+    as->snapalloc = 1;
+  }
+}
+
+/* Fixup snapshot mcode offsetst. */
+static void asm_snap_fixup_mcofs(ASMState *as)
+{
+  uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
+  SnapShot *snap = as->T->snap;
+  SnapNo i;
+  for (i = as->T->nsnap-1; i > 0; i--) {
+    /* Compute offset from mcode start and store in correct snapshot. */
+    snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
+  }
+  snap[0].mcofs = 0;
+}
+
 /* -- Miscellaneous helpers ----------------------------------------------- */
 
 /* Calculate stack adjustment. */
@@ -1057,6 +1140,7 @@ static void asm_snew(ASMState *as, IRIns
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
   IRRef args[3];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const char *str */
   args[2] = ir->op2;   /* size_t len      */
@@ -1069,6 +1153,7 @@ static void asm_tnew(ASMState *as, IRIns
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L    */
   args[1] = ASMREF_TMP1;  /* uint32_t ahsize */
   as->gcsteps++;
@@ -1081,6 +1166,7 @@ static void asm_tdup(ASMState *as, IRIns
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;  /* lua_State *L    */
   args[1] = ir->op1;   /* const GCtab *kt */
   as->gcsteps++;
@@ -1106,28 +1192,43 @@ static void asm_gcstep(ASMState *as, IRI
 
 /* -- Buffer operations --------------------------------------------------- */
 
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb);
+#endif
 
 static void asm_bufhdr(ASMState *as, IRIns *ir)
 {
   Reg sb = ra_dest(as, ir, RSET_GPR);
-  if ((ir->op2 & IRBUFHDR_APPEND)) {
+  switch (ir->op2) {
+  case IRBUFHDR_RESET: {
+    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+    IRIns irbp;
+    irbp.ot = IRT(0, IRT_PTR);  /* Buffer data pointer type. */
+    emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
+    emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
+    break;
+    }
+  case IRBUFHDR_APPEND: {
     /* Rematerialize const buffer pointer instead of likely spill. */
     IRIns *irp = IR(ir->op1);
     if (!(ra_hasreg(irp->r) || irp == ir-1 ||
 	  (irp == ir-2 && !ra_used(ir-1)))) {
-      while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
+      while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
 	irp = IR(irp->op1);
       if (irref_isk(irp->op1)) {
 	ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
 	ir = irp;
       }
     }
-  } else {
-    Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
-    /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
-    emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
-    emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
+    break;
+    }
+#if LJ_HASBUFFER
+  case IRBUFHDR_WRITE:
+    asm_bufhdr_write(as, sb);
+    break;
+#endif
+  default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
   }
 #if LJ_TARGET_X86ORX64
   ra_left(as, sb, ir->op1);
@@ -1179,7 +1280,7 @@ static void asm_bufput(ASMState *as, IRI
   if (args[1] == ASMREF_TMP1) {
     Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
     if (kchar == -129)
-      asm_tvptr(as, tmp, irs->op1);
+      asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
     else
       ra_allockreg(as, kchar, tmp);
   }
@@ -1201,6 +1302,7 @@ static void asm_tostr(ASMState *as, IRIn
 {
   const CCallInfo *ci;
   IRRef args[2];
+  asm_snap_prep(as);
   args[0] = ASMREF_L;
   as->gcsteps++;
   if (ir->op2 == IRTOSTR_NUM) {
@@ -1216,7 +1318,7 @@ static void asm_tostr(ASMState *as, IRIn
   asm_setupresult(as, ir, ci);  /* GCstr * */
   asm_gencall(as, ci, args);
   if (ir->op2 == IRTOSTR_NUM)
-    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+    asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
 }
 
 #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
@@ -1257,12 +1359,19 @@ static void asm_newref(ASMState *as, IRI
   IRRef args[3];
   if (ir->r == RID_SINK)
     return;
+  asm_snap_prep(as);
   args[0] = ASMREF_L;     /* lua_State *L */
   args[1] = ir->op1;      /* GCtab *t     */
   args[2] = ASMREF_TMP1;  /* cTValue *key */
   asm_setupresult(as, ir, ci);  /* TValue * */
   asm_gencall(as, ci, args);
-  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+  asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
+}
+
+static void asm_tmpref(ASMState *as, IRIns *ir)
+{
+  Reg r = ra_dest(as, ir, RSET_GPR);
+  asm_tvptr(as, r, ir->op1, ir->op2);
 }
 
 static void asm_lref(ASMState *as, IRIns *ir)
@@ -1601,6 +1710,8 @@ static void asm_loop(ASMState *as)
 #include "lj_asm_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "lj_asm_mips.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_asm_riscv64.h"
 #else
 #error "Missing assembler for target CPU"
 #endif
@@ -1610,7 +1721,6 @@ static void asm_loop(ASMState *as)
 #if !LJ_SOFTFP32
 #if !LJ_TARGET_X86ORX64
 #define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
-#define asm_fppowi(as, ir)	asm_callid(as, ir, IRCALL_lj_vm_powi)
 #endif
 
 static void asm_pow(ASMState *as, IRIns *ir)
@@ -1621,10 +1731,7 @@ static void asm_pow(ASMState *as, IRIns
 					  IRCALL_lj_carith_powu64);
   else
 #endif
-  if (irt_isnum(IR(ir->op2)->t))
-    asm_callid(as, ir, IRCALL_pow);
-  else
-    asm_fppowi(as, ir);
+  asm_callid(as, ir, IRCALL_pow);
 }
 
 static void asm_div(ASMState *as, IRIns *ir)
@@ -1744,6 +1851,7 @@ static void asm_ir(ASMState *as, IRIns *
   case IR_NEWREF: asm_newref(as, ir); break;
   case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
   case IR_FREF: asm_fref(as, ir); break;
+  case IR_TMPREF: asm_tmpref(as, ir); break;
   case IR_STRREF: asm_strref(as, ir); break;
   case IR_LREF: asm_lref(as, ir); break;
 
@@ -1830,6 +1938,8 @@ static void asm_head_side(ASMState *as)
   IRRef1 sloadins[RID_MAX];
   RegSet allow = RSET_ALL;  /* Inverse of all coalesced registers. */
   RegSet live = RSET_EMPTY;  /* Live parent registers. */
+  RegSet pallow = RSET_GPR;  /* Registers needed by the parent stack check. */
+  Reg pbase;
   IRIns *irp = &as->parent->ir[REF_BASE];  /* Parent base. */
   int32_t spadj, spdelta;
   int pass2 = 0;
@@ -1838,10 +1948,13 @@ static void asm_head_side(ASMState *as)
 
   if (as->snapno && as->topslot > as->parent->topslot) {
     /* Force snap #0 alloc to prevent register overwrite in stack check. */
-    as->snapno = 0;
-    asm_snap_alloc(as);
+    asm_snap_alloc(as, 0);
+  }
+  pbase = asm_head_side_base(as, irp);
+  if (pbase != RID_NONE) {
+    rset_clear(allow, pbase);
+    rset_clear(pallow, pbase);
   }
-  allow = asm_head_side_base(as, irp, allow);
 
   /* Scan all parent SLOADs and collect register dependencies. */
   for (i = as->stopins; i > REF_BASE; i--) {
@@ -1871,6 +1984,7 @@ static void asm_head_side(ASMState *as)
       sloadins[rs] = (IRRef1)i;
       rset_set(live, rs);  /* Block live parent register. */
     }
+    if (!ra_hasspill(regsp_spill(rs))) rset_clear(pallow, regsp_reg(rs));
   }
 
   /* Calculate stack frame adjustment. */
@@ -1987,7 +2101,7 @@ static void asm_head_side(ASMState *as)
     ExitNo exitno = as->J->exitno;
 #endif
     as->T->topslot = (uint8_t)as->topslot;  /* Remember for child traces. */
-    asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno);
+    asm_stack_check(as, as->topslot, irp, pallow, exitno);
   }
 }
 
@@ -2078,6 +2192,9 @@ static void asm_setup_regsp(ASMState *as
 #endif
 
   ra_setup(as);
+#if LJ_TARGET_ARM64
+  ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
+#endif
 
   /* Clear reg/sp for constants. */
   for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
@@ -2100,6 +2217,7 @@ static void asm_setup_regsp(ASMState *as
   as->snaprename = nins;
   as->snapref = nins;
   as->snapno = T->nsnap;
+  as->snapalloc = 0;
 
   as->stopins = REF_BASE;
   as->orignins = nins;
@@ -2148,6 +2266,10 @@ static void asm_setup_regsp(ASMState *as
       ir->prev = (uint16_t)REGSP_HINT((rload & 15));
       rload = lj_ror(rload, 4);
       continue;
+    case IR_TMPREF:
+      if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
+	as->evenspill = 4;  /* TMPREF OUT2 needs two TValues on the stack. */
+      break;
 #endif
     case IR_CALLXS: {
       CCallInfo ci;
@@ -2157,7 +2279,17 @@ static void asm_setup_regsp(ASMState *as
 	as->modset |= RSET_SCRATCH;
       continue;
       }
-    case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
+    case IR_CALLL:
+      /* lj_vm_next needs two TValues on the stack. */
+#if LJ_TARGET_X64 && LJ_ABI_WIN
+      if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
+	as->evenspill = SPS_FIRST + 4;
+#else
+      if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
+	as->evenspill = 4;
+#endif
+      /* fallthrough */
+    case IR_CALLN: case IR_CALLA: case IR_CALLS: {
       const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
       ir->prev = asm_setup_call_slots(as, ir, ci);
       if (inloop)
@@ -2165,7 +2297,6 @@ static void asm_setup_regsp(ASMState *as
 		      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
       continue;
       }
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
     case IR_HIOP:
       switch ((ir-1)->o) {
 #if LJ_SOFTFP && LJ_TARGET_ARM
@@ -2176,7 +2307,7 @@ static void asm_setup_regsp(ASMState *as
 	}
 	break;
 #endif
-#if !LJ_SOFTFP && LJ_NEED_FP64
+#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
       case IR_CONV:
 	if (irt_isfp((ir-1)->t)) {
 	  ir->prev = REGSP_HINT(RID_FPRET);
@@ -2184,7 +2315,7 @@ static void asm_setup_regsp(ASMState *as
 	}
 #endif
       /* fallthrough */
-      case IR_CALLN: case IR_CALLXS:
+      case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
 #if LJ_SOFTFP
       case IR_MIN: case IR_MAX:
 #endif
@@ -2195,7 +2326,6 @@ static void asm_setup_regsp(ASMState *as
 	break;
       }
       break;
-#endif
 #if LJ_SOFTFP
     case IR_MIN: case IR_MAX:
       if ((ir+1)->o != IR_HIOP) break;
@@ -2250,13 +2380,23 @@ static void asm_setup_regsp(ASMState *as
       }
       /* fallthrough */ /* for integer POW */
     case IR_DIV: case IR_MOD:
-      if (!irt_isnum(ir->t)) {
+      if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
+	ir->prev = REGSP_HINT(RID_RET);
+	if (inloop)
+	  as->modset |= (RSET_SCRATCH & RSET_GPR);
+	continue;
+      }
+      break;
+#if LJ_64 && LJ_SOFTFP
+    case IR_ADD: case IR_SUB: case IR_MUL:
+      if (irt_isnum(ir->t)) {
 	ir->prev = REGSP_HINT(RID_RET);
 	if (inloop)
 	  as->modset |= (RSET_SCRATCH & RSET_GPR);
 	continue;
       }
       break;
+#endif
     case IR_FPMATH:
 #if LJ_TARGET_X86ORX64
       if (ir->op2 <= IRFPM_TRUNC) {
@@ -2327,7 +2467,6 @@ void lj_asm_trace(jit_State *J, GCtrace
 {
   ASMState as_;
   ASMState *as = &as_;
-  MCode *origtop;
 
   /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
   {
@@ -2353,9 +2492,12 @@ void lj_asm_trace(jit_State *J, GCtrace
   as->realign = NULL;
   as->loopinv = 0;
   as->parent = J->parent ? traceref(J, J->parent) : NULL;
+#ifdef LUAJIT_RANDOM_RA
+  (void)lj_prng_u64(&J2G(J)->prng);  /* Ensure PRNG step between traces. */
+#endif
 
   /* Reserve MCode memory. */
-  as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
+  as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
   as->mcp = as->mctop;
   as->mclim = as->mcbot + MCLIM_REDZONE;
   asm_setup_target(as);
@@ -2394,6 +2536,10 @@ void lj_asm_trace(jit_State *J, GCtrace
 #endif
     as->ir = J->curfinal->ir;  /* Use the copied IR. */
     as->curins = J->cur.nins = as->orignins;
+#ifdef LUAJIT_RANDOM_RA
+    as->prngstate = J2G(J)->prng;  /* Must (re)start from identical state. */
+    as->prngbits = 0;
+#endif
 
     RA_DBG_START();
     RA_DBGX((as, "===== STOP ====="));
@@ -2417,6 +2563,7 @@ void lj_asm_trace(jit_State *J, GCtrace
       lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
 		 "IR %04d has unsplit 64 bit type",
 		 (int)(ir - as->ir) - REF_BIAS);
+      asm_snap_prev(as);
       if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
 	continue;  /* Dead-code elimination can be soooo easy. */
       if (irt_isguard(ir->t))
@@ -2450,6 +2597,9 @@ void lj_asm_trace(jit_State *J, GCtrace
       memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
 	     (T->nins - as->orignins) * sizeof(IRIns));  /* Copy RENAMEs. */
       T->nins = J->curfinal->nins;
+      /* Fill mcofs of any unprocessed snapshots. */
+      as->curins = REF_FIRST;
+      asm_snap_prev(as);
       break;  /* Done. */
     }
 
@@ -2468,13 +2618,16 @@ void lj_asm_trace(jit_State *J, GCtrace
   /* Set trace entry point before fixing up tail to allow link to self. */
   T->mcode = as->mcp;
   T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
-  if (!as->loopref)
+  if (as->loopref)
+    asm_loop_tail_fixup(as);
+  else
     asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
   T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
+  asm_snap_fixup_mcofs(as);
 #if LJ_TARGET_MCODE_FIXUP
   asm_mcode_fixup(T->mcode, T->szmcode);
 #endif
-  lj_mcode_sync(T->mcode, origtop);
+  lj_mcode_sync(T->mcode, as->mctoporig);
 }
 
 #undef IR
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm.h
@@ -1,6 +1,6 @@
 /*
 ** IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ASM_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_arm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm_arm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_arm.h
@@ -1,6 +1,6 @@
 /*
 ** ARM IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as,
 	*ofsp = (ofs & 255);  /* Mask out less bits to allow LDRD. */
 	return ra_allock(as, (ofs & ~255), allow);
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = 0;
+      return RID_SP;
     }
   }
   *ofsp = 0;
@@ -310,7 +313,11 @@ static void asm_fusexref(ASMState *as, A
 }
 
 #if !LJ_SOFTFP
-/* Fuse to multiply-add/sub instruction. */
+/*
+** Fuse to multiply-add/sub instruction.
+** VMLA rounds twice (UMA, not FMA) -- no need to check for JIT_F_OPT_FMA.
+** VFMA needs VFPv4, which is uncommon on the remaining ARM32 targets.
+*/
 static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
 {
   IRRef lref = ir->op1, rref = ir->op2;
@@ -498,6 +505,30 @@ static void asm_retf(ASMState *as, IRIns
   emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  if ((as->flags & JIT_F_ARMV6T2)) {
+    emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
+  } else {
+    emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
+    emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
+  }
+  emit_lso(as, ARMI_LDR, RID_TMP,
+	   ra_allock(as, (addr & ~4095),
+		     rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
+	   (addr & 4095));
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -666,35 +697,55 @@ static void asm_strto(ASMState *as, IRIn
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    } else {
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
 #if LJ_SOFTFP
-      lj_assertA(0, "unsplit FP op");
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		 RID_SP, 0);
+	emit_lso(as, ARMI_STR,
+		 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		 RID_SP, 4);
 #else
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_dm(as, ARMI_MOV, dest, RID_SP);
+	emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
 #endif
+      }
+    } else {
+      /* Otherwise use [sp] and [sp+4] to hold the TValue.
+      ** This assumes the following call has max. 4 args.
+      */
+      Reg type;
+      emit_dm(as, ARMI_MOV, dest, RID_SP);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_lso(as, ARMI_STR, src, RID_SP, 0);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_lso(as, ARMI_STR, type, RID_SP, 4);
     }
   } else {
-    /* Otherwise use [sp] and [sp+4] to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
     emit_dm(as, ARMI_MOV, dest, RID_SP);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_lso(as, ARMI_STR, src, RID_SP, 0);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_lso(as, ARMI_STR, type, RID_SP, 4);
   }
 }
 
@@ -918,24 +969,32 @@ static void asm_hrefk(ASMState *as, IRIn
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, ARMI_LDR, dest, v);
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guardcc(as, CC_NE);
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
       emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
-      emit_opk(as, ARMI_ADD, dest, uv,
+    }
+    if (ir->o == IR_UREFC)
+      emit_opk(as, ARMI_ADD, dest, dest,
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
-      emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    else
+      emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_lso(as, ARMI_LDRB, RID_TMP, dest,
+	       (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadi(as, dest, k);
     } else {
-      emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
     }
-    emit_lso(as, ARMI_LDR, uv, func,
-	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
   }
 }
 
@@ -1086,6 +1145,7 @@ static void asm_ahuvload(ASMState *as, I
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
 		       (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   if (!hiop || type == RID_NONE) {
     rset_clear(allow, idx);
     if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
@@ -1202,7 +1262,12 @@ dotypecheck:
       }
     }
     asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
-    emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+    if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+      emit_n(as, ARMI_CMN|ARMI_K12|1, type);
+      emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type);
+    } else {
+      emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+    }
   }
   if (ra_hasreg(dest)) {
 #if !LJ_SOFTFP
@@ -1837,15 +1902,15 @@ static void asm_int64comp(ASMState *as,
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
   if ((ir-1)->o <= IR_NE) {  /* 64 bit integer or FP comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
 #if LJ_SOFTFP
@@ -1876,6 +1941,7 @@ static void asm_hiop(ASMState *as, IRIns
       asm_xstore_(as, ir, 4);
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
 #if LJ_HASFFI
@@ -1894,6 +1960,9 @@ static void asm_hiop(ASMState *as, IRIns
     asm_intneg(as, ir, ARMI_RSC);
     asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
     break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
 #if LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1901,25 +1970,16 @@ static void asm_hiop(ASMState *as, IRIns
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -1938,6 +1998,7 @@ static void asm_prof(ASMState *as, IRIns
 static void asm_stack_check(ASMState *as, BCReg topslot,
 			    IRIns *irp, RegSet allow, ExitNo exitno)
 {
+  int savereg = 0;
   Reg pbase;
   uint32_t k;
   if (irp) {
@@ -1948,12 +2009,14 @@ static void asm_stack_check(ASMState *as
       pbase = rset_pickbot(allow);
     } else {
       pbase = RID_RET;
-      emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0);  /* Restore temp. register. */
+      savereg = 1;
     }
   } else {
     pbase = RID_BASE;
   }
   emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
+  if (savereg)
+    emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0);  /* Restore temp. register. */
   k = emit_isk12(0, (int32_t)(8*topslot));
   lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
   emit_n(as, ARMI_CMP^k, RID_TMP);
@@ -1965,7 +2028,7 @@ static void asm_stack_check(ASMState *as
     if (ra_hasspill(irp->s))
       emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
     emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
-    if (ra_hasspill(irp->s) && !allow)
+    if (savereg)
       emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0);  /* Save temp. register. */
     emit_loadi(as, RID_TMP, (i & ~4095));
   } else {
@@ -2021,6 +2084,8 @@ static void asm_stack_restore(ASMState *
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
       }
@@ -2082,6 +2147,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Reload L register from g->cur_L. */
@@ -2107,7 +2178,7 @@ static void asm_head_root_base(ASMState
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir;
   asm_head_lreg(as);
@@ -2115,16 +2186,15 @@ static RegSet asm_head_side_base(ASMStat
   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
     ra_spill(as, ir);
   if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
+    return ra_dest(as, ir, RSET_GPR);
   } else {
     Reg r = irp->r;
     lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
     if (r != ir->r && !rset_test(as->freeset, r))
       ra_restore(as, regcost_ref(as->cost[r]));
     ra_destreg(as, ir, r);
+    return r;
   }
-  return allow;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
@@ -2193,7 +2263,7 @@ static Reg asm_setup_call_slots(ASMState
   }
   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
     as->evenspill = nslots;
-  return REGSP_HINT(RID_RET);
+  return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_arm64.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm_arm64.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_arm64.h
@@ -1,6 +1,6 @@
 /*
 ** ARM64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
 ** Sponsored by Cisco Systems, Inc.
@@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A6
   emit_cond_branch(as, cc, target);
 }
 
-/* Emit test and branch instruction to exit for guard. */
-static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+/* Emit test and branch instruction to exit for guard, if in range. */
+static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
 {
   MCode *target = asm_exitstub_addr(as, as->snapno);
   MCode *p = as->mcp;
+  ptrdiff_t delta = target - p;
   if (LJ_UNLIKELY(p == as->invmcp)) {
+    if (as->orignins > 1023) return 0;  /* Delta might end up too large. */
     as->loopinv = 1;
-    *p = A64I_B | A64F_S26(target-p);
-    emit_tnb(as, ai^0x01000000u, r, bit, p-1);
-    return;
+    *p = A64I_B | A64F_S26(delta);
+    ai ^= 0x01000000u;
+    target = p-1;
+  } else if (LJ_UNLIKELY(delta >= 0x1fff)) {
+    return 0;
   }
   emit_tnb(as, ai, r, bit, target);
+  return 1;
 }
 
 /* Emit compare and branch instruction to exit for guard. */
@@ -198,6 +203,9 @@ static Reg asm_fuseahuref(ASMState *as,
 	  return RID_GL;
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
+      return RID_GL;
     }
   }
   *ofsp = 0;
@@ -208,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as,
 static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
 {
   IRIns *ir = IR(ref);
+  int logical = (ai & 0x1f000000) == 0x0a000000;
   if (ra_hasreg(ir->r)) {
     ra_noweak(as, ir->r);
     return A64F_M(ir->r);
   } else if (irref_isk(ref)) {
-    uint32_t m;
     int64_t k = get_k64val(as, ref);
-    if ((ai & 0x1f000000) == 0x0a000000)
-      m = emit_isk13(k, irt_is64(ir->t));
-    else
-      m = emit_isk12(k);
+    uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
+			   emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
     if (m)
       return m;
   } else if (mayfuse(as, ref)) {
@@ -229,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as
 		    (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
       IRIns *irl = IR(ir->op1);
       if (sh == A64SH_LSL &&
-	  irl->o == IR_CONV &&
+	  irl->o == IR_CONV && !logical &&
 	  irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
 	  shift <= 4 &&
 	  canfuse(as, irl)) {
@@ -239,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as
 	Reg m = ra_alloc1(as, ir->op1, allow);
 	return A64F_M(m) | A64F_SH(sh, shift);
       }
-    } else if (ir->o == IR_CONV &&
+    } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
+      Reg m = ra_alloc1(as, ir->op1, allow);
+      int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
+      return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
+    } else if (ir->o == IR_CONV && !logical &&
 	       ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
       Reg m = ra_alloc1(as, ir->op1, allow);
       return A64F_M(m) | A64F_EX(A64EX_SXTW);
@@ -334,7 +344,8 @@ static int asm_fusemadd(ASMState *as, IR
 {
   IRRef lref = ir->op1, rref = ir->op2;
   IRIns *irm;
-  if (lref != rref &&
+  if ((as->flags & JIT_F_OPT_FMA) &&
+      lref != rref &&
       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
        ra_noreg(irm->r)) ||
        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
@@ -415,13 +426,18 @@ static int asm_fuseorshift(ASMState *as,
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 {
   uint32_t n, nargs = CCI_XNARGS(ci);
-  int32_t ofs = 0;
+  int32_t spofs = 0, spalign = LJ_HASFFI && LJ_TARGET_OSX ? 0 : 7;
   Reg gpr, fpr = REGARG_FIRSTFPR;
-  if ((void *)ci->func)
-    emit_call(as, (void *)ci->func);
+  if (ci->func)
+    emit_call(as, ci->func);
   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
   gpr = REGARG_FIRSTGPR;
+#if LJ_HASFFI && LJ_ABI_WIN
+  if ((ci->flags & CCI_VARARG)) {
+    fpr = REGARG_LASTFPR+1;
+  }
+#endif
   for (n = 0; n < nargs; n++) { /* Setup args. */
     IRRef ref = args[n];
     IRIns *ir = IR(ref);
@@ -432,10 +448,21 @@ static void asm_gencall(ASMState *as, co
 		     "reg %d not free", fpr);  /* Must have been evicted. */
 	  ra_leftov(as, fpr, ref);
 	  fpr++;
+#if LJ_HASFFI && LJ_ABI_WIN
+	} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
+	  Reg rf = ra_alloc1(as, ref, RSET_FPR);
+	  emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
+#endif
 	} else {
 	  Reg r = ra_alloc1(as, ref, RSET_FPR);
-	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
-	  ofs += 8;
+	  int32_t al = spalign;
+#if LJ_HASFFI && LJ_TARGET_OSX
+	  al |= irt_isnum(ir->t) ? 7 : 3;
+#endif
+	  spofs = (spofs + al) & ~al;
+	  if (LJ_BE && al >= 7 && !irt_isnum(ir->t)) spofs += 4, al -= 4;
+	  emit_spstore(as, ir, r, spofs);
+	  spofs += al + 1;
 	}
       } else {
 	if (gpr <= REGARG_LASTGPR) {
@@ -445,10 +472,27 @@ static void asm_gencall(ASMState *as, co
 	  gpr++;
 	} else {
 	  Reg r = ra_alloc1(as, ref, RSET_GPR);
-	  emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
-	  ofs += 8;
+	  int32_t al = spalign;
+#if LJ_HASFFI && LJ_TARGET_OSX
+	  al |= irt_size(ir->t) - 1;
+#endif
+	  spofs = (spofs + al) & ~al;
+	  if (al >= 3) {
+	    if (LJ_BE && al >= 7 && !irt_is64(ir->t)) spofs += 4, al -= 4;
+	    emit_spstore(as, ir, r, spofs);
+	  } else {
+	    lj_assertA(al == 0 || al == 1, "size %d unexpected", al + 1);
+	    emit_lso(as, al ? A64I_STRH : A64I_STRB, r, RID_SP, spofs);
+	  }
+	  spofs += al + 1;
 	}
       }
+#if LJ_HASFFI && LJ_TARGET_OSX
+    } else {  /* Marker for start of varargs. */
+      gpr = REGARG_LASTGPR+1;
+      fpr = REGARG_LASTFPR+1;
+      spalign = 7;
+#endif
     }
   }
 }
@@ -457,8 +501,11 @@ static void asm_gencall(ASMState *as, co
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r); /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
   ra_evictset(as, drop); /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lj_assertA(!irt_ispri(ir->t), "PRI dest");
@@ -470,6 +517,8 @@ static void asm_setupresult(ASMState *as
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
+    } else if (hiop) {
+      ra_destpair(as, ir);
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -492,7 +541,7 @@ static void asm_callx(ASMState *as, IRIn
     ci.func = (ASMFunction)(ir_k64(irf)->u64);
   } else {  /* Need a non-argument register for indirect calls. */
     Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    emit_n(as, A64I_BLR, freg);
+    emit_n(as, A64I_BLR_AUTH, freg);
     ci.func = (ASMFunction)(void *)0;
   }
   asm_gencall(as, &ci, args);
@@ -509,8 +558,6 @@ static void asm_retf(ASMState *as, IRIns
   as->topslot -= (BCReg)delta;
   if ((int32_t)as->topslot < 0) as->topslot = 0;
   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
-  /* Need to force a spill on REF_BASE now to update the stack slot. */
-  emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
   emit_setgl(as, base, jit_base);
   emit_addptr(as, base, -8*delta);
   asm_guardcc(as, CC_NE);
@@ -519,6 +566,21 @@ static void asm_retf(ASMState *as, IRIns
   emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp);
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
@@ -602,7 +664,7 @@ static void asm_conv(ASMState *as, IRIns
 	emit_dn(as, A64I_SXTW, dest, left);
       }
     } else {
-      if (st64) {
+      if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	/* This is either a 32 bit reg/reg mov which zeroes the hiword
 	** or a load of the loword from a 64 bit address.
 	*/
@@ -619,25 +681,22 @@ static void asm_strto(ASMState *as, IRIn
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   IRRef args[2];
-  Reg dest = 0, tmp;
-  int destused = ra_used(ir);
+  Reg tmp;
   int32_t ofs = 0;
   ra_evictset(as, RSET_SCRATCH);
-  if (destused) {
+  if (ra_used(ir)) {
     if (ra_hasspill(ir->s)) {
       ofs = sps_scale(ir->s);
-      destused = 0;
       if (ra_hasreg(ir->r)) {
 	ra_free(as, ir->r);
 	ra_modified(as, ir->r);
 	emit_spload(as, ir, ir->r, ofs);
       }
     } else {
-      dest = ra_dest(as, ir, RSET_FPR);
+      Reg dest = ra_dest(as, ir, RSET_FPR);
+      emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
     }
   }
-  if (destused)
-    emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
   asm_guardcnb(as, A64I_CBZ, RID_RET);
   args[0] = ir->op1; /* GCstr *str */
   args[1] = ASMREF_TMP1; /* TValue *n  */
@@ -675,22 +734,23 @@ static void asm_tvstore64(ASMState *as,
 }
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref)) {
-      /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+	return;
+      }
+      emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
     } else {
-      /* Otherwise force a spill and use the spill slot. */
-      emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+      asm_tvstore64(as, dest, 0, ref);
     }
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    asm_tvstore64(as, dest, 0, ref);
-    ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
   }
+  /* g->tmptv holds the TValue(s). */
+  emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)
@@ -727,113 +787,75 @@ static void asm_href(ASMState *as, IRIns
   int destused = ra_used(ir);
   Reg dest = ra_dest(as, ir, allow);
   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
-  Reg key = 0, tmp = RID_TMP;
-  Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
+  Reg tmp = RID_TMP, type = RID_NONE, key, tkey;
   IRRef refkey = ir->op2;
   IRIns *irkey = IR(refkey);
-  int isk = irref_isk(ir->op2);
+  int isk = irref_isk(refkey);
   IRType1 kt = irkey->t;
   uint32_t k = 0;
   uint32_t khash;
-  MCLabel l_end, l_loop, l_next;
+  MCLabel l_end, l_loop;
   rset_clear(allow, tab);
 
-  if (!isk) {
-    key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
-    rset_clear(allow, key);
-    if (!irt_isstr(kt)) {
-      tmp = ra_scratch(as, allow);
-      rset_clear(allow, tmp);
-    }
-  } else if (irt_isnum(kt)) {
-    int64_t val = (int64_t)ir_knum(irkey)->u64;
-    if (!(k = emit_isk12(val))) {
-      key = ra_allock(as, val, allow);
-      rset_clear(allow, key);
-    }
-  } else if (!irt_ispri(kt)) {
-    if (!(k = emit_isk12(irkey->i))) {
-      key = ra_alloc1(as, refkey, allow);
-      rset_clear(allow, key);
-    }
-  }
-
-  /* Allocate constants early. */
-  if (irt_isnum(kt)) {
-    if (!isk) {
-      tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
-      ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
-      rset_clear(allow, tisnum);
-    }
-  } else if (irt_isaddr(kt)) {
-    if (isk) {
-      int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
-      scr = ra_allock(as, kk, allow);
+  /* Allocate register for tkey outside of the loop. */
+  if (isk) {
+    int64_t kk;
+    if (irt_isaddr(kt)) {
+      kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+    } else if (irt_isnum(kt)) {
+      kk = (int64_t)ir_knum(irkey)->u64;
+      /* Assumes -0.0 is already canonicalized to +0.0. */
     } else {
-      scr = ra_scratch(as, allow);
+      lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+      kk = ~((int64_t)~irt_toitype(kt) << 47);
     }
-    rset_clear(allow, scr);
+    k = emit_isk12(kk);
+    tkey = k ? 0 : ra_allock(as, kk, allow);
   } else {
-    lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
-    type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
-    scr = ra_scratch(as, rset_clear(allow, type));
-    rset_clear(allow, scr);
+    tkey = ra_scratch(as, allow);
   }
 
   /* Key not found in chain: jump to exit (if merged) or load niltv. */
   l_end = emit_label(as);
   as->invmcp = NULL;
-  if (merge == IR_NE)
+  if (merge == IR_NE) {
     asm_guardcc(as, CC_AL);
-  else if (destused)
-    emit_loada(as, dest, niltvg(J2G(as->J)));
+  } else if (destused) {
+    uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
+    lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
+    emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
+  }
 
   /* Follow hash chain until the end. */
   l_loop = --as->mcp;
-  emit_n(as, A64I_CMPx^A64I_K12^0, dest);
-  emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
-  l_next = emit_label(as);
+  if (destused)
+    emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
 
   /* Type and value comparison. */
   if (merge == IR_EQ)
     asm_guardcc(as, CC_EQ);
   else
     emit_cond_branch(as, CC_EQ, l_end);
+  emit_nm(as, A64I_CMPx^k, tmp, tkey);
+  if (!destused)
+    emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
+  emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
+  *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
 
-  if (irt_isnum(kt)) {
-    if (isk) {
-      /* Assumes -0.0 is already canonicalized to +0.0. */
-      if (k)
-	emit_n(as, A64I_CMPx^k, tmp);
-      else
-	emit_nm(as, A64I_CMPx, key, tmp);
-      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
-    } else {
-      emit_nm(as, A64I_FCMPd, key, ftmp);
-      emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
-      emit_cond_branch(as, CC_LO, l_next);
-      emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
-      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
-    }
-  } else if (irt_isaddr(kt)) {
-    if (isk) {
-      emit_nm(as, A64I_CMPx, scr, tmp);
-      emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+  /* Construct tkey as canonicalized or tagged key. */
+  if (!isk) {
+    if (irt_isnum(kt)) {
+      key = ra_alloc1(as, refkey, RSET_FPR);
+      emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
+      /* A64I_FMOV_R_D from key to tkey done below. */
     } else {
-      emit_nm(as, A64I_CMPx, tmp, scr);
-      emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+      lj_assertA(irt_isaddr(kt), "bad HREF key type");
+      key = ra_alloc1(as, refkey, allow);
+      type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
+      emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
     }
-  } else {
-    emit_nm(as, A64I_CMPw, scr, type);
-    emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
   }
 
-  *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
-  if (!isk && irt_isaddr(kt)) {
-    type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
-    emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
-    rset_clear(allow, type);
-  }
   /* Load main position relative to tab->node into dest. */
   khash = isk ? ir_khash(as, irkey) : 1;
   if (khash == 0) {
@@ -847,7 +869,6 @@ static void asm_href(ASMState *as, IRIns
       emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
       emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
     } else if (irt_isstr(kt)) {
-      /* Fetch of str->sid is cheaper than ra_allock. */
       emit_dnm(as, A64I_ANDw, dest, dest, tmp);
       emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
       emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@@ -856,23 +877,18 @@ static void asm_href(ASMState *as, IRIns
       emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
       emit_dnm(as, A64I_SUBw, dest, dest, tmp);
       emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
-      emit_dnm(as, A64I_EORw, dest, dest, tmp);
-      emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
+      emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
       emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
       emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
-      emit_dnm(as, A64I_EORw, tmp, tmp, dest);
       if (irt_isnum(kt)) {
+	emit_dnm(as, A64I_EORw, tmp, tkey, dest);
 	emit_dnm(as, A64I_ADDw, dest, dest, dest);
-	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
-	emit_dm(as, A64I_MOVw, tmp, dest);
-	emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
+	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
+	emit_nm(as, A64I_FCMPZd, (key & 31), 0);
+	emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
       } else {
-	checkmclim(as);
-	emit_dm(as, A64I_MOVw, tmp, key);
-	emit_dnm(as, A64I_EORw, dest, dest,
-		 ra_allock(as, irt_toitype(kt) << 15, allow));
-	emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
-	emit_dm(as, A64I_MOVx, dest, key);
+	emit_dnm(as, A64I_EORw, tmp, key, dest);
+	emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
       }
     }
   }
@@ -884,10 +900,10 @@ static void asm_hrefk(ASMState *as, IRIn
   IRIns *irkey = IR(kslot->op1);
   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
-  int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+  int bigofs = !emit_checkofs(A64I_LDRx, kofs);
   Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
-  Reg key, idx = node;
+  Reg idx = node;
   RegSet allow = rset_exclude(RSET_GPR, node);
   uint64_t k;
   lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@@ -906,34 +922,39 @@ static void asm_hrefk(ASMState *as, IRIn
   } else {
     k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
   }
-  key = ra_scratch(as, allow);
-  emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
-  emit_lso(as, A64I_LDRx, key, idx, kofs);
+  emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
+  emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
   if (bigofs)
-    emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+    emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
 }
 
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, A64I_LDRx, dest, v);
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guardcc(as, CC_NE);
-      emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
-      emit_opk(as, A64I_ADDx, dest, uv,
+    if (guarded)
+      asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
+    if (ir->o == IR_UREFC)
+      emit_opk(as, A64I_ADDx, dest, dest,
 	       (int32_t)offsetof(GCupval, tv), RSET_GPR);
-      emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    else
+      emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_lso(as, A64I_LDRB, RID_TMP, dest,
+	       (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadu64(as, dest, k);
     } else {
-      emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
     }
-    emit_lso(as, A64I_LDRx, uv, func,
-	     (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
   }
 }
 
@@ -1038,7 +1059,7 @@ static void asm_xstore(ASMState *as, IRI
 
 static void asm_ahuvload(ASMState *as, IRIns *ir)
 {
-  Reg idx, tmp, type;
+  Reg idx, tmp;
   int32_t ofs = 0;
   RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
   lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@@ -1057,18 +1078,20 @@ static void asm_ahuvload(ASMState *as, I
   } else {
     tmp = ra_scratch(as, gpr);
   }
-  type = ra_scratch(as, rset_clear(gpr, tmp));
-  idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
+  idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
+  rset_clear(gpr, idx);
+  if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   /* Always do the type check, even if the load result is unused. */
   asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
   if (irt_type(ir->t) >= IRT_NUM) {
     lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
 	       "bad load type %d", irt_type(ir->t));
     emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
-	    ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+	    ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
   } else if (irt_isaddr(ir->t)) {
-    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
-    emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+    emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
+    emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
   } else if (irt_isnil(ir->t)) {
     emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
   } else {
@@ -1176,7 +1199,7 @@ dotypecheck:
       tmp = ra_scratch(as, allow);
       rset_clear(allow, tmp);
     }
-    if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
+    if (ra_hasreg(dest) && tmp != dest)
       emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
     /* Need type check, even if the load result is unused. */
     asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
@@ -1184,16 +1207,15 @@ dotypecheck:
       lj_assertA(irt_isinteger(t) || irt_isnum(t),
 		 "bad SLOAD type %d", irt_type(t));
       emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
-	      ra_allock(as, LJ_TISNUM << 15, allow), tmp);
+	      ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : (LJ_TISNUM << 15), allow), tmp);
     } else if (irt_isnil(t)) {
       emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
     } else if (irt_ispri(t)) {
       emit_nm(as, A64I_CMPx,
 	      ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
     } else {
-      Reg type = ra_scratch(as, allow);
-      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
-      emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+      emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
+      emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
     }
     emit_lso(as, A64I_LDRx, tmp, base, ofs);
     return;
@@ -1261,17 +1283,14 @@ static void asm_tbar(ASMState *as, IRIns
 {
   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
-  Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
-		     rset_exclude(rset_exclude(RSET_GPR, tab), link));
   Reg mark = RID_TMP;
   MCLabel l_end = emit_label(as);
-  emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
   emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
-  emit_lso(as, A64I_STRx, tab, gr,
-	   (int32_t)offsetof(global_State, gc.grayagain));
+  /* Keep STRx in the middle to avoid LDP/STP fusion with surrounding code. */
+  emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_setgl(as, tab, gc.grayagain);
   emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
-  emit_lso(as, A64I_LDRx, link, gr,
-	   (int32_t)offsetof(global_State, gc.grayagain));
+  emit_getgl(as, link, gc.grayagain);
   emit_cond_branch(as, CC_EQ, l_end);
   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
   emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
@@ -1282,7 +1301,6 @@ static void asm_obar(ASMState *as, IRIns
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
   IRRef args[2];
   MCLabel l_end;
-  RegSet allow = RSET_GPR;
   Reg obj, val, tmp;
   /* No need for other object barriers (yet). */
   lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@@ -1291,16 +1309,15 @@ static void asm_obar(ASMState *as, IRIns
   args[0] = ASMREF_TMP1;  /* global_State *g */
   args[1] = ir->op1;      /* TValue *tv      */
   asm_gencall(as, ci, args);
-  ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
+  emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
   obj = IR(ir->op1)->r;
-  tmp = ra_scratch(as, rset_exclude(allow, obj));
-  emit_cond_branch(as, CC_EQ, l_end);
-  emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
+  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+  emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
   emit_cond_branch(as, CC_EQ, l_end);
   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
   emit_lso(as, A64I_LDRB, tmp, obj,
-     (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+	   (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
   emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
 }
 
@@ -1342,12 +1359,12 @@ static int asm_swapops(ASMState *as, IRR
   if (irref_isk(lref))
     return 1;  /* But swap constants to the right. */
   ir = IR(rref);
-  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
       (ir->o == IR_ADD && ir->op1 == ir->op2) ||
       (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
     return 0;  /* Don't swap fusable operands to the left. */
   ir = IR(lref);
-  if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+  if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
       (ir->o == IR_ADD && ir->op1 == ir->op2) ||
       (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
     return 1;  /* But swap fusable operands to the right. */
@@ -1393,13 +1410,12 @@ static void asm_intneg(ASMState *as, IRI
 static void asm_intmul(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   if (irt_isguard(ir->t)) {  /* IR_MULOV */
     asm_guardcc(as, CC_NE);
     emit_dm(as, A64I_MOVw, dest, dest);  /* Zero-extend. */
-    emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
-    emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
+    emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
     emit_dnm(as, A64I_SMULL, dest, right, left);
   } else {
     emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@@ -1659,16 +1675,15 @@ static void asm_intcomp(ASMState *as, IR
       if (asm_swapops(as, blref, brref)) {
 	Reg tmp = blref; blref = brref; brref = tmp;
       }
+      bleft = ra_alloc1(as, blref, RSET_GPR);
       if (irref_isk(brref)) {
 	uint64_t k = get_k64val(as, brref);
-	if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
-	  asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
-		       ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
+	if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
+	    asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
+			 emit_ctz64(k)))
 	  return;
-	}
 	m2 = emit_isk13(k, irt_is64(irl->t));
       }
-      bleft = ra_alloc1(as, blref, RSET_GPR);
       ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
       if (!m2)
 	m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@@ -1704,13 +1719,25 @@ static void asm_comp(ASMState *as, IRIns
 
 #define asm_equal(as, ir)	asm_comp(as, ir)
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-  UNUSED(as); UNUSED(ir);
-  lj_assertA(0, "unexpected HIOP");  /* Unused on 64 bit. */
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_CALLN:
+  case IR_CALLL:
+  case IR_CALLS:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+  }
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -1731,37 +1758,28 @@ static void asm_prof(ASMState *as, IRIns
 static void asm_stack_check(ASMState *as, BCReg topslot,
 			    IRIns *irp, RegSet allow, ExitNo exitno)
 {
-  Reg pbase;
   uint32_t k;
+  Reg pbase = RID_BASE;
   if (irp) {
-    if (!ra_hasspill(irp->s)) {
-      pbase = irp->r;
-      lj_assertA(ra_hasreg(pbase), "base reg lost");
-    } else if (allow) {
-      pbase = rset_pickbot(allow);
-    } else {
-      pbase = RID_RET;
-      emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0);  /* Restore temp register. */
-    }
-  } else {
-    pbase = RID_BASE;
+    pbase = irp->r;
+    if (!ra_hasreg(pbase))
+      pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
   }
   emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
+  if (pbase & 0x80)  /* Restore temp. register. */
+    emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
   k = emit_isk12((8*topslot));
   lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
   emit_n(as, A64I_CMPx^k, RID_TMP);
-  emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
+  emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
   emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
 	   (int32_t)offsetof(lua_State, maxstack));
-  if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
-    if (ra_hasspill(irp->s))
-      emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
-    emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
-    if (ra_hasspill(irp->s) && !allow)
-      emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0);  /* Save temp register. */
-  } else {
-    emit_getgl(as, RID_TMP, cur_L);
+  if (pbase & 0x40) {
+    emit_getgl(as, (pbase & 31), jit_base);
+    if (pbase & 0x80)  /* Save temp register. */
+      emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
   }
+  emit_getgl(as, RID_TMP, cur_L);
 }
 
 /* Restore Lua stack from on-trace state. */
@@ -1781,7 +1799,14 @@ static void asm_stack_restore(ASMState *
     IRIns *ir = IR(ref);
     if ((sn & SNAP_NORESTORE))
       continue;
-    if (irt_isnum(ir->t)) {
+    if ((sn & SNAP_KEYINDEX)) {
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) :
+			       ra_alloc1(as, ref, allow);
+      rset_clear(allow, r);
+      emit_lso(as, A64I_STRw, r, RID_BASE, ofs);
+      emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4);
+    } else if (irt_isnum(ir->t)) {
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
     } else {
@@ -1796,7 +1821,7 @@ static void asm_stack_restore(ASMState *
 
 /* Marker to prevent patching the GC check exit. */
 #define ARM64_NOPATCH_GC_CHECK \
-  (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
+  (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
 
 /* Check GC threshold and do one or more GC steps. */
 static void asm_gc_check(ASMState *as)
@@ -1804,7 +1829,7 @@ static void asm_gc_check(ASMState *as)
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
   IRRef args[2];
   MCLabel l_end;
-  Reg tmp1, tmp2;
+  Reg tmp2;
   ra_evictset(as, RSET_SCRATCH);
   l_end = emit_label(as);
   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
@@ -1813,17 +1838,14 @@ static void asm_gc_check(ASMState *as)
   args[0] = ASMREF_TMP1;  /* global_State *g */
   args[1] = ASMREF_TMP2;  /* MSize steps     */
   asm_gencall(as, ci, args);
-  tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+  emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
   tmp2 = ra_releasetmp(as, ASMREF_TMP2);
   emit_loadi(as, tmp2, as->gcsteps);
   /* Jump around GC step if GC total < GC threshold. */
   emit_cond_branch(as, CC_LS, l_end);
   emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
-  emit_lso(as, A64I_LDRx, tmp2, tmp1,
-	   (int32_t)offsetof(global_State, gc.threshold));
-  emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
-	   (int32_t)offsetof(global_State, gc.total));
-  ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
+  emit_getgl(as, tmp2, gc.threshold);
+  emit_getgl(as, RID_TMP, gc.total);
   as->gcsteps = 0;
   checkmclim(as);
 }
@@ -1846,49 +1868,48 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
-/* -- Head of trace ------------------------------------------------------- */
-
-/* Reload L register from g->cur_L. */
-static void asm_head_lreg(ASMState *as)
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
 {
-  IRIns *ir = IR(ASMREF_L);
-  if (ra_used(ir)) {
-    Reg r = ra_dest(as, ir, RSET_GPR);
-    emit_getgl(as, r, cur_L);
-    ra_evictk(as);
-  }
+  UNUSED(as);  /* Nothing to do. */
 }
 
+/* -- Head of trace ------------------------------------------------------- */
+
 /* Coalesce BASE register for a root trace. */
 static void asm_head_root_base(ASMState *as)
 {
-  IRIns *ir;
-  asm_head_lreg(as);
-  ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
-    ra_spill(as, ir);
-  ra_destreg(as, ir, RID_BASE);
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (r != RID_BASE)
+      emit_movrr(as, ir, r, RID_BASE);
+  }
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
-  IRIns *ir;
-  asm_head_lreg(as);
-  ir = IR(REF_BASE);
-  if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
-    ra_spill(as, ir);
-  if (ra_hasspill(irp->s)) {
-    rset_clear(allow, ra_dest(as, ir, allow));
-  } else {
-    Reg r = irp->r;
-    lj_assertA(ra_hasreg(r), "base reg lost");
-    rset_clear(allow, r);
-    if (r != ir->r && !rset_test(as->freeset, r))
-      ra_restore(as, regcost_ref(as->cost[r]));
-    ra_destreg(as, ir, r);
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (irp->r == r) {
+      return r;  /* Same BASE register already coalesced. */
+    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+      /* Move from coalesced parent reg. */
+      emit_movrr(as, ir, r, irp->r);
+      return irp->r;
+    } else {
+      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
+    }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
@@ -1932,20 +1953,47 @@ static void asm_tail_prep(ASMState *as)
 /* Ensure there are enough stack slots for call arguments. */
 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
-  IRRef args[CCI_NARGS_MAX*2];
+#if LJ_HASFFI
   uint32_t i, nargs = CCI_XNARGS(ci);
-  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
-  asm_collectargs(as, ir, ci, args);
-  for (i = 0; i < nargs; i++) {
-    if (args[i] && irt_isfp(IR(args[i])->t)) {
-      if (nfpr > 0) nfpr--; else nslots += 2;
-    } else {
-      if (ngpr > 0) ngpr--; else nslots += 2;
+  if (nargs > (REGARG_NUMGPR < REGARG_NUMFPR ? REGARG_NUMGPR : REGARG_NUMFPR) ||
+      (LJ_TARGET_OSX && (ci->flags & CCI_VARARG))) {
+    IRRef args[CCI_NARGS_MAX*2];
+    int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+    int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
+    asm_collectargs(as, ir, ci, args);
+#if LJ_ABI_WIN
+    if ((ci->flags & CCI_VARARG)) nfpr = 0;
+#endif
+    for (i = 0; i < nargs; i++) {
+      int al = spalign;
+      if (!args[i]) {
+#if LJ_TARGET_OSX
+	/* Marker for start of varaargs. */
+	nfpr = 0;
+	ngpr = 0;
+	spalign = 7;
+#endif
+      } else if (irt_isfp(IR(args[i])->t)) {
+	if (nfpr > 0) { nfpr--; continue; }
+#if LJ_ABI_WIN
+	if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
+#elif LJ_TARGET_OSX
+	al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
+#endif
+      } else {
+	if (ngpr > 0) { ngpr--; continue; }
+#if LJ_TARGET_OSX
+	al |= irt_size(IR(args[i])->t) - 1;
+#endif
+      }
+      spofs = (spofs + 2*al+1) & ~al;  /* Align and bump stack pointer. */
     }
+    nslots = (spofs + 3) >> 2;
+    if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+      as->evenspill = nslots;
   }
-  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
-    as->evenspill = nslots;
-  return REGSP_HINT(RID_RET);
+#endif
+  return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_mips.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm_mips.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_mips.h
@@ -1,6 +1,6 @@
 /*
 ** MIPS IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns
 /* Setup spare long-range jump slots per mcarea. */
 static void asm_sparejump_setup(ASMState *as)
 {
-  MCode *mxp = as->mcbot;
-  if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
+  MCode *mxp = as->mctop;
+  if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
+    mxp -= MIPS_SPAREJUMP*2;
     lj_assertA(MIPSI_NOP == 0, "bad NOP");
     memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
-    mxp += MIPS_SPAREJUMP*2;
-    lj_assertA(mxp < as->mctop, "MIPS_SPAREJUMP too big");
-    lj_mcode_sync(as->mcbot, mxp);
-    lj_mcode_commitbot(as->J, mxp);
-    as->mcbot = mxp;
-    as->mclim = as->mcbot + MCLIM_REDZONE;
+    as->mctop = mxp;
+  }
+}
+
+static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
+{
+  MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
+  int slot = MIPS_SPAREJUMP;
+  while (slot--) {
+    mxp -= 2;
+    if (*mxp == tjump) {
+      return mxp;
+    } else if (*mxp == MIPSI_NOP) {
+      *mxp = tjump;
+      return mxp;
+    }
   }
+  return NULL;
 }
 
 /* Setup exit stub after the end of each trace. */
@@ -181,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as,
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -336,19 +351,15 @@ static void asm_gencall(ASMState *as, co
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-#if LJ_32
   int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
-#endif
 #if !LJ_SOFTFP
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
 #endif
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
-#if LJ_32
   if (hiop && ra_hasreg((ir+1)->r))
     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
-#endif
   ra_evictset(as, drop);  /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lj_assertA(!irt_ispri(ir->t), "PRI dest");
@@ -377,10 +388,8 @@ static void asm_setupresult(ASMState *as
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -450,6 +459,27 @@ static void asm_retf(ASMState *as, IRIns
   emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  if ((as->flags & JIT_F_MIPSXXR2)) {
+    emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
+	      lj_fls(SBUF_MASK_FLAG), 0);
+  } else {
+    emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
+    emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
+  }
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -739,7 +769,7 @@ static void asm_conv(ASMState *as, IRIns
 	  }
 	}
       } else {
-	if (st64) {
+	if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	  /* This is either a 32 bit reg/reg mov which zeroes the hiword
 	  ** or a load of the loword from a 64 bit address.
 	  */
@@ -827,34 +857,63 @@ static void asm_tvstore64(ASMState *as,
 #endif
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, igcptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-#if LJ_32
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
-    }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+#if LJ_64
+	emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
+#else
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#endif
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+	emit_tsi(as, MIPSI_SDC1, (src & 31),  RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP32
+	lj_assertA(0, "unsplit FP op");
 #else
-    asm_tvstore64(as, dest, 0, ref);
-    emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
-	     (int32_t)(offsetof(global_State, tmptv)-32768));
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
 #endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
+#if LJ_32
+      Reg type;
+      emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
+#else
+      asm_tvstore64(as, dest, 0, ref);
+      emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
+#endif
+    }
+  } else {
+    emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -909,11 +968,16 @@ static void asm_href(ASMState *as, IRIns
   MCLabel l_end, l_loop, l_next;
 
   rset_clear(allow, tab);
-#if LJ_SOFTFP32
-  if (!isk) {
-    key = ra_alloc1(as, refkey, allow);
-    rset_clear(allow, key);
-    if (irkey[1].o == IR_HIOP) {
+  if (!LJ_SOFTFP && irt_isnum(kt)) {
+    key = ra_alloc1(as, refkey, RSET_FPR);
+    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+  } else {
+    if (!irt_ispri(kt)) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+#if LJ_32
+    if (LJ_SOFTFP && irkey[1].o == IR_HIOP) {
       if (ra_hasreg((irkey+1)->r)) {
 	type = tmpnum = (irkey+1)->r;
 	tmp1 = ra_scratch(as, allow);
@@ -924,23 +988,11 @@ static void asm_href(ASMState *as, IRIns
       }
       rset_clear(allow, tmpnum);
     } else {
-      type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
+      type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
       rset_clear(allow, type);
     }
-  }
-#else
-  if (!LJ_SOFTFP && irt_isnum(kt)) {
-    key = ra_alloc1(as, refkey, RSET_FPR);
-    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
-  } else if (!irt_ispri(kt)) {
-    key = ra_alloc1(as, refkey, allow);
-    rset_clear(allow, key);
-#if LJ_32
-    type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
-    rset_clear(allow, type);
 #endif
   }
-#endif
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 #if LJ_64
@@ -953,10 +1005,10 @@ static void asm_href(ASMState *as, IRIns
     } else {
       int64_t k;
       if (isk && irt_isaddr(kt)) {
-	k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+	k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
       } else {
 	lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
-	k = ~((int64_t)~irt_toitype(ir->t) << 47);
+	k = ~((int64_t)~irt_toitype(kt) << 47);
       }
       cmp64 = ra_allock(as, k, allow);
       rset_clear(allow, cmp64);
@@ -1155,22 +1207,29 @@ nolo:
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
-      emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
-      emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
-    } else {
-      emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
+    if (ir->o == IR_UREFC)
+      emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
+    else
+      emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loada(as, dest, o);
+    } else {
+      emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) +
+	       (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
     }
-    emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
-	     (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
   }
 }
 
@@ -1285,8 +1344,8 @@ static void asm_fload(ASMState *as, IRIn
       }
     }
     ofs = field_ofs[ir->op2];
+    lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   }
-  lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
   emit_tsi(as, mi, dest, idx, ofs);
 }
 
@@ -1352,6 +1411,7 @@ static void asm_ahuvload(ASMState *as, I
 #endif
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
   rset_clear(allow, idx);
   if (irt_isnum(t)) {
     asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1524,7 +1584,7 @@ dotypecheck:
       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
       emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
     } else {
-      Reg ktype = ra_allock(as, irt_toitype(t), allow);
+      Reg ktype = ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : irt_toitype(t), allow);
       asm_guard(as, MIPSI_BNE, type, ktype);
     }
   }
@@ -1542,6 +1602,10 @@ dotypecheck:
     if (irt_ispri(t)) {
       asm_guard(as, MIPSI_BNE, type,
 		ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
+    } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+      asm_guard(as, MIPSI_BNE, RID_TMP,
+		ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
+      emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 0);
     } else {
       if (irt_isnum(t)) {
 	asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1837,7 +1901,7 @@ static void asm_arithov(ASMState *as, IR
   lj_assertA(!irt_is64(ir->t), "bad usage");
   if (irref_isk(ir->op2)) {
     int k = IR(ir->op2)->i;
-    if (ir->o == IR_SUBOV) k = -k;
+    if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
       left = ra_alloc1(as, ir->op1, RSET_GPR);
       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -2327,15 +2391,15 @@ static void asm_comp64eq(ASMState *as, I
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
 #if LJ_HASFFI && !LJ_SOFTFP
@@ -2382,38 +2446,33 @@ static void asm_hiop(ASMState *as, IRIns
     }
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
-#if LJ_HASFFI
+#if LJ_32 && LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-#if LJ_SOFTFP
+#if LJ_32 && LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
   case IR_STRTO:
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused on MIPS64 or without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2513,12 +2572,29 @@ static void asm_stack_restore(ASMState *
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
       emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
 #else
-      asm_tvstore64(as, RID_BASE, ofs, ref);
+      if ((sn & SNAP_KEYINDEX)) {
+	RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+	int64_t kki = (int64_t)LJ_KEYINDEX << 32;
+	if (irref_isk(ref)) {
+	  emit_tsi(as, MIPSI_SD,
+		   ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
+		   RID_BASE, ofs);
+	} else {
+	  Reg src = ra_alloc1(as, ref, allow);
+	  Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
+	  emit_tsi(as, MIPSI_SD, RID_TMP, RID_BASE, ofs);
+	  emit_dst(as, MIPSI_DADDU, RID_TMP, src, rki);
+	}
+      } else {
+	asm_tvstore64(as, RID_BASE, ofs, ref);
+      }
 #endif
     }
     checkmclim(as);
@@ -2575,6 +2651,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  if (as->loopinv) as->mctop--;
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */
@@ -2582,7 +2664,6 @@ static void asm_head_root_base(ASMState
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
-  if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -2593,25 +2674,24 @@ static void asm_head_root_base(ASMState
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
-  if (as->loopinv) as->mctop--;
   if (ra_hasreg(r)) {
     ra_free(as, r);
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
@@ -2704,21 +2784,17 @@ void lj_asm_patchexit(jit_State *J, GCtr
 	patchbranch:
 	  p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
 	  *p = MIPSI_NOP;  /* Replace the load of the exit number. */
-	  cstop = p;
+	  cstop = p+1;
 	  if (!cstart) cstart = p-1;
 	} else {  /* Branch out of range. Use spare jump slot in mcarea. */
-	  int i;
-	  for (i = (int)(sizeof(MCLink)/sizeof(MCode));
-	       i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
-	       i += 2) {
-	    if (mcarea[i] == tjump) {
-	      delta = mcarea+i - p;
-	      goto patchbranch;
-	    } else if (mcarea[i] == MIPSI_NOP) {
-	      mcarea[i] = tjump;
-	      cstart = mcarea+i;
-	      delta = mcarea+i - p;
+	  MCode *mcjump = asm_sparejump_use(mcarea, tjump);
+	  if (mcjump) {
+	    lj_mcode_sync(mcjump, mcjump+1);
+	    delta = mcjump - p;
+	    if (((delta + 0x8000) >> 16) == 0) {
 	      goto patchbranch;
+	    } else {
+	      lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
 	    }
 	  }
 	  /* Ignore jump slot overflow. Child trace is simply not attached. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_ppc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm_ppc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_ppc.h
@@ -1,6 +1,6 @@
 /*
 ** PPC IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Register allocator extensions --------------------------------------- */
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as,
 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
 	}
       }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+      return RID_JGL;
     }
   }
   *ofsp = 0;
@@ -232,7 +235,8 @@ static int asm_fusemadd(ASMState *as, IR
 {
   IRRef lref = ir->op1, rref = ir->op2;
   IRIns *irm;
-  if (lref != rref &&
+  if ((as->flags & JIT_F_OPT_FMA) &&
+      lref != rref &&
       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
 	ra_noreg(irm->r)) ||
        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
@@ -337,10 +341,8 @@ static void asm_setupresult(ASMState *as
       } else {
 	ra_destreg(as, ir, RID_FPRET);
       }
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       ra_destreg(as, ir, RID_RET);
     }
@@ -389,6 +391,21 @@ static void asm_retf(ASMState *as, IRIns
   emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 #if !LJ_SOFTFP
@@ -567,28 +584,54 @@ static void asm_strto(ASMState *as, IRIn
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
-      ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
-    else  /* Otherwise force a spill and use the spill slot. */
-      emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
-    RegSet allow = rset_exclude(RSET_GPR, dest);
-    Reg type;
-    emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
-    if (!irt_ispri(ir->t)) {
-      Reg src = ra_alloc1(as, ref, allow);
-      emit_setgl(as, src, tmptv.gcr);
+  int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+	lj_assertA(irref_isk(ref), "unsplit FP op");
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+		   tmptv.u32.lo);
+	emit_setgl(as,
+		   ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+		   tmptv.u32.hi);
+#else
+	Reg src = ra_alloc1(as, ref, RSET_FPR);
+	emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+	emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
+#endif
+      } else if (irref_isk(ref)) {
+	/* Use the number constant itself as a TValue. */
+	ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+      } else {
+#if LJ_SOFTFP
+	lj_assertA(0, "unsplit FP op");
+#else
+	/* Otherwise force a spill and use the spill slot. */
+	emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+#endif
+      }
+    } else {
+      /* Otherwise use g->tmptv to hold the TValue. */
+      Reg type;
+      emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+      if (!irt_ispri(ir->t)) {
+	Reg src = ra_alloc1(as, ref, RSET_GPR);
+	emit_setgl(as, src, tmptv.gcr);
+      }
+      if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+	type = ra_alloc1(as, ref+1, RSET_GPR);
+      else
+	type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+      emit_setgl(as, type, tmptv.it);
     }
-    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
-      type = ra_alloc1(as, ref+1, allow);
-    else
-      type = ra_allock(as, irt_toitype(ir->t), allow);
-    emit_setgl(as, type, tmptv.it);
+  } else {
+    emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
   }
 }
 
@@ -797,23 +840,30 @@ static void asm_hrefk(ASMState *as, IRIn
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
   } else {
-    Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
-      asm_guardcc(as, CC_NE);
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
       emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
-      emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
-      emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+    }
+    if (ir->o == IR_UREFC)
+      emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
+    else
+      emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loadi(as, dest, k);
     } else {
-      emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
+      emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+	       (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
     }
-    emit_tai(as, PPCI_LWZ, uv, func,
-	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
   }
 }
 
@@ -894,7 +944,7 @@ static void asm_fload(ASMState *as, IRIn
   int32_t ofs;
   if (ir->op1 == REF_NIL) {  /* FLOAD from GG_State with offset. */
     idx = RID_JGL;
-    ofs = (ir->op2 << 2) - 32768;
+    ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
   } else {
     idx = ra_alloc1(as, ir->op1, RSET_GPR);
     if (ir->op2 == IRFL_TAB_ARRAY) {
@@ -975,6 +1025,10 @@ static void asm_ahuvload(ASMState *as, I
     rset_clear(allow, dest);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (ir->o == IR_VLOAD) {
+    ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
+	  ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
+  }
   if (irt_isnum(t)) {
     Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
     asm_guardcc(as, CC_GE);
@@ -1057,7 +1111,8 @@ static void asm_sload(ASMState *as, IRIn
   lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
 	     "inconsistent SLOAD variant");
   lj_assertA(LJ_DUALNUM ||
-	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
+	     !irt_isint(t) ||
+	     (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
 	     "bad SLOAD type");
 #if LJ_SOFTFP
   lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
@@ -1122,7 +1177,12 @@ dotypecheck:
   } else {
     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
       asm_guardcc(as, CC_NE);
-      emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
+      if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+	emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff));
+	emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16));
+      } else {
+	emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
+      }
       type = RID_TMP;
     }
     if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
@@ -1894,15 +1954,15 @@ static void asm_comp64(ASMState *as, IRI
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
 #if LJ_HASFFI && !LJ_SOFTFP
@@ -1937,12 +1997,16 @@ static void asm_hiop(ASMState *as, IRIns
     }
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
 #if LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+  case IR_CNEWI:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
 #if LJ_SOFTFP
   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1950,25 +2014,16 @@ static void asm_hiop(ASMState *as, IRIns
     if (!uselo)
       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
     break;
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+    /* Nothing to do here. Handled by lo op itself. */
+    break;
 #endif
-  case IR_CALLN:
-  case IR_CALLS:
-  case IR_CALLXS:
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
-#if LJ_SOFTFP
-  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
-  case IR_CNEWI:
-    /* Nothing to do here. Handled by lo op itself. */
-    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused without SOFTFP or FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2055,6 +2110,8 @@ static void asm_stack_restore(ASMState *
       } else if ((sn & SNAP_SOFTFPNUM)) {
 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
@@ -2113,6 +2170,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */
@@ -2130,7 +2193,7 @@ static void asm_head_root_base(ASMState
 }
 
 /* Coalesce BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2139,15 +2202,15 @@ static RegSet asm_head_side_base(ASMStat
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
-      rset_clear(allow, irp->r);
       emit_mr(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_riscv64.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_riscv64.h
@@ -0,0 +1,1976 @@
+/*
+** RISC-V IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!ra_hashint(r) && !iscrossref(as, ref))
+      ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
+    r = ra_allocref(as, ref, allow);
+  }
+  ra_noweak(as, r);
+  return r;
+}
+
+/* Allocate a register or RID_ZERO. */
+static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
+{
+  Reg r = IR(ref)->r;
+  if (ra_noreg(r)) {
+    if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
+      return RID_ZERO;
+    r = ra_allocref(as, ref, allow);
+  } else {
+    ra_noweak(as, r);
+  }
+  return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  Reg left = irl->r, right = irr->r;
+  if (ra_hasreg(left)) {
+    ra_noweak(as, left);
+    if (ra_noreg(right))
+      right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
+    else
+      ra_noweak(as, right);
+  } else if (ra_hasreg(right)) {
+    ra_noweak(as, right);
+    left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
+  } else if (ra_hashint(right)) {
+    right = ra_alloc1z(as, ir->op2, allow);
+    left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
+  } else {
+    left = ra_alloc1z(as, ir->op1, allow);
+    right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
+  }
+  return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */
+#define RISCV_SPAREJUMP		4
+
+/* Setup spare long-range jump (trampoline?) slots per mcarea. */
+
+static void asm_sparejump_setup(ASMState *as)
+{
+  MCode *mxp = as->mctop;
+  if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
+    for (int i = RISCV_SPAREJUMP*2; i--; )
+      *--mxp = RISCVI_EBREAK;
+    as->mctop = mxp;
+  }
+}
+
+static MCode *asm_sparejump_use(MCode *mcarea, MCode *target)
+{
+  MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
+  int slot = RISCV_SPAREJUMP;
+  RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr;
+  while (slot--) {
+    mxp -= 2;
+    ptrdiff_t delta = (char *)target - (char *)mxp;
+    tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)),
+    tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
+    if (mxp[0] == tauipc && mxp[1] == tjalr) {
+      return mxp;
+    } else if (mxp[0] == tslot) {
+      mxp[0] = tauipc, mxp[1] = tjalr;
+      return mxp;
+    }
+  }
+  return NULL;
+}
+
+/* Setup exit stub after the end of each trace. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+  ExitNo i;
+  MCode *mxp = as->mctop;
+  if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
+    asm_mclimit(as);
+  for (i = nexits-1; (int32_t)i >= 0; i--)
+    *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i)));
+  ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3);
+  /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */
+  *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno);
+  *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP)
+         | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta));
+  *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP)
+         | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta));
+  *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP);
+  as->mctop = mxp;
+}
+
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
+{
+  /* Keep this in-sync with exitstub_trace_addr(). */
+  return as->mctop + exitno + 4;
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2)
+{
+  MCode *target = asm_exitstub_addr(as, as->snapno);
+  MCode *p = as->mcp;
+  if (LJ_UNLIKELY(p == as->invmcp)) {
+    as->loopinv = 1;
+    as->mcp = ++p;
+    *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p);
+    riscvi = riscvi^RISCVF_FUNCT3(1);  /* Invert cond. */
+    target = p - 1;  /* Patch target later in asm_loop_fixup. */
+  }
+    ptrdiff_t delta = (char *)target - (char *)(p - 1);
+    *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
+    *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
+    as->mcp = p;
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM	31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+  IRIns *ir = as->ir;
+  IRRef i = as->curins;
+  if (i > ref + CONFLICT_SEARCH_LIM)
+    return 0;  /* Give up, ref is too far away. */
+  while (--i > ref)
+    if (ir[i].o == conflict)
+      return 0;  /* Conflict found. */
+  return 1;  /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+      !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+    return (int32_t)sizeof(GCtab);
+  return 0;
+}
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
+{
+  IRIns *ir = IR(ref);
+  if (ra_noreg(ir->r)) {
+    if (ir->o == IR_AREF) {
+      if (mayfuse(as, ref)) {
+	if (irref_isk(ir->op2)) {
+	  IRRef tab = IR(ir->op1)->op1;
+	  int32_t ofs = asm_fuseabase(as, tab);
+	  IRRef refa = ofs ? tab : ir->op1;
+	  ofs += 8*IR(ir->op2)->i;
+	  if (checki12(ofs)) {
+	    *ofsp = ofs;
+	    return ra_alloc1(as, refa, allow);
+	  }
+	}
+      }
+    } else if (ir->o == IR_HREFK) {
+      if (mayfuse(as, ref)) {
+	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+	if (checki12(ofs)) {
+	  *ofsp = ofs;
+	  return ra_alloc1(as, ir->op1, allow);
+	}
+      }
+    } else if (ir->o == IR_UREFC) {
+      if (irref_isk(ir->op1)) {
+	GCfunc *fn = ir_kfunc(IR(ir->op1));
+	GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+  intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g));
+	if (checki12(ofs)) {
+	  *ofsp = (int32_t)ofs;
+	  return RID_GL;
+	}
+      }
+    } else if (ir->o == IR_TMPREF) {
+      *ofsp = (int32_t)offsetof(global_State, tmptv);
+      return RID_GL;
+    }
+  }
+  *ofsp = 0;
+  return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref,
+			 RegSet allow, int32_t ofs)
+{
+  IRIns *ir = IR(ref);
+  Reg base;
+  if (ra_noreg(ir->r) && canfuse(as, ir)) {
+    intptr_t ofs2;
+    if (ir->o == IR_ADD) {
+      if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
+				 checki12(ofs2))) {
+	ref = ir->op1;
+	ofs = (int32_t)ofs2;
+      }
+    } else if (ir->o == IR_STRREF) {
+      ofs2 = 4096;
+      lj_assertA(ofs == 0, "bad usage");
+      ofs = (int32_t)sizeof(GCstr);
+      if (irref_isk(ir->op2)) {
+	ofs2 = ofs + get_kval(as, ir->op2);
+	ref = ir->op1;
+      } else if (irref_isk(ir->op1)) {
+	ofs2 = ofs + get_kval(as, ir->op1);
+	ref = ir->op2;
+      }
+      if (!checki12(ofs2)) {
+        /* NYI: Fuse ADD with constant. */
+        Reg right, left = ra_alloc2(as, ir, allow);
+        right = (left >> 8); left &= 255;
+        emit_lso(as, riscvi, rd, RID_TMP, ofs);
+        emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right);
+        return;
+      }
+      ofs = ofs2;
+    }
+  }
+  base = ra_alloc1(as, ref, allow);
+  emit_lso(as, riscvi, rd, base, ofs);
+}
+
+/* Fuse Integer multiply-accumulate. */
+
+static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if (lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+       ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+       (rref = lref, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_GPR);
+    Reg left = ra_alloc2(as, irm,
+       rset_exclude(rset_exclude(RSET_GPR, dest), add));
+    Reg right = (left >> 8); left &= 255;
+    emit_ds1s2(as, riscvi, dest, left, right);
+    if (dest != add) emit_mv(as, dest, add);
+    return 1;
+  }
+  return 0;
+}
+
+/* Fuse FP multiply-add/sub. */
+
+static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir)
+{
+  IRRef lref = ir->op1, rref = ir->op2;
+  IRIns *irm;
+  if ((as->flags & JIT_F_OPT_FMA) &&
+      lref != rref &&
+      ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+       ra_noreg(irm->r)) ||
+       (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+       (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+    Reg left = ra_alloc2(as, irm,
+       rset_exclude(rset_exclude(RSET_FPR, dest), add));
+    Reg right = (left >> 8); left &= 255;
+    emit_ds1s2s3(as, riscvi, dest, left, right, add);
+    return 1;
+  }
+  return 0;
+}
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+  uint32_t n, nargs = CCI_XNARGS(ci);
+  int32_t ofs = 0;
+  Reg gpr, fpr = REGARG_FIRSTFPR;
+  if ((void *)ci->func)
+    emit_call(as, (void *)ci->func, 1);
+  for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+    as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+  gpr = REGARG_FIRSTGPR;
+  for (n = 0; n < nargs; n++) { /* Setup args. */
+    IRRef ref = args[n];
+    IRIns *ir = IR(ref);
+    if (ref) {
+      if (irt_isfp(ir->t)) {
+        if (fpr <= REGARG_LASTFPR) {
+	  lj_assertA(rset_test(as->freeset, fpr),
+	             "reg %d not free", fpr);  /* Must have been evicted. */
+          ra_leftov(as, fpr, ref);
+	  fpr++; if(ci->flags & CCI_VARARG) gpr++;
+	} else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) {
+	  lj_assertA(rset_test(as->freeset, gpr),
+	             "reg %d not free", gpr);  /* Must have been evicted. */
+          ra_leftov(as, gpr, ref);
+	  gpr++;
+	} else {
+	  Reg r = ra_alloc1(as, ref, RSET_FPR);
+	  emit_spstore(as, ir, r, ofs);
+	  ofs += 8;
+	}
+      } else {
+        if (gpr <= REGARG_LASTGPR) {
+	  lj_assertA(rset_test(as->freeset, gpr),
+	             "reg %d not free", gpr);  /* Must have been evicted. */
+          ra_leftov(as, gpr, ref);
+	  gpr++; if(ci->flags & CCI_VARARG) fpr++;
+	} else {
+	  Reg r = ra_alloc1z(as, ref, RSET_GPR);
+	  emit_spstore(as, ir, r, ofs);
+	  ofs += 8;
+	}
+      }
+    }
+  }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  RegSet drop = RSET_SCRATCH;
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  if (hiop && ra_hasreg((ir+1)->r))
+    rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);  /* Evictions must be performed first. */
+  if (ra_used(ir)) {
+    lj_assertA(!irt_ispri(ir->t), "PRI dest");
+    if (irt_isfp(ir->t)) {
+      if ((ci->flags & CCI_CASTU64)) {
+        Reg dest = ra_dest(as, ir, RSET_FPR);
+  emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X,
+	        dest, RID_RET);
+      } else {
+	ra_destreg(as, ir, RID_FPRET);
+      }
+    } else if (hiop) {
+      ra_destpair(as, ir);
+    } else {
+      ra_destreg(as, ir, RID_RET);
+    }
+  }
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  CCallInfo ci;
+  IRRef func;
+  IRIns *irf;
+  ci.flags = asm_callx_flags(as, ir);
+  asm_collectargs(as, ir, &ci, args);
+  asm_setupresult(as, ir, &ci);
+  func = ir->op2; irf = IR(func);
+  if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+  if (irref_isk(func)) {  /* Call to constant address. */
+    ci.func = (ASMFunction)(void *)get_kval(as, func);
+  } else {  /* Need specific register for indirect calls. */
+    Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
+    MCode *p = as->mcp;
+    *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r);
+    if (r == RID_CFUNCADDR)
+      *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
+    else
+      *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r);
+    as->mcp = p;
+    ci.func = (ASMFunction)(void *)0;
+  }
+  asm_gencall(as, &ci, args);
+}
+
+static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
+{
+  /* The modified regs must match with the *.dasc implementation. */
+  RegSet drop = RID2RSET(RID_X6)|RID2RSET(RID_X7)|RID2RSET(RID_F10)|
+                RID2RSET(RID_F14)|RID2RSET(RID_F1)|RID2RSET(RID_F3)|
+                RID2RSET(RID_F4);
+  if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+  ra_evictset(as, drop);
+  ra_destreg(as, ir, RID_FPRET);
+  emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
+  ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+  Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+  void *pc = ir_kptr(IR(ir->op2));
+  int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
+  as->topslot -= (BCReg)delta;
+  if ((int32_t)as->topslot < 0) as->topslot = 0;
+  irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
+  emit_setgl(as, base, jit_base);
+  emit_addptr(as, base, -8*delta);
+  asm_guard(as, RISCVI_BNE, RID_TMP,
+	    ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
+  emit_lso(as, RISCVI_LD, RID_TMP, base, -8);
+}
+
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+  emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp);
+  emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
+  emit_getgl(as, RID_TMP, cur_L);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+  Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest));
+  asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO);
+  emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left);
+  emit_ds(as, RISCVI_FCVT_D_W, tmp, dest);
+  emit_ds(as, RISCVI_FCVT_W_D, dest, left);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_FPR;
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, allow);
+  Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+  Reg tmp = ra_scratch(as, rset_clear(allow, right));
+  emit_ds(as, RISCVI_FMV_X_W, dest, tmp);
+  emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right);
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+  IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+  int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+  int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+  IRRef lref = ir->op1;
+  lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+  /* Use GPR to pass floating-point arguments */
+  if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg ftmp = ra_scratch(as, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp);
+      emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
+        ftmp, ra_alloc1(as, lref, RSET_FPR));
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      RISCVIns riscvi = irt_isfloat(ir->t) ?
+  (((IRT_IS64 >> st) & 1) ?
+   (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
+   (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
+  (((IRT_IS64 >> st) & 1) ?
+   (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
+   (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
+      emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp);
+      emit_ds(as, riscvi, ftmp, left);
+    }
+  } else if (irt_isfp(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    if (stfp) {  /* FP to FP conversion. */
+      emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S,
+	      dest, ra_alloc1(as, lref, RSET_FPR));
+    } else {  /* Integer to FP conversion. */
+      Reg left = ra_alloc1(as, lref, RSET_GPR);
+      RISCVIns riscvi = irt_isfloat(ir->t) ?
+  (((IRT_IS64 >> st) & 1) ?
+   (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) :
+   (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) :
+  (((IRT_IS64 >> st) & 1) ?
+   (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) :
+   (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU));
+      emit_ds(as, riscvi, dest, left);
+    }
+  } else if (stfp) {  /* FP to integer conversion. */
+    if (irt_isguard(ir->t)) {
+      /* Checked conversions are only supported from number to int. */
+      lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+		 "bad type for checked CONV");
+      asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+    } else {
+      Reg left = ra_alloc1(as, lref, RSET_FPR);
+      Reg dest = ra_dest(as, ir, RSET_GPR);
+      RISCVIns riscvi = irt_is64(ir->t) ?
+  (st == IRT_NUM ?
+   (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) :
+   (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) :
+  (st == IRT_NUM ?
+   (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) :
+   (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S));
+      emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left);
+    }
+  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_alloc1(as, lref, RSET_GPR);
+    RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B :
+    st == IRT_U8 ? RISCVI_ZEXT_B :
+    st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H;
+    lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
+    emit_ext(as, riscvi, dest, left);
+  } else {  /* 32/64 bit integer conversions. */
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    if (irt_is64(ir->t)) {
+	    if (st64) {
+	/* 64/64 bit no-op (cast)*/
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+      } else {  /* 32 to 64 bit sign extension. */
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	  if ((ir->op2 & IRCONV_SEXT)) {  /* 32 to 64 bit sign extension. */
+	    emit_ext(as, RISCVI_SEXT_W, dest, left);
+	  } else {  /* 32 to 64 bit zero extension. */
+	    emit_ext(as, RISCVI_ZEXT_W, dest, left);
+	  }
+	    }
+    } else {
+	    if (st64 && !(ir->op2 & IRCONV_NONE)) {
+	/* This is either a 32 bit reg/reg mov which zeroes the hiword
+	** or a load of the loword from a 64 bit address.
+	*/
+	Reg left = ra_alloc1(as, lref, RSET_GPR);
+	emit_ext(as, RISCVI_ZEXT_W, dest, left);
+	    } else {  /* 32/32 bit no-op (cast). */
+	ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
+    	}
+    }
+  }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+  IRRef args[2];
+  int32_t ofs = SPOFS_TMP;
+  RegSet drop = RSET_SCRATCH;
+  if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
+  ra_evictset(as, drop);
+  if (ir->s) ofs = sps_scale(ir->s);
+  asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
+  args[0] = ir->op1;      /* GCstr *str */
+  args[1] = ASMREF_TMP1;  /* TValue *n  */
+  asm_gencall(as, ci, args);
+  /* Store the result to the spill slot or temp slots. */
+  Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+  emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
+{
+  RegSet allow = rset_exclude(RSET_GPR, base);
+  IRIns *ir = IR(ref);
+  lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+	     "store of IR type %d", irt_type(ir->t));
+  if (irref_isk(ref)) {
+    TValue k;
+    lj_ir_kvalue(as->J->L, &k, ir);
+    emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
+  } else {
+    Reg src = ra_alloc1(as, ref, allow);
+    rset_clear(allow, src);
+    Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+    emit_lso(as, RISCVI_SD, RID_TMP, base, ofs);
+    if (irt_isinteger(ir->t)) {
+      emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type);
+      emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src);
+    } else {
+      emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type);
+    }
+  }
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)	// todo-new
+{
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+  /* Use the number constant itself as a TValue. */
+  ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+  return;
+      }
+      emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0);
+    } else {
+      asm_tvstore64(as, dest, 0, ref);
+    }
+  }
+  /* g->tmptv holds the TValue(s). */
+  emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv));
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg idx, base;
+  if (irref_isk(ir->op2)) {
+    IRRef tab = IR(ir->op1)->op1;
+    int32_t ofs = asm_fuseabase(as, tab);
+    IRRef refa = ofs ? tab : ir->op1;
+    ofs += 8*IR(ir->op2)->i;
+    if (checki12(ofs)) {
+      base = ra_alloc1(as, refa, RSET_GPR);
+      emit_dsi(as, RISCVI_ADDI, dest, base, ofs);
+      return;
+    }
+  }
+  base = ra_alloc1(as, ir->op1, RSET_GPR);
+  idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+  emit_sh3add(as, dest, base, idx, RID_TMP);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+**   Node *n = hashkey(t, key);
+**   do {
+**     if (lj_obj_equal(&n->key, key)) return &n->val;
+**   } while ((n = nextnode(n)));
+**   return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+  RegSet allow = RSET_GPR;
+  int destused = ra_used(ir);
+  Reg dest = ra_dest(as, ir, allow);
+  Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+  Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2;
+  Reg cmp64 = RID_NONE;
+  IRRef refkey = ir->op2;
+  IRIns *irkey = IR(refkey);
+  int isk = irref_isk(refkey);
+  IRType1 kt = irkey->t;
+  uint32_t khash;
+  MCLabel l_end, l_loop, l_next;
+  rset_clear(allow, tab);
+  tmp1 = ra_scratch(as, allow);
+  rset_clear(allow, tmp1);
+  tmp2 = ra_scratch(as, allow);
+  rset_clear(allow, tmp2);
+
+  if (irt_isnum(kt)) {
+    key = ra_alloc1(as, refkey, RSET_FPR);
+    tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
+  } else {
+    /* Allocate cmp64 register used for 64-bit comparisons */
+    if (!isk && irt_isaddr(kt)) {
+      cmp64 = tmp2;
+    } else {
+      int64_t k;
+      if (isk && irt_isaddr(kt)) {
+	k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+      } else {
+	lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+	k = ~((int64_t)~irt_toitype(kt) << 47);
+      }
+      cmp64 = ra_allock(as, k, allow);
+      rset_clear(allow, cmp64);
+    }
+    if (!irt_ispri(kt)) {
+      key = ra_alloc1(as, refkey, allow);
+      rset_clear(allow, key);
+    }
+  } 
+
+  /* Key not found in chain: jump to exit (if merged) or load niltv. */
+  l_end = emit_label(as);
+  int is_lend_exit = 0;
+  as->invmcp = NULL;
+  if (merge == IR_NE)
+    asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO);
+  else if (destused)
+    emit_loada(as, dest, niltvg(J2G(as->J)));
+
+  /* Follow hash chain until the end. */
+  l_loop = --as->mcp;
+  emit_mv(as, dest, tmp1);
+  emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next));
+  l_next = emit_label(as);
+
+  /* Type and value comparison. */
+  if (merge == IR_EQ) {  /* Must match asm_guard(). */
+    l_end = asm_exitstub_addr(as, as->snapno);
+    is_lend_exit = 1;
+  }
+  if (irt_isnum(kt)) {
+    emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit);
+    emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key);
+    emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, 0);
+    emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM));
+    emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47);
+    emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1);
+  } else {
+    emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit);
+  }
+  emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
+  *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO)
+          | RISCVF_IMMB((char *)as->mcp-(char *)l_loop);
+  if (!isk && irt_isaddr(kt)) {
+    type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
+    emit_ds1s2(as, RISCVI_ADD, tmp2, key, type);
+    rset_clear(allow, type);
+  }
+
+  /* Load main position relative to tab->node into dest. */
+  khash = isk ? ir_khash(as, irkey) : 1;
+  if (khash == 0) {
+    emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
+  } else {
+    Reg tmphash = tmp1;
+    if (isk)
+      tmphash = ra_allock(as, khash, allow);
+    /* node = tab->node + (idx*32-idx*8) */
+    emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1);
+    lj_assertA(sizeof(Node) == 24, "bad Node size");
+    emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1);
+    emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3);
+    emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5);
+    emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash);	// idx = hi & tab->hmask
+    emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node));
+    emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
+    if (isk) {
+      /* Nothing to do. */
+    } else if (irt_isstr(kt)) {
+      emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
+    } else {  /* Must match with hash*() in lj_tab.c. */
+      emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2);
+      emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f);
+      emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2);
+      emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f);
+      emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest);
+      emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1);
+      emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f);
+      if (irt_isnum(kt)) {
+	emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1);
+	emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32);	// hi
+	emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1);	// lo
+	emit_ds(as, RISCVI_FMV_X_D, tmp1, key);
+      } else {
+	checkmclim(as);
+	emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32);	// hi
+	emit_ext(as, RISCVI_SEXT_W, tmp2, key);	// lo
+	emit_ds1s2(as, RISCVI_ADD, tmp1, key, type);
+      }
+    }
+  }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+  IRIns *kslot = IR(ir->op2);
+  IRIns *irkey = IR(kslot->op1);
+  int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+  int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+  int bigofs = !checki12(kofs);
+  Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+  Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+  RegSet allow = rset_exclude(RSET_GPR, node);
+  Reg idx = node;
+  int64_t k;
+  lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+  if (bigofs) {
+    idx = dest;
+    rset_clear(allow, dest);
+    kofs = (int32_t)offsetof(Node, key);
+  } else if (ra_hasreg(dest)) {
+    emit_dsi(as, RISCVI_ADDI, dest, node, ofs);
+  }
+  if (irt_ispri(irkey->t)) {
+    lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+    k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+  } else if (irt_isnum(irkey->t)) {
+    k = (int64_t)ir_knum(irkey)->u64;
+  } else {
+    k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
+  }
+  asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow));
+  emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs);
+  if (bigofs)
+    emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow));
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
+    GCfunc *fn = ir_kfunc(IR(ir->op1));
+    MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+    emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR);
+  } else {
+    if (guarded)
+      asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO);
+    if (ir->o == IR_UREFC)
+      emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
+    else
+      emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v));
+    if (guarded)
+      emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loada(as, dest, o);
+    } else {
+      emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+         (int32_t)offsetof(GCfuncL, uvptr) +
+         (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
+    }
+  }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+  UNUSED(as); UNUSED(ir);
+  lj_assertA(!ra_used(ir), "unfused FREF");
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg dest = ra_dest(as, ir, allow);
+  Reg base = ra_alloc1(as, ir->op1, allow);
+  IRIns *irr = IR(ir->op2);
+  int32_t ofs = sizeof(GCstr);
+  rset_clear(allow, base);
+  if (irref_isk(ir->op2) && checki12(ofs + irr->i)) {
+    emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i);
+  } else {
+    emit_dsi(as, RISCVI_ADDI, dest, dest, ofs);
+    emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow));
+  }
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static RISCVIns asm_fxloadins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: return RISCVI_LB;
+  case IRT_U8: return RISCVI_LBU;
+  case IRT_I16: return RISCVI_LH;
+  case IRT_U16: return RISCVI_LHU;
+  case IRT_NUM: return RISCVI_FLD;
+  case IRT_FLOAT: return RISCVI_FLW;
+  default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW;
+  }
+}
+
+static RISCVIns asm_fxstoreins(IRIns *ir)
+{
+  switch (irt_type(ir->t)) {
+  case IRT_I8: case IRT_U8: return RISCVI_SB;
+  case IRT_I16: case IRT_U16: return RISCVI_SH;
+  case IRT_NUM: return RISCVI_FSD;
+  case IRT_FLOAT: return RISCVI_FSW;
+  default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW;
+  }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg idx, dest = ra_dest(as, ir, allow);
+  rset_clear(allow, dest);
+  RISCVIns riscvi = asm_fxloadins(ir);
+  int32_t ofs;
+  if (ir->op1 == REF_NIL) {  /* FLOAD from GG_State with offset. */
+    idx = RID_GL;
+    ofs = (ir->op2 << 2) - GG_OFS(g);
+  } else {
+    idx = ra_alloc1(as, ir->op1, allow);
+    if (ir->op2 == IRFL_TAB_ARRAY) {
+      ofs = asm_fuseabase(as, ir->op1);
+      if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
+	emit_dsi(as, RISCVI_ADDI, dest, idx, ofs);
+	return;
+      }
+    }
+    ofs = field_ofs[ir->op2];
+    lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
+  }
+  rset_clear(allow, idx);
+  emit_lso(as, riscvi, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
+    IRIns *irf = IR(ir->op1);
+    Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+    int32_t ofs = field_ofs[irf->op2];
+    lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
+    emit_lso(as, asm_fxstoreins(ir), src, idx, ofs);
+  }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+  lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
+	     "unaligned XLOAD");
+  asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+{
+  if (ir->r != RID_SINK) {
+    Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+	  	 rset_exclude(RSET_GPR, src), ofs);
+  }
+}
+
+#define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+  Reg dest = RID_NONE, type = RID_TMP, idx;
+  RegSet allow = RSET_GPR;
+  int32_t ofs = 0;
+  IRType1 t = ir->t;
+  if (ra_used(ir)) {
+    lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t),
+	       "bad load type %d", irt_type(ir->t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+    rset_clear(allow, dest);
+    if (irt_isaddr(t)) {
+      emit_cleartp(as, dest, dest);
+    } else if (irt_isint(t))
+      emit_ext(as, RISCVI_SEXT_W, dest, dest);
+  }
+  idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+  if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+  rset_clear(allow, idx);
+  if (irt_isnum(t)) {
+    asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
+    emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
+  } else {
+    asm_guard(as, RISCVI_BNE, type,
+	      ra_allock(as, (int32_t)irt_toitype(t), allow));
+  }
+  if (ra_hasreg(dest)) {
+    if (irt_isnum(t)) {
+      emit_lso(as, RISCVI_FLD, dest, idx, ofs);
+      dest = type;
+    }
+  } else {
+    dest = type;
+  }
+  emit_dsshamt(as, RISCVI_SRAI, type, dest, 47);
+  emit_lso(as, RISCVI_LD, dest, idx, ofs);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+  RegSet allow = RSET_GPR;
+  Reg idx, src = RID_NONE, type = RID_NONE;
+  int32_t ofs = 0;
+  if (ir->r == RID_SINK)
+    return;
+  if (irt_isnum(ir->t)) {
+    src = ra_alloc1(as, ir->op2, RSET_FPR);
+    idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+    emit_lso(as, RISCVI_FSD, src, idx, ofs);
+  } else {
+    Reg tmp = RID_TMP;
+    if (irt_ispri(ir->t)) {
+      tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+      rset_clear(allow, tmp);
+    } else {
+      src = ra_alloc1(as, ir->op2, allow);
+      rset_clear(allow, src);
+      type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+      rset_clear(allow, type);
+    }
+    idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+    emit_lso(as, RISCVI_SD, tmp, idx, ofs);
+    if (ra_hasreg(src)) {
+      if (irt_isinteger(ir->t)) {
+	emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type);
+  emit_ext(as, RISCVI_ZEXT_W, tmp, src);
+      } else {
+	emit_ds1s2(as, RISCVI_ADD, tmp, src, type);
+      }
+    }
+  }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+  Reg dest = RID_NONE, type = RID_NONE, base;
+  RegSet allow = RSET_GPR;
+  IRType1 t = ir->t;
+  int32_t ofs = 8*((int32_t)ir->op1-2);
+  lj_assertA(checki12(ofs), "sload IR operand out of range");
+  lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+	     "bad parent SLOAD");  /* Handled by asm_head_side(). */
+  lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+	     "inconsistent SLOAD variant");
+  if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+    dest = ra_scratch(as, RSET_FPR);
+    asm_tointg(as, ir, dest);
+    t.irt = IRT_NUM;  /* Continue with a regular number type check. */
+  } else if (ra_used(ir)) {
+    Reg tmp = RID_NONE;
+    if ((ir->op2 & IRSLOAD_CONVERT))
+      tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
+    lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
+	       "bad SLOAD type %d", irt_type(t));
+    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+    rset_clear(allow, dest);
+    base = ra_alloc1(as, REF_BASE, allow);
+    rset_clear(allow, base);
+    if (irt_isaddr(t)) { /* Clear type from pointers. */
+      emit_cleartp(as, dest, dest);
+    } else if (ir->op2 & IRSLOAD_CONVERT) {
+      if (irt_isint(t)) {
+	emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp);
+  /* If value is already loaded for type check, move it to FPR. */
+	if ((ir->op2 & IRSLOAD_TYPECHECK))
+	  emit_ds(as, RISCVI_FMV_D_X, tmp, dest);
+	else
+	  dest = tmp;
+	t.irt = IRT_NUM;  /* Check for original type. */
+      } else {
+	emit_ds(as, RISCVI_FCVT_D_W, dest, tmp);
+	dest = tmp;
+	t.irt = IRT_INT;  /* Check for original type. */
+      }
+    } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+      /* Sign-extend integers. */
+      emit_ext(as, RISCVI_SEXT_W, dest, dest);
+    }
+    goto dotypecheck;
+  }
+  base = ra_alloc1(as, REF_BASE, allow);
+  rset_clear(allow, base);
+dotypecheck:
+  if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+    type = dest < RID_MAX_GPR ? dest : RID_TMP;
+    if (irt_ispri(t)) {
+      asm_guard(as, RISCVI_BNE, type,
+		ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
+    } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+      asm_guard(as, RISCVI_BNE, RID_TMP,
+               ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
+      emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32);
+    } else {
+      if (irt_isnum(t)) {
+        asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO);
+        emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM);
+	if (ra_hasreg(dest)) {
+	  emit_lso(as, RISCVI_FLD, dest, base, ofs);
+	}
+      } else {
+	asm_guard(as, RISCVI_BNE, RID_TMP,
+		  ra_allock(as, (int32_t)irt_toitype(t), allow));
+      }
+      emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47);
+    }
+    emit_lso(as, RISCVI_LD, type, base, ofs);
+  } else if (ra_hasreg(dest)) {
+    emit_lso(as, irt_isnum(t) ? RISCVI_FLD :
+             irt_isint(t) ? RISCVI_LW : RISCVI_LD,
+             dest, base, ofs);
+  }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+  CTState *cts = ctype_ctsG(J2G(as->J));
+  CTypeID id = (CTypeID)IR(ir->op1)->i;
+  CTSize sz;
+  CTInfo info = lj_ctype_info(cts, id, &sz);
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+  IRRef args[4];
+  RegSet drop = RSET_SCRATCH;
+  lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+	     "bad CNEW/CNEWI operands");
+
+  as->gcsteps++;
+  if (ra_hasreg(ir->r))
+    rset_clear(drop, ir->r);  /* Dest reg handled below. */
+  ra_evictset(as, drop);
+  if (ra_used(ir))
+    ra_destreg(as, ir, RID_RET);  /* GCcdata * */
+
+  /* Initialize immutable cdata object. */
+  if (ir->o == IR_CNEWI) {
+    RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+    emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow),
+	     RID_RET, (sizeof(GCcdata)));
+    lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+  } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
+    ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+    args[0] = ASMREF_L;     /* lua_State *L */
+    args[1] = ir->op1;      /* CTypeID id   */
+    args[2] = ir->op2;      /* CTSize sz    */
+    args[3] = ASMREF_TMP1;  /* CTSize align */
+    asm_gencall(as, ci, args);
+    emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+    return;
+  }
+
+  /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+  emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct)));
+  emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid)));
+  emit_loadk12(as, RID_RET+1, ~LJ_TCDATA);
+  emit_loadk32(as, RID_TMP, id);
+  args[0] = ASMREF_L;     /* lua_State *L */
+  args[1] = ASMREF_TMP1;  /* MSize size   */
+  asm_gencall(as, ci, args);
+  ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+         ra_releasetmp(as, ASMREF_TMP1));
+}
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+  Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+  Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+  Reg link = RID_TMP;
+  MCLabel l_end = emit_label(as);
+  emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist));
+  emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
+  emit_setgl(as, tab, gc.grayagain);	// make tab gray again
+  emit_getgl(as, link, gc.grayagain);
+  emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0);	// black: not jump
+  emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP);	// mark=0: gray
+  emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
+  emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked)));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg obj, val, tmp;
+  /* No need for other object barriers (yet). */
+  lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");	// Closed upvalue
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ir->op1;      /* TValue *tv      */
+  asm_gencall(as, ci, args);
+  emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+  obj = IR(ir->op1)->r;
+  tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+  emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, 0);
+  emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0);	// black: jump
+  emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK);
+  emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
+  val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+  emit_lso(as, RISCVI_LBU, tmp, obj,
+	   ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)));
+  emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked)));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  emit_ds1s2(as, riscvi, dest, left, right);
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi)
+{
+  Reg dest = ra_dest(as, ir, RSET_FPR);
+  Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+  switch(riscvi) {
+    case RISCVI_FSQRT_S: case RISCVI_FSQRT_D:
+      emit_ds(as, riscvi, dest, left);
+      break;
+    case RISCVI_FMV_S: case RISCVI_FMV_D:
+    case RISCVI_FABS_S: case RISCVI_FABS_D:
+    case RISCVI_FNEG_S: case RISCVI_FNEG_D:
+      emit_ds1s2(as, riscvi, dest, left, left);
+      break;
+    default:
+      lj_assertA(0, "bad fp unary instruction");
+      return;
+  }
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+  IRFPMathOp fpm = (IRFPMathOp)ir->op2;
+  if (fpm <= IRFPM_TRUNC)
+    asm_callround(as, ir, IRCALL_lj_vm_floor + fpm);
+  else if (fpm == IRFPM_SQRT)
+    asm_fpunary(as, ir, RISCVI_FSQRT_D);
+  else
+    asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+  IRType1 t = ir->t;
+  if (irt_isnum(t)) {
+    if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D))
+      asm_fparith(as, ir, RISCVI_FADD_D);
+    return;
+  } else {
+    if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA))
+      return;
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    if (irref_isk(ir->op2)) {
+      intptr_t k = get_kval(as, ir->op2);
+      if (checki12(k)) {
+  if (irt_is64(t)) {
+    emit_dsi(as, RISCVI_ADDI, dest, left, k);
+  } else {
+	  emit_dsi(as, RISCVI_ADDIW, dest, left, k);
+  }
+	return;
+      }
+    }
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest,
+	     left, right);
+  }
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D))
+      asm_fparith(as, ir, RISCVI_FSUB_D);
+    return;
+  } else {
+    if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS))
+      return;
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
+	     left, right);
+  }
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fparith(as, ir, RISCVI_FMUL_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest,
+	     left, right);
+  }
+}
+
+static void asm_fpdiv(ASMState *as, IRIns *ir)
+{
+    asm_fparith(as, ir, RISCVI_FDIV_D);
+}
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpunary(as, ir, RISCVI_FNEG_D);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest,
+	     RID_ZERO, left);
+  }
+}
+
+#define asm_abs(as, ir)		asm_fpunary(as, ir, RISCVI_FABS_D)
+
+static void asm_arithov(ASMState *as, IRIns *ir)
+{
+  Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
+  lj_assertA(!irt_is64(ir->t), "bad usage");
+  if (irref_isk(ir->op2)) {
+    int k = IR(ir->op2)->i;
+    if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u);
+    if (checki12(k)) {	/* (dest < left) == (k >= 0 ? 1 : 0) */
+      left = ra_alloc1(as, ir->op1, RSET_GPR);
+      asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left);
+      emit_dsi(as, RISCVI_ADDI, dest, left, k);
+      if (dest == left) emit_mv(as, RID_TMP, left);
+      return;
+    }
+  }
+  left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+						 right), dest));
+  asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO);
+  emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp);
+  if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
+    emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
+  } else {  /* ((dest^left) & (dest^~right)) < 0 */
+    emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right);
+  }
+  emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left);
+  emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right);
+  if (dest == left || dest == right)
+    emit_mv(as, RID_TMP, dest == left ? left : right);
+}
+
+#define asm_addov(as, ir)	asm_arithov(as, ir)
+#define asm_subov(as, ir)	asm_arithov(as, ir)
+
+static void asm_mulov(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+  right = (left >> 8); left &= 255;
+  asm_guard(as, RISCVI_BNE, dest, RID_TMP);
+  emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP);	// dest: [31:0]+signextend
+  emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right);	// RID_TMP: [63:0]
+}
+
+static void asm_bnot(ASMState *as, IRIns *ir)
+{
+  Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
+  IRIns *irl = IR(ir->op1);
+  if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) {
+    left = ra_alloc2(as, irl, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    emit_ds1s2(as, RISCVI_XNOR, dest, left, right);
+  } else {
+    left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    emit_ds(as, RISCVI_NOT, dest, left);
+  }
+}
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left);
+  if (as->flags & JIT_F_RVZbb) {
+    if (!irt_is64(ir->t))
+      emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32);
+    emit_ds(as, RISCVI_REV8, dest, left);
+  } else if (as->flags & JIT_F_RVXThead) {
+    emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW,
+       dest, left);
+  } else if (irt_is64(ir->t)) {
+    Reg tmp1, tmp2, tmp3, tmp4;
+    tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
+    tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2);
+    tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3);
+    tmp4 = ra_scratch(as, allow);
+    emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4);
+    emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3);
+    emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
+    emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40);
+    emit_dsshamt(as, RISCVI_SLLI, dest, left, 56);
+    emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3);
+    emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP);
+    emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32);
+    emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24);
+    emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24);
+    emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2);
+    emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1);
+    emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3);
+    emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24);
+    emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24);
+    emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1);
+    emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8);
+    emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24);
+    emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3);
+    emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u));
+    emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP);
+    emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56);
+    emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
+    emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
+    emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40);
+  } else {
+    Reg tmp1, tmp2;
+    tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1);
+    tmp2 = ra_scratch(as, allow);
+    emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2);
+    emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1);
+    emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8);
+    emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24);
+    emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2);
+    emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP);
+    emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP);
+    emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24);
+    emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00));
+    emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u));
+    emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8);
+  }
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left, right;
+  IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+  if (irref_isk(ir->op2)) {
+    intptr_t k = get_kval(as, ir->op2);
+    if (checki12(k)) {
+      left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+      emit_dsi(as, riscvik, dest, left, k);
+      return;
+    }
+  } else if (as->flags & JIT_F_RVZbb) {
+    if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) {
+      left = ra_alloc1(as, irl->op1, RSET_GPR);
+      right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+      emit_ds1s2(as, riscvin, dest, right, left);
+      return;
+    } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) {
+      left = ra_alloc1(as, ir->op1, RSET_GPR);
+      right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
+      emit_ds1s2(as, riscvin, dest, left, right);
+      return;
+    }
+  }
+  left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  emit_ds1s2(as, riscvi, dest, left, right);
+}
+
+#define asm_band(as, ir)	asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN)
+#define asm_bor(as, ir)	asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN)
+#define asm_bxor(as, ir)	asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR)
+
+static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik)
+{
+  Reg dest = ra_dest(as, ir, RSET_GPR);
+  Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+  uint32_t shmsk = irt_is64(ir->t) ? 63 : 31;
+  if (irref_isk(ir->op2)) {  /* Constant shifts. */
+    uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk);
+    switch (riscvik) {
+      case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI:
+      case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW:
+        emit_dsshamt(as, riscvik, dest, left, shift);
+        break;
+      case RISCVI_RORI: case RISCVI_RORIW:
+        emit_roti(as, riscvik, dest, left, RID_TMP, shift);
+        break;
+      default:
+        lj_assertA(0, "bad shift instruction");
+        return;
+    }
+  } else {
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    switch (riscvi) {
+      case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL:
+      case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW:
+        emit_ds1s2(as, riscvi, dest, left, right);
+        break;
+      case RISCVI_ROR: case RISCVI_ROL:
+      case RISCVI_RORW: case RISCVI_ROLW:
+        emit_rot(as, riscvi, dest, left, right, RID_TMP);
+        break;
+      default:
+        lj_assertA(0, "bad shift instruction");
+        return;
+    }
+  }
+}
+
+#define asm_bshl(as, ir)	(irt_is64(ir->t) ? \
+  asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \
+  asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW))
+#define asm_bshr(as, ir)	(irt_is64(ir->t) ? \
+  asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \
+  asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW))
+#define asm_bsar(as, ir)	(irt_is64(ir->t) ? \
+  asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \
+  asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW))
+#define asm_brol(as, ir)	lj_assertA(0, "unexpected BROL")
+#define asm_bror(as, ir)	(irt_is64(ir->t) ? \
+  asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \
+  asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW))
+
+static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
+{
+  if (irt_isnum(ir->t)) {
+    Reg dest = ra_dest(as, ir, RSET_FPR);
+    Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+    right = (left >> 8); left &= 255;
+    emit_ds1s2(as, ismax ? RISCVI_FMAX_D : RISCVI_FMIN_D, dest, left, right);
+  } else {
+    Reg dest = ra_dest(as, ir, RSET_GPR);
+    Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+    Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+    if (as->flags & JIT_F_RVZbb) {
+      emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right);
+    } else {
+      if (as->flags & JIT_F_RVXThead) {
+  if (left == right) {
+    if (dest != left) emit_mv(as, dest, left);
+  } else {
+    if (dest == left) {
+	    emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP);
+    } else {
+	    emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP);
+	    if (dest != right) emit_mv(as, dest, right);
+    }
+  }
+      } else {
+  emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP);
+  if (dest != right) {
+    emit_ds1s2(as, RISCVI_AND, RID_TMP, right, RID_TMP);
+    emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP);
+    emit_ds1s2(as, RISCVI_AND, dest, left, RID_TMP);
+  } else {
+    emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP);
+    emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP);
+    emit_ds1s2(as, RISCVI_AND, dest, right, RID_TMP);
+  }
+  emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1);
+      }
+      emit_ds1s2(as, RISCVI_SLT, RID_TMP,
+         ismax ? left : right, ismax ? right : left);
+    }
+  }
+}
+
+#define asm_min(as, ir)		asm_min_max(as, ir, 0)
+#define asm_max(as, ir)		asm_min_max(as, ir, 1)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+  IROp op = ir->o;
+  Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+  right = (left >> 8); left &= 255;
+  asm_guard(as, (op < IR_EQ ? (op&4) : (op&1))
+            ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
+  switch (op) {
+    case IR_LT: case IR_UGE:
+      emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right);
+      break;
+    case IR_LE: case IR_UGT: case IR_ABC:
+      emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right);
+      break;
+    case IR_GT: case IR_ULE:
+      emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left);
+      break;
+    case IR_GE: case IR_ULT:
+      emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left);
+      break;
+    case IR_EQ: case IR_NE:
+      emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right);
+      break;
+    default:
+      break;
+  }
+}
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+  /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
+  /*           00 01 10 11  100 101 110 111  */
+  IROp op = ir->o;
+  Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+  if (op == IR_ABC) op = IR_UGT;
+  if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
+    switch (op) {
+      case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break;
+      case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break;
+      case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break;
+      case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break;
+      default: break;
+    }
+    return;
+  }
+  if (irref_isk(ir->op2)) {
+    intptr_t k = get_kval(as, ir->op2);
+    if ((op&2)) k++;
+    if (checki12(k)) {
+      asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO);
+      emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k);
+      return;
+    }
+  }
+  right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+  asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1),
+             (op&2) ? right : left, (op&2) ? left : right);
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t))
+    asm_fpcomp(as, ir);
+  else
+    asm_intcomp(as, ir);
+}
+
+static void asm_equal(ASMState *as, IRIns *ir)
+{
+  if (irt_isnum(ir->t)) {
+    asm_fpcomp(as, ir);
+  } else {
+    Reg right, left = ra_alloc2(as, ir, RSET_GPR);
+    right = (left >> 8); left &= 255;
+    asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right);
+  }
+}
+
+/* -- Split register ops -------------------------------------------------- */
+
+/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+  /* HIOP is marked as a store because it needs its own DCE logic. */
+  int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
+  if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+  if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
+  switch ((ir-1)->o) {
+  case IR_CALLN:
+  case IR_CALLL:
+  case IR_CALLS:
+  case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
+  default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+  }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+  UNUSED(ir);
+  asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
+  emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+  emit_lsglptr(as, RISCVI_LBU, RID_TMP,
+         (int32_t)offsetof(global_State, hookmask));
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+			    IRIns *irp, RegSet allow, ExitNo exitno)
+{
+  /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */
+  Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
+  ExitNo oldsnap = as->snapno;
+  rset_clear(allow, pbase);
+  as->snapno = exitno;
+  asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO);
+  as->snapno = oldsnap;
+  if (allow) {
+    tmp = rset_pickbot(allow);
+    ra_modified(as, tmp);
+  } else {	// allow == RSET_EMPTY
+    tmp = RID_RET;
+    emit_lso(as, RISCVI_LD, tmp, RID_SP, 0);	/* Restore tmp1 register. */
+  }
+  emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
+  emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase);
+  emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack));
+  if (pbase == RID_TMP)
+    emit_getgl(as, RID_TMP, jit_base);
+  emit_getgl(as, tmp, cur_L);
+  if (allow == RSET_EMPTY)  /* Spill temp register. */
+    emit_lso(as, RISCVI_SD, tmp, RID_SP, 0);
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+  SnapEntry *map = &as->T->snapmap[snap->mapofs];
+#ifdef LUA_USE_ASSERT
+  SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
+  MSize n, nent = snap->nent;
+  /* Store the value of all modified slots to the Lua stack. */
+  for (n = 0; n < nent; n++) {
+    SnapEntry sn = map[n];
+    BCReg s = snap_slot(sn);
+    int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
+    IRRef ref = snap_ref(sn);
+    IRIns *ir = IR(ref);
+    if ((sn & SNAP_NORESTORE))
+      continue;
+    if (irt_isnum(ir->t)) {
+      Reg src = ra_alloc1(as, ref, RSET_FPR);
+      emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs);
+    } else {
+      if ((sn & SNAP_KEYINDEX)) {
+        RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+	int64_t kki = (int64_t)LJ_KEYINDEX << 32;
+	if (irref_isk(ref)) {
+	  emit_lso(as, RISCVI_SD,
+       ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
+       RID_BASE, ofs);
+	} else {
+	  Reg src = ra_alloc1(as, ref, allow);
+	  Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
+	  emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs);
+	  emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki);
+	}
+      } else {
+        asm_tvstore64(as, RID_BASE, ofs, ref);
+      }
+    }
+    checkmclim(as);
+  }
+  lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Marker to prevent patching the GC check exit. */
+#define RISCV_NOPATCH_GC_CHECK \
+  (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP))
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+  IRRef args[2];
+  MCLabel l_end;
+  Reg tmp;
+  ra_evictset(as, RSET_SCRATCH);
+  l_end = emit_label(as);
+  /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+  asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO);	/* Assumes asm_snap_prep() already done. */
+  *--as->mcp = RISCV_NOPATCH_GC_CHECK;
+  args[0] = ASMREF_TMP1;  /* global_State *g */
+  args[1] = ASMREF_TMP2;  /* MSize steps     */
+  asm_gencall(as, ci, args);
+  emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+  tmp = ra_releasetmp(as, ASMREF_TMP2);
+  emit_loadi(as, tmp, as->gcsteps);
+  /* Jump around GC step if GC total < GC threshold. */
+  emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, 0);
+  emit_getgl(as, tmp, gc.threshold);
+  emit_getgl(as, RID_TMP, gc.total);
+  as->gcsteps = 0;
+  checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+  MCode *p = as->mctop;
+  MCode *target = as->mcp;
+  ptrdiff_t delta;
+  if (as->loopinv) {  /* Inverted loop branch? */
+    delta = (char *)target - (char *)(p - 2);
+    /* asm_guard* already inverted the branch, and patched the final b. */
+    lj_assertA(checki21(delta), "branch target out of range");
+    p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta);
+  } else {
+    /* J */
+    delta = (char *)target - (char *)(p - 1);
+    p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta);
+  }
+}
+
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do(?) */
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (r != RID_BASE)
+      emit_mv(as, r, RID_BASE);
+  }
+}
+
+/* Coalesce BASE register for a side trace. */
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
+{
+  IRIns *ir = IR(REF_BASE);
+  Reg r = ir->r;
+  if (ra_hasreg(r)) {
+    ra_free(as, r);
+    if (rset_test(as->modset, r) || irt_ismarked(ir->t))
+      ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
+    if (irp->r == r) {
+      return r;  /* Same BASE register already coalesced. */
+    } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+      emit_mv(as, r, irp->r);  /* Move from coalesced parent reg. */
+      return irp->r;
+    } else {
+      emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
+    }
+  }
+  return RID_NONE;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+  MCode *p = as->mctop;
+  MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+  int32_t spadj = as->T->spadjust;
+  if (spadj == 0) {
+    p[-3] = RISCVI_NOP;
+    // as->mctop = p-2;
+  } else {
+    /* Patch stack adjustment. */
+    p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj);
+  }
+  /* Patch exit jump. */
+  ptrdiff_t delta = (char *)target - (char *)(p - 2);
+  p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
+  p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+  MCode *p = as->mctop - 2;  /* Leave room for exitstub. */
+  if (as->loopref) {
+    as->invmcp = as->mcp = p;
+  } else {
+    as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
+    as->invmcp = NULL;
+  }
+  p[0] = p[1] = RISCVI_NOP;  /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+  IRRef args[CCI_NARGS_MAX*2];
+  uint32_t i, nargs = CCI_XNARGS(ci);
+  int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+  asm_collectargs(as, ir, ci, args);
+  for (i = 0; i < nargs; i++) {
+    if (args[i] && irt_isfp(IR(args[i])->t)) {
+      if (nfpr > 0) {
+        nfpr--; if(ci->flags & CCI_VARARG) ngpr--;
+      } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--;
+      else nslots += 2;
+    } else {
+      if (ngpr > 0) {
+        ngpr--; if(ci->flags & CCI_VARARG) nfpr--;
+      } else nslots += 2;
+    }
+  }
+  if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
+    as->evenspill = nslots;
+  return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+  asm_sparejump_setup(as);
+  asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+  MCode *p = T->mcode;
+  MCode *pe = (MCode *)((char *)p + T->szmcode);
+  MCode *px = exitstub_trace_addr(T, exitno);
+  MCode *cstart = NULL;
+  MCode *mcarea = lj_mcode_patch(J, p, 0);
+
+  for (; p < pe; p++) {
+    /* Look for exitstub branch, replace with branch to target. */
+    ptrdiff_t odelta = (char *)px - (char *)(p+1),
+              ndelta = (char *)target - (char *)(p+1);
+    if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 &&
+         ((p[0] & 0x0000007fu) == 0x63u) &&
+         ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
+         ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) ||
+        (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 &&
+         ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) {
+      lj_assertJ(checki32(ndelta), "branch target out of range");
+      /* Patch jump, if within range. */
+	    patchbranch:
+      if (checki21(ndelta)) { /* Patch jump */
+  p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta);
+  if (!cstart) cstart = p + 1;
+      } else {  /* Branch out of range. Use spare jump slot in mcarea. */
+  MCode *mcjump = asm_sparejump_use(mcarea, target);
+  if (mcjump) {
+	  lj_mcode_sync(mcjump, mcjump+2);
+    ndelta = (char *)mcjump - (char *)(p+1);
+    if (checki21(ndelta)) {
+      goto patchbranch;
+    } else {
+      lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
+    }
+  }
+	/* Ignore jump slot overflow. Child trace is simply not attached. */
+      }
+    } else if (p+2 == pe) {
+      if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) {
+  ptrdiff_t delta = (char *)target - (char *)p;
+  lj_assertJ(checki32(delta), "jump target out of range");
+  p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
+  p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
+  if (!cstart) cstart = p;
+      }
+    }
+  }
+  if (cstart) lj_mcode_sync(cstart, px+1);
+  lj_mcode_patch(J, mcarea, 1);
+}
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_x86.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_asm_x86.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_asm_x86.h
@@ -1,6 +1,6 @@
 /*
 ** x86/x64 IR assembler (SSA IR -> machine code).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Guard handling ------------------------------------------------------ */
@@ -140,7 +140,8 @@ static IRRef asm_fuseabase(ASMState *as,
     }
   } else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
     /* Fuse base offset (vararg load). */
-    as->mrm.ofs = IR(irb->op2)->i;
+    IRIns *irk = IR(irb->op2);
+    as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
     return irb->op1;
   }
   return ref;  /* Otherwise use the given array base. */
@@ -216,10 +217,17 @@ static void asm_fuseahuref(ASMState *as,
 #endif
       }
       break;
+    case IR_TMPREF:
+#if LJ_GC64
+      as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
+      as->mrm.base = RID_DISPATCH;
+      as->mrm.idx = RID_NONE;
+#else
+      as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
+      as->mrm.base = as->mrm.idx = RID_NONE;
+#endif
+      return;
     default:
-      lj_assertA(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
-		 ir->o == IR_KKPTR,
-		 "bad IR op %d", ir->o);
       break;
     }
   }
@@ -478,8 +486,10 @@ static Reg asm_fuseload(ASMState *as, IR
 	asm_fusexref(as, ir->op1, xallow);
 	return RID_MRM;
       }
-    } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
+    } else if (ir->o == IR_VLOAD && IR(ir->op1)->o == IR_AREF &&
+	       !(LJ_GC64 && irt_isaddr(ir->t))) {
       asm_fuseahuref(as, ir->op1, xallow);
+      as->mrm.ofs += 8 * ir->op2;
       return RID_MRM;
     }
   }
@@ -651,7 +661,7 @@ static void asm_gencall(ASMState *as, co
 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 {
   RegSet drop = RSET_SCRATCH;
-  int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+  int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
   if (ra_hasreg(ir->r))
@@ -691,10 +701,8 @@ static void asm_setupresult(ASMState *as
 		  irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
       }
 #endif
-#if LJ_32
     } else if (hiop) {
       ra_destpair(as, ir);
-#endif
     } else {
       lj_assertA(!irt_ispri(ir->t), "PRI dest");
       ra_destreg(as, ir, RID_RET);
@@ -781,6 +789,21 @@ static void asm_retf(ASMState *as, IRIns
 #endif
 }
 
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+  Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+  IRIns irgc;
+  irgc.ot = IRT(0, IRT_PGC);  /* GC type. */
+  emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+  emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
+  emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
+  emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
 /* -- Type conversions ---------------------------------------------------- */
 
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
@@ -924,7 +947,7 @@ static void asm_conv(ASMState *as, IRIns
       }
     } else {
       Reg dest = ra_dest(as, ir, RSET_GPR);
-      if (st64) {
+      if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	Reg left = asm_fuseload(as, lref, RSET_GPR);
 	/* This is either a 32 bit reg/reg mov which zeroes the hiword
 	** or a load of the loword from a 64 bit address.
@@ -1050,47 +1073,48 @@ static void asm_strto(ASMState *as, IRIn
 /* -- Memory references --------------------------------------------------- */
 
 /* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
 {
-  IRIns *ir = IR(ref);
-  if (irt_isnum(ir->t)) {
-    /* For numbers use the constant itself or a spill slot as a TValue. */
-    if (irref_isk(ref))
-      emit_loada(as, dest, ir_knum(ir));
-    else
-      emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
-  } else {
-    /* Otherwise use g->tmptv to hold the TValue. */
+  if ((mode & IRTMPREF_IN1)) {
+    IRIns *ir = IR(ref);
+    if (irt_isnum(ir->t)) {
+      if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+	/* Use the number constant itself as a TValue. */
+	emit_loada(as, dest, ir_knum(ir));
+	return;
+      }
+      emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
+    } else {
 #if LJ_GC64
-    if (irref_isk(ref)) {
-      TValue k;
-      lj_ir_kvalue(as->J->L, &k, ir);
-      emit_movmroi(as, dest, 4, k.u32.hi);
-      emit_movmroi(as, dest, 0, k.u32.lo);
-    } else {
-      /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      if (irt_is64(ir->t)) {
-	emit_u32(as, irt_toitype(ir->t) << 15);
-	emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+      if (irref_isk(ref)) {
+	TValue k;
+	lj_ir_kvalue(as->J->L, &k, ir);
+	emit_movmroi(as, dest, 4, k.u32.hi);
+	emit_movmroi(as, dest, 0, k.u32.lo);
       } else {
-	/* Currently, no caller passes integers that might end up here. */
-	emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	/* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	if (irt_is64(ir->t)) {
+	  emit_u32(as, irt_toitype(ir->t) << 15);
+	  emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+	} else {
+	  emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+	}
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
       }
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    }
 #else
-    if (!irref_isk(ref)) {
-      Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
-      emit_movtomro(as, REX_64IR(ir, src), dest, 0);
-    } else if (!irt_ispri(ir->t)) {
-      emit_movmroi(as, dest, 0, ir->i);
-    }
-    if (!(LJ_64 && irt_islightud(ir->t)))
-      emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+      if (!irref_isk(ref)) {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+	emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+      } else if (!irt_ispri(ir->t)) {
+	emit_movmroi(as, dest, 0, ir->i);
+      }
+      if (!(LJ_64 && irt_islightud(ir->t)))
+	emit_movmroi(as, dest, 4, irt_toitype(ir->t));
 #endif
-    emit_loada(as, dest, &J2G(as->J)->tmptv);
+    }
   }
+  emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
 }
 
 static void asm_aref(ASMState *as, IRIns *ir)
@@ -1349,24 +1373,31 @@ static void asm_hrefk(ASMState *as, IRIn
 static void asm_uref(ASMState *as, IRIns *ir)
 {
   Reg dest = ra_dest(as, ir, RSET_GPR);
-  if (irref_isk(ir->op1)) {
+  int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+  if (irref_isk(ir->op1) && !guarded) {
     GCfunc *fn = ir_kfunc(IR(ir->op1));
     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
     emit_rma(as, XO_MOV, dest|REX_GC64, v);
   } else {
     Reg uv = ra_scratch(as, RSET_GPR);
-    Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
-    if (ir->o == IR_UREFC) {
+    if (ir->o == IR_UREFC)
       emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
-      asm_guardcc(as, CC_NE);
-      emit_i8(as, 1);
-      emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
-    } else {
+    else
       emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
+    if (guarded) {
+      asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
+      emit_i8(as, 0);
+      emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
+    }
+    if (irref_isk(ir->op1)) {
+      GCfunc *fn = ir_kfunc(IR(ir->op1));
+      GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+      emit_loada(as, uv, o);
+    } else {
+      emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
+	        (int32_t)offsetof(GCfuncL, uvptr) +
+	        (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
     }
-    emit_rmro(as, XO_MOV, uv|REX_GC64, func,
-	      (int32_t)offsetof(GCfuncL, uvptr) +
-	      (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
   }
 }
 
@@ -1524,6 +1555,7 @@ static void asm_ahuvload(ASMState *as, I
     Reg dest = asm_load_lightud64(as, ir, 1);
     if (ra_hasreg(dest)) {
       asm_fuseahuref(as, ir->op1, RSET_GPR);
+      if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
       emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
     }
     return;
@@ -1533,6 +1565,7 @@ static void asm_ahuvload(ASMState *as, I
     RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
     Reg dest = ra_dest(as, ir, allow);
     asm_fuseahuref(as, ir->op1, RSET_GPR);
+    if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
 #if LJ_GC64
     if (irt_isaddr(ir->t)) {
       emit_shifti(as, XOg_SHR|REX_64, dest, 17);
@@ -1560,6 +1593,7 @@ static void asm_ahuvload(ASMState *as, I
     }
 #endif
     asm_fuseahuref(as, ir->op1, gpr);
+    if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
   }
   /* Always do the type check, even if the load result is unused. */
   as->mrm.ofs += 4;
@@ -1675,7 +1709,8 @@ static void asm_sload(ASMState *as, IRIn
   lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
 	     "inconsistent SLOAD variant");
   lj_assertA(LJ_DUALNUM ||
-	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)),
+	     !irt_isint(t) ||
+	     (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
 	     "bad SLOAD type");
   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
     Reg left = ra_scratch(as, RSET_FPR);
@@ -1742,14 +1777,11 @@ static void asm_sload(ASMState *as, IRIn
   if ((ir->op2 & IRSLOAD_TYPECHECK)) {
     /* Need type check, even if the load result is unused. */
     asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
-    if (LJ_64 && irt_type(t) >= IRT_NUM) {
+    if ((LJ_64 && irt_type(t) >= IRT_NUM) || (ir->op2 & IRSLOAD_KEYINDEX)) {
       lj_assertA(irt_isinteger(t) || irt_isnum(t),
 		 "bad SLOAD type %d", irt_type(t));
-#if LJ_GC64
-      emit_u32(as, LJ_TISNUM << 15);
-#else
-      emit_u32(as, LJ_TISNUM);
-#endif
+      emit_u32(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX :
+		   LJ_GC64 ? (LJ_TISNUM << 15) : LJ_TISNUM);
       emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
 #if LJ_GC64
     } else if (irt_isnil(t)) {
@@ -1991,19 +2023,6 @@ static void asm_ldexp(ASMState *as, IRIn
   asm_x87load(as, ir->op2);
 }
 
-static void asm_fppowi(ASMState *as, IRIns *ir)
-{
-  /* The modified regs must match with the *.dasc implementation. */
-  RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
-  if (ra_hasreg(ir->r))
-    rset_clear(drop, ir->r);  /* Dest reg handled below. */
-  ra_evictset(as, drop);
-  ra_destreg(as, ir, RID_XMM0);
-  emit_call(as, lj_vm_powi_sse);
-  ra_left(as, RID_XMM0, ir->op1);
-  ra_left(as, RID_EAX, ir->op2);
-}
-
 static int asm_swapops(ASMState *as, IRIns *ir)
 {
   IRIns *irl = IR(ir->op1);
@@ -2584,15 +2603,15 @@ static void asm_comp_int64(ASMState *as,
 }
 #endif
 
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
 
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_32 && LJ_HASFFI
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && LJ_HASFFI
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
     if (usehi || uselo)
@@ -2606,8 +2625,10 @@ static void asm_hiop(ASMState *as, IRIns
       asm_fxstore(as, ir);
     return;
   }
+#endif
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
+#if LJ_32 && LJ_HASFFI
   case IR_ADD:
     as->flagmcp = NULL;
     as->curins--;
@@ -2630,20 +2651,16 @@ static void asm_hiop(ASMState *as, IRIns
     asm_neg_not(as, ir-1, XOg_NEG);
     break;
     }
-  case IR_CALLN:
-  case IR_CALLXS:
-    if (!uselo)
-      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
-    break;
   case IR_CNEWI:
     /* Nothing to do here. Handled by CNEWI itself. */
     break;
+#endif
+  case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
+    break;
   default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
   }
-#else
-  /* Unused on x64 or without FFI. */
-  UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP");
-#endif
 }
 
 /* -- Profiling ----------------------------------------------------------- */
@@ -2704,7 +2721,15 @@ static void asm_stack_restore(ASMState *
     IRIns *ir = IR(ref);
     if ((sn & SNAP_NORESTORE))
       continue;
-    if (irt_isnum(ir->t)) {
+    if ((sn & SNAP_KEYINDEX)) {
+      emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX);
+      if (irref_isk(ref)) {
+	emit_movmroi(as, RID_BASE, ofs, ir->i);
+      } else {
+	Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+	emit_movtomro(as, src, RID_BASE, ofs);
+      }
+    } else if (irt_isnum(ir->t)) {
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
     } else {
@@ -2837,6 +2862,12 @@ static void asm_loop_fixup(ASMState *as)
   }
 }
 
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+  UNUSED(as);  /* Nothing to do. */
+}
+
 /* -- Head of trace ------------------------------------------------------- */
 
 /* Coalesce BASE register for a root trace. */
@@ -2854,7 +2885,7 @@ static void asm_head_root_base(ASMState
 }
 
 /* Coalesce or reload BASE register for a side trace. */
-static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+static Reg asm_head_side_base(ASMState *as, IRIns *irp)
 {
   IRIns *ir = IR(REF_BASE);
   Reg r = ir->r;
@@ -2863,16 +2894,16 @@ static RegSet asm_head_side_base(ASMStat
     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
     if (irp->r == r) {
-      rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
+      return r;  /* Same BASE register already coalesced. */
     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
       /* Move from coalesced parent reg. */
-      rset_clear(allow, irp->r);
       emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
+      return irp->r;
     } else {
       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
     }
   }
-  return allow;
+  return RID_NONE;
 }
 
 /* -- Tail of trace ------------------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_assert.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_assert.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_assert.c
@@ -1,6 +1,6 @@
 /*
 ** Internal assertions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_assert_c
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bc.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_bc.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bc.c
@@ -1,6 +1,6 @@
 /*
 ** Bytecode instruction modes.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bc_c
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_bc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bc.h
@@ -1,6 +1,6 @@
 /*
 ** Bytecode instruction format.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_BC_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcdump.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_bcdump.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcdump.h
@@ -1,6 +1,6 @@
 /*
 ** Bytecode dump definitions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_BCDUMP_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcread.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_bcread.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcread.c
@@ -1,6 +1,6 @@
 /*
 ** Bytecode reader.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bcread_c
@@ -53,11 +53,11 @@ static LJ_NOINLINE void bcread_fill(LexS
   do {
     const char *buf;
     size_t sz;
-    char *p = sbufB(&ls->sb);
+    char *p = ls->sb.b;
     MSize n = (MSize)(ls->pe - ls->p);
     if (n) {  /* Copy remainder to buffer. */
       if (sbuflen(&ls->sb)) {  /* Move down in buffer. */
-	lj_assertLS(ls->pe == sbufP(&ls->sb), "bad buffer pointer");
+	lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
 	if (ls->p != p) memmove(p, ls->p, n);
       } else {  /* Copy from buffer provided by reader. */
 	p = lj_buf_need(&ls->sb, len);
@@ -66,7 +66,7 @@ static LJ_NOINLINE void bcread_fill(LexS
       ls->p = p;
       ls->pe = p + n;
     }
-    setsbufP(&ls->sb, p + n);
+    ls->sb.w = p + n;
     buf = ls->rfunc(ls->L, ls->rdata, &sz);  /* Get more data from reader. */
     if (buf == NULL || sz == 0) {  /* EOF? */
       if (need) bcread_error(ls, LJ_ERR_BCBAD);
@@ -77,8 +77,8 @@ static LJ_NOINLINE void bcread_fill(LexS
     if (n) {  /* Append to buffer. */
       n += (MSize)sz;
       p = lj_buf_need(&ls->sb, n < len ? len : n);
-      memcpy(sbufP(&ls->sb), buf, sz);
-      setsbufP(&ls->sb, p + n);
+      memcpy(ls->sb.w, buf, sz);
+      ls->sb.w = p + n;
       ls->p = p;
       ls->pe = p + n;
     } else {  /* Return buffer provided by reader. */
@@ -399,11 +399,7 @@ static int bcread_header(LexState *ls)
   if ((flags & BCDUMP_F_FFI)) {
 #if LJ_HASFFI
     lua_State *L = ls->L;
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    ctype_loadffi(L);
 #else
     return 0;
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcwrite.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_bcwrite.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_bcwrite.c
@@ -1,6 +1,6 @@
 /*
 ** Bytecode writer.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_bcwrite_c
@@ -62,7 +62,7 @@ static void bcwrite_ktabk(BCWriteCtx *ct
       if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	*p++ = BCDUMP_KTAB_INT;
 	p = lj_strfmt_wuleb128(p, k);
-	setsbufP(&ctx->sb, p);
+	ctx->sb.w = p;
 	return;
       }
     }
@@ -73,7 +73,7 @@ static void bcwrite_ktabk(BCWriteCtx *ct
     lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
     *p++ = BCDUMP_KTAB_NIL+~itype(o);
   }
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 }
 
 /* Write a template table. */
@@ -97,7 +97,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx
   /* Write number of array slots and hash slots. */
   p = lj_strfmt_wuleb128(p, narray);
   p = lj_strfmt_wuleb128(p, nhash);
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
   if (narray) {  /* Write array entries (may contain nil). */
     MSize i;
     TValue *o = tvref(t->array);
@@ -172,7 +172,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx,
       }
 #endif
     }
-    setsbufP(&ctx->sb, p);
+    ctx->sb.w = p;
   }
 }
 
@@ -189,7 +189,8 @@ static void bcwrite_knum(BCWriteCtx *ctx
       goto save_int;
     } else {
       /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
-      if (!LJ_DUALNUM) {  /* Narrow number constants to integers. */
+      if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
+	/* Narrow number constants to integers. */
 	lua_Number num = numV(o);
 	k = lj_num2int(num);
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
@@ -206,7 +207,7 @@ static void bcwrite_knum(BCWriteCtx *ctx
       p = lj_strfmt_wuleb128(p, o->u32.hi);
     }
   }
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 }
 
 /* Write bytecode instructions. */
@@ -281,7 +282,7 @@ static void bcwrite_proto(BCWriteCtx *ct
   /* Write bytecode instructions and upvalue refs. */
   p = bcwrite_bytecode(ctx, p, pt);
   p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
-  setsbufP(&ctx->sb, p);
+  ctx->sb.w = p;
 
   /* Write constants. */
   bcwrite_kgc(ctx, pt);
@@ -291,16 +292,16 @@ static void bcwrite_proto(BCWriteCtx *ct
   if (sizedbg) {
     p = lj_buf_more(&ctx->sb, sizedbg);
     p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
-    setsbufP(&ctx->sb, p);
+    ctx->sb.w = p;
   }
 
   /* Pass buffer to writer function. */
   if (ctx->status == 0) {
     MSize n = sbuflen(&ctx->sb) - 5;
     MSize nn = (lj_fls(n)+8)*9 >> 6;
-    char *q = sbufB(&ctx->sb) + (5 - nn);
+    char *q = ctx->sb.b + (5 - nn);
     p = lj_strfmt_wuleb128(q, n);  /* Fill in final size. */
-    lj_assertBCW(p == sbufB(&ctx->sb) + 5, "bad ULEB128 write");
+    lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
     ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
   }
 }
@@ -324,8 +325,8 @@ static void bcwrite_header(BCWriteCtx *c
     p = lj_strfmt_wuleb128(p, len);
     p = lj_buf_wmem(p, name, len);
   }
-  ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
-			   (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
+  ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
+			   (MSize)(p - ctx->sb.b), ctx->wdata);
 }
 
 /* Write footer of bytecode dump. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_buf.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_buf.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_buf.c
@@ -1,6 +1,6 @@
 /*
 ** Buffer handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_buf_c
@@ -20,12 +20,32 @@ static void buf_grow(SBuf *sb, MSize sz)
 {
   MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
   char *b;
+  GCSize flag;
   if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
   while (nsz < sz) nsz += nsz;
-  b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
-  setmref(sb->b, b);
-  setmref(sb->p, b + len);
-  setmref(sb->e, b + nsz);
+  flag = sbufflag(sb);
+  if ((flag & SBUF_FLAG_COW)) {  /* Copy-on-write semantics. */
+    lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
+    b = (char *)lj_mem_new(sbufL(sb), nsz);
+    setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
+    setgcrefnull(sbufX(sb)->cowref);
+    memcpy(b, sb->b, osz);
+  } else {
+    b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
+  }
+  if ((flag & SBUF_FLAG_EXT)) {
+    sbufX(sb)->r = sbufX(sb)->r - sb->b + b;  /* Adjust read pointer, too. */
+  }
+  /* Adjust buffer pointers. */
+  sb->b = b;
+  sb->w = b + len;
+  sb->e = b + nsz;
+  if ((flag & SBUF_FLAG_BORROW)) {  /* Adjust borrowed buffer pointers. */
+    SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
+    bsb->b = b;
+    bsb->w = b + len;
+    bsb->e = b + nsz;
+  }
 }
 
 LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
@@ -34,30 +54,51 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_nee
   if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
     lj_err_mem(sbufL(sb));
   buf_grow(sb, sz);
-  return sbufB(sb);
+  return sb->b;
 }
 
 LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
 {
-  MSize len = sbuflen(sb);
-  lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
-  if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
-    lj_err_mem(sbufL(sb));
-  buf_grow(sb, len + sz);
-  return sbufP(sb);
+  if (sbufisext(sb)) {
+    SBufExt *sbx = (SBufExt *)sb;
+    MSize len = sbufxlen(sbx);
+    if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+      lj_err_mem(sbufL(sbx));
+    if (len + sz > sbufsz(sbx)) {  /* Must grow. */
+      buf_grow((SBuf *)sbx, len + sz);
+    } else if (sbufiscow(sb) || sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
+      /* Also grow to avoid excessive compactions, if slack < size/8. */
+      buf_grow((SBuf *)sbx, sbuflen(sbx) + sz);  /* Not sbufxlen! */
+      return sbx->w;
+    }
+    if (sbx->r != sbx->b) {  /* Compact by moving down. */
+      memmove(sbx->b, sbx->r, len);
+      sbx->r = sbx->b;
+      sbx->w = sbx->b + len;
+      lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact");
+    }
+  } else {
+    MSize len = sbuflen(sb);
+    lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
+    if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+      lj_err_mem(sbufL(sb));
+    buf_grow(sb, len + sz);
+  }
+  return sb->w;
 }
 
 void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
 {
-  char *b = sbufB(sb);
-  MSize osz = (MSize)(sbufE(sb) - b);
+  char *b = sb->b;
+  MSize osz = (MSize)(sb->e - b);
   if (osz > 2*LJ_MIN_SBUF) {
-    MSize n = (MSize)(sbufP(sb) - b);
+    MSize n = (MSize)(sb->w - b);
     b = lj_mem_realloc(L, b, osz, (osz >> 1));
-    setmref(sb->b, b);
-    setmref(sb->p, b + n);
-    setmref(sb->e, b + (osz >> 1));
+    sb->b = b;
+    sb->w = b + n;
+    sb->e = b + (osz >> 1);
   }
+  lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
 }
 
 char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
@@ -67,30 +108,62 @@ char * LJ_FASTCALL lj_buf_tmp(lua_State
   return lj_buf_need(sb, sz);
 }
 
+#if LJ_HASBUFFER && LJ_HASJIT
+void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
+{
+  lua_State *L = sbufL(sbx);
+  lj_bufx_free(L, sbx);
+  lj_bufx_set_cow(L, sbx, p, len);
+  setgcref(sbx->cowref, ref);
+  lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
+}
+
+#if LJ_HASFFI
+MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
+{
+  lj_buf_more((SBuf *)sbx, sz);
+  return sbufleft(sbx);
+}
+#endif
+#endif
+
 /* -- Low-level buffer put operations ------------------------------------- */
 
 SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
 {
-  char *p = lj_buf_more(sb, len);
-  p = lj_buf_wmem(p, q, len);
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, len);
+  w = lj_buf_wmem(w, q, len);
+  sb->w = w;
   return sb;
 }
 
-SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+#if LJ_HASJIT || LJ_HASFFI
+static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
 {
-  char *p = lj_buf_more(sb, 1);
-  *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more2(sb, 1);
+  *w++ = (char)c;
+  sb->w = w;
   return sb;
 }
 
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+  char *w = sb->w;
+  if (LJ_LIKELY(w < sb->e)) {
+    *w++ = (char)c;
+    sb->w = w;
+    return sb;
+  }
+  return lj_buf_putchar2(sb, c);
+}
+#endif
+
 SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len);
-  p = lj_buf_wmem(p, strdata(s), len);
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, len);
+  w = lj_buf_wmem(w, strdata(s), len);
+  sb->w = w;
   return sb;
 }
 
@@ -99,47 +172,47 @@ SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *s
 SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s)+len-1;
-  while (p < e)
-    *p++ = *q--;
-  setsbufP(sb, p);
+  while (w < e)
+    *w++ = *q--;
+  sb->w = w;
   return sb;
 }
 
 SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s);
-  for (; p < e; p++, q++) {
+  for (; w < e; w++, q++) {
     uint32_t c = *(unsigned char *)q;
 #if LJ_TARGET_PPC
-    *p = c + ((c >= 'A' && c <= 'Z') << 5);
+    *w = c + ((c >= 'A' && c <= 'Z') << 5);
 #else
     if (c >= 'A' && c <= 'Z') c += 0x20;
-    *p = c;
+    *w = c;
 #endif
   }
-  setsbufP(sb, p);
+  sb->w = w;
   return sb;
 }
 
 SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
 {
   MSize len = s->len;
-  char *p = lj_buf_more(sb, len), *e = p+len;
+  char *w = lj_buf_more(sb, len), *e = w+len;
   const char *q = strdata(s);
-  for (; p < e; p++, q++) {
+  for (; w < e; w++, q++) {
     uint32_t c = *(unsigned char *)q;
 #if LJ_TARGET_PPC
-    *p = c - ((c >= 'a' && c <= 'z') << 5);
+    *w = c - ((c >= 'a' && c <= 'z') << 5);
 #else
     if (c >= 'a' && c <= 'z') c -= 0x20;
-    *p = c;
+    *w = c;
 #endif
   }
-  setsbufP(sb, p);
+  sb->w = w;
   return sb;
 }
 
@@ -148,21 +221,21 @@ SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr
   MSize len = s->len;
   if (rep > 0 && len) {
     uint64_t tlen = (uint64_t)rep * len;
-    char *p;
+    char *w;
     if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
       lj_err_mem(sbufL(sb));
-    p = lj_buf_more(sb, (MSize)tlen);
+    w = lj_buf_more(sb, (MSize)tlen);
     if (len == 1) {  /* Optimize a common case. */
       uint32_t c = strdata(s)[0];
-      do { *p++ = c; } while (--rep > 0);
+      do { *w++ = c; } while (--rep > 0);
     } else {
       const char *e = strdata(s) + len;
       do {
 	const char *q = strdata(s);
-	do { *p++ = *q++; } while (q < e);
+	do { *w++ = *q++; } while (q < e);
       } while (--rep > 0);
     }
-    setsbufP(sb, p);
+    sb->w = w;
   }
   return sb;
 }
@@ -173,27 +246,27 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t,
   if (i <= e) {
     for (;;) {
       cTValue *o = lj_tab_getint(t, i);
-      char *p;
+      char *w;
       if (!o) {
       badtype:  /* Error: bad element type. */
-	setsbufP(sb, (void *)(intptr_t)i);  /* Store failing index. */
+	sb->w = (char *)(intptr_t)i;  /* Store failing index. */
 	return NULL;
       } else if (tvisstr(o)) {
 	MSize len = strV(o)->len;
-	p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+	w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
       } else if (tvisint(o)) {
-	p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+	w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
       } else if (tvisnum(o)) {
-	p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
+	w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
       } else {
 	goto badtype;
       }
       if (i++ == e) {
-	setsbufP(sb, p);
+	sb->w = w;
 	break;
       }
-      if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
-      setsbufP(sb, p);
+      if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
+      sb->w = w;
     }
   }
   return sb;
@@ -203,7 +276,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t,
 
 GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
 {
-  return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
+  return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
 }
 
 /* Concatenate two strings. */
@@ -219,14 +292,14 @@ GCstr *lj_buf_cat2str(lua_State *L, GCst
 /* Read ULEB128 from buffer. */
 uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
 {
-  const uint8_t *p = (const uint8_t *)*pp;
-  uint32_t v = *p++;
+  const uint8_t *w = (const uint8_t *)*pp;
+  uint32_t v = *w++;
   if (LJ_UNLIKELY(v >= 0x80)) {
     int sh = 0;
     v &= 0x7f;
-    do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+    do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
   }
-  *pp = (const char *)p;
+  *pp = (const char *)w;
   return v;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_buf.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_buf.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_buf.h
@@ -1,6 +1,6 @@
 /*
 ** Buffer handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_BUF_H
@@ -10,16 +10,60 @@
 #include "lj_gc.h"
 #include "lj_str.h"
 
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-#define sbufB(sb)	(mref((sb)->b, char))
-#define sbufP(sb)	(mref((sb)->p, char))
-#define sbufE(sb)	(mref((sb)->e, char))
-#define sbufL(sb)	(mref((sb)->L, lua_State))
-#define sbufsz(sb)	((MSize)(sbufE((sb)) - sbufB((sb))))
-#define sbuflen(sb)	((MSize)(sbufP((sb)) - sbufB((sb))))
-#define sbufleft(sb)	((MSize)(sbufE((sb)) - sbufP((sb))))
-#define setsbufP(sb, q)	(setmref((sb)->p, (q)))
-#define setsbufL(sb, l)	(setmref((sb)->L, (l)))
+/* Resizable string buffers. */
+
+/* The SBuf struct definition is in lj_obj.h:
+**   char *w;	Write pointer.
+**   char *e;	End pointer.
+**   char *b;	Base pointer.
+**   MRef L;	lua_State, used for buffer resizing. Extension bits in 3 LSB.
+*/
+
+/* Extended string buffer. */
+typedef struct SBufExt {
+  SBufHeader;
+  union {
+    GCRef cowref;	/* Copy-on-write object reference. */
+    MRef bsb;		/* Borrowed string buffer. */
+  };
+  char *r;		/* Read pointer. */
+  GCRef dict_str;	/* Serialization string dictionary table. */
+  GCRef dict_mt;	/* Serialization metatable dictionary table. */
+  int depth;		/* Remaining recursion depth. */
+} SBufExt;
+
+#define sbufsz(sb)		((MSize)((sb)->e - (sb)->b))
+#define sbuflen(sb)		((MSize)((sb)->w - (sb)->b))
+#define sbufleft(sb)		((MSize)((sb)->e - (sb)->w))
+#define sbufxlen(sbx)		((MSize)((sbx)->w - (sbx)->r))
+#define sbufxslack(sbx)		((MSize)((sbx)->r - (sbx)->b))
+
+#define SBUF_MASK_FLAG		(7)
+#define SBUF_MASK_L		(~(GCSize)SBUF_MASK_FLAG)
+#define SBUF_FLAG_EXT		1	/* Extended string buffer. */
+#define SBUF_FLAG_COW		2	/* Copy-on-write buffer. */
+#define SBUF_FLAG_BORROW	4	/* Borrowed string buffer. */
+
+#define sbufL(sb) \
+  ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
+#define setsbufL(sb, l)		(setmref((sb)->L, (l)))
+#define setsbufXL(sb, l, flag) \
+  (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
+#define setsbufXL_(sb, l) \
+  (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG)))
+
+#define sbufflag(sb)		(mrefu((sb)->L))
+#define sbufisext(sb)		(sbufflag((sb)) & SBUF_FLAG_EXT)
+#define sbufiscow(sb)		(sbufflag((sb)) & SBUF_FLAG_COW)
+#define sbufisborrow(sb)	(sbufflag((sb)) & SBUF_FLAG_BORROW)
+#define sbufiscoworborrow(sb)	(sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
+#define sbufX(sb) \
+  (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
+#define setsbufflag(sb, flag)	(setmrefu((sb)->L, (flag)))
+
+#define tvisbuf(o) \
+  (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER)
+#define bufV(o)		check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
 
 /* Buffer management */
 LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
@@ -30,12 +74,12 @@ LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lu
 static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
 {
   setsbufL(sb, L);
-  setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
+  sb->w = sb->e = sb->b = NULL;
 }
 
 static LJ_AINLINE void lj_buf_reset(SBuf *sb)
 {
-  setmrefr(sb->p, sb->b);
+  sb->w = sb->b;
 }
 
 static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
@@ -48,26 +92,77 @@ static LJ_AINLINE SBuf *lj_buf_tmp_(lua_
 
 static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
 {
-  lj_mem_free(g, sbufB(sb), sbufsz(sb));
+  lj_assertG(!sbufisext(sb), "bad free of SBufExt");
+  lj_mem_free(g, sb->b, sbufsz(sb));
 }
 
 static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
 {
   if (LJ_UNLIKELY(sz > sbufsz(sb)))
     return lj_buf_need2(sb, sz);
-  return sbufB(sb);
+  return sb->b;
 }
 
 static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
 {
   if (LJ_UNLIKELY(sz > sbufleft(sb)))
     return lj_buf_more2(sb, sz);
-  return sbufP(sb);
+  return sb->w;
+}
+
+/* Extended buffer management */
+static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
+{
+  memset(sbx, 0, sizeof(SBufExt));
+  setsbufXL(sbx, L, SBUF_FLAG_EXT);
+}
+
+static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
+{
+  setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
+  setmref(sbx->bsb, sb);
+  sbx->r = sbx->w = sbx->b = sb->b;
+  sbx->e = sb->e;
+}
+
+static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
+				       const char *p, MSize len)
+{
+  setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
+  sbx->r = sbx->b = (char *)p;
+  sbx->w = sbx->e = (char *)p + len;
+}
+
+static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
+{
+  if (sbufiscow(sbx)) {
+    setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
+    setgcrefnull(sbx->cowref);
+    sbx->b = sbx->e = NULL;
+  }
+  sbx->r = sbx->w = sbx->b;
 }
 
+static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
+{
+  if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
+  setsbufXL(sbx, L, SBUF_FLAG_EXT);
+  setgcrefnull(sbx->cowref);
+  sbx->r = sbx->w = sbx->b = sbx->e = NULL;
+}
+
+#if LJ_HASBUFFER && LJ_HASJIT
+LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
+#if LJ_HASFFI
+LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
+#endif
+#endif
+
 /* Low-level buffer put operations */
 LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT || LJ_HASFFI
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
 LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
 
 static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
@@ -77,9 +172,9 @@ static LJ_AINLINE char *lj_buf_wmem(char
 
 static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
 {
-  char *p = lj_buf_more(sb, 1);
-  *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, 1);
+  *w++ = (char)c;
+  sb->w = w;
 }
 
 /* High-level buffer put operations */
@@ -97,7 +192,7 @@ LJ_FUNC uint32_t LJ_FASTCALL lj_buf_rule
 
 static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
 {
-  return lj_str_new(L, sbufB(sb), sbuflen(sb));
+  return lj_str_new(L, sb->b, sbuflen(sb));
 }
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_carith.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_carith.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_carith.c
@@ -1,6 +1,6 @@
 /*
 ** C data arithmetic.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -44,9 +44,13 @@ static int carith_checkarg(lua_State *L,
 	p = (uint8_t *)cdata_getptr(p, ct->size);
 	if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
       } else if (ctype_isfunc(ct->info)) {
+	CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
 	p = (uint8_t *)*(void **)p;
 	ct = ctype_get(cts,
 	  lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+	if (i) {  /* cts->tab may have been reallocated. */
+	  ca->ct[0] = ctype_get(cts, id0);
+	}
       }
       if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
       ca->ct[i] = ct;
@@ -207,7 +211,7 @@ static int carith_int64(lua_State *L, CT
       else
 	*up = lj_carith_powu64(u0, u1);
       break;
-    case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
+    case MM_unm: *up = ~u0+1u; break;
     default:
       lj_assertL(0, "bad metamethod %d", mm);
       break;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_carith.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_carith.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_carith.h
@@ -1,6 +1,6 @@
 /*
 ** C data arithmetic.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CARITH_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccall.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ccall.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccall.c
@@ -1,6 +1,6 @@
 /*
 ** FFI C call handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -20,12 +20,15 @@
 #if LJ_TARGET_X86
 /* -- x86 calling conventions --------------------------------------------- */
 
+#define CCALL_PUSH(arg) \
+  *(GPRArg *)((uint8_t *)cc->stack + nsp) = (GPRArg)(arg), nsp += CTSIZE_PTR
+
 #if LJ_ABI_WIN
 
 #define CCALL_HANDLE_STRUCTRET \
   /* Return structs bigger than 8 by reference (on stack only). */ \
   cc->retref = (sz > 8); \
-  if (cc->retref) cc->stack[nsp++] = (GPRArg)dp;
+  if (cc->retref) CCALL_PUSH(dp);
 
 #define CCALL_HANDLE_COMPLEXRET CCALL_HANDLE_STRUCTRET
 
@@ -40,7 +43,7 @@
     if (ngpr < maxgpr) \
       cc->gpr[ngpr++] = (GPRArg)dp; \
     else \
-      cc->stack[nsp++] = (GPRArg)dp; \
+      CCALL_PUSH(dp); \
   } else {  /* Struct with single FP field ends up in FPR. */ \
     cc->resx87 = ccall_classify_struct(cts, ctr); \
   }
@@ -56,7 +59,7 @@
   if (ngpr < maxgpr) \
     cc->gpr[ngpr++] = (GPRArg)dp; \
   else \
-    cc->stack[nsp++] = (GPRArg)dp;
+    CCALL_PUSH(dp);
 
 #endif
 
@@ -67,7 +70,7 @@
     if (ngpr < maxgpr) \
       cc->gpr[ngpr++] = (GPRArg)dp; \
     else \
-      cc->stack[nsp++] = (GPRArg)dp; \
+      CCALL_PUSH(dp); \
   }
 
 #endif
@@ -278,8 +281,8 @@
   if (ngpr < maxgpr) { \
     dp = &cc->gpr[ngpr]; \
     if (ngpr + n > maxgpr) { \
-      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
-      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+      nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
+      if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
       ngpr = maxgpr; \
     } else { \
       ngpr += n; \
@@ -334,7 +337,7 @@
   isfp = sz == 2*sizeof(float) ? 2 : 1;
 
 #define CCALL_HANDLE_REGARG \
-  if (LJ_TARGET_IOS && isva) { \
+  if (LJ_TARGET_OSX && isva) { \
     /* IOS: All variadic arguments are on the stack. */ \
   } else if (isfp) {  /* Try to pass argument in FPRs. */ \
     int n2 = ctype_isvector(d->info) ? 1 : \
@@ -345,10 +348,9 @@
       goto done; \
     } else { \
       nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
-      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
     } \
   } else {  /* Try to pass argument in GPRs. */ \
-    if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+    if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
       ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
       dp = &cc->gpr[ngpr]; \
@@ -356,7 +358,6 @@
       goto done; \
     } else { \
       ngpr = maxgpr;  /* Prevent reordering. */ \
-      if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
     } \
   }
 
@@ -471,8 +472,8 @@
   if (ngpr < maxgpr) { \
     dp = &cc->gpr[ngpr]; \
     if (ngpr + n > maxgpr) { \
-     nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
-     if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+     nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
+     if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
      ngpr = maxgpr; \
     } else { \
      ngpr += n; \
@@ -565,8 +566,8 @@
   if (ngpr < maxgpr) { \
     dp = &cc->gpr[ngpr]; \
     if (ngpr + n > maxgpr) { \
-      nsp += ngpr + n - maxgpr;  /* Assumes contiguous gpr/stack fields. */ \
-      if (nsp > CCALL_MAXSTACK) goto err_nyi;  /* Too many arguments. */ \
+      nsp += (ngpr + n - maxgpr) * CTSIZE_PTR;  /* Assumes contiguous gpr/stack fields. */ \
+      if (nsp > CCALL_SIZE_STACK) goto err_nyi;  /* Too many arguments. */ \
       ngpr = maxgpr; \
     } else { \
       ngpr += n; \
@@ -574,6 +575,97 @@
     goto done; \
   }
 
+#elif LJ_TARGET_RISCV64
+/* -- RISC-V lp64d calling conventions ------------------------------------ */
+
+#define CCALL_HANDLE_STRUCTRET \
+  /* Return structs of size > 16 by reference. */ \
+  cc->retref = !(sz <= 16); \
+  if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+  unsigned int cl = ccall_classify_struct(cts, ctr); \
+  if ((cl & 4) && (cl >> 8) <= 2) { \
+    CTSize i = (cl >> 8) - 1; \
+    do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \
+  } else { \
+    if (cl > 1) { \
+      sp = (uint8_t *)&cc->fpr[0]; \
+      if ((cl >> 8) > 2) \
+        sp = (uint8_t *)&cc->gpr[0]; \
+    } \
+      memcpy(dp, sp, ctr->size); \
+  } \
+
+#define CCALL_HANDLE_COMPLEXRET \
+  /* Complex values are returned in 1 or 2 FPRs. */ \
+  cc->retref = 0;
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+  if (ctr->size == 2*sizeof(float)) {  /* Copy complex float from FPRs. */ \
+    ((float *)dp)[0] = cc->fpr[0].f; \
+    ((float *)dp)[1] = cc->fpr[1].f; \
+  } else {  /* Copy complex double from FPRs. */ \
+    ((double *)dp)[0] = cc->fpr[0].d; \
+    ((double *)dp)[1] = cc->fpr[1].d; \
+  }
+
+#define CCALL_HANDLE_COMPLEXARG \
+  /* Pass long double complex by reference. */ \
+  if (sz == 2*sizeof(long double)) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } \
+  /* Pass complex in two FPRs or on stack. */ \
+  else if (sz == 2*sizeof(float)) { \
+    isfp = 2; \
+    sz = 2*CTSIZE_PTR; \
+  } else { \
+    isfp = 1; \
+    sz = 2*CTSIZE_PTR; \
+  }
+
+#define CCALL_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    sp = (uint8_t *)&cc->fpr[0].f;
+
+#define CCALL_HANDLE_STRUCTARG \
+  /* Pass structs of size >16 by reference. */ \
+  unsigned int cl = ccall_classify_struct(cts, d); \
+  nff = cl >> 8; \
+  if (sz > 16) { \
+    rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+    sz = CTSIZE_PTR; \
+  } \
+  /* Pass struct in FPRs. */ \
+  if (cl > 1) { \
+    isfp = (cl & 4) ? 2 : 1; \
+  }
+
+
+#define CCALL_HANDLE_REGARG \
+  if (isfp && (!isva)) {  /* Try to pass argument in FPRs. */ \
+    int n2 = ctype_isvector(d->info) ? 1 : \
+            isfp == 1 ? n : 2; \
+    if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \
+      dp = &cc->fpr[nfpr]; \
+      nfpr += n2; \
+      goto done; \
+    } else { \
+      if (ngpr + n2 <= maxgpr) { \
+       dp = &cc->gpr[ngpr]; \
+       ngpr += n2; \
+       goto done; \
+      } \
+    } \
+  } else {  /* Try to pass argument in GPRs. */ \
+      if (ngpr + n <= maxgpr) { \
+        dp = &cc->gpr[ngpr]; \
+        ngpr += n; \
+        goto done; \
+    } \
+  }
+
 #else
 #error "Missing calling convention definitions for this architecture"
 #endif
@@ -698,10 +790,11 @@ static int ccall_struct_arg(CCallState *
   lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
   if (ccall_struct_reg(cc, cts, dp, rcl)) {
     /* Register overflow? Pass on stack. */
-    MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
-    if (nsp + n > CCALL_MAXSTACK) return 1;  /* Too many arguments. */
-    cc->nsp = nsp + n;
-    memcpy(&cc->stack[nsp], dp, n*CTSIZE_PTR);
+    MSize nsp = cc->nsp, sz = rcl[1] ? 2*CTSIZE_PTR : CTSIZE_PTR;
+    if (nsp + sz > CCALL_SIZE_STACK)
+      return 1;  /* Too many arguments. */
+    cc->nsp = nsp + sz;
+    memcpy((uint8_t *)cc->stack + nsp, dp, sz);
   }
   return 0;  /* Ok. */
 }
@@ -889,6 +982,51 @@ static void ccall_copy_struct(CCallState
 
 #endif
 
+/* -- RISC-V ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_RISCV64
+
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+  CTSize sz = ct->size;
+  unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+  while (ct->sib) {
+    CType *sct;
+    ct = ctype_get(cts, ct->sib);
+    if (ctype_isfield(ct->info)) {
+      sct = ctype_rawchild(cts, ct);
+      if (ctype_isfp(sct->info)) {
+	r |= sct->size;
+	if (!isu) n++; else if (n == 0) n = 1;
+      } else if (ctype_iscomplex(sct->info)) {
+	r |= (sct->size >> 1);
+	if (!isu) n += 2; else if (n < 2) n = 2;
+      } else if (ctype_isstruct(sct->info)) {
+	goto substruct;
+      } else {
+	goto noth;
+      }
+    } else if (ctype_isbitfield(ct->info)) {
+      goto noth;
+    } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+      sct = ctype_rawchild(cts, ct);
+    substruct:
+      if (sct->size > 0) {
+	unsigned int s = ccall_classify_struct(cts, sct);
+	if (s <= 1) goto noth;
+	r |= (s & 255);
+	if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+      }
+    }
+  }
+  if ((r == 4 || r == 8) && n <= 4)
+    return r + (n << 8);
+noth:  /* Not a homogeneous float/double aggregate. */
+  return (sz <= 16);  /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
 /* -- Common C call handling ---------------------------------------------- */
 
 /* Infer the destination CTypeID for a vararg argument. */
@@ -935,6 +1073,10 @@ static int ccall_set_args(lua_State *L,
 #endif
 #endif
 
+#if LJ_TARGET_RISCV64
+  int nff = 0;
+#endif
+
   /* Clear unused regs to get some determinism in case of misdeclaration. */
   memset(cc->gpr, 0, sizeof(cc->gpr));
 #if CCALL_NUM_FPR
@@ -983,6 +1125,14 @@ static int ccall_set_args(lua_State *L,
     fid = ctf->sib;
   }
 
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+  if ((ct->info & CTF_VARARG)) {
+    nsp -= maxgpr * CTSIZE_PTR;  /* May end up with negative nsp. */
+    ngpr = maxgpr;
+    nfpr = CCALL_NARG_FPR;
+  }
+#endif
+
   /* Walk through all passed arguments. */
   for (o = L->base+1, narg = 1; o < top; o++, narg++) {
     CTypeID did;
@@ -1019,25 +1169,31 @@ static int ccall_set_args(lua_State *L,
       CCALL_HANDLE_STRUCTARG
     } else if (ctype_iscomplex(d->info)) {
       CCALL_HANDLE_COMPLEXARG
-    } else {
+    } else if (!(CCALL_PACK_STACKARG && ctype_isenum(d->info))) {
       sz = CTSIZE_PTR;
     }
-    sz = (sz + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
-    n = sz / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
+    n = (sz + CTSIZE_PTR-1) / CTSIZE_PTR;  /* Number of GPRs or stack slots needed. */
 
     CCALL_HANDLE_REGARG  /* Handle register arguments. */
 
     /* Otherwise pass argument on stack. */
-    if (CCALL_ALIGN_STACKARG && !rp && (d->info & CTF_ALIGN) > CTALIGN_PTR) {
-      MSize align = (1u << ctype_align(d->info-CTALIGN_PTR)) -1;
-      nsp = (nsp + align) & ~align;  /* Align argument on stack. */
+    if (CCALL_ALIGN_STACKARG) {  /* Align argument on stack. */
+      MSize align = (1u << ctype_align(d->info)) - 1;
+      if (rp || (CCALL_PACK_STACKARG && isva && align < CTSIZE_PTR-1))
+	align = CTSIZE_PTR-1;
+      nsp = (nsp + align) & ~align;
     }
-    if (nsp + n > CCALL_MAXSTACK) {  /* Too many arguments. */
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+    /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
+    dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
+#else
+    dp = ((uint8_t *)cc->stack) + nsp;
+#endif
+    nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
+    if ((int32_t)nsp > CCALL_SIZE_STACK) {  /* Too many arguments. */
     err_nyi:
       lj_err_caller(L, LJ_ERR_FFI_NYICALL);
     }
-    dp = &cc->stack[nsp];
-    nsp += n;
     isva = 0;
 
   done:
@@ -1048,7 +1204,8 @@ static int ccall_set_args(lua_State *L,
     }
     lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
     /* Extend passed integers to 32 bits at least. */
-    if (ctype_isinteger_or_bool(d->info) && d->size < 4) {
+    if (ctype_isinteger_or_bool(d->info) && d->size < 4 &&
+	(!CCALL_PACK_STACKARG || !((uintptr_t)dp & 3))) {  /* Assumes LJ_LE. */
       if (d->info & CTF_UNSIGNED)
 	*(uint32_t *)dp = d->size == 1 ? (uint32_t)*(uint8_t *)dp :
 					 (uint32_t)*(uint16_t *)dp;
@@ -1060,7 +1217,11 @@ static int ccall_set_args(lua_State *L,
     if (isfp && d->size == sizeof(float))
       ((float *)dp)[1] = ((float *)dp)[0];  /* Floats occupy high slot. */
 #endif
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+#if LJ_TARGET_RISCV64
+    if (isfp && d->size == sizeof(float))
+      ((uint32_t *)dp)[1] = 0xffffffffu;  /* Float NaN boxing */
+#endif
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
     if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
 #if LJ_TARGET_MIPS64
 	 || (isfp && nsp == 0)
@@ -1090,19 +1251,30 @@ static int ccall_set_args(lua_State *L,
       CTSize i = (sz >> 2) - 1;
       do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
     }
+#elif LJ_TARGET_RISCV64
+    if (isfp == 2 && nff <= 2) {
+      /* Split complex float into separate registers. */
+      CTSize i = (sz >> 2) - 1;
+      do {
+        ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i];
+      } while (i--);
+    }
 #else
     UNUSED(isfp);
 #endif
   }
   if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG);  /* Too few arguments. */
+#if LJ_TARGET_ARM64 && LJ_ABI_WIN
+  if ((int32_t)nsp < 0) nsp = 0;
+#endif
 
-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64
   cc->nfpr = nfpr;  /* Required for vararg functions. */
 #endif
-  cc->nsp = nsp;
-  cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA)*CTSIZE_PTR;
-  if (nsp > CCALL_SPS_FREE)
-    cc->spadj += (((nsp-CCALL_SPS_FREE)*CTSIZE_PTR + 15u) & ~15u);
+  cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
+  cc->spadj = (CCALL_SPS_FREE + CCALL_SPS_EXTRA) * CTSIZE_PTR;
+  if (cc->nsp > CCALL_SPS_FREE * CTSIZE_PTR)
+    cc->spadj += (((cc->nsp - CCALL_SPS_FREE * CTSIZE_PTR) + 15u) & ~15u);
   return gcsteps;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccall.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ccall.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccall.h
@@ -1,6 +1,6 @@
 /*
 ** FFI C call handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CCALL_H
@@ -75,6 +75,9 @@ typedef union FPRArg {
 #define CCALL_NARG_FPR		8
 #define CCALL_NRET_FPR		4
 #define CCALL_SPS_FREE		0
+#if LJ_TARGET_OSX
+#define CCALL_PACK_STACKARG	1
+#endif
 
 typedef intptr_t GPRArg;
 typedef union FPRArg {
@@ -126,6 +129,21 @@ typedef union FPRArg {
   struct { LJ_ENDIAN_LOHI(float f; , float g;) };
 } FPRArg;
 
+#elif LJ_TARGET_RISCV64
+
+#define CCALL_NARG_GPR		8
+#define CCALL_NARG_FPR		8
+#define CCALL_NRET_GPR		2
+#define CCALL_NRET_FPR		2
+#define CCALL_SPS_EXTRA		3
+#define CCALL_SPS_FREE		1
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+  double d;
+  struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
+
 #else
 #error "Missing calling convention definitions for this architecture"
 #endif
@@ -139,6 +157,9 @@ typedef union FPRArg {
 #ifndef CCALL_ALIGN_STACKARG
 #define CCALL_ALIGN_STACKARG	1
 #endif
+#ifndef CCALL_PACK_STACKARG
+#define CCALL_PACK_STACKARG	0
+#endif
 #ifndef CCALL_ALIGN_CALLSTATE
 #define CCALL_ALIGN_CALLSTATE	8
 #endif
@@ -152,14 +173,15 @@ typedef union FPRArg {
 LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
 LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);
 
-#define CCALL_MAXSTACK		32
+#define CCALL_NUM_STACK		31
+#define CCALL_SIZE_STACK	(CCALL_NUM_STACK * CTSIZE_PTR)
 
 /* -- C call state -------------------------------------------------------- */
 
 typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
   void (*func)(void);		/* Pointer to called function. */
   uint32_t spadj;		/* Stack pointer adjustment. */
-  uint8_t nsp;			/* Number of stack slots. */
+  uint8_t nsp;			/* Number of bytes on stack. */
   uint8_t retref;		/* Return value by reference. */
 #if LJ_TARGET_X64
   uint8_t ngpr;			/* Number of arguments in GPRs. */
@@ -168,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE)
   uint8_t resx87;		/* Result on x87 stack: 1:float, 2:double. */
 #elif LJ_TARGET_ARM64
   void *retp;			/* Aggregate return pointer in x8. */
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64
   uint8_t nfpr;			/* Number of arguments in FPRs. */
 #endif
 #if LJ_32
@@ -178,7 +200,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE)
   FPRArg fpr[CCALL_NUM_FPR];	/* Arguments/results in FPRs. */
 #endif
   GPRArg gpr[CCALL_NUM_GPR];	/* Arguments/results in GPRs. */
-  GPRArg stack[CCALL_MAXSTACK];	/* Stack slots. */
+  GPRArg stack[CCALL_NUM_STACK];	/* Stack slots. */
 } CCallState;
 
 /* -- C call handling ----------------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccallback.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ccallback.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccallback.c
@@ -1,6 +1,6 @@
 /*
 ** FFI C callback handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs
 
 #define CALLBACK_MCODE_HEAD		52
 
+#elif LJ_TARGET_RISCV64
+
+#define CALLBACK_MCODE_HEAD		68
+
 #else
 
 /* Missing support for this architecture. */
@@ -171,13 +175,13 @@ static void *callback_mcode_init(global_
 static void *callback_mcode_init(global_State *g, uint32_t *page)
 {
   uint32_t *p = page;
-  void *target = (void *)lj_vm_ffi_callback;
+  ASMFunction target = lj_vm_ffi_callback;
   MSize slot;
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
   *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
-  *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
+  *p++ = A64I_LE(A64I_BR_AUTH | A64F_N(RID_X11));
   *p++ = A64I_LE(A64I_NOP);
-  ((void **)p)[0] = target;
+  ((ASMFunction *)p)[0] = target;
   ((void **)p)[1] = g;
   p += 4;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_
   }
   return p;
 }
+#elif LJ_TARGET_RISCV64
+static void *callback_mcode_init(global_State *g, uint32_t *page)
+{
+  uint32_t *p = page;
+  uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+  uintptr_t ug = (uintptr_t)(void *)g;
+  uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL;
+  uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL;
+  MSize slot;
+  *p++ = RISCVI_LUI  | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi));
+  *p++ = RISCVI_LUI  | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi));
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi));
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi));
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10);
+  *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff);
+  *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff);
+  *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0);
+  for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+    *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot);
+    *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p));
+    p++;
+  }
+  return p;
+}
 #else
 /* Missing support for this architecture. */
 #define callback_mcode_init(g, p)	(p)
@@ -256,6 +293,11 @@ static void *callback_mcode_init(global_
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS   MAP_ANON
 #endif
+#ifdef PROT_MPROTECT
+#define CCPROT_CREATE	(PROT_MPROTECT(PROT_EXEC))
+#else
+#define CCPROT_CREATE	0
+#endif
 
 #endif
 
@@ -271,7 +313,7 @@ static void callback_mcode_new(CTState *
   if (!p)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
 #elif LJ_TARGET_POSIX
-  p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS,
+  p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
 	   -1, 0);
   if (p == MAP_FAILED)
     lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
@@ -409,7 +451,7 @@ void lj_ccallback_mcode_free(CTState *ct
       nfpr = CCALL_NARG_FPR;  /* Prevent reordering. */ \
     } \
   } else { \
-    if (!LJ_TARGET_IOS && n > 1) \
+    if (!LJ_TARGET_OSX && n > 1) \
       ngpr = (ngpr + 1u) & ~1u;  /* Align to regpair. */ \
     if (ngpr + n <= maxgpr) { \
       sp = &cts->cb.gpr[ngpr]; \
@@ -511,6 +553,31 @@ void lj_ccallback_mcode_free(CTState *ct
   if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
     ((float *)dp)[1] = *(float *)dp;
 
+#elif LJ_TARGET_RISCV64
+
+#define CALLBACK_HANDLE_REGARG \
+  if (isfp) { \
+    if (nfpr + n <= CCALL_NARG_FPR) { \
+      sp = &cts->cb.fpr[nfpr]; \
+      nfpr += n; \
+      goto done; \
+    } else if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  } else { \
+    if (ngpr + n <= maxgpr) { \
+      sp = &cts->cb.gpr[ngpr]; \
+      ngpr += n; \
+      goto done; \
+    } \
+  }
+
+#define CALLBACK_HANDLE_RET \
+  if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+    ((float *)dp)[1] = *(float *)dp;
+
 #else
 #error "Missing calling convention definitions for this architecture"
 #endif
@@ -657,7 +724,7 @@ static void callback_conv_result(CTState
 	*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
 					  (int32_t)*(int16_t *)dp;
     }
-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64
     /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
     if (ctr->size <= 4 &&
 	(LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccallback.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ccallback.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ccallback.h
@@ -1,6 +1,6 @@
 /*
 ** FFI C callback handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CCALLBACK_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cconv.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cconv.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cconv.c
@@ -1,6 +1,6 @@
 /*
 ** C type conversions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -8,6 +8,7 @@
 #if LJ_HASFFI
 
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_tab.h"
 #include "lj_ctype.h"
 #include "lj_cdata.h"
@@ -568,7 +569,9 @@ void lj_cconv_ct_tv(CTState *cts, CType
     }
     s = ctype_raw(cts, sid);
     if (ctype_isfunc(s->info)) {
+      CTypeID did = ctype_typeid(cts, d);
       sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
+      d = ctype_get(cts, did);  /* cts->tab may have been reallocated. */
     } else {
       if (ctype_isenum(s->info)) s = ctype_child(cts, s);
       goto doconv;
@@ -619,6 +622,8 @@ void lj_cconv_ct_tv(CTState *cts, CType
     tmpptr = uddata(ud);
     if (ud->udtype == UDTYPE_IO_FILE)
       tmpptr = *(void **)tmpptr;
+    else if (ud->udtype == UDTYPE_BUFFER)
+      tmpptr = ((SBufExt *)tmpptr)->r;
   } else if (tvislightud(o)) {
     tmpptr = lightudV(cts->g, o);
   } else if (tvisfunc(o)) {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cconv.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cconv.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cconv.h
@@ -1,6 +1,6 @@
 /*
 ** C type conversions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CCONV_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cdata.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cdata.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cdata.c
@@ -1,6 +1,6 @@
 /*
 ** C data management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cdata.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cdata.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cdata.h
@@ -1,6 +1,6 @@
 /*
 ** C data management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CDATA_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_clib.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_clib.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_clib.c
@@ -1,6 +1,6 @@
 /*
 ** FFI C library loader.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -25,7 +25,7 @@
 #include <dlfcn.h>
 #include <stdio.h>
 
-#if defined(RTLD_DEFAULT)
+#if defined(RTLD_DEFAULT) && !defined(NO_RTLD_DEFAULT)
 #define CLIB_DEFHANDLE	RTLD_DEFAULT
 #elif LJ_TARGET_OSX || LJ_TARGET_BSD
 #define CLIB_DEFHANDLE	((void *)(intptr_t)-2)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_clib.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_clib.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_clib.h
@@ -1,6 +1,6 @@
 /*
 ** FFI C library loader.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CLIB_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cparse.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cparse.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cparse.c
@@ -1,6 +1,6 @@
 /*
 ** C declaration parser.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -133,9 +133,9 @@ LJ_NORET static void cp_errmsg(CPState *
     tokstr = NULL;
   } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
 	     tok >= CTOK_FIRSTDECL) {
-    if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
+    if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
     cp_save(cp, '\0');
-    tokstr = sbufB(&cp->sb);
+    tokstr = cp->sb.b;
   } else {
     tokstr = cp_tok2str(cp, tok);
   }
@@ -175,7 +175,7 @@ static CPToken cp_number(CPState *cp)
   TValue o;
   do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
   cp_save(cp, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), sbuflen(&cp->sb)-1,
+  fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
 			&o, STRSCAN_OPT_C);
   if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
   else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
@@ -279,7 +279,7 @@ static CPToken cp_string(CPState *cp)
     return CTOK_STRING;
   } else {
     if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
-    cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
+    cp->val.i32 = (int32_t)(char)*cp->sb.b;
     cp->val.id = CTID_INT32;
     return CTOK_INTEGER;
   }
@@ -468,7 +468,7 @@ static void cp_expr_sizeof(CPState *cp,
   } else {
     cp_expr_unary(cp, k);
   }
-  info = lj_ctype_info(cp->cts, k->id, &sz);
+  info = lj_ctype_info_raw(cp->cts, k->id, &sz);
   if (wantsz) {
     if (sz != CTSIZE_INVALID)
       k->u32 = sz;
@@ -488,7 +488,7 @@ static void cp_expr_prefix(CPState *cp,
   } else if (cp_opt(cp, '+')) {
     cp_expr_unary(cp, k);  /* Nothing to do (well, integer promotion). */
   } else if (cp_opt(cp, '-')) {
-    cp_expr_unary(cp, k); k->i32 = -k->i32;
+    cp_expr_unary(cp, k); k->i32 = (int32_t)(~(uint32_t)k->i32+1);
   } else if (cp_opt(cp, '~')) {
     cp_expr_unary(cp, k); k->i32 = ~k->i32;
   } else if (cp_opt(cp, '!')) {
@@ -1766,9 +1766,11 @@ static void cp_pragma(CPState *cp, BCLin
     cp_check(cp, '(');
     if (cp->tok == CTOK_IDENT) {
       if (cp_str_is(cp->str, "push")) {
-	if (cp->curpack < CPARSE_MAX_PACKSTACK) {
+	if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
 	  cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
 	  cp->curpack++;
+	} else {
+	  cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
 	}
       } else if (cp_str_is(cp->str, "pop")) {
 	if (cp->curpack > 0) cp->curpack--;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cparse.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_cparse.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_cparse.h
@@ -1,6 +1,6 @@
 /*
 ** C declaration parser.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CPARSE_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_crecord.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_crecord.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_crecord.c
@@ -1,6 +1,6 @@
 /*
 ** Trace recorder for C data operations.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_ffrecord_c
@@ -78,7 +78,7 @@ static CTypeID argv2ctype(jit_State *J,
     /* Specialize to the string containing the C type declaration. */
     emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, s));
     cp.L = J->L;
-    cp.cts = ctype_ctsG(J2G(J));
+    cp.cts = ctype_cts(J->L);
     oldtop = cp.cts->top;
     cp.srcname = strdata(s);
     cp.p = strdata(s);
@@ -616,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CTy
     sp = lj_ir_kptr(J, NULL);
   } else if (tref_isudata(sp)) {
     GCudata *ud = udataV(sval);
-    if (ud->udtype == UDTYPE_IO_FILE) {
+    if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
       TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
-      emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
-      sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE);
+      emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
+      sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
+		  ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
+						 IRFL_SBUF_R);
     } else {
       sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
     }
@@ -1024,8 +1026,26 @@ static void crec_alloc(jit_State *J, Rec
 	crec_ct_tv(J, dc, dp, sp, sval);
       }
     } else if (ctype_isstruct(d->info)) {
-      CTypeID fid = d->sib;
+      CTypeID fid;
       MSize i = 1;
+      if (!J->base[1]) {  /* Handle zero-fill of struct-of-NYI. */
+	fid = d->sib;
+	while (fid) {
+	  CType *df = ctype_get(cts, fid);
+	  fid = df->sib;
+	  if (ctype_isfield(df->info)) {
+	    CType *dc;
+	    if (!gcref(df->name)) continue;  /* Ignore unnamed fields. */
+	    dc = ctype_rawchild(cts, df);  /* Field type. */
+	    if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
+		  ctype_isenum(dc->info)))
+	      goto special;
+	  } else if (!ctype_isconstval(df->info)) {
+	    goto special;
+	  }
+	}
+      }
+      fid = d->sib;
       while (fid) {
 	CType *df = ctype_get(cts, fid);
 	fid = df->sib;
@@ -1098,6 +1118,8 @@ static TRef crec_call_args(jit_State *J,
     ngpr = 1;
   else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
     ngpr = 2;
+#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
+  int ngpr = CCALL_NARG_GPR;
 #endif
 
   /* Skip initial attributes. */
@@ -1123,6 +1145,14 @@ static TRef crec_call_args(jit_State *J,
     } else {
       if (!(ct->info & CTF_VARARG))
 	lj_trace_err(J, LJ_TRERR_NYICALL);  /* Too many arguments. */
+#if LJ_TARGET_ARM64 && LJ_TARGET_OSX
+      if (ngpr >= 0) {
+	ngpr = -1;
+	args[n++] = TREF_NIL;  /* Marker for start of varargs. */
+	if (n >= CCI_NARGS_MAX)
+	  lj_trace_err(J, LJ_TRERR_NYICALL);
+      }
+#endif
       did = lj_ccall_ctid_vararg(cts, o);  /* Infer vararg type. */
     }
     d = ctype_raw(cts, did);
@@ -1131,6 +1161,15 @@ static TRef crec_call_args(jit_State *J,
       lj_trace_err(J, LJ_TRERR_NYICALL);
     tr = crec_ct_tv(J, d, 0, *base, o);
     if (ctype_isinteger_or_bool(d->info)) {
+#if LJ_TARGET_ARM64 && LJ_TARGET_OSX
+      if (!ngpr) {
+	/* Fixed args passed on the stack use their unpromoted size. */
+	if (d->size != lj_ir_type_size[tref_type(tr)]) {
+	  lj_assertJ(d->size == 1 || d->size==2, "unexpected size %d", d->size);
+	  tr = emitconv(tr, d->size==1 ? IRT_U8 : IRT_U16, tref_type(tr), 0);
+	}
+      } else
+#endif
       if (d->size < 4) {
 	if ((d->info & CTF_UNSIGNED))
 	  tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_U8 : IRT_U16, 0);
@@ -1168,6 +1207,10 @@ static TRef crec_call_args(jit_State *J,
       }
     }
 #endif
+#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
+    if (!ctype_isfp(d->info) && ngpr) {
+      ngpr--;
+    }
 #endif
     args[n] = tr;
   }
@@ -1484,9 +1527,13 @@ void LJ_FASTCALL recff_cdata_arith(jit_S
 	if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
 	goto ok;
       } else if (ctype_isfunc(ct->info)) {
+	CTypeID id0 = i ? ctype_typeid(cts, s[0]) : 0;
 	tr = emitir(IRT(IR_FLOAD, IRT_PTR), tr, IRFL_CDATA_PTR);
 	ct = ctype_get(cts,
 	  lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+	if (i) {
+	  s[0] = ctype_get(cts, id0);  /* cts->tab may have been reallocated. */
+	}
 	goto ok;
       } else {
 	tr = emitir(IRT(IR_ADD, IRT_PTR), tr, lj_ir_kintp(J, sizeof(GCcdata)));
@@ -1855,7 +1902,8 @@ TRef recff_bit64_tohex(jit_State *J, Rec
   } else {
     n = id ? 16 : 8;
   }
-  if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+  if (n < 0) { n = (int32_t)(~n+1u); sf |= STRFMT_F_UPPER; }
+  if ((uint32_t)n > 254) n = 254;
   sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
   if (id) {
     tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
@@ -1893,6 +1941,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit
   }
 }
 
+TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
+{
+  CTypeID id = argv2cdata(J, tr, o)->ctypeid;
+  if (!(id == CTID_INT64 || id == CTID_UINT64))
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  lj_needsplit(J);
+  return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
+		IRFL_CDATA_INT64);
+}
+
+#if LJ_HASBUFFER
+TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
+{
+  CTState *cts = ctype_ctsG(J2G(J));
+  if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
+}
+
+TRef lj_crecord_topuint8(jit_State *J, TRef tr)
+{
+  return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
+}
+#endif
+
 #undef IR
 #undef emitir
 #undef emitconv
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_crecord.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_crecord.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_crecord.h
@@ -1,6 +1,6 @@
 /*
 ** Trace recorder for C data operations.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CRECORD_H
@@ -33,6 +33,11 @@ LJ_FUNC int LJ_FASTCALL recff_bit64_shif
 LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
 
 LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
+#if LJ_HASBUFFER
+LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
+LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
+#endif
 #endif
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ctype.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ctype.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ctype.c
@@ -1,6 +1,6 @@
 /*
 ** C type management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include "lj_obj.h"
@@ -191,8 +191,20 @@ CTypeID lj_ctype_intern(CTState *cts, CT
   }
   id = cts->top;
   if (LJ_UNLIKELY(id >= cts->sizetab)) {
+#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
+    CType *ct;
+#endif
     if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
+#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
+    ct = lj_mem_newvec(cts->L, id+1, CType);
+    memcpy(ct, cts->tab, id*sizeof(CType));
+    memset(cts->tab, 0, id*sizeof(CType));
+    lj_mem_freevec(cts->g, cts->tab, cts->sizetab, CType);
+    cts->tab = ct;
+    cts->sizetab = id+1;
+#else
     lj_mem_growvec(cts->L, cts->tab, cts->sizetab, CTID_MAX, CType);
+#endif
   }
   cts->top = id+1;
   cts->tab[id].info = info;
@@ -333,6 +345,14 @@ CTInfo lj_ctype_info(CTState *cts, CType
   return qual;
 }
 
+/* Ditto, but follow a reference. */
+CTInfo lj_ctype_info_raw(CTState *cts, CTypeID id, CTSize *szp)
+{
+  CType *ct = ctype_get(cts, id);
+  if (ctype_isref(ct->info)) id = ctype_cid(ct->info);
+  return lj_ctype_info(cts, id, szp);
+}
+
 /* Get ctype metamethod. */
 cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm)
 {
@@ -562,7 +582,7 @@ GCstr *lj_ctype_repr_int64(lua_State *L,
   if (isunsigned) {
     *--p = 'U';
   } else if ((int64_t)n < 0) {
-    n = (uint64_t)-(int64_t)n;
+    n = ~n+1u;
     sign = 1;
   }
   do { *--p = (char)('0' + n % 10); } while (n /= 10);
@@ -583,7 +603,7 @@ GCstr *lj_ctype_repr_complex(lua_State *
   lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
   if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
   lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
-  lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i');
+  lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
   return lj_buf_str(L, sb);
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ctype.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ctype.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ctype.h
@@ -1,6 +1,6 @@
 /*
 ** C type management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_CTYPE_H
@@ -276,6 +276,8 @@ typedef struct CTState {
 #define CTTYDEFP(_)
 #endif
 
+#define CTF_LONG_IF8		(CTF_LONG * (sizeof(long) == 8))
+
 /* Common types. */
 #define CTTYDEF(_) \
   _(NONE,		0,	CT_ATTRIB, CTATTRIB(CTA_BAD)) \
@@ -289,8 +291,8 @@ typedef struct CTState {
   _(UINT16,		2,	CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \
   _(INT32,		4,	CT_NUM, CTALIGN(2)) \
   _(UINT32,		4,	CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \
-  _(INT64,		8,	CT_NUM, CTF_LONG|CTALIGN(3)) \
-  _(UINT64,		8,	CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \
+  _(INT64,		8,	CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \
+  _(UINT64,		8,	CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \
   _(FLOAT,		4,	CT_NUM, CTF_FP|CTALIGN(2)) \
   _(DOUBLE,		8,	CT_NUM, CTF_FP|CTALIGN(3)) \
   _(COMPLEX_FLOAT,	8,	CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \
@@ -298,6 +300,7 @@ typedef struct CTState {
   _(P_VOID,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_VOID) \
   _(P_CVOID,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_CVOID) \
   _(P_CCHAR,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
+  _(P_UINT8,	CTSIZE_PTR,	CT_PTR, CTALIGN_PTR|CTID_UINT8) \
   _(A_CCHAR,		-1,	CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
   _(CTYPEID,		4,	CT_ENUM, CTALIGN(2)|CTID_INT32) \
   CTTYDEFP(_) \
@@ -389,6 +392,16 @@ static LJ_AINLINE CTState *ctype_cts(lua
   return cts;
 }
 
+/* Load FFI library on-demand. */
+#define ctype_loadffi(L) \
+  do { \
+    if (!ctype_ctsG(G(L))) { \
+      ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
+      luaopen_ffi(L); \
+      L->top = (TValue *)(mref(L->stack, char) + oldtop); \
+    } \
+  } while (0)
+
 /* Save and restore state of C type table. */
 #define LJ_CTYPE_SAVE(cts)	CTState savects_ = *(cts)
 #define LJ_CTYPE_RESTORE(cts) \
@@ -457,6 +470,7 @@ LJ_FUNC CType *lj_ctype_rawref(CTState *
 LJ_FUNC CTSize lj_ctype_size(CTState *cts, CTypeID id);
 LJ_FUNC CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem);
 LJ_FUNC CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp);
+LJ_FUNC CTInfo lj_ctype_info_raw(CTState *cts, CTypeID id, CTSize *szp);
 LJ_FUNC cTValue *lj_ctype_meta(CTState *cts, CTypeID id, MMS mm);
 LJ_FUNC GCstr *lj_ctype_repr(lua_State *L, CTypeID id, GCstr *name);
 LJ_FUNC GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_debug.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_debug.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_debug.c
@@ -1,6 +1,6 @@
 /*
 ** Debugging and introspection.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_debug_c
@@ -101,9 +101,12 @@ static BCPos debug_framepc(lua_State *L,
   pos = proto_bcpos(pt, ins) - 1;
 #if LJ_HASJIT
   if (pos > pt->sizebc) {  /* Undo the effects of lj_trace_exit for JLOOP. */
-    GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
-    lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected");
-    pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
+    if (bc_isret(bc_op(ins[-1]))) {
+      GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
+      pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
+    } else {
+      pos = NO_BCPOS;  /* Punt in case of stack overflow for stitched trace. */
+    }
   }
 #endif
   return pos;
@@ -648,7 +651,7 @@ void lj_debug_dumpstack(lua_State *L, SB
     level += dir;
   }
   if (lastlen)
-    setsbufP(sb, sbufB(sb) + lastlen);  /* Zap trailing separator. */
+    sb->w = sb->b + lastlen;  /* Zap trailing separator. */
 }
 #endif
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_debug.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_debug.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_debug.h
@@ -1,6 +1,6 @@
 /*
 ** Debugging and introspection.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_DEBUG_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_def.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_def.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_def.h
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT common internal definitions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_DEF_H
@@ -69,7 +69,7 @@ typedef unsigned int uintptr_t;
 #define LJ_MAX_UPVAL	60		/* Max. # of upvalues. */
 
 #define LJ_MAX_IDXCHAIN	100		/* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA	(5+2*LJ_FR2)	/* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA	(5+3*LJ_FR2)	/* Extra stack space (metamethods). */
 
 #define LJ_NUM_CBPAGE	1		/* Number of FFI callback pages. */
 
@@ -146,15 +146,9 @@ typedef uintptr_t BloomFilter;
 #define LJ_UNLIKELY(x)	__builtin_expect(!!(x), 0)
 
 #define lj_ffs(x)	((uint32_t)__builtin_ctz(x))
-/* Don't ask ... */
-#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
-static LJ_AINLINE uint32_t lj_fls(uint32_t x)
-{
-  uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
-}
-#else
 #define lj_fls(x)	((uint32_t)(__builtin_clz(x)^31))
-#endif
+#define lj_ffs64(x)	((uint32_t)__builtin_ctzll(x))
+#define lj_fls64(x)	((uint32_t)(__builtin_clzll(x)^63))
 
 #if defined(__arm__)
 static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
@@ -277,6 +271,23 @@ static LJ_AINLINE uint32_t lj_fls(uint32
 {
   unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
 }
+
+#if defined(_M_X64) || defined(_M_ARM64)
+unsigned char _BitScanForward64(unsigned long *, uint64_t);
+unsigned char _BitScanReverse64(unsigned long *, uint64_t);
+#pragma intrinsic(_BitScanForward64)
+#pragma intrinsic(_BitScanReverse64)
+
+static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
+{
+  unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
+}
+
+static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
+{
+  unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
+}
+#endif
 #endif
 
 unsigned long _byteswap_ulong(unsigned long);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_dispatch.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_dispatch.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_dispatch.c
@@ -1,6 +1,6 @@
 /*
 ** Instruction dispatch handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_dispatch_c
@@ -68,6 +68,8 @@ void lj_dispatch_init(GG_State *GG)
   /* The JIT engine is off by default. luaopen_jit() turns it on. */
   disp[BC_FORL] = disp[BC_IFORL];
   disp[BC_ITERL] = disp[BC_IITERL];
+  /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */
+  disp[BC_ITERN] = &lj_vm_IITERN;
   disp[BC_LOOP] = disp[BC_ILOOP];
   disp[BC_FUNCF] = disp[BC_IFUNCF];
   disp[BC_FUNCV] = disp[BC_IFUNCV];
@@ -118,19 +120,21 @@ void lj_dispatch_update(global_State *g)
   mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
   if (oldmode != mode) {  /* Mode changed? */
     ASMFunction *disp = G2GG(g)->dispatch;
-    ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv;
+    ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv;
     g->dispatchmode = mode;
 
     /* Hotcount if JIT is on, but not while recording. */
     if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) {
       f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
       f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
+      f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]);
       f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
       f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
       f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
     } else {  /* Otherwise use the non-hotcounting instructions. */
       f_forl = disp[GG_LEN_DDISP+BC_IFORL];
       f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
+      f_itern = &lj_vm_IITERN;
       f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
       f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]);
       f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]);
@@ -138,6 +142,7 @@ void lj_dispatch_update(global_State *g)
     /* Init static counting instruction dispatch first (may be copied below). */
     disp[GG_LEN_DDISP+BC_FORL] = f_forl;
     disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
+    disp[GG_LEN_DDISP+BC_ITERN] = f_itern;
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
 
     /* Set dynamic instruction dispatch. */
@@ -165,6 +170,7 @@ void lj_dispatch_update(global_State *g)
       /* Otherwise set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
+      disp[BC_ITERN] = f_itern;
       disp[BC_LOOP] = f_loop;
       /* Set dynamic return dispatch. */
       if ((mode & DISPMODE_RET)) {
@@ -301,9 +307,9 @@ int luaJIT_setmode(lua_State *L, int idx
       } else {
 	return 0;  /* Failed. */
       }
-      g->bc_cfunc_ext = BCINS_AD(BC_FUNCCW, 0, 0);
+      setbc_op(&g->bc_cfunc_ext, BC_FUNCCW);
     } else {
-      g->bc_cfunc_ext = BCINS_AD(BC_FUNCC, 0, 0);
+      setbc_op(&g->bc_cfunc_ext, BC_FUNCC);
     }
     break;
   default:
@@ -447,7 +453,7 @@ static int call_init(lua_State *L, GCfun
     int numparams = pt->numparams;
     int gotparams = (int)(L->top - L->base);
     int need = pt->framesize;
-    if ((pt->flags & PROTO_VARARG)) need += 1+gotparams;
+    if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams;
     lj_state_checkstack(L, (MSize)need);
     numparams -= gotparams;
     return numparams >= 0 ? numparams : 0;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_dispatch.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_dispatch.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_dispatch.h
@@ -1,6 +1,6 @@
 /*
 ** Instruction dispatch handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_DISPATCH_H
@@ -31,7 +31,7 @@ extern double __divdf3(double a, double
 #define SFGOTDEF(_)
 #endif
 #if LJ_HASJIT
-#define JITGOTDEF(_)	_(lj_trace_exit) _(lj_trace_hot)
+#define JITGOTDEF(_)	_(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
 #else
 #define JITGOTDEF(_)
 #endif
@@ -46,7 +46,7 @@ extern double __divdf3(double a, double
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
   _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
-  _(lj_dispatch_profile) _(lj_err_throw) _(lj_err_run) \
+  _(lj_dispatch_profile) _(lj_err_throw) \
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
@@ -89,7 +89,7 @@ typedef uint16_t HotCount;
 typedef struct GG_State {
   lua_State L;				/* Main thread. */
   global_State g;			/* Global state. */
-#if LJ_TARGET_ARM
+#if LJ_TARGET_ARM && !LJ_TARGET_NX
   /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
   uint8_t align1[(16-sizeof(global_State))&15];
 #endif
@@ -99,7 +99,7 @@ typedef struct GG_State {
 #if LJ_HASJIT
   jit_State J;				/* JIT state. */
   HotCount hotcount[HOTCOUNT_SIZE];	/* Hot counters. */
-#if LJ_TARGET_ARM
+#if LJ_TARGET_ARM && !LJ_TARGET_NX
   /* Ditto for J. */
   uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_arm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_emit_arm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_arm.h
@@ -1,6 +1,6 @@
 /*
 ** ARM instruction emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Constant encoding --------------------------------------------------- */
@@ -157,7 +157,7 @@ static int emit_kdelta2(ASMState *as, Re
       if (other) {
 	int32_t delta = i - other;
 	uint32_t sh, inv = 0, k2, k;
-	if (delta < 0) { delta = -delta; inv = ARMI_ADD^ARMI_SUB; }
+	if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; }
 	sh = lj_ffs(delta) & ~1;
 	k2 = emit_isk12(0, delta & (255 << sh));
 	k = emit_isk12(0, delta & ~(255 << sh));
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_arm64.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_emit_arm64.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_arm64.h
@@ -1,6 +1,6 @@
 /*
 ** ARM64 instruction emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
 ** Sponsored by Cisco Systems, Inc.
@@ -20,49 +20,41 @@ static uint64_t get_k64val(ASMState *as,
   } else {
     lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
 	       "bad 64 bit const IR op %d", ir->o);
-    return ir->i;  /* Sign-extended. */
+    return (uint32_t)ir->i;  /* Zero-extended. */
   }
 }
 
 /* Encode constant in K12 format for data processing instructions. */
 static uint32_t emit_isk12(int64_t n)
 {
-  uint64_t k = (n < 0) ? -n : n;
-  uint32_t m = (n < 0) ? 0x40000000 : 0;
+  uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
+  uint32_t m = n < 0 ? 0x40000000 : 0;
   if (k < 0x1000) {
-    return A64I_K12|m|A64F_U12(k);
+    return (uint32_t)(A64I_K12|m|A64F_U12(k));
   } else if ((k & 0xfff000) == k) {
-    return A64I_K12|m|0x400000|A64F_U12(k>>12);
+    return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
   }
   return 0;
 }
 
-#define emit_clz64(n)	__builtin_clzll(n)
-#define emit_ctz64(n)	__builtin_ctzll(n)
+#define emit_clz64(n)	(lj_fls64(n)^63)
+#define emit_ctz64(n)	lj_ffs64(n)
 
 /* Encode constant in K13 format for logical data processing instructions. */
 static uint32_t emit_isk13(uint64_t n, int is64)
 {
-  int inv = 0, w = 128, lz, tz;
-  if (n & 1) { n = ~n; w = 64; inv = 1; }  /* Avoid wrap-around of ones. */
-  if (!n) return 0;  /* Neither all-zero nor all-ones are allowed. */
-  do {  /* Find the repeat width. */
-    if (is64 && (uint32_t)(n^(n>>32))) break;
-    n = (uint32_t)n;
-    if (!n) return 0;  /* Ditto when passing n=0xffffffff and is64=0. */
-    w = 32; if ((n^(n>>16)) & 0xffff) break;
-    n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
-    n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
-    n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
-    n = n & 0x3; w = 2;
-  } while (0);
-  lz = emit_clz64(n);
-  tz = emit_ctz64(n);
-  if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
-  if (inv)
-    return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
-  else
-    return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
+  /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */
+  int rot, ones, size, immr, imms;
+  if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n;
+  if ((n+1u) <= 1u) return 0;  /* Neither all-zero nor all-ones are allowed. */
+  rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64;
+  n = lj_ror(n, rot & 63);
+  ones = emit_ctz64(~n);
+  size = emit_clz64(n) + ones;
+  if (lj_ror(n, size & 63) != n) return 0;  /* Non-repeating? */
+  immr = -rot & (size - 1);
+  imms = (-(size << 1) | (ones - 1)) & 63;
+  return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms);
 }
 
 static uint32_t emit_isfpk64(uint64_t n)
@@ -121,9 +113,20 @@ static int emit_checkofs(A64Ins ai, int6
   }
 }
 
-static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
+static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
 {
-  int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
+  if (ofs >= 0) {
+    return ai | A64F_U12(ofs>>sc);  /* Subsequent lj_ror checks ofs. */
+  } else if (ofs >= -256) {
+    return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff);
+  } else {
+    return A64F_D(31);  /* Will mismatch prev. */
+  }
+}
+
+static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
+{
+  int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
   lj_assertA(ot, "load/store offset %d out of range", ofs);
   /* Combine LDR/STR pairs to LDP/STP. */
   if ((sc == 2 || sc == 3) &&
@@ -132,18 +135,16 @@ static void emit_lso(ASMState *as, A64In
     uint32_t prev = *as->mcp & ~A64F_D(31);
     int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
     A64Ins aip;
-    if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
-	prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
+    if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) {
       aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
-    } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
-	       prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
+    } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) {
       aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
       ofsm = ofs;
     } else {
       goto nopair;
     }
-    if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
-      *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
+    if (lj_ror((unsigned int)ofsm + (64u<<sc), sc) <= 127u) {
+      *as->mcp = aip | A64F_N(rn) | (((ofsm >> sc) & 0x7f) << 15) |
 	(ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
       return;
     }
@@ -158,13 +159,12 @@ nopair:
 /* -- Emit loads/stores --------------------------------------------------- */
 
 /* Prefer rematerialization of BASE/L from global_State over spills. */
-#define emit_canremat(ref)	((ref) <= ASMREF_L)
+#define emit_canremat(ref)	((ref) <= REF_BASE)
 
-/* Try to find an N-step delta relative to other consts with N < lim. */
-static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
+/* Try to find a one-step delta relative to other consts. */
+static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
 {
-  RegSet work = ~as->freeset & RSET_GPR;
-  if (lim <= 1) return 0;  /* Can't beat that. */
+  RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
   while (work) {
     Reg r = rset_picktop(work);
     IRRef ref = regcost_ref(as->cost[r]);
@@ -173,13 +173,14 @@ static int emit_kdelta(ASMState *as, Reg
       uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
 				     get_k64val(as, ref);
       int64_t delta = (int64_t)(k - kx);
+      if (!is64) delta = (int64_t)(int32_t)delta;  /* Sign-extend. */
       if (delta == 0) {
-	emit_dm(as, A64I_MOVx, rd, r);
+	emit_dm(as, is64|A64I_MOVw, rd, r);
 	return 1;
       } else {
-	uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
+	uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
 	if (k12) {
-	  emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
+	  emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
 	  return 1;
 	}
 	/* Do other ops or multi-step deltas pay off? Probably not.
@@ -192,77 +193,101 @@ static int emit_kdelta(ASMState *as, Reg
   return 0;  /* Failed. */
 }
 
-static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
+#define glofs(as, k) \
+  ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
+#define mcpofs(as, k) \
+  ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
+#define checkmcpofs(as, k) \
+  (A64F_S_OK(mcpofs(as, k)>>2, 19))
+
+/* Try to form a const as ADR or ADRP or ADRP + ADD. */
+static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
 {
-  uint32_t k13 = emit_isk13(u64, is64);
-  if (k13) {  /* Can the constant be represented as a bitmask immediate? */
-    emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
-  } else {
-    int i, zeros = 0, ones = 0, neg;
-    if (!is64) u64 = (int64_t)(int32_t)u64;  /* Sign-extend. */
-    /* Count homogeneous 16 bit fragments. */
-    for (i = 0; i < 4; i++) {
-      uint64_t frag = (u64 >> i*16) & 0xffff;
-      zeros += (frag == 0);
-      ones += (frag == 0xffff);
+  A64Ins ai = A64I_ADR;
+  int64_t ofs = mcpofs(as, k);
+  if (!A64F_S_OK((uint64_t)ofs, 21)) {
+    uint64_t kpage = k & ~0xfffull;
+    MCode *adrp = as->mcp - 1 - (k != kpage);
+    ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
+    if (!A64F_S_OK(ofs, 21))
+      return 0;  /* Failed. */
+    if (k != kpage)
+      emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
+    ai = A64I_ADRP;
+  }
+  emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd);
+  return 1;
+}
+
+static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
+{
+  int zeros = 0, ones = 0, neg, lshift = 0;
+  int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
+  /* Count non-homogeneous 16 bit fragments. */
+  while (--i >= 0) {
+    uint32_t frag = (u64 >> i*16) & 0xffff;
+    zeros += (frag != 0);
+    ones += (frag != 0xffff);
+  }
+  neg = ones < zeros;  /* Use MOVN if it pays off. */
+  if ((neg ? ones : zeros) > 1) {  /* Need 2+ ins. Try 1 ins encodings. */
+    uint32_t k13 = emit_isk13(u64, is64);
+    if (k13) {
+      emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
+      return;
     }
-    neg = ones > zeros;  /* Use MOVN if it pays off. */
-    if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
-      int shift = 0, lshift = 0;
-      uint64_t n64 = neg ? ~u64 : u64;
-      if (n64 != 0) {
-	/* Find first/last fragment to be filled. */
-	shift = (63-emit_clz64(n64)) & ~15;
-	lshift = emit_ctz64(n64) & ~15;
-      }
-      /* MOVK requires the original value (u64). */
-      while (shift > lshift) {
-	uint32_t u16 = (u64 >> shift) & 0xffff;
-	/* Skip fragments that are correctly filled by MOVN/MOVZ. */
-	if (u16 != (neg ? 0xffff : 0))
-	  emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
-	shift -= 16;
-      }
-      /* But MOVN needs an inverted value (n64). */
-      emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
-		 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
+    if (emit_kdelta(as, rd, u64, is64)) {
+      return;
+    }
+    if (emit_kadrp(as, rd, u64)) {  /* Either 1 or 2 ins. */
+      return;
+    }
+  }
+  if (neg) {
+    u64 = ~u64;
+    if (!is64) u64 = (uint32_t)u64;
+  }
+  if (u64) {
+    /* Find first/last fragment to be filled. */
+    int shift = (63-emit_clz64(u64)) & ~15;
+    lshift = emit_ctz64(u64) & ~15;
+    for (; shift > lshift; shift -= 16) {
+      uint32_t frag = (u64 >> shift) & 0xffff;
+      if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
+      if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
+      emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
     }
   }
+  /* But MOVN needs an inverted value. */
+  emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
+	     A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
 }
 
 /* Load a 32 bit constant into a GPR. */
-#define emit_loadi(as, rd, i)	emit_loadk(as, rd, i, 0)
+#define emit_loadi(as, rd, i)	emit_loadk(as, rd, (uint32_t)i)
 
 /* Load a 64 bit constant into a GPR. */
-#define emit_loadu64(as, rd, i)	emit_loadk(as, rd, i, A64I_X)
-
-#define emit_loada(as, r, addr)	emit_loadu64(as, (r), (uintptr_t)(addr))
-
-#define glofs(as, k) \
-  ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
-#define mcpofs(as, k) \
-  ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
-#define checkmcpofs(as, k) \
-  (A64F_S_OK(mcpofs(as, k)>>2, 19))
+#define emit_loadu64(as, rd, i)	emit_loadk(as, rd, i)
 
 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
 
 /* Get/set from constant pointer. */
 static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
 {
-  /* First, check if ip + offset is in range. */
-  if ((ai & 0x00400000) && checkmcpofs(as, p)) {
+  Reg base = RID_GL;
+  int64_t ofs = glofs(as, p);
+  if (emit_checkofs(ai, ofs)) {
+    /* GL + offset, might subsequently fuse to LDP/STP. */
+  } else if (ai == A64I_LDRx && checkmcpofs(as, p)) {
+    /* IP + offset is cheaper than allock, but address must be in range. */
     emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
-  } else {
-    Reg base = RID_GL;  /* Next, try GL + offset. */
-    int64_t ofs = glofs(as, p);
-    if (!emit_checkofs(ai, ofs)) {  /* Else split up into base reg + offset. */
-      int64_t i64 = i64ptr(p);
-      base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
-      ofs = i64 & 0x7fffull;
-    }
-    emit_lso(as, ai, r, base, ofs);
+    return;
+  } else {  /* Split up into base reg + offset. */
+    int64_t i64 = i64ptr(p);
+    base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
+    ofs = i64 & 0x7fffull;
   }
+  emit_lso(as, ai, r, base, ofs);
 }
 
 /* Load 64 bit IR constant into register. */
@@ -346,16 +371,22 @@ static void emit_cnb(ASMState *as, A64In
 
 #define emit_jmp(as, target)	emit_branch(as, A64I_B, (target))
 
-static void emit_call(ASMState *as, void *target)
+static void emit_call(ASMState *as, ASMFunction target)
 {
   MCode *p = --as->mcp;
-  ptrdiff_t delta = (char *)target - (char *)p;
+#if LJ_ABI_PAUTH
+  char *targetp = ptrauth_auth_data((char *)target,
+				    ptrauth_key_function_pointer, 0);
+#else
+  char *targetp = (char *)target;
+#endif
+  ptrdiff_t delta = targetp - (char *)p;
   if (A64F_S_OK(delta>>2, 26)) {
     *p = A64I_BL | A64F_S26(delta>>2);
   } else {  /* Target out of range: need indirect call. But don't use R0-R7. */
     Reg r = ra_allock(as, i64ptr(target),
 		      RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
-    *p = A64I_BLR | A64F_N(r);
+    *p = A64I_BLR_AUTH | A64F_N(r);
   }
 }
 
@@ -415,7 +446,8 @@ static void emit_addptr(ASMState *as, Re
 {
   if (ofs)
     emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
-		 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
+		 ofs < 0 ? (int32_t)(~(uint32_t)ofs+1u) : ofs,
+		 rset_exclude(RSET_GPR, r));
 }
 
 #define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_mips.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_emit_mips.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_mips.h
@@ -1,6 +1,6 @@
 /*
 ** MIPS instruction emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #if LJ_64
@@ -70,7 +70,7 @@ static void emit_rotr(ASMState *as, Reg
   }
 }
 
-#if LJ_64
+#if LJ_64 || LJ_HASBUFFER
 static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
 		      uint32_t lsb)
 {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_ppc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_emit_ppc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_ppc.h
@@ -1,6 +1,6 @@
 /*
 ** PPC instruction emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Emit basic instructions --------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_riscv.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_riscv.h
@@ -0,0 +1,519 @@
+/*
+** RISC-V instruction emitter.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+static intptr_t get_k64val(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (ir->o == IR_KINT64) {
+    return (intptr_t)ir_kint64(ir)->u64;
+  } else if (ir->o == IR_KGC) {
+    return (intptr_t)ir_kgc(ir);
+  } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+    return (intptr_t)ir_kptr(ir);
+  } else {
+    lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
+               "bad 64 bit const IR op %d", ir->o);
+    return ir->i;  /* Sign-extended. */
+  }
+}
+
+#define get_kval(as, ref)       get_k64val(as, ref)
+
+/* -- Emit basic instructions --------------------------------------------- */
+
+static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2)
+{
+  *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2);
+}
+
+#define emit_ds(as, riscvi, rd, rs1)         emit_r(as, riscvi, rd, rs1, 0)
+#define emit_ds2(as, riscvi, rd, rs2)         emit_r(as, riscvi, rd, 0, rs2)
+#define emit_ds1s2(as, riscvi, rd, rs1, rs2)         emit_r(as, riscvi, rd, rs1, rs2)
+
+static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3)
+{
+  *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3);
+}
+
+#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3)         emit_r4(as, riscvi, rd, rs1, rs2, rs3)
+
+static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i)
+{
+  *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff);
+}
+
+#define emit_di(as, riscvi, rd, i)         emit_i(as, riscvi, rd, 0, i)
+#define emit_dsi(as, riscvi, rd, rs1, i)     emit_i(as, riscvi, rd, rs1, i)
+#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f)
+
+static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
+{
+  *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff);
+}
+
+#define emit_s1s2i(as, riscvi, rs1, rs2, i)  emit_s(as, riscvi, rs1, rs2, i)
+
+/*
+static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i)
+{
+  *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe);
+}
+*/
+
+static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i)
+{
+  *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff);
+}
+
+#define emit_du(as, riscvi, rd, i)           emit_u(as, riscvi, rd, i)
+
+/*
+static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i)
+{
+  *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe);
+}
+*/
+
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
+static Reg ra_scratch(ASMState *as, RegSet allow);
+
+static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs)
+{
+  lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs);
+  switch (riscvi) {
+    case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB:
+    case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU:
+    case RISCVI_FLW: case RISCVI_FLD:
+      emit_dsi(as, riscvi, data, base, ofs);
+      break;
+    case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB:
+    case RISCVI_FSW: case RISCVI_FSD:
+      emit_s1s2i(as, riscvi, base, data, ofs);
+      break;
+    default: lj_assertA(0, "invalid lso"); break;
+  }
+}
+
+static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp,
+                       int32_t shamt)
+{
+  if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) {
+    if (as->flags & JIT_F_RVXThead) switch (riscvi) {
+      case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break;
+      case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break;
+      default: lj_assertA(0, "invalid roti op"); break;
+    }
+    emit_dsshamt(as, riscvi, rd, rs1, shamt);
+  } else {
+    RISCVIns ai, bi;
+    int32_t shwid, shmsk;
+    switch (riscvi) {
+      case RISCVI_RORI:
+        ai = RISCVI_SRLI, bi = RISCVI_SLLI;
+        shwid = 64, shmsk = 63;
+        break;
+      case RISCVI_RORIW:
+        ai = RISCVI_SRLIW, bi = RISCVI_SLLIW;
+        shwid = 32, shmsk = 31;
+        break;
+      default:
+        lj_assertA(0, "invalid roti op");
+        return;
+    }
+    emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
+    emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk);
+    emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk);
+  }
+}
+
+static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp)
+{
+  if (as->flags & JIT_F_RVZbb) {
+    emit_ds1s2(as, riscvi, rd, rs1, rs2);
+  } else {
+    RISCVIns sai, sbi;
+    switch (riscvi) {
+      case RISCVI_ROL:
+        sai = RISCVI_SLL, sbi = RISCVI_SRL;
+        break;
+      case RISCVI_ROR:
+        sai = RISCVI_SRL, sbi = RISCVI_SLL;
+        break;
+      case RISCVI_ROLW:
+        sai = RISCVI_SLLW, sbi = RISCVI_SRLW;
+        break;
+      case RISCVI_RORW:
+        sai = RISCVI_SRLW, sbi = RISCVI_SLLW;
+        break;
+      default:
+        lj_assertA(0, "invalid rot op");
+        return;
+    }
+    if (rd == rs2) {
+      emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
+      emit_ds1s2(as, sbi, tmp, rs1, tmp);
+      emit_ds1s2(as, sai, rd, rs1, rs2);
+      emit_ds2(as, RISCVI_NEG, tmp, rs2);
+    } else {
+      emit_ds1s2(as, RISCVI_OR, rd, rd, tmp);
+      emit_ds1s2(as, sai, rd, rs1, rs2);
+      emit_ds1s2(as, sbi, tmp, rs1, tmp);
+      emit_ds2(as, RISCVI_NEG, tmp, rs2);
+    }
+  }
+}
+
+static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1)
+{
+  if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) ||
+      (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) {
+    emit_ds(as, riscvi, rd, rs1);
+  } else if (as->flags & JIT_F_RVXThead) {
+    uint32_t hi, sext;
+    switch (riscvi) {
+      case RISCVI_ZEXT_B:
+      case RISCVI_SEXT_W:
+        emit_ds(as, riscvi, rd, rs1);
+        return;
+      case RISCVI_ZEXT_H:
+        hi = 15, sext = 0;
+        break;
+      case RISCVI_ZEXT_W:
+        hi = 31, sext = 0;
+        break;
+      case RISCVI_SEXT_B:
+        hi = 7, sext = 1;
+        break;
+      case RISCVI_SEXT_H:
+        hi = 15, sext = 1;
+        break;
+      default:
+        lj_assertA(0, "invalid ext op");
+        return;
+    }
+    emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU,
+      rd, rs1, hi << 6);
+  } else {
+    RISCVIns sli, sri;
+    int32_t shamt;
+    switch (riscvi) {
+      case RISCVI_ZEXT_B:
+      case RISCVI_SEXT_W:
+        emit_ds(as, riscvi, rd, rs1);
+        return;
+      case RISCVI_ZEXT_H:
+        sli = RISCVI_SLLI, sri = RISCVI_SRLI;
+        shamt = 48;
+        break;
+      case RISCVI_ZEXT_W:
+        sli = RISCVI_SLLI, sri = RISCVI_SRLI;
+        shamt = 32;
+        break;
+      case RISCVI_SEXT_B:
+        sli = RISCVI_SLLI, sri = RISCVI_SRAI;
+        shamt = 56;
+        break;
+      case RISCVI_SEXT_H:
+        sli = RISCVI_SLLI, sri = RISCVI_SRAI;
+        shamt = 48;
+        break;
+      default:
+        lj_assertA(0, "invalid ext op");
+        return;
+    }
+    emit_dsshamt(as, sri, rd, rd, shamt);   
+    emit_dsshamt(as, sli, rd, rs1, shamt);
+  }
+}
+
+static void emit_cleartp(ASMState *as, Reg rd, Reg rs1)
+{
+  if (as->flags & JIT_F_RVXThead) {
+    emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6);
+  } else {
+    emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17);
+    emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17);
+  }
+}
+
+/*
+static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
+{
+  if (as->flags & JIT_F_RVZbb) {
+    emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2);
+  } else {
+    emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp);
+    emit_ds(as, RISCVI_NOT, tmp, rs2);
+  }
+}
+*/
+
+/*
+static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp)
+{
+  if (as->flags & JIT_F_RVZbb) {
+    emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2);
+  } else {
+    emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp);
+    emit_ds(as, RISCVI_NOT, tmp, rs2);
+  }
+}
+*/
+
+static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2)
+{
+  if (as->flags & JIT_F_RVZbb) {
+    emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2);
+  } else {
+    emit_ds(as, RISCVI_NOT, rd, rd);
+    emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2);
+  }
+}
+
+static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt)
+{
+  if (as->flags & JIT_F_RVZba) {
+    switch (shamt) {
+      case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break;
+      case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break;
+      case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break;
+      default: return;
+    }
+  } else if (as->flags & JIT_F_RVXThead) {
+    emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2);
+  } else {
+    emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp);
+    emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt);
+  }
+}
+
+#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1)
+#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2)
+#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3)
+
+static void emit_loadk12(ASMState *as, Reg rd, int32_t i)
+{
+  emit_di(as, RISCVI_ADDI, rd, i);
+}
+
+static void emit_loadk32(ASMState *as, Reg rd, int32_t i)
+{
+  if (checki12((int64_t)i)) {
+    emit_loadk12(as, rd, i);
+  } else {
+    if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0))
+      emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i));
+    else
+    emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i));
+    emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i));
+  }
+}
+
+/* -- Emit loads/stores --------------------------------------------------- */
+
+/* Prefer rematerialization of BASE/L from global_State over spills. */
+#define emit_canremat(ref)	((ref) <= REF_BASE)
+
+
+/* Load a 32 bit constant into a GPR. */
+#define emit_loadi(as, r, i)	emit_loadk32(as, r, i);
+
+/* Load a 64 bit constant into a GPR. */
+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
+{
+  if (checki32((int64_t)u64)) {
+    emit_loadk32(as, r, (int32_t)u64);
+  } else {
+    uint32_t lo32 = u64 & 0xfffffffful;
+    RISCVIns instrs[7] = {0};
+    int shamt = 0, step = 0;
+    for(int bit = 0; bit < 32; bit++) {
+      if (lo32 & (1u << bit)) {
+  if (shamt) instrs[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
+  int inc = bit+10 > 31 ? 31-bit : 10;
+  bit += inc, shamt = inc+1;
+  uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1);
+  uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0);
+  instrs[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload);
+      } else shamt++;
+    }
+    if (shamt) instrs[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt);
+
+    if (step < 6) {
+      for(int i = 0; i < step; i++)
+        *--as->mcp = instrs[i];
+    } else {
+      emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff);
+      emit_dsshamt(as, RISCVI_SLLI, r, r, 10);
+      emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff);
+      emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
+      emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff);
+      emit_dsshamt(as, RISCVI_SLLI, r, r, 11);
+    }
+
+    uint32_t hi32 = u64 >> 32;
+    if (hi32 & 0xfff) emit_loadk32(as, r, hi32);
+    else emit_du(as, RISCVI_LUI, r, hi32 >> 12);
+  }
+}
+
+#define emit_loada(as, r, addr)	emit_loadu64(as, (r), u64ptr((addr)))
+
+/* Get/set from constant pointer. */
+static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow)
+{
+  emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0);
+}
+
+/* Load 64 bit IR constant into register. */
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+  const uint64_t *k = &ir_k64(ir)->u64;
+  Reg r64 = r;
+  if (rset_test(RSET_FPR, r)) {
+    r64 = RID_TMP;
+    emit_ds(as, RISCVI_FMV_D_X, r, r64);
+  }
+  emit_loadu64(as, r64, *k);
+}
+
+/* Get/set global_State fields. */
+static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs)
+{
+  emit_lso(as, riscvi, r, RID_GL, ofs);
+}
+
+#define emit_getgl(as, r, field) \
+  emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field))
+#define emit_setgl(as, r, field) \
+  emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field))
+
+/* Trace number is determined from per-trace exit stubs. */
+#define emit_setvmstate(as, i)		UNUSED(i)
+
+/* -- Emit control-flow instructions -------------------------------------- */
+
+/* Label for internal jumps. */
+typedef MCode *MCLabel;
+
+/* Return label pointing to current PC. */
+#define emit_label(as)		((as)->mcp)
+
+static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump)
+{
+  MCode *p = as->mcp;
+  ptrdiff_t delta = (char *)target - (char *)(p - 1);
+  // lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */
+  lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */
+  if (checki13(delta) && !jump) {
+    *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta);
+    *--p = RISCVI_NOP;
+  } else {
+    *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */
+    *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8);
+  }
+  as->mcp = p;
+}
+
+static void emit_jmp(ASMState *as, MCode *target)
+{
+  MCode *p = as->mcp;
+  ptrdiff_t delta = (char *)target - (char *)(p - 2);
+  // lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */
+  lj_assertA(checki32(delta), "jump target out of range"); /* AUIPC+JALR */
+  if (checki21(delta)) {
+    *--p = RISCVI_NOP;
+    *--p = RISCVI_JAL | RISCVF_IMMJ(delta);
+  } else {
+    *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
+    *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
+  }
+  as->mcp = p;
+}
+
+#define emit_mv(as, dst, src) \
+  emit_ds(as, RISCVI_MV, (dst), (src))
+
+static void emit_call(ASMState *as, void *target, int needcfa)
+{
+  MCode *p = as->mcp;
+  ptrdiff_t delta = (char *)target - (char *)(p - 2);
+  if (checki21(delta)) {
+    *--p = RISCVI_NOP;
+    *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta);
+  } else if (checki32(delta)) {
+    *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta));
+    *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta));
+    needcfa = 1;
+  } else {
+    *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0);
+    needcfa = 2;
+  }
+  as->mcp = p;
+  if (needcfa > 1)
+    ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
+}
+
+/* -- Emit generic operations --------------------------------------------- */
+
+/* Generic move between two regs. */
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+{
+  if (src < RID_MAX_GPR && dst < RID_MAX_GPR)
+    emit_mv(as, dst, src);
+  else if (src < RID_MAX_GPR)
+    emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src);
+  else if (dst < RID_MAX_GPR)
+    emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src);
+  else
+    emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src);
+}
+
+/* Emit an arithmetic operation with a constant operand. */
+static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src,
+         Reg tmp, intptr_t k)
+{
+  if (checki12(k)) emit_dsi(as, riscvi, dest, src, k);
+  else {
+    switch (riscvi) {
+      case RISCVI_ADDI: riscvi = RISCVI_ADD; break;
+      case RISCVI_XORI: riscvi = RISCVI_XOR; break;
+      case RISCVI_ORI: riscvi = RISCVI_OR; break;
+      case RISCVI_ANDI: riscvi = RISCVI_AND; break;
+      default: lj_assertA(0, "NYI arithmetic RISCVIns"); return;
+    }
+    emit_ds1s2(as, riscvi, dest, src, tmp);
+    emit_loadu64(as, tmp, (uintptr_t)k);
+  }
+}
+
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+  if (r < RID_MAX_GPR)
+    emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs);
+  else
+    emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs);
+}
+
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+  if (r < RID_MAX_GPR)
+    emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs);
+  else
+    emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs);
+}
+
+/* Add offset to pointer. */
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+{
+  if (ofs)
+    emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs);
+}
+
+
+#define emit_spsub(as, ofs)	emit_addptr(as, RID_SP, -(ofs))
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_x86.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_emit_x86.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_emit_x86.h
@@ -1,6 +1,6 @@
 /*
 ** x86/x64 instruction emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* -- Emit basic instructions --------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_err.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_err.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_err.c
@@ -1,6 +1,6 @@
 /*
 ** Error handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_err_c
@@ -29,12 +29,18 @@
 ** Pros and Cons:
 **
 ** - EXT requires unwind tables for *all* functions on the C stack between
-**   the pcall/catch and the error/throw. This is the default on x64,
-**   but needs to be manually enabled on x86/PPC for non-C++ code.
+**   the pcall/catch and the error/throw. C modules used by Lua code can
+**   throw errors, so these need to have unwind tables, too. Transitively
+**   this applies to all system libraries used by C modules -- at least
+**   when they have callbacks which may throw an error.
 **
-** - INT is faster when actually throwing errors (but this happens rarely).
+** - INT is faster when actually throwing errors, but this happens rarely.
 **   Setting up error handlers is zero-cost in any case.
 **
+** - INT needs to save *all* callee-saved registers when entering the
+**   interpreter. EXT only needs to save those actually used inside the
+**   interpreter. JIT-compiled code may need to save some more.
+**
 ** - EXT provides full interoperability with C++ exceptions. You can throw
 **   Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
 **   C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -46,27 +52,38 @@
 **   the wrapper function feature. Lua errors thrown through C++ frames
 **   cannot be caught by C++ code and C++ destructors are not run.
 **
-** EXT is the default on x64 systems and on Windows, INT is the default on all
-** other systems.
-**
-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
-** and all C libraries that have callbacks which may be used to call back
-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
-**
-** EXT is mandatory on WIN64 since the calling convention has an abundance
-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
+** - EXT can handle errors from internal helper functions that are called
+**   from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
+**   INT has no choice but to call the panic handler, if this happens.
+**   Note: this is mainly relevant for out-of-memory errors.
+**
+** EXT is the default on all systems where the toolchain produces unwind
+** tables by default (*). This is hard-coded and/or detected in src/Makefile.
+** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
+**
+** INT is the default on all other systems.
+**
+** EXT can be manually enabled for toolchains that are able to produce
+** conforming unwind tables:
+**   "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
+** As explained above, *all* C code used directly or indirectly by LuaJIT
+** must be compiled with -funwind-tables (or -fexceptions). C++ code must
+** *not* be compiled with -fno-exceptions.
+**
+** If you're unsure whether error handling inside the VM works correctly,
+** try running this and check whether it prints "OK":
+**
+**   luajit -e "print(select(2, load('OK')):match('OK'))"
+**
+** (*) Originally, toolchains only generated unwind tables for C++ code. For
+** interoperability reasons, this can be manually enabled for plain C code,
+** too (with -funwind-tables). With the introduction of the x64 architecture,
+** the corresponding POSIX and Windows ABIs mandated unwind tables for all
+** code. Over the following years most desktop and server platforms have
+** enabled unwind tables by default on all architectures. OTOH mobile and
+** embedded platforms do not consistently mandate unwind tables.
 */
 
-#if (defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
-#define LJ_UNWIND_EXT	1
-#elif LJ_TARGET_WINDOWS
-#define LJ_UNWIND_EXT	1
-#endif
-
 /* -- Error messages ------------------------------------------------------ */
 
 /* Error message strings. */
@@ -157,12 +174,15 @@ static void *err_unwind(lua_State *L, vo
     case FRAME_PCALL:  /* FF pcall() frame. */
     case FRAME_PCALLH:  /* FF pcall() frame inside hook. */
       if (errcode) {
+	global_State *g;
 	if (errcode == LUA_YIELD) {
 	  frame = frame_prevd(frame);
 	  break;
 	}
+	g = G(L);
+	setgcref(g->cur_L, obj2gco(L));
 	if (frame_typep(frame) == FRAME_PCALL)
-	  hook_leave(G(L));
+	  hook_leave(g);
 	L->base = frame_prevd(frame) + 1;
 	L->cframe = cf;
 	unwindstack(L, L->base);
@@ -184,7 +204,198 @@ static void *err_unwind(lua_State *L, vo
 
 /* -- External frame unwinding -------------------------------------------- */
 
-#if (defined(__GNUC__) || defined(__clang__)) && !LJ_NO_UNWIND && !LJ_ABI_WIN
+#if LJ_ABI_WIN
+
+/*
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+*/
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_X86
+typedef void *UndocumentedDispatcherContext;  /* Unused on x86. */
+#else
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+  ULONG64 ControlPc;
+  ULONG64 ImageBase;
+  PRUNTIME_FUNCTION FunctionEntry;
+  ULONG64 EstablisherFrame;
+  ULONG64 TargetIp;
+  PCONTEXT ContextRecord;
+  void (*LanguageHandler)(void);
+  PVOID HandlerData;
+  PUNWIND_HISTORY_TABLE HistoryTable;
+  ULONG ScopeIndex;
+  ULONG Fill0;
+} UndocumentedDispatcherContext;
+#endif
+
+/* Another wild guess. */
+extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+
+#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+/* Workaround for broken MinGW64 declaration. */
+VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
+#define RtlUnwindEx RtlUnwindEx_FIXED
+#endif
+
+#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
+#define LJ_GCC_EXCODE		((DWORD)0x20474343)
+
+#define LJ_EXCODE		((DWORD)0xe24c4a00)
+#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
+#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
+#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+
+/* Windows exception handler for interpreter frame. */
+LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+{
+#if LJ_TARGET_X86
+  void *cf = (char *)f - CFRAME_OFS_SEH;
+#elif LJ_TARGET_ARM64
+  void *cf = (char *)f - CFRAME_SIZE;
+#else
+  void *cf = f;
+#endif
+  lua_State *L = cframe_L(cf);
+  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
+    if (rec->ExceptionCode == STATUS_LONGJUMP &&
+	rec->ExceptionRecord &&
+	LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) {
+      errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode);
+      if ((rec->ExceptionFlags & 0x20)) {  /* EH_TARGET_UNWIND */
+	/* Unwinding is about to finish; revert the ExceptionCode so that
+	** RtlRestoreContext does not try to restore from a _JUMP_BUFFER.
+	*/
+	rec->ExceptionCode = 0;
+      }
+    }
+    /* Unwind internal frames. */
+    err_unwind(L, cf, errcode);
+  } else {
+    void *cf2 = err_unwind(L, cf, 0);
+    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
+#if !LJ_TARGET_X86
+      EXCEPTION_RECORD rec2;
+#endif
+      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+	  rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if !LJ_TARGET_CYGWIN
+	__DestructExceptionObject(rec, 1);
+#endif
+	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* Don't catch access violations etc. */
+	return 1;  /* ExceptionContinueSearch */
+      }
+#if LJ_TARGET_X86
+      UNUSED(ctx);
+      UNUSED(dispatch);
+      /* Call all handlers for all lower C frames (including ourselves) again
+      ** with EH_UNWINDING set. Then call the specified function, passing cf
+      ** and errcode.
+      */
+      lj_vm_rtlunwind(cf, (void *)rec,
+	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+      /* lj_vm_rtlunwind does not return. */
+#else
+      if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+	/* For unwind purposes, wrap the EXCEPTION_RECORD in something that
+	** looks like a longjmp, so that MSVC will execute C++ destructors in
+	** the frames we unwind over. ExceptionInformation[0] should really
+	** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely
+	** at this point.
+	*/
+	rec2.ExceptionCode = STATUS_LONGJUMP;
+	rec2.ExceptionRecord = rec;
+	rec2.ExceptionAddress = 0;
+	rec2.NumberParameters = 1;
+	rec2.ExceptionInformation[0] = (ULONG_PTR)ctx;
+	rec = &rec2;
+      }
+      /* Unwind the stack and call all handlers for all lower C frames
+      ** (including ourselves) again with EH_UNWINDING set. Then set
+      ** stack pointer = f, result = errcode and jump to the specified target.
+      */
+      RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+			      lj_vm_unwind_ff_eh :
+			      lj_vm_unwind_c_eh),
+		  rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord,
+		  dispatch->HistoryTable);
+      /* RtlUnwindEx should never return. */
+#endif
+    }
+  }
+  return 1;  /* ExceptionContinueSearch */
+}
+
+#if LJ_UNWIND_JIT
+
+#if LJ_TARGET_X64
+#define CONTEXT_REG_PC	Rip
+#elif LJ_TARGET_ARM64
+#define CONTEXT_REG_PC	Pc
+#else
+#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
+#endif
+
+/* Windows unwinder for JIT-compiled code. */
+static void err_unwind_win_jit(global_State *g, int errcode)
+{
+  CONTEXT ctx;
+  UNWIND_HISTORY_TABLE hist;
+
+  memset(&hist, 0, sizeof(hist));
+  RtlCaptureContext(&ctx);
+  while (1) {
+    DWORD64 frame, base, addr = ctx.CONTEXT_REG_PC;
+    void *hdata;
+    PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
+    if (!func) {  /* Found frame without .pdata: must be JIT-compiled code. */
+      ExitNo exitno;
+      uintptr_t stub = lj_trace_unwind(G2J(g), (uintptr_t)(addr - sizeof(MCode)), &exitno);
+      if (stub) {  /* Jump to side exit to unwind the trace. */
+	ctx.CONTEXT_REG_PC = stub;
+	G2J(g)->exitcode = errcode;
+	RtlRestoreContext(&ctx, NULL);  /* Does not return. */
+      }
+      break;
+    }
+    RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
+		     &ctx, &hdata, &frame, NULL);
+    if (!addr) break;
+  }
+  /* Unwinding failed, if we end up here. */
+}
+#endif
+
+/* Raise Windows exception. */
+static void err_raise_ext(global_State *g, int errcode)
+{
+#if LJ_UNWIND_JIT
+  if (tvref(g->jit_base)) {
+    err_unwind_win_jit(g, errcode);
+    return;  /* Unwinding failed. */
+  }
+#elif LJ_HASJIT
+  /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
+  setmref(g->jit_base, NULL);
+#endif
+  UNUSED(g);
+  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+}
+
+#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
 
 /*
 ** We have to use our own definitions instead of the mandatory (!) unwind.h,
@@ -194,6 +405,7 @@ static void *err_unwind(lua_State *L, vo
 typedef struct _Unwind_Context _Unwind_Context;
 
 #define _URC_OK			0
+#define _URC_FATAL_PHASE2_ERROR	2
 #define _URC_FATAL_PHASE1_ERROR	3
 #define _URC_HANDLER_FOUND	6
 #define _URC_INSTALL_CONTEXT	7
@@ -213,9 +425,11 @@ typedef struct _Unwind_Exception
   void (*excleanup)(int, struct _Unwind_Exception *);
   uintptr_t p1, p2;
 } __attribute__((__aligned__)) _Unwind_Exception;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Exception
 
 extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
 extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
+extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
 extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
 extern void _Unwind_DeleteException(_Unwind_Exception *);
 extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -233,7 +447,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver
   lua_State *L;
   if (version != 1)
     return _URC_FATAL_PHASE1_ERROR;
-  UNUSED(uexclass);
   cf = (void *)_Unwind_GetCFA(ctx);
   L = cframe_L(cf);
   if ((actions & _UA_SEARCH_PHASE)) {
@@ -260,10 +473,10 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver
     if ((actions & _UA_FORCE_UNWIND)) {
       return _URC_CONTINUE_UNWIND;
     } else if (cf) {
+      ASMFunction ip;
       _Unwind_SetGR(ctx, LJ_TARGET_EHRETREG, errcode);
-      _Unwind_SetIP(ctx, (uintptr_t)(cframe_unwind_ff(cf) ?
-				     lj_vm_unwind_ff_eh :
-				     lj_vm_unwind_c_eh));
+      ip = cframe_unwind_ff(cf) ? lj_vm_unwind_ff_eh : lj_vm_unwind_c_eh;
+      _Unwind_SetIP(ctx, (uintptr_t)lj_ptr_strip(ip));
       return _URC_INSTALL_CONTEXT;
     }
 #if LJ_TARGET_X86ORX64
@@ -281,20 +494,150 @@ LJ_FUNCA int lj_err_unwind_dwarf(int ver
     ** it on non-x64 because the interpreter restores all callee-saved regs.
     */
     lj_err_throw(L, errcode);
+#if LJ_TARGET_X64
+#error "Broken build system -- only use the provided Makefiles!"
+#endif
 #endif
   }
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Exception static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+struct dwarf_eh_bases { void *tbase, *dbase, *func; };
+extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
+
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+#if !LJ_TARGET_OSX
+  /* Check disabled on MacOS due to brilliant software engineering at Apple. */
+  struct dwarf_eh_bases ehb;
+  lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
+#endif
+  /* Check disabled, because of broken Fedora/ARM64. See #722.
+  lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
+  */
+}
+#endif
 
-/* Raise DWARF2 exception. */
-static void err_raise_ext(int errcode)
+#if LJ_UNWIND_JIT
+/* DWARF2 personality handler for JIT-compiled code. */
+static int err_unwind_jit(int version, int actions,
+  uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
 {
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  static_uex.excleanup = NULL;
-  _Unwind_RaiseException(&static_uex);
+  /* NYI: FFI C++ exception interoperability. */
+  if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
+    return _URC_FATAL_PHASE1_ERROR;
+  if ((actions & _UA_SEARCH_PHASE)) {
+    return _URC_HANDLER_FOUND;
+  }
+  if ((actions & _UA_CLEANUP_PHASE)) {
+    global_State *g = *(global_State **)(uex+1);
+    ExitNo exitno;
+    uintptr_t addr = _Unwind_GetIP(ctx);  /* Return address _after_ call. */
+    uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+    lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
+    if (stub) {  /* Jump to side exit to unwind the trace. */
+      G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
+#ifdef LJ_TARGET_MIPS
+      _Unwind_SetGR(ctx, 4, stub);
+      _Unwind_SetGR(ctx, 5, exitno);
+      _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
+#else
+      _Unwind_SetIP(ctx, stub);
+#endif
+      return _URC_INSTALL_CONTEXT;
+    }
+    return _URC_FATAL_PHASE2_ERROR;
+  }
+  return _URC_FATAL_PHASE1_ERROR;
+}
+
+/* DWARF2 template frame info for JIT-compiled code.
+**
+** After copying the template to the start of the mcode segment,
+** the frame handler function and the code size is patched.
+** The frame handler always installs a new context to jump to the exit,
+** so don't bother to add any unwind opcodes.
+*/
+static const uint8_t err_frame_jit_template[] = {
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x1c : 0x14,  /* CIE length. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 1, 'z','P','R',0,  /* CIE mark, CIE version, augmentation. */
+  1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG,  /* Code/data align, RA. */
+#if LJ_64
+  10, 0, 0,0,0,0,0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,0,0,0,0,  /* Alignment. */
+#else
+  6, 0, 0,0,0,0, 0x1b,  /* Aug. data ABS handler, PCREL|SDATA4 code. */
+  0,  /* Alignment. */
+#endif
+#if LJ_BE
+  0,0,0,
+#endif
+  LJ_64 ? 0x14 : 0x10,  /* FDE length. */
+  0,0,0,
+  LJ_64 ? 0x24 : 0x1c,  /* CIE offset. */
+  0,0,0,
+  LJ_64 ? 0x14 : 0x10,  /* Code offset. After Final FDE. */
+#if LJ_LE
+  0,0,0,
+#endif
+  0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
+#if LJ_64
+  0,0,0,0,  /* Alignment. */
+#endif
+  0,0,0,0  /* Final FDE. */
+};
+
+#define ERR_FRAME_JIT_OFS_HANDLER	0x12
+#define ERR_FRAME_JIT_OFS_FDE		(LJ_64 ? 0x20 : 0x18)
+#define ERR_FRAME_JIT_OFS_CODE_SIZE	(LJ_64 ? 0x2c : 0x24)
+#if LJ_TARGET_OSX
+#define ERR_FRAME_JIT_OFS_REGISTER	ERR_FRAME_JIT_OFS_FDE
+#else
+#define ERR_FRAME_JIT_OFS_REGISTER	0
+#endif
+
+extern void __register_frame(const void *);
+extern void __deregister_frame(const void *);
+
+uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
+{
+  ASMFunction handler = (ASMFunction)err_unwind_jit;
+  memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+  handler = ptrauth_auth_and_resign(handler,
+    ptrauth_key_function_pointer, 0,
+    ptrauth_key_process_independent_code, info + ERR_FRAME_JIT_OFS_HANDLER);
+#else
+#error "missing pointer authentication support for this architecture"
+#endif
+#endif
+  memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
+  *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
+    (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
+  __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+#ifdef LUA_USE_ASSERT
+  {
+    struct dwarf_eh_bases ehb;
+    lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
+	       "bad JIT unwind table registration");
+  }
+#endif
+  return info + sizeof(err_frame_jit_template);
+}
+
+void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
+{
+  UNUSED(base); UNUSED(sz);
+  __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
 }
 #endif
 
@@ -306,6 +649,7 @@ static void err_raise_ext(int errcode)
 #define _US_FORCE_UNWIND		8
 
 typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+#define UNWIND_EXCEPTION_TYPE	_Unwind_Control_Block
 
 struct _Unwind_Control_Block {
   uint64_t exclass;
@@ -364,136 +708,63 @@ LJ_FUNCA int lj_err_unwind_arm(int state
   }
   if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
     return _URC_FAILURE;
+#ifdef LUA_USE_ASSERT
+  /* We should never get here unless this is a forced unwind aka backtrace. */
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
+    _Unwind_SetGR(ctx, 0, 0xff33aa88);
+  }
+#endif
   return _URC_CONTINUE_UNWIND;
 }
 
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Control_Block static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
+extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
 
-static void err_raise_ext(int errcode)
+static int err_verify_bt(_Unwind_Context *ctx, int *got)
 {
-  memset(&static_uex, 0, sizeof(static_uex));
-  static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
-  _Unwind_RaiseException(&static_uex);
+  if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
+  else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
+  return _URC_OK;
 }
-#endif
 
-#endif /* LJ_TARGET_ARM */
-
-#elif LJ_ABI_WIN
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+  int got = 0;
+  _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
+  lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
+}
+#endif
 
 /*
-** Someone in Redmond owes me several days of my life. A lot of this is
-** undocumented or just plain wrong on MSDN. Some of it can be gathered
-** from 3rd party docs or must be found by trial-and-error. They really
-** don't want you to write your own language-specific exception handler
-** or to interact gracefully with MSVC. :-(
+** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
 **
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
+** The quirky ARM unwind API doesn't have __register_frame().
+** A potential workaround might involve _Unwind_Backtrace.
+** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
+** since they are built without unwind tables by default.
 */
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-#if LJ_TARGET_X64
-/* Taken from: http://www.nynaeve.net/?p=99 */
-typedef struct UndocumentedDispatcherContext {
-  ULONG64 ControlPc;
-  ULONG64 ImageBase;
-  PRUNTIME_FUNCTION FunctionEntry;
-  ULONG64 EstablisherFrame;
-  ULONG64 TargetIp;
-  PCONTEXT ContextRecord;
-  void (*LanguageHandler)(void);
-  PVOID HandlerData;
-  PUNWIND_HISTORY_TABLE HistoryTable;
-  ULONG ScopeIndex;
-  ULONG Fill0;
-} UndocumentedDispatcherContext;
-#else
-typedef void *UndocumentedDispatcherContext;
-#endif
-
-/* Another wild guess. */
-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
-
-#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
-/* Workaround for broken MinGW64 declaration. */
-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
-#define RtlUnwindEx RtlUnwindEx_FIXED
-#endif
+#endif /* LJ_TARGET_ARM */
 
-#define LJ_MSVC_EXCODE		((DWORD)0xe06d7363)
-#define LJ_GCC_EXCODE		((DWORD)0x20474343)
 
-#define LJ_EXCODE		((DWORD)0xe24c4a00)
-#define LJ_EXCODE_MAKE(c)	(LJ_EXCODE | (DWORD)(c))
-#define LJ_EXCODE_CHECK(cl)	(((cl) ^ LJ_EXCODE) <= 0xff)
-#define LJ_EXCODE_ERRCODE(cl)	((int)((cl) & 0xff))
+#if LJ_UNWIND_EXT
+static __thread struct {
+  UNWIND_EXCEPTION_TYPE ex;
+  global_State *g;
+} static_uex;
 
-/* Windows exception handler for interpreter frame. */
-LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
-  void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+/* Raise external exception. */
+static void err_raise_ext(global_State *g, int errcode)
 {
-#if LJ_TARGET_X64
-  void *cf = f;
-#else
-  void *cf = (char *)f - CFRAME_OFS_SEH;
-#endif
-  lua_State *L = cframe_L(cf);
-  int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
-		LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
-  if ((rec->ExceptionFlags & 6)) {  /* EH_UNWINDING|EH_EXIT_UNWIND */
-    /* Unwind internal frames. */
-    err_unwind(L, cf, errcode);
-  } else {
-    void *cf2 = err_unwind(L, cf, 0);
-    if (cf2) {  /* We catch it, so start unwinding the upper frames. */
-      if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
-	  rec->ExceptionCode == LJ_GCC_EXCODE) {
-#if LJ_TARGET_WINDOWS
-	__DestructExceptionObject(rec, 1);
-#endif
-	setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
-      } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
-	/* Don't catch access violations etc. */
-	return 1;  /* ExceptionContinueSearch */
-      }
-#if LJ_TARGET_X64
-      /* Unwind the stack and call all handlers for all lower C frames
-      ** (including ourselves) again with EH_UNWINDING set. Then set
-      ** rsp = cf, rax = errcode and jump to the specified target.
-      */
-      RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-			       lj_vm_unwind_ff_eh :
-			       lj_vm_unwind_c_eh),
-		  rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
-      /* RtlUnwindEx should never return. */
-#else
-      UNUSED(ctx);
-      UNUSED(dispatch);
-      /* Call all handlers for all lower C frames (including ourselves) again
-      ** with EH_UNWINDING set. Then call the specified function, passing cf
-      ** and errcode.
-      */
-      lj_vm_rtlunwind(cf, (void *)rec,
-	(cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
-	(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
-      /* lj_vm_rtlunwind does not return. */
-#endif
-    }
-  }
-  return 1;  /* ExceptionContinueSearch */
+  memset(&static_uex, 0, sizeof(static_uex));
+  static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
+  static_uex.g = g;
+  _Unwind_RaiseException(&static_uex.ex);
 }
 
-/* Raise Windows exception. */
-static void err_raise_ext(int errcode)
-{
-  RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
-}
+#endif
 
 #endif
 
@@ -504,22 +775,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_thro
 {
   global_State *g = G(L);
   lj_trace_abort(g);
-  setmref(g->jit_base, NULL);
   L->status = LUA_OK;
 #if LJ_UNWIND_EXT
-  err_raise_ext(errcode);
+  err_raise_ext(g, errcode);
   /*
   ** A return from this function signals a corrupt C stack that cannot be
   ** unwound. We have no choice but to call the panic function and exit.
   **
   ** Usually this is caused by a C function without unwind information.
-  ** This should never happen on x64, but may happen if you've manually
-  ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
-  ** non-C++ file with -funwind-tables.
+  ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
+  ** and forgot to recompile *every* non-C++ file with -funwind-tables.
   */
   if (G(L)->panic)
     G(L)->panic(L);
 #else
+#if LJ_HASJIT
+  setmref(g->jit_base, NULL);
+#endif
   {
     void *cf = err_unwind(L, NULL, errcode);
     if (cframe_unwind_ff(cf))
@@ -542,6 +814,11 @@ LJ_NOINLINE void lj_err_mem(lua_State *L
 {
   if (L->status == LUA_ERRERR+1)  /* Don't touch the stack during lua_open. */
     lj_vm_unwind_c(L->cframe, LUA_ERRMEM);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
+  if (curr_funcisL(L)) L->top = curr_topL(L);
   setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRMEM));
   lj_err_throw(L, LUA_ERRMEM);
 }
@@ -600,7 +877,7 @@ static ptrdiff_t finderrfunc(lua_State *
 /* Runtime error. */
 LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
 {
-  ptrdiff_t ef = finderrfunc(L);
+  ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
   if (ef) {
     TValue *errfunc = restorestack(L, ef);
     TValue *top = L->top;
@@ -619,12 +896,26 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(
   lj_err_throw(L, LUA_ERRRUN);
 }
 
+#if LJ_HASJIT
+LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
+{
+  if (errcode == LUA_ERRRUN)
+    lj_err_run(L);
+  else
+    lj_err_throw(L, errcode);
+}
+#endif
+
 /* Formatted runtime error message. */
 LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
 {
   const char *msg;
   va_list argp;
   va_start(argp, em);
+  if (LJ_HASJIT) {
+    TValue *base = tvref(G(L)->jit_base);
+    if (base) L->base = base;
+  }
   if (curr_funcisL(L)) L->top = curr_topL(L);
   msg = lj_strfmt_pushvf(L, err2msg(em), argp);
   va_end(argp);
@@ -699,25 +990,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_
 /* Error in context of caller. */
 LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
 {
-  TValue *frame = L->base-1;
-  TValue *pframe = NULL;
-  if (frame_islua(frame)) {
-    pframe = frame_prevl(frame);
-  } else if (frame_iscont(frame)) {
-    if (frame_iscont_fficb(frame)) {
-      pframe = frame;
-      frame = NULL;
-    } else {
-      pframe = frame_prevd(frame);
+  TValue *frame = NULL, *pframe = NULL;
+  if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
+    frame = L->base-1;
+    if (frame_islua(frame)) {
+      pframe = frame_prevl(frame);
+    } else if (frame_iscont(frame)) {
+      if (frame_iscont_fficb(frame)) {
+	pframe = frame;
+	frame = NULL;
+      } else {
+	pframe = frame_prevd(frame);
 #if LJ_HASFFI
-      /* Remove frame for FFI metamethods. */
-      if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
-	  frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
-	L->base = pframe+1;
-	L->top = frame;
-	setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
-      }
+	/* Remove frame for FFI metamethods. */
+	if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+	    frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+	  L->base = pframe+1;
+	  L->top = frame;
+	  setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
+	}
 #endif
+      }
     }
   }
   lj_debug_addloc(L, msg, pframe, frame);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_err.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_err.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_err.h
@@ -1,6 +1,6 @@
 /*
 ** Error handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_ERR_H
@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
 LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
 LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
 LJ_FUNC_NORET void lj_err_mem(lua_State *L);
-LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+#if LJ_HASJIT
+LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
+#endif
 LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
 LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
 			      BCLine line, ErrMsg em, va_list argp);
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State
 LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
 LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
 
+#if LJ_UNWIND_JIT && !LJ_ABI_WIN
+LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
+LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
+#else
+#define lj_err_register_mcode(base, sz, info)	(info)
+#define lj_err_deregister_mcode(base, sz, info)	UNUSED(base)
+#endif
+
+#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
+LJ_FUNC void lj_err_verify(void);
+#else
+#define lj_err_verify()		((void)0)
+#endif
+
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_errmsg.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_errmsg.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_errmsg.h
@@ -1,6 +1,6 @@
 /*
 ** VM error messages.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* This file may be included multiple times with different ERRDEF macros. */
@@ -67,6 +67,7 @@ ERRDEF(PROTMT,	"cannot change a protecte
 ERRDEF(UNPACK,	"too many results to unpack")
 ERRDEF(RDRSTR,	"reader function must return a string")
 ERRDEF(PRTOSTR,	LUA_QL("tostring") " must return a string to " LUA_QL("print"))
+ERRDEF(NUMRNG,	"number out of range")
 ERRDEF(IDXRNG,	"index out of range")
 ERRDEF(BASERNG,	"base out of range")
 ERRDEF(LVLRNG,	"level out of range")
@@ -179,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT,	"NYI: packed bit
 ERRDEF(FFI_NYICALL,	"NYI: cannot call this C function (yet)")
 #endif
 
+#if LJ_HASBUFFER
+/* String buffer errors. */
+ERRDEF(BUFFER_SELF,	"cannot put buffer into itself")
+ERRDEF(BUFFER_BADOPT,	"bad options table")
+ERRDEF(BUFFER_BADENC,	"cannot serialize " LUA_QS)
+ERRDEF(BUFFER_BADDEC,	"cannot deserialize tag 0x%02x")
+ERRDEF(BUFFER_BADDICTX,	"cannot deserialize dictionary index %d")
+ERRDEF(BUFFER_DEPTH,	"too deep to serialize")
+ERRDEF(BUFFER_DUPKEY,	"duplicate table key")
+ERRDEF(BUFFER_EOB,	"unexpected end of buffer")
+ERRDEF(BUFFER_LEFTOV,	"left-over data in buffer")
+#endif
+
 #undef ERRDEF
 
 /* Detecting unused error messages:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ff.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ff.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ff.h
@@ -1,6 +1,6 @@
 /*
 ** Fast function IDs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_FF_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ffrecord.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ffrecord.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ffrecord.c
@@ -1,6 +1,6 @@
 /*
 ** Fast function call recorder.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_ffrecord_c
@@ -11,6 +11,7 @@
 #if LJ_HASJIT
 
 #include "lj_err.h"
+#include "lj_buf.h"
 #include "lj_str.h"
 #include "lj_tab.h"
 #include "lj_frame.h"
@@ -28,6 +29,7 @@
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strfmt.h"
+#include "lj_serialize.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
 #define IR(ref)			(&J->cur.ir[(ref)])
@@ -107,6 +109,10 @@ static void recff_stitch(jit_State *J)
   const BCIns *pc = frame_pc(base-1);
   TValue *pframe = frame_prevl(base-1);
 
+  /* Check for this now. Throwing in lj_record_stop messes up the stack. */
+  if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
+    lj_trace_err(J, LJ_TRERR_SNAPOV);
+
   /* Move func + args up in Lua stack and insert continuation. */
   memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
   setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
@@ -182,6 +188,14 @@ static TRef recff_bufhdr(jit_State *J)
 		lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
 }
 
+/* Emit TMPREF. */
+static TRef recff_tmpref(jit_State *J, TRef tr, int mode)
+{
+  if (!LJ_DUALNUM && tref_isinteger(tr))
+    tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+  return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode);
+}
+
 /* -- Base library fast functions ----------------------------------------- */
 
 static void LJ_FASTCALL recff_assert(jit_State *J, RecordFFData *rd)
@@ -296,7 +310,7 @@ int32_t lj_ffrecord_select_mode(jit_Stat
     } else {
       TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
       TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
-      emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
+      emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#'));
     }
     return 0;
   } else {  /* select(n, ...) */
@@ -317,9 +331,9 @@ static void LJ_FASTCALL recff_select(jit
       ptrdiff_t n = (ptrdiff_t)J->maxslot;
       if (start < 0) start += n;
       else if (start > n) start = n;
-      rd->nres = n - start;
       if (start >= 1) {
 	ptrdiff_t i;
+	rd->nres = n - start;
 	for (i = 0; i < n - start; i++)
 	  J->base[i] = J->base[start+i];
       }  /* else: Interpreter will throw. */
@@ -455,6 +469,7 @@ static void LJ_FASTCALL recff_pcall(jit_
 #endif
     lj_record_call(J, 0, J->maxslot - 1);
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 
@@ -490,6 +505,7 @@ static void LJ_FASTCALL recff_xpcall(jit
     if (errcode)
       lj_err_throw(J->L, errcode);  /* Propagate errors. */
     rd->nres = -1;  /* Pending call. */
+    J->needsnap = 1;  /* Start catching on-trace errors. */
   }  /* else: Interpreter will throw. */
 }
 
@@ -505,6 +521,40 @@ static void LJ_FASTCALL recff_getfenv(ji
   recff_nyiu(J, rd);
 }
 
+static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd)
+{
+#if LJ_BE
+  /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+  ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+  */
+  recff_nyi(J, rd);
+#else
+  TRef tab = J->base[0];
+  if (tref_istab(tab)) {
+    RecordIndex ix;
+    cTValue *keyv;
+    ix.tab = tab;
+    if (tref_isnil(J->base[1])) {  /* Shortcut for start of traversal. */
+      ix.key = lj_ir_kint(J, 0);
+      keyv = niltvg(J2G(J));
+    } else {
+      TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+      ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp);
+      keyv = &rd->argv[1];
+    }
+    copyTV(J->L, &ix.tabv, &rd->argv[0]);
+    ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv);
+    /* Omit the value, if not used by the caller. */
+    ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) &&
+		   bc_b(frame_pc(J->L->base-1)[-1])-1 < 2);
+    ix.mobj = 0;  /* We don't need the next index. */
+    rd->nres = lj_record_next(J, &ix);
+    J->base[0] = ix.key;
+    J->base[1] = ix.val;
+  }  /* else: Interpreter will throw. */
+#endif
+}
+
 /* -- Math library fast functions ----------------------------------------- */
 
 static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
@@ -588,8 +638,8 @@ static void LJ_FASTCALL recff_math_call(
 
 static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
 {
-  J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
-				 &rd->argv[0], &rd->argv[1]);
+  J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
+				   &rd->argv[0], &rd->argv[1], IR_POW);
   UNUSED(rd);
 }
 
@@ -707,7 +757,7 @@ static void LJ_FASTCALL recff_bit_tohex(
 #if LJ_HASFFI
   TRef hdr = recff_bufhdr(J);
   TRef tr = recff_bit64_tohex(J, rd, hdr);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 #else
   recff_nyiu(J, rd);  /* Don't bother working around this NYI. */
 #endif
@@ -833,8 +883,8 @@ static void LJ_FASTCALL recff_string_cha
   if (i > 1) {  /* Concatenate the strings, if there's more than one. */
     TRef hdr = recff_bufhdr(J), tr = hdr;
     for (i = 0; J->base[i] != 0; i++)
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
-    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
   } else if (i == 0) {
     J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
   }
@@ -852,19 +902,19 @@ static void LJ_FASTCALL recff_string_rep
     emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
     if (vrep > 1) {
       TRef hdr2 = recff_bufhdr(J);
-      TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
-      tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
-      str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
+      TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
+      tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
+      str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
     }
   }
   tr = hdr = recff_bufhdr(J);
   if (str2) {
-    tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
+    tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
     str = str2;
     rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
   }
   tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 
 static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
@@ -872,7 +922,7 @@ static void LJ_FASTCALL recff_string_op(
   TRef str = lj_ir_tostr(J, J->base[0]);
   TRef hdr = recff_bufhdr(J);
   TRef tr = lj_ir_call(J, rd->data, hdr, str);
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
 }
 
 static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
@@ -935,34 +985,40 @@ static void LJ_FASTCALL recff_string_fin
   }
 }
 
-static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
 {
-  TRef trfmt = lj_ir_tostr(J, J->base[0]);
-  GCstr *fmt = argv2str(J, &rd->argv[0]);
-  int arg = 1;
-  TRef hdr, tr;
+  ptrdiff_t arg = sbufx;
+  TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
+  GCstr *fmt = argv2str(J, &rd->argv[arg]);
   FormatState fs;
   SFormat sf;
   /* Specialize to the format string. */
   emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
-  tr = hdr = recff_bufhdr(J);
   lj_strfmt_init(&fs, strdata(fmt), fmt->len);
   while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {  /* Parse format. */
-    TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
+    TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
     TRef trsf = lj_ir_kint(J, (int32_t)sf);
     IRCallID id;
     switch (STRFMT_TYPE(sf)) {
     case STRFMT_LIT:
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		  lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
       break;
     case STRFMT_INT:
       id = IRCALL_lj_strfmt_putfnum_int;
     handle_int:
-      if (!tref_isinteger(tra))
+      if (!tref_isinteger(tra)) {
+#if LJ_HASFFI
+	if (tref_iscdata(tra)) {
+	  tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
+	  tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+	  break;
+	}
+#endif
 	goto handle_num;
+      }
       if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
       } else {
 #if LJ_HASFFI
@@ -989,10 +1045,11 @@ static void LJ_FASTCALL recff_string_for
     case STRFMT_STR:
       if (!tref_isstr(tra)) {
 	recff_nyiu(J, rd);  /* NYI: __tostring and non-string types for %s. */
+	/* NYI: also buffers. */
 	return;
       }
       if (sf == STRFMT_STR)  /* Shortcut for plain %s. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
       else if ((sf & STRFMT_T_QUOTED))
 	tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
       else
@@ -1001,7 +1058,7 @@ static void LJ_FASTCALL recff_string_for
     case STRFMT_CHAR:
       tra = lj_opt_narrow_toint(J, tra);
       if (sf == STRFMT_CHAR)  /* Shortcut for plain %c. */
-	tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
+	tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
 		    emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
       else
 	tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
@@ -1013,9 +1070,333 @@ static void LJ_FASTCALL recff_string_for
       return;
     }
   }
-  J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+  if (sbufx) {
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+  } else {
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+  }
 }
 
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+  recff_format(J, rd, recff_bufhdr(J), 0);
+}
+
+/* -- Buffer library fast functions --------------------------------------- */
+
+#if LJ_HASBUFFER
+
+static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
+}
+
+static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
+  emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
+{
+  return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
+}
+
+static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
+{
+  TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
+  emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
+{
+  TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
+  if (LJ_64)
+    len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  return len;
+}
+
+/* Emit typecheck for string buffer. */
+static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
+{
+  TRef trtype, ud = J->base[arg];
+  if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
+  trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+  emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
+  J->needsnap = 1;
+  return ud;
+}
+
+/* Emit BUFHDR for write to extended string buffer. */
+static TRef recff_sbufx_write(jit_State *J, TRef ud)
+{
+  TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata)));
+  return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
+}
+
+/* Check for integer in range for the buffer API. */
+static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
+{
+  TRef tr = J->base[arg];
+  TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
+  if (tref_isinteger(tr)) {
+    emitir(IRTGI(IR_ULE), tr, trlim);
+  } else if (tref_isnum(tr)) {
+    tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
+    emitir(IRTGI(IR_ULE), tr, trlim);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
+    emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
+    tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
+#else
+    UNUSED(rd);
+#endif
+  } else {
+    lj_trace_err(J, LJ_TRERR_BADTYPE);
+  }
+  return tr;
+}
+
+static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  SBufExt *sbx = bufV(&rd->argv[0]);
+  int iscow = (int)sbufiscow(sbx);
+  TRef trl = recff_sbufx_get_L(J, ud);
+  TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
+  TRef zeropgc = lj_ir_kintpgc(J, 0);
+  emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc);
+  if (iscow) {
+    TRef zerop = lj_ir_kintp(J, 0);
+    trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop);
+    recff_sbufx_set_L(J, ud, trl);
+    emitir(IRT(IR_FSTORE, IRT_PGC),
+	   emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop);
+  } else {
+    TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
+  }
+}
+
+static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef len = recff_sbufx_len(J, trr, trw);
+  TRef trn = recff_sbufx_checkint(J, rd, 1);
+  len = emitir(IRTI(IR_MIN), len, trn);
+  trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = J->base[1];
+  if (tref_isstr(tr)) {
+    TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
+    TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#if LJ_HASFFI
+  } else if (tref_iscdata(tr)) {
+    TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
+    TRef len = recff_sbufx_checkint(J, rd, 2);
+    lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#endif
+  }  /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) return;
+  for (arg = 1; (tr = J->base[arg]); arg++) {
+    if (tref_isudata(tr)) {
+      TRef ud2 = recff_sbufx_check(J, rd, arg);
+      emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
+    }
+  }
+  for (arg = 1; (tr = J->base[arg]); arg++) {
+    if (tref_isstr(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
+    } else if (tref_isnumber(tr)) {
+      trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
+		     emitir(IRT(IR_TOSTR, IRT_STR), tr,
+			    tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
+    } else if (tref_isudata(tr)) {
+      TRef trr = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_R);
+      TRef trw = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_W);
+      TRef len = recff_sbufx_len(J, trr, trw);
+      trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
+    } else {
+      recff_nyiu(J, rd);
+    }
+  }
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  recff_format(J, rd, trbuf, 1);
+}
+
+static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tr;
+  ptrdiff_t arg;
+  if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
+  for (arg = 0; (tr = J->base[arg+1]); arg++) {
+    if (!tref_isnil(tr)) {
+      J->base[arg+1] = recff_sbufx_checkint(J, rd, arg+1);
+    }
+  }
+  for (arg = 0; (tr = J->base[arg+1]); arg++) {
+    TRef len = recff_sbufx_len(J, trr, trw);
+    if (tref_isnil(tr)) {
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = trw;
+    } else {
+      TRef tru;
+      len = emitir(IRTI(IR_MIN), len, tr);
+      tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+      J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+      trr = tru;  /* Doing the ADD before the SNEW generates better code. */
+    }
+    recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+  }
+  rd->nres = arg;
+}
+
+static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
+}
+
+static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = recff_sbufx_len(J, trr, trw);
+}
+
+#if LJ_HASFFI
+static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
+  TRef len = recff_sbufx_checkint(J, rd, 2);
+  trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
+  emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef trsz = recff_sbufx_checkint(J, rd, 1);
+  J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
+  J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
+  rd->nres = 2;
+}
+
+static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef len = recff_sbufx_checkint(J, rd, 1);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
+  TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
+  if (LJ_64)
+    left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+  emitir(IRTGI(IR_ULE), len, left);
+  trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
+}
+
+static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+  TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+  J->base[0] = lj_crecord_topuint8(J, trr);
+  J->base[1] = recff_sbufx_len(J, trr, trw);
+  rd->nres = 2;
+}
+#endif
+
+static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+  lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
+  /* No IR_USE needed, since the call is a store. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
+{
+  TRef ud = recff_sbufx_check(J, rd, 0);
+  TRef trbuf = recff_sbufx_write(J, ud);
+  TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+  TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
+  IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
+  /* No IR_USE needed, since the call is a store. */
+  J->base[0] = lj_record_vload(J, tmp, 0, t);
+  /* The sbx->r store must be after the VLOAD type check, in case it fails. */
+  recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
+{
+  TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1);
+  J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
+  /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
+  emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
+  UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
+{
+  if (tvisstr(&rd->argv[0])) {
+    GCstr *str = strV(&rd->argv[0]);
+    SBufExt sbx;
+    IRType t;
+    TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+    TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
+    /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
+    ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
+    */
+    emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+    memset(&sbx, 0, sizeof(SBufExt));
+    lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
+    t = (IRType)lj_serialize_peektype(&sbx);
+    J->base[0] = lj_record_vload(J, tmp, 0, t);
+  }  /* else: Interpreter will throw. */
+}
+
+#endif
+
 /* -- Table library fast functions ---------------------------------------- */
 
 static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
@@ -1054,7 +1435,7 @@ static void LJ_FASTCALL recff_table_conc
     TRef hdr = recff_bufhdr(J);
     TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
     emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
-    J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+    J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
   }  /* else: Interpreter will throw. */
   UNUSED(rd);
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ffrecord.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ffrecord.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ffrecord.h
@@ -1,6 +1,6 @@
 /*
 ** Fast function call recorder.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_FFRECORD_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_frame.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_frame.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_frame.h
@@ -1,6 +1,6 @@
 /*
 ** Stack frames.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_FRAME_H
@@ -192,12 +192,12 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
 #endif
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_ARM64
-#define CFRAME_OFS_ERRF		196
-#define CFRAME_OFS_NRES		200
-#define CFRAME_OFS_PREV		160
-#define CFRAME_OFS_L		176
-#define CFRAME_OFS_PC		168
-#define CFRAME_OFS_MULTRES	192
+#define CFRAME_OFS_ERRF		36
+#define CFRAME_OFS_NRES		40
+#define CFRAME_OFS_PREV		0
+#define CFRAME_OFS_L		16
+#define CFRAME_OFS_PC		8
+#define CFRAME_OFS_MULTRES	32
 #define CFRAME_SIZE		208
 #define CFRAME_SHIFT_MULTRES	3
 #elif LJ_TARGET_PPC
@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL
 #endif
 #define CFRAME_OFS_MULTRES	0
 #define CFRAME_SHIFT_MULTRES	3
+#elif LJ_TARGET_RISCV64
+#define CFRAME_OFS_ERRF		252
+#define CFRAME_OFS_NRES		248
+#define CFRAME_OFS_PREV		240
+#define CFRAME_OFS_L		232
+#define CFRAME_OFS_PC		224
+#define CFRAME_OFS_MULTRES	0
+#define CFRAME_SIZE		256
+#define CFRAME_SHIFT_MULTRES	3
 #else
 #error "Missing CFRAME_* definitions for this architecture"
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_func.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_func.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_func.c
@@ -1,6 +1,6 @@
 /*
 ** Function handling (prototypes, functions and upvalues).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_func.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_func.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_func.h
@@ -1,6 +1,6 @@
 /*
 ** Function handling (prototypes, functions and upvalues).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_FUNC_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gc.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_gc.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gc.c
@@ -1,6 +1,6 @@
 /*
 ** Garbage collector.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -27,6 +27,7 @@
 #include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
+#include "lj_vmevent.h"
 
 #define GCSTEPSIZE	1024u
 #define GCSWEEPMAX	40
@@ -65,6 +66,15 @@ static void gc_mark(global_State *g, GCo
     gray2black(o);  /* Userdata are never gray. */
     if (mt) gc_markobj(g, mt);
     gc_markobj(g, tabref(gco2ud(o)->env));
+    if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
+      SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
+      if (sbufiscow(sbx) && gcref(sbx->cowref))
+	gc_markobj(g, gcref(sbx->cowref));
+      if (gcref(sbx->dict_str))
+	gc_markobj(g, gcref(sbx->dict_str));
+      if (gcref(sbx->dict_mt))
+	gc_markobj(g, gcref(sbx->dict_mt));
+    }
   } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
     GCupval *uv = gco2uv(o);
     gc_marktv(g, uvval(uv));
@@ -512,8 +522,13 @@ static void gc_call_finalizer(global_Sta
   hook_restore(g, oldh);
   if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
   g->gc.threshold = oldt;  /* Restore GC threshold. */
-  if (errcode)
-    lj_err_throw(L, errcode);  /* Propagate errors. */
+  if (errcode) {
+    ptrdiff_t errobj = savestack(L, L->top-1);  /* Stack may be resized. */
+    lj_vmevent_send(L, ERRFIN,
+      copyTV(L, L->top++, restorestack(L, errobj));
+    );
+    L->top--;
+  }
 }
 
 /* Finalize one userdata or cdata object from the mmudata list. */
@@ -691,9 +706,12 @@ static size_t gc_onestep(lua_State *L)
     }
   case GCSfinalize:
     if (gcref(g->gc.mmudata) != NULL) {
+      GCSize old = g->gc.total;
       if (tvref(g->jit_base))  /* Don't call finalizers on trace. */
 	return LJ_MAX_MEM;
       gc_finalize(L);  /* Finalize one userdata object. */
+      if (old >= g->gc.total && g->gc.estimate > old - g->gc.total)
+	g->gc.estimate -= old - g->gc.total;
       if (g->gc.estimate > GCFINALIZECOST)
 	g->gc.estimate -= GCFINALIZECOST;
       return GCFINALIZECOST;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_gc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gc.h
@@ -1,6 +1,6 @@
 /*
 ** Garbage collector.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_GC_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gdbjit.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_gdbjit.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gdbjit.c
@@ -1,6 +1,6 @@
 /*
 ** Client for the GDB JIT API.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_gdbjit_c
@@ -306,6 +306,9 @@ enum {
 #elif LJ_TARGET_MIPS
   DW_REG_SP = 29,
   DW_REG_RA = 31,
+#elif LJ_TARGET_RISCV64
+  DW_REG_SP = 2,
+  DW_REG_RA = 1,
 #else
 #error "Unsupported target architecture"
 #endif
@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template =
   .machine = 20,
 #elif LJ_TARGET_MIPS
   .machine = 8,
+#elif LJ_TARGET_RISCV64
+  .machine = 243,
 #else
 #error "Unsupported target architecture"
 #endif
@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(G
       for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); }
       for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); }
     }
+#elif LJ_TARGET_RISCV64
+    {
+      int i;
+      for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); }
+      DB(DW_CFA_offset|9); DUV(17);
+      DB(DW_CFA_offset|8); DUV(18);
+      for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); }
+      DB(DW_CFA_offset|32|9); DUV(29);
+      DB(DW_CFA_offset|32|8); DUV(30);
+    }
 #else
 #error "Unsupported target architecture"
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gdbjit.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_gdbjit.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_gdbjit.h
@@ -1,6 +1,6 @@
 /*
 ** Client for the GDB JIT API.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_GDBJIT_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ir.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ir.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ir.c
@@ -1,6 +1,6 @@
 /*
 ** SSA IR (Intermediate Representation) emitter.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_ir_c
@@ -30,6 +30,7 @@
 #endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
+#include "lj_serialize.h"
 #include "lj_strfmt.h"
 #include "lj_prng.h"
 
@@ -147,7 +148,7 @@ TRef lj_ir_call(jit_State *J, IRCallID i
 }
 
 /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
-LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
+TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
 {
   lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
   ofs >>= 2;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ir.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ir.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ir.h
@@ -1,6 +1,6 @@
 /*
 ** SSA IR (Intermediate Representation) format.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_IR_H
@@ -76,8 +76,8 @@
   \
   _(ABS,	N , ref, ref) \
   _(LDEXP,	N , ref, ref) \
-  _(MIN,	C , ref, ref) \
-  _(MAX,	C , ref, ref) \
+  _(MIN,	N , ref, ref) \
+  _(MAX,	N , ref, ref) \
   _(FPMATH,	N , ref, lit) \
   \
   /* Overflow-checking arithmetic ops. */ \
@@ -95,6 +95,7 @@
   _(UREFO,	LW, ref, lit) \
   _(UREFC,	LW, ref, lit) \
   _(FREF,	R , ref, lit) \
+  _(TMPREF,	S , ref, lit) \
   _(STRREF,	N , ref, ref) \
   _(LREF,	L , ___, ___) \
   \
@@ -105,7 +106,7 @@
   _(FLOAD,	L , ref, lit) \
   _(XLOAD,	L , ref, lit) \
   _(SLOAD,	L , lit, lit) \
-  _(VLOAD,	L , ref, ___) \
+  _(VLOAD,	L , ref, lit) \
   _(ALEN,	L , ref, ref) \
   \
   _(ASTORE,	S , ref, ref) \
@@ -124,8 +125,8 @@
   \
   /* Buffer operations. */ \
   _(BUFHDR,	L , ref, lit) \
-  _(BUFPUT,	L , ref, ref) \
-  _(BUFSTR,	A , ref, ref) \
+  _(BUFPUT,	LW, ref, ref) \
+  _(BUFSTR,	AW, ref, ref) \
   \
   /* Barriers. */ \
   _(TBAR,	S , ref, ___) \
@@ -139,9 +140,9 @@
   _(STRTO,	N , ref, ___) \
   \
   /* Calls. */ \
-  _(CALLN,	N , ref, lit) \
-  _(CALLA,	A , ref, lit) \
-  _(CALLL,	L , ref, lit) \
+  _(CALLN,	NW, ref, lit) \
+  _(CALLA,	AW, ref, lit) \
+  _(CALLL,	LW, ref, lit) \
   _(CALLS,	S , ref, lit) \
   _(CALLXS,	S , ref, ref) \
   _(CARG,	N , ref, ref) \
@@ -204,9 +205,15 @@ IRFPMDEF(FPMENUM)
   _(UDATA_META,	offsetof(GCudata, metatable)) \
   _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
   _(UDATA_FILE,	sizeof(GCudata)) \
+  _(SBUF_W,	sizeof(GCudata) + offsetof(SBufExt, w)) \
+  _(SBUF_E,	sizeof(GCudata) + offsetof(SBufExt, e)) \
+  _(SBUF_B,	sizeof(GCudata) + offsetof(SBufExt, b)) \
+  _(SBUF_L,	sizeof(GCudata) + offsetof(SBufExt, L)) \
+  _(SBUF_REF,	sizeof(GCudata) + offsetof(SBufExt, cowref)) \
+  _(SBUF_R,	sizeof(GCudata) + offsetof(SBufExt, r)) \
   _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
   _(CDATA_PTR,	sizeof(GCcdata)) \
-  _(CDATA_INT, sizeof(GCcdata)) \
+  _(CDATA_INT,	sizeof(GCcdata)) \
   _(CDATA_INT64, sizeof(GCcdata)) \
   _(CDATA_INT64_4, sizeof(GCcdata) + 4)
 
@@ -217,6 +224,11 @@ IRFLDEF(FLENUM)
   IRFL__MAX
 } IRFieldID;
 
+/* TMPREF mode bits, stored in op2. */
+#define IRTMPREF_IN1		0x01	/* First input value. */
+#define IRTMPREF_OUT1		0x02	/* First output value. */
+#define IRTMPREF_OUT2		0x04	/* Second output value. */
+
 /* SLOAD mode bits, stored in op2. */
 #define IRSLOAD_PARENT		0x01	/* Coalesce with parent trace. */
 #define IRSLOAD_FRAME		0x02	/* Load 32 bits of ftsz. */
@@ -224,15 +236,17 @@ IRFLDEF(FLENUM)
 #define IRSLOAD_CONVERT		0x08	/* Number to integer conversion. */
 #define IRSLOAD_READONLY	0x10	/* Read-only, omit slot store. */
 #define IRSLOAD_INHERIT		0x20	/* Inherited by exits/side traces. */
+#define IRSLOAD_KEYINDEX	0x40	/* Table traversal key index. */
 
-/* XLOAD mode, stored in op2. */
-#define IRXLOAD_READONLY	1	/* Load from read-only data. */
-#define IRXLOAD_VOLATILE	2	/* Load from volatile data. */
-#define IRXLOAD_UNALIGNED	4	/* Unaligned load. */
+/* XLOAD mode bits, stored in op2. */
+#define IRXLOAD_READONLY	0x01	/* Load from read-only data. */
+#define IRXLOAD_VOLATILE	0x02	/* Load from volatile data. */
+#define IRXLOAD_UNALIGNED	0x04	/* Unaligned load. */
 
 /* BUFHDR mode, stored in op2. */
 #define IRBUFHDR_RESET		0	/* Reset buffer. */
 #define IRBUFHDR_APPEND		1	/* Append to buffer. */
+#define IRBUFHDR_WRITE		2	/* Write to string buffer. */
 
 /* CONV mode, stored in op2. */
 #define IRCONV_SRCMASK		0x001f	/* Source IRType. */
@@ -249,6 +263,7 @@ IRFLDEF(FLENUM)
 #define IRCONV_ANY    (1<<IRCONV_CSH)	/* Any FP number is ok. */
 #define IRCONV_INDEX  (2<<IRCONV_CSH)	/* Check + special backprop rules. */
 #define IRCONV_CHECK  (3<<IRCONV_CSH)	/* Number checked for integerness. */
+#define IRCONV_NONE   IRCONV_ANY	/* INT|*64 no conv, but change type. */
 
 /* TOSTR mode, stored in op2. */
 #define IRTOSTR_INT		0	/* Convert integer to string. */
@@ -481,6 +496,7 @@ typedef uint32_t TRef;
 #define TREF_REFMASK		0x0000ffff
 #define TREF_FRAME		0x00010000
 #define TREF_CONT		0x00020000
+#define TREF_KEYINDEX		0x00100000
 
 #define TREF(ref, t)		((TRef)((ref) + ((t)<<24)))
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ircall.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_ircall.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_ircall.h
@@ -1,6 +1,6 @@
 /*
 ** IR CALL* instruction definitions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_IRCALL_H
@@ -30,10 +30,12 @@ typedef struct CCallInfo {
 #define CCI_CALL_L		(IR_CALLL << CCI_OPSHIFT)
 #define CCI_CALL_S		(IR_CALLS << CCI_OPSHIFT)
 #define CCI_CALL_FN		(CCI_CALL_N|CCI_CC_FASTCALL)
+#define CCI_CALL_FA		(CCI_CALL_A|CCI_CC_FASTCALL)
 #define CCI_CALL_FL		(CCI_CALL_L|CCI_CC_FASTCALL)
 #define CCI_CALL_FS		(CCI_CALL_S|CCI_CC_FASTCALL)
 
 /* C call info flags. */
+#define CCI_T			(IRT_GUARD << CCI_OTSHIFT)  /* May throw. */
 #define CCI_L			0x0100	/* Implicit L arg. */
 #define CCI_CASTU64		0x0200	/* Cast u64 result to number. */
 #define CCI_NOFPRCLOBBER	0x0400	/* Does not clobber any FPRs. */
@@ -61,7 +63,7 @@ typedef struct CCallInfo {
 /* Helpers for conditional function definitions. */
 #define IRCALLCOND_ANY(x)		x
 
-#if LJ_TARGET_X86ORX64
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64
 #define IRCALLCOND_FPMATH(x)		NULL
 #else
 #define IRCALLCOND_FPMATH(x)		x
@@ -111,6 +113,18 @@ typedef struct CCallInfo {
 #define IRCALLCOND_FFI32(x)		NULL
 #endif
 
+#if LJ_HASBUFFER
+#define IRCALLCOND_BUFFER(x)		x
+#else
+#define IRCALLCOND_BUFFER(x)		NULL
+#endif
+
+#if LJ_HASBUFFER && LJ_HASFFI
+#define IRCALLCOND_BUFFFI(x)		x
+#else
+#define IRCALLCOND_BUFFFI(x)		NULL
+#endif
+
 #if LJ_SOFTFP
 #define XA_FP		CCI_XA
 #define XA2_FP		(CCI_XA+CCI_XA)
@@ -139,39 +153,47 @@ typedef struct CCallInfo {
 #define IRCALLDEF(_) \
   _(ANY,	lj_str_cmp,		2,  FN, INT, CCI_NOFPRCLOBBER) \
   _(ANY,	lj_str_find,		4,   N, PGC, 0) \
-  _(ANY,	lj_str_new,		3,   S, STR, CCI_L) \
+  _(ANY,	lj_str_new,		3,   S, STR, CCI_L|CCI_T) \
   _(ANY,	lj_strscan_num,		2,  FN, INT, 0) \
-  _(ANY,	lj_strfmt_int,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_num,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_char,		2,  FN, STR, CCI_L) \
-  _(ANY,	lj_strfmt_putint,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putnum,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putquoted,	2,  FL, PGC, 0) \
-  _(ANY,	lj_strfmt_putfxint,	3,   L, PGC, XA_64) \
-  _(ANY,	lj_strfmt_putfnum_int,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfnum_uint,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfnum,	3,   L, PGC, XA_FP) \
-  _(ANY,	lj_strfmt_putfstr,	3,   L, PGC, 0) \
-  _(ANY,	lj_strfmt_putfchar,	3,   L, PGC, 0) \
-  _(ANY,	lj_buf_putmem,		3,   S, PGC, 0) \
-  _(ANY,	lj_buf_putstr,		2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putchar,		2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_reverse,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_lower,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_upper,	2,  FL, PGC, 0) \
-  _(ANY,	lj_buf_putstr_rep,	3,   L, PGC, 0) \
-  _(ANY,	lj_buf_puttab,		5,   L, PGC, 0) \
-  _(ANY,	lj_buf_tostr,		1,  FL, STR, 0) \
-  _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L) \
-  _(ANY,	lj_tab_new1,		2,  FS, TAB, CCI_L) \
-  _(ANY,	lj_tab_dup,		2,  FS, TAB, CCI_L) \
+  _(ANY,	lj_strfmt_int,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_num,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_char,		2,  FN, STR, CCI_L|CCI_T) \
+  _(ANY,	lj_strfmt_putint,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putnum,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putquoted,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putfxint,	3,   L, PGC, XA_64|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum_int,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum_uint,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfnum,	3,   L, PGC, XA_FP|CCI_T) \
+  _(ANY,	lj_strfmt_putfstr,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_strfmt_putfchar,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_buf_putmem,		3,   S, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr,		2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putchar,		2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_reverse,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_lower,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_upper,	2,  FL, PGC, CCI_T) \
+  _(ANY,	lj_buf_putstr_rep,	3,   L, PGC, CCI_T) \
+  _(ANY,	lj_buf_puttab,		5,   L, PGC, CCI_T) \
+  _(BUFFER,	lj_bufx_set,		4,   S, NIL, 0) \
+  _(BUFFFI,	lj_bufx_more,		2,  FS, INT, CCI_T) \
+  _(BUFFER,	lj_serialize_put,	2,  FS, PGC, CCI_T) \
+  _(BUFFER,	lj_serialize_get,	2,  FS, PTR, CCI_T) \
+  _(BUFFER,	lj_serialize_encode,	2,  FA, STR, CCI_L|CCI_T) \
+  _(BUFFER,	lj_serialize_decode,	3,   A, INT, CCI_L|CCI_T) \
+  _(ANY,	lj_buf_tostr,		1,  FL, STR, CCI_T) \
+  _(ANY,	lj_tab_new_ah,		3,   A, TAB, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_new1,		2,  FA, TAB, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_dup,		2,  FA, TAB, CCI_L|CCI_T) \
   _(ANY,	lj_tab_clear,		1,  FS, NIL, 0) \
-  _(ANY,	lj_tab_newkey,		3,   S, PGC, CCI_L) \
+  _(ANY,	lj_tab_newkey,		3,   S, PGC, CCI_L|CCI_T) \
+  _(ANY,	lj_tab_keyindex,	2,  FL, INT, 0) \
+  _(ANY,	lj_vm_next,		2,  FL, PTR, 0) \
   _(ANY,	lj_tab_len,		1,  FL, INT, 0) \
   _(ANY,	lj_tab_len_hint,	2,  FL, INT, 0) \
   _(ANY,	lj_gc_step_jit,		2,  FS, NIL, CCI_L) \
   _(ANY,	lj_gc_barrieruv,	2,  FS, NIL, 0) \
-  _(ANY,	lj_mem_newgco,		2,  FS, PGC, CCI_L) \
+  _(ANY,	lj_mem_newgco,		2,  FA, PGC, CCI_L|CCI_T) \
   _(ANY,	lj_prng_u64d,		1,  FS, NUM, CCI_CASTU64) \
   _(ANY,	lj_vm_modi,		2,  FN, INT, 0) \
   _(ANY,	log10,			1,   N, NUM, XA_FP) \
@@ -195,7 +217,6 @@ typedef struct CCallInfo {
   _(FPMATH,	sqrt,			1,   N, NUM, XA_FP) \
   _(ANY,	log,			1,   N, NUM, XA_FP) \
   _(ANY,	lj_vm_log2,		1,   N, NUM, XA_FP) \
-  _(ANY,	lj_vm_powi,		2,   N, NUM, XA_FP) \
   _(ANY,	pow,			2,   N, NUM, XA2_FP) \
   _(ANY,	atan2,			2,   N, NUM, XA2_FP) \
   _(ANY,	ldexp,			2,   N, NUM, XA_FP) \
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_iropt.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_iropt.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_iropt.h
@@ -1,6 +1,6 @@
 /*
 ** Common header for IR emitter and optimizations.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_IROPT_H
@@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
 #define lj_ir_kintp(J, k)	lj_ir_kint(J, (int32_t)(k))
 #endif
 
+#if LJ_GC64
+#define lj_ir_kintpgc		lj_ir_kintp
+#else
+#define lj_ir_kintpgc		lj_ir_kint
+#endif
+
 static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
 {
   TValue tv;
@@ -124,6 +130,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen
 LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
 LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
+LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
 LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
 
 /* Dead-store elimination. */
@@ -144,7 +151,6 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_Sta
 				 TValue *vb, TValue *vc, IROp op);
 LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
 LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
-LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
 LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
 
 /* Optimization passes. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_jit.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_jit.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_jit.h
@@ -1,12 +1,13 @@
 /*
 ** Common definitions for the JIT compiler.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_JIT_H
 #define _LJ_JIT_H
 
 #include "lj_obj.h"
+#if LJ_HASJIT
 #include "lj_ir.h"
 
 /* -- JIT engine flags ---------------------------------------------------- */
@@ -66,6 +67,15 @@
 #endif
 #endif
 
+#elif LJ_TARGET_RISCV64
+
+#define JIT_F_RVC		(JIT_F_CPU << 0)
+#define JIT_F_RVZba		(JIT_F_CPU << 1)
+#define JIT_F_RVZbb		(JIT_F_CPU << 2)
+#define JIT_F_RVXThead		(JIT_F_CPU << 3)
+
+#define JIT_F_CPUSTRING		"\003RVC\003Zba\003Zbb\006XThead"
+
 #else
 
 #define JIT_F_CPUSTRING		""
@@ -86,10 +96,11 @@
 #define JIT_F_OPT_ABC		(JIT_F_OPT << 7)
 #define JIT_F_OPT_SINK		(JIT_F_OPT << 8)
 #define JIT_F_OPT_FUSE		(JIT_F_OPT << 9)
+#define JIT_F_OPT_FMA		(JIT_F_OPT << 10)
 
 /* Optimizations names for -O. Must match the order above. */
 #define JIT_F_OPTSTRING	\
-  "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
+  "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse\3fma"
 
 /* Optimization levels set a fixed combination of flags. */
 #define JIT_F_OPT_0	0
@@ -98,11 +109,12 @@
 #define JIT_F_OPT_3	(JIT_F_OPT_2|\
   JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
 #define JIT_F_OPT_DEFAULT	JIT_F_OPT_3
+/* Note: FMA is not set by default. */
 
 /* -- JIT engine parameters ----------------------------------------------- */
 
 #if LJ_TARGET_WINDOWS || LJ_64
-/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
+/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
 #define JIT_P_sizemcode_DEFAULT		64
 #else
 /* Could go as low as 4K, but the mmap() overhead would be rather high. */
@@ -150,6 +162,7 @@ typedef enum {
   LJ_TRACE_IDLE,	/* Trace compiler idle. */
   LJ_TRACE_ACTIVE = 0x10,
   LJ_TRACE_RECORD,	/* Bytecode recording active. */
+  LJ_TRACE_RECORD_1ST,	/* Record 1st instruction, too. */
   LJ_TRACE_START,	/* New trace started. */
   LJ_TRACE_END,		/* End of trace. */
   LJ_TRACE_ASM,		/* Assemble trace. */
@@ -184,6 +197,7 @@ typedef struct MCLink {
 typedef struct SnapShot {
   uint32_t mapofs;	/* Offset into snapshot map. */
   IRRef1 ref;		/* First IR ref for this snapshot. */
+  uint16_t mcofs;	/* Offset into machine code in MCode units. */
   uint8_t nslots;	/* Number of valid slots. */
   uint8_t topslot;	/* Maximum frame extent. */
   uint8_t nent;		/* Number of compressed entries. */
@@ -199,12 +213,15 @@ typedef uint32_t SnapEntry;
 #define SNAP_CONT		0x020000	/* Continuation slot. */
 #define SNAP_NORESTORE		0x040000	/* No need to restore slot. */
 #define SNAP_SOFTFPNUM		0x080000	/* Soft-float number. */
+#define SNAP_KEYINDEX		0x100000	/* Traversal key index. */
 LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
 LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
+LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX);
 
 #define SNAP(slot, flags, ref)	(((SnapEntry)(slot) << 24) + (flags) + (ref))
 #define SNAP_TR(slot, tr) \
-  (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
+  (((SnapEntry)(slot) << 24) + \
+   ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK)))
 #if !LJ_FR2
 #define SNAP_MKPC(pc)		((SnapEntry)u32ptr(pc))
 #endif
@@ -265,6 +282,9 @@ typedef struct GCtrace {
   BCIns startins;	/* Original bytecode of starting instruction. */
   MSize szmcode;	/* Size of machine code. */
   MCode *mcode;		/* Start of machine code. */
+#if LJ_ABI_PAUTH
+  ASMFunction mcauth;	/* Start of machine code, with ptr auth applied. */
+#endif
   MSize mcloop;		/* Offset of loop start in machine code. */
   uint16_t nchild;	/* Number of child traces (root trace only). */
   uint16_t spadjust;	/* Stack pointer adjustment (offset in bytes). */
@@ -366,6 +386,7 @@ enum {
 #endif
   LJ_K64__MAX,
 };
+#define LJ_K64__USED	(LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
 
 enum {
 #if LJ_TARGET_X86ORX64
@@ -384,6 +405,7 @@ enum {
 #endif
   LJ_K32__MAX
 };
+#define LJ_K32__USED	(LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
 
 /* Get 16 byte aligned pointer to SIMD constant. */
 #define LJ_KSIMD(J, n) \
@@ -438,9 +460,13 @@ typedef struct jit_State {
   int32_t framedepth;	/* Current frame depth. */
   int32_t retdepth;	/* Return frame depth (count of RETF). */
 
+#if LJ_K32__USED
   uint32_t k32[LJ_K32__MAX];  /* Common 4 byte constants used by backends. */
+#endif
   TValue ksimd[LJ_KSIMD__MAX*2+1];  /* 16 byte aligned SIMD constants. */
+#if LJ_K64__USED
   TValue k64[LJ_K64__MAX];  /* Common 8 byte constants. */
+#endif
 
   IRIns *irbuf;		/* Temp. IR instruction buffer. Biased with REF_BIAS. */
   IRRef irtoplim;	/* Upper limit of instuction buffer (biased). */
@@ -485,6 +511,7 @@ typedef struct jit_State {
   const BCIns *startpc;	/* Bytecode PC of starting instruction. */
   TraceNo parent;	/* Parent of current side trace (0 for root traces). */
   ExitNo exitno;	/* Exit number in parent of current side trace. */
+  int exitcode;		/* Exit code from unwound trace. */
 
   BCIns *patchpc;	/* PC for pending re-patch. */
   BCIns patchins;	/* Instruction for pending re-patch. */
@@ -510,5 +537,6 @@ typedef struct jit_State {
 #else
 #define lj_assertJ(c, ...)	((void)J)
 #endif
+#endif
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lex.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_lex.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lex.c
@@ -1,6 +1,6 @@
 /*
 ** Lexical analyzer.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -105,7 +105,7 @@ static void lex_number(LexState *ls, TVa
     lex_savenext(ls);
   }
   lex_save(ls, '\0');
-  fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), sbuflen(&ls->sb)-1, tv,
+  fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
 	  (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
 	  (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
   if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -118,11 +118,7 @@ static void lex_number(LexState *ls, TVa
     GCcdata *cd;
     lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
 		"unexpected number format %d", fmt);
-    if (!ctype_ctsG(G(L))) {
-      ptrdiff_t oldtop = savestack(L, L->top);
-      luaopen_ffi(L);  /* Load FFI library on-demand. */
-      L->top = restorestack(L, oldtop);
-    }
+    ctype_loadffi(L);
     if (fmt == STRSCAN_IMAG) {
       cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
       ((double *)cdataptr(cd))[0] = 0;
@@ -180,7 +176,7 @@ static void lex_longstring(LexState *ls,
     }
   } endloop:
   if (tv) {
-    GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
+    GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
 				      sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
     setstrV(ls->L, tv, str);
   }
@@ -286,7 +282,7 @@ static void lex_string(LexState *ls, TVa
   }
   lex_savenext(ls);  /* Skip trailing delimiter. */
   setstrV(ls->L, tv,
-	  lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
+	  lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
 }
 
 /* -- Main lexical scanner ------------------------------------------------ */
@@ -306,7 +302,7 @@ static LexToken lex_scan(LexState *ls, T
       do {
 	lex_savenext(ls);
       } while (lj_char_isident(ls->c));
-      s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
+      s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
       setstrV(ls->L, tv, s);
       if (s->reserved > 0)  /* Reserved word? */
 	return TK_OFS + s->reserved;
@@ -496,7 +492,7 @@ void lj_lex_error(LexState *ls, LexToken
     tokstr = NULL;
   } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
     lex_save(ls, '\0');
-    tokstr = sbufB(&ls->sb);
+    tokstr = ls->sb.b;
   } else {
     tokstr = lj_lex_token2str(ls, tok);
   }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lex.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_lex.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lex.h
@@ -1,6 +1,6 @@
 /*
 ** Lexical analyzer.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_LEX_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lib.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_lib.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lib.c
@@ -1,6 +1,6 @@
 /*
 ** Library function support.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_lib_c
@@ -16,6 +16,9 @@
 #include "lj_func.h"
 #include "lj_bc.h"
 #include "lj_dispatch.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
 #include "lj_vm.h"
 #include "lj_strscan.h"
 #include "lj_strfmt.h"
@@ -301,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int na
   return def;
 }
 
+/* -- Strict type checks -------------------------------------------------- */
+
+/* The following type checks do not coerce between strings and numbers.
+** And they handle plain int64_t/uint64_t FFI numbers, too.
+*/
+
+#if LJ_HASBUFFER
+GCstr *lj_lib_checkstrx(lua_State *L, int narg)
+{
+  TValue *o = L->base + narg-1;
+  if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
+  return strV(o);
+}
+
+int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
+{
+  TValue *o = L->base + narg-1;
+  lj_assertL(b >= 0, "expected range must be non-negative");
+  if (o < L->top) {
+    if (LJ_LIKELY(tvisint(o))) {
+      int32_t i = intV(o);
+      if (i >= a && i <= b) return i;
+    } else if (LJ_LIKELY(tvisnum(o))) {
+      /* For performance reasons, this doesn't check for integerness or
+      ** integer overflow. Overflow detection still works, since all FPUs
+      ** return either MININT or MAXINT, which is then out of range.
+      */
+      int32_t i = (int32_t)numV(o);
+      if (i >= a && i <= b) return i;
+#if LJ_HASFFI
+    } else if (tviscdata(o)) {
+      GCcdata *cd = cdataV(o);
+      if (cd->ctypeid == CTID_INT64) {
+	int64_t i = *(int64_t *)cdataptr(cd);
+	if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
+      } else if (cd->ctypeid == CTID_UINT64) {
+	uint64_t i = *(uint64_t *)cdataptr(cd);
+	if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i;
+      } else {
+	goto badtype;
+      }
+#endif
+    } else {
+      goto badtype;
+    }
+    lj_err_arg(L, narg, LJ_ERR_NUMRNG);
+  }
+badtype:
+  lj_err_argt(L, narg, LUA_TNUMBER);
+  return 0;  /* unreachable */
+}
+#endif
+
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lib.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_lib.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_lib.h
@@ -1,6 +1,6 @@
 /*
 ** Library function support.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_LIB_H
@@ -46,6 +46,12 @@ LJ_FUNC GCtab *lj_lib_checktab(lua_State
 LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
 LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
 
+#if LJ_HASBUFFER
+LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
+LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
+				     int32_t a, int32_t b);
+#endif
+
 /* Avoid including lj_frame.h. */
 #if LJ_GC64
 #define lj_lib_upvalue(L, n) \
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_load.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_load.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_load.c
@@ -1,6 +1,6 @@
 /*
 ** Load and dump code.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <errno.h>
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_mcode.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_mcode.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_mcode.c
@@ -1,6 +1,6 @@
 /*
 ** Machine code management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_mcode_c
@@ -29,6 +29,11 @@
 #include <valgrind/valgrind.h>
 #endif
 
+#if LJ_TARGET_WINDOWS
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
 #if LJ_TARGET_IOS
 void sys_icache_invalidate(void *start, size_t len);
 #endif
@@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *en
 #endif
 #if LJ_TARGET_X86ORX64
   UNUSED(start); UNUSED(end);
+#elif LJ_TARGET_WINDOWS
+  FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start);
 #elif LJ_TARGET_IOS
   sys_icache_invalidate(start, (char *)end-(char *)start);
 #elif LJ_TARGET_PPC
@@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *en
 
 #if LJ_TARGET_WINDOWS
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
 #define MCPROT_RW	PAGE_READWRITE
 #define MCPROT_RX	PAGE_EXECUTE_READ
 #define MCPROT_RWX	PAGE_EXECUTE_READWRITE
@@ -97,10 +101,15 @@ static int mcode_setprot(void *p, size_t
 #define MCPROT_RW	(PROT_READ|PROT_WRITE)
 #define MCPROT_RX	(PROT_READ|PROT_EXEC)
 #define MCPROT_RWX	(PROT_READ|PROT_WRITE|PROT_EXEC)
+#ifdef PROT_MPROTECT
+#define MCPROT_CREATE	(PROT_MPROTECT(MCPROT_RWX))
+#else
+#define MCPROT_CREATE	0
+#endif
 
 static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
 {
-  void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+  void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (p == MAP_FAILED) {
     if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
     p = NULL;
@@ -163,7 +172,7 @@ static void mcode_protect(jit_State *J,
 #define MCPROT_RUN	MCPROT_RX
 
 /* Protection twiddling failed. Probably due to kernel security. */
-static LJ_NOINLINE void mcode_protfail(jit_State *J)
+static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
 {
   lua_CFunction panic = J2G(J)->panic;
   if (panic) {
@@ -171,6 +180,7 @@ static LJ_NOINLINE void mcode_protfail(j
     setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
     panic(L);
   }
+  exit(EXIT_FAILURE);
 }
 
 /* Change protection of MCode area. */
@@ -238,7 +248,7 @@ static void *mcode_alloc(jit_State *J, s
 /* All memory addresses are reachable by relative jumps. */
 static void *mcode_alloc(jit_State *J, size_t sz)
 {
-#if defined(__OpenBSD__) || LJ_TARGET_UWP
+#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
   /* Allow better executable memory allocation for OpenBSD W^X mode. */
   void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
   if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
@@ -269,6 +279,7 @@ static void mcode_allocarea(jit_State *J
   ((MCLink *)J->mcarea)->next = oldarea;
   ((MCLink *)J->mcarea)->size = sz;
   J->szallmcarea += sz;
+  J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
 }
 
 /* Free all MCode areas. */
@@ -279,7 +290,9 @@ void lj_mcode_free(jit_State *J)
   J->szallmcarea = 0;
   while (mc) {
     MCode *next = ((MCLink *)mc)->next;
-    mcode_free(J, mc, ((MCLink *)mc)->size);
+    size_t sz = ((MCLink *)mc)->size;
+    lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
+    mcode_free(J, mc, sz);
     mc = next;
   }
 }
@@ -314,21 +327,21 @@ void lj_mcode_abort(jit_State *J)
 /* Set/reset protection to allow patching of MCode areas. */
 MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
 {
-#if LUAJIT_SECURITY_MCODE == 0
-  UNUSED(J); UNUSED(ptr); UNUSED(finish);
-  return NULL;
-#else
   if (finish) {
+#if LUAJIT_SECURITY_MCODE
     if (J->mcarea == ptr)
       mcode_protect(J, MCPROT_RUN);
     else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
       mcode_protfail(J);
+#endif
     return NULL;
   } else {
     MCode *mc = J->mcarea;
     /* Try current area first to use the protection cache. */
     if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
+#if LUAJIT_SECURITY_MCODE
       mcode_protect(J, MCPROT_GEN);
+#endif
       return mc;
     }
     /* Otherwise search through the list of MCode areas. */
@@ -336,13 +349,14 @@ MCode *lj_mcode_patch(jit_State *J, MCod
       mc = ((MCLink *)mc)->next;
       lj_assertJ(mc != NULL, "broken MCode area chain");
       if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
+#if LUAJIT_SECURITY_MCODE
 	if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
 	  mcode_protfail(J);
+#endif
 	return mc;
       }
     }
   }
-#endif
 }
 
 /* Limit of MCode reservation reached. */
@@ -353,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, siz
   sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
   sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
   maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
-  if ((size_t)need > sizemcode)
+  if (need * sizeof(MCode) > sizemcode)
     lj_trace_err(J, LJ_TRERR_MCODEOV);  /* Too long for any area. */
   if (J->szallmcarea + sizemcode > maxmcode)
     lj_trace_err(J, LJ_TRERR_MCODEAL);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_mcode.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_mcode.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_mcode.h
@@ -1,6 +1,6 @@
 /*
 ** Machine code management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_MCODE_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_meta.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_meta.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_meta.c
@@ -1,6 +1,6 @@
 /*
 ** Metamethod handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue
   int fromc = 0;
   if (left < 0) { left = -left; fromc = 1; }
   do {
-    if (!(tvisstr(top) || tvisnumber(top)) ||
-	!(tvisstr(top-1) || tvisnumber(top-1))) {
+    if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
+	!(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
       cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
       if (tvisnil(mo)) {
 	mo = lj_meta_lookup(L, top, MM_concat);
@@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue
       ** next step: [...][CAT stack ............]
       */
       TValue *e, *o = top;
-      uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+      uint64_t tlen = tvisstr(o) ? strV(o)->len :
+		      tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
       SBuf *sb;
       do {
-	o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
+	o--; tlen += tvisstr(o) ? strV(o)->len :
+		     tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
       } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
       if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
       sb = lj_buf_tmp_(L);
@@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue
 	  GCstr *s = strV(o);
 	  MSize len = s->len;
 	  lj_buf_putmem(sb, strdata(s), len);
+	} else if (tvisbuf(o)) {
+	  SBufExt *sbx = bufV(o);
+	  lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
 	} else if (tvisint(o)) {
 	  lj_strfmt_putint(sb, intV(o));
 	} else {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_meta.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_meta.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_meta.h
@@ -1,6 +1,6 @@
 /*
 ** Metamethod handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_META_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_obj.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_obj.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_obj.c
@@ -1,6 +1,6 @@
 /*
 ** Miscellaneous object handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_obj_c
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_obj.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_obj.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_obj.h
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT VM tags, values and objects.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -34,13 +34,17 @@ typedef struct MRef {
 
 #if LJ_GC64
 #define mref(r, t)	((t *)(void *)(r).ptr64)
+#define mrefu(r)	((r).ptr64)
 
 #define setmref(r, p)	((r).ptr64 = (uint64_t)(void *)(p))
+#define setmrefu(r, u)	((r).ptr64 = (uint64_t)(u))
 #define setmrefr(r, v)	((r).ptr64 = (v).ptr64)
 #else
 #define mref(r, t)	((t *)(void *)(uintptr_t)(r).ptr32)
+#define mrefu(r)	((r).ptr32)
 
 #define setmref(r, p)	((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
+#define setmrefu(r, u)	((r).ptr32 = (uint32_t)(u))
 #define setmrefr(r, v)	((r).ptr32 = (v).ptr32)
 #endif
 
@@ -153,11 +157,9 @@ typedef int32_t BCLine;  /* Bytecode lin
 typedef void (*ASMFunction)(void);
 
 /* Resizable string buffer. Need this here, details in lj_buf.h. */
+#define SBufHeader	char *w, *e, *b; MRef L
 typedef struct SBuf {
-  MRef p;		/* String buffer pointer. */
-  MRef e;		/* String buffer end pointer. */
-  MRef b;		/* String buffer base. */
-  MRef L;		/* lua_State, used for buffer resizing. */
+  SBufHeader;
 } SBuf;
 
 /* -- Tags and values ----------------------------------------------------- */
@@ -282,6 +284,9 @@ typedef const TValue cTValue;
 #define LJ_TISGCV		(LJ_TSTR+1)
 #define LJ_TISTABUD		LJ_TTAB
 
+/* Type marker for slot holding a traversal index. Must be lightuserdata. */
+#define LJ_KEYINDEX		0xfffe7fffu
+
 #if LJ_GC64
 #define LJ_GCVMASK		(((uint64_t)1 << 47) - 1)
 #endif
@@ -330,6 +335,7 @@ enum {
   UDTYPE_USERDATA,	/* Regular userdata. */
   UDTYPE_IO_FILE,	/* I/O library FILE. */
   UDTYPE_FFI_CLIB,	/* FFI C library namespace. */
+  UDTYPE_BUFFER,	/* String buffer. */
   UDTYPE__MAX
 };
 
@@ -407,7 +413,7 @@ typedef struct GCproto {
 #define PROTO_UV_IMMUTABLE	0x4000	/* Immutable upvalue. */
 
 #define proto_kgc(pt, idx) \
-  check_exp((uintptr_t)(intptr_t)(idx) >= (uintptr_t)-(intptr_t)(pt)->sizekgc, \
+  check_exp((uintptr_t)(intptr_t)(idx) >= ~(uintptr_t)(pt)->sizekgc+1u, \
 	    gcref(mref((pt)->k, GCRef)[(idx)]))
 #define proto_knumtv(pt, idx) \
   check_exp((uintptr_t)(idx) < (pt)->sizekn, &mref((pt)->k, TValue)[(idx)])
@@ -505,7 +511,7 @@ typedef struct GCtab {
 } GCtab;
 
 #define sizetabcolo(n)	((n)*sizeof(TValue) + sizeof(GCtab))
-#define tabref(r)	(&gcref((r))->tab)
+#define tabref(r)	((GCtab *)gcref((r)))
 #define noderef(r)	(mref((r), Node))
 #define nextnode(n)	(mref((n)->next, Node))
 #if LJ_GC64
@@ -839,6 +845,7 @@ static LJ_AINLINE void *lightudV(global_
   uint64_t seg = lightudseg(u);
   uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
   lj_assertG(tvislightud(o), "lightuserdata expected");
+  if (seg == (1 << LJ_LIGHTUD_BITS_SEG)-1) return NULL;
   lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg);
   return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u));
 }
@@ -920,7 +927,7 @@ static LJ_AINLINE void setgcV(lua_State
 }
 
 #define define_setV(name, type, tag) \
-static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \
+static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
 { \
   setgcV(L, o, obj2gco(v), tag); \
 }
@@ -1035,4 +1042,18 @@ LJ_DATA const char *const lj_obj_itypena
 LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
 LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
 
+#if LJ_ABI_PAUTH
+#if LJ_TARGET_ARM64
+#include <ptrauth.h>
+#define lj_ptr_sign(ptr, ctx) \
+  ptrauth_sign_unauthenticated((ptr), ptrauth_key_function_pointer, (ctx))
+#define lj_ptr_strip(ptr) ptrauth_strip((ptr), ptrauth_key_function_pointer)
+#else
+#error "No support for pointer authentication for this architecture"
+#endif
+#else
+#define lj_ptr_sign(ptr, ctx) (ptr)
+#define lj_ptr_strip(ptr) (ptr)
+#endif
+
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_dce.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_dce.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_dce.c
@@ -1,6 +1,6 @@
 /*
 ** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_dce_c
@@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J)
     IRIns *ir = IR(ins);
     if (irt_ismarked(ir->t)) {
       irt_clearmark(ir->t);
-      pchain[ir->o] = &ir->prev;
     } else if (!ir_sideeff(ir)) {
       *pchain[ir->o] = ir->prev;  /* Reroute original instruction chain. */
       lj_ir_nop(ir);
       continue;
     }
+    pchain[ir->o] = &ir->prev;
     if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
     if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
   }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_fold.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_fold.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_fold.c
@@ -2,7 +2,7 @@
 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
 ** ABCelim: Array Bounds Check Elimination.
 ** CSE: Common-Subexpression Elimination.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_fold_c
@@ -236,14 +236,10 @@ LJFOLDF(kfold_fpcall2)
   return NEXTFOLD;
 }
 
-LJFOLD(POW KNUM KINT)
 LJFOLD(POW KNUM KNUM)
 LJFOLDF(kfold_numpow)
 {
-  lua_Number a = knumleft;
-  lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
-  lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
-  return lj_ir_knum(J, y);
+  return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
 }
 
 /* Must not use kfold_kref for numbers (could be NaN). */
@@ -271,7 +267,7 @@ static int32_t kfold_intop(int32_t k1, i
   case IR_SUB: k1 -= k2; break;
   case IR_MUL: k1 *= k2; break;
   case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
-  case IR_NEG: k1 = -k1; break;
+  case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break;
   case IR_BAND: k1 &= k2; break;
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
@@ -381,10 +377,10 @@ static uint64_t kfold_int64arith(jit_Sta
   case IR_BOR: k1 |= k2; break;
   case IR_BXOR: k1 ^= k2; break;
   case IR_BSHL: k1 <<= (k2 & 63); break;
-  case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
-  case IR_BSAR: k1 >>= (k2 & 63); break;
-  case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
-  case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
+  case IR_BSHR: k1 >>= (k2 & 63); break;
+  case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
+  case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
+  case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
   default: lj_assertJ(0, "bad IR op %d", op); break;
   }
 #else
@@ -514,6 +510,7 @@ LJFOLDF(kfold_snew_kptr)
 }
 
 LJFOLD(SNEW any KINT)
+LJFOLD(XSNEW any KINT)
 LJFOLDF(kfold_snew_empty)
 {
   if (fright->i == 0)
@@ -577,22 +574,51 @@ LJFOLDF(kfold_strcmp)
 ** The compromise is to declare them as loads, emit them like stores and
 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
 ** fragments left over from CSE are eliminated by DCE.
+**
+** The string buffer methods emit a USE instead of a BUFSTR to keep the
+** chain alive.
 */
 
-/* BUFHDR is emitted like a store, see below. */
+LJFOLD(BUFHDR any any)
+LJFOLDF(bufhdr_merge)
+{
+  return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
+}
 
-LJFOLD(BUFPUT BUFHDR BUFSTR)
-LJFOLDF(bufput_append)
+LJFOLD(BUFPUT any BUFSTR)
+LJFOLDF(bufput_bufstr)
 {
-  /* New buffer, no other buffer op inbetween and same buffer? */
-  if ((J->flags & JIT_F_OPT_FWD) &&
-      !(fleft->op2 & IRBUFHDR_APPEND) &&
-      fleft->prev == fright->op2 &&
-      fleft->op1 == IR(fright->op2)->op1) {
-    IRRef ref = fins->op1;
-    IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND);  /* Modify BUFHDR. */
-    IR(ref)->op1 = fright->op1;
-    return ref;
+  if ((J->flags & JIT_F_OPT_FWD)) {
+    IRRef hdr = fright->op2;
+    /* New buffer, no other buffer op inbetween and same buffer? */
+    if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
+	fleft->prev == hdr &&
+	fleft->op1 == IR(hdr)->op1 &&
+	!(irt_isphi(fright->t) && IR(hdr)->prev) &&
+	(!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
+      IRRef ref = fins->op1;
+      IR(ref)->op2 = IRBUFHDR_APPEND;  /* Modify BUFHDR. */
+      IR(ref)->op1 = fright->op1;
+      return ref;
+    }
+    /* Replay puts to global temporary buffer. */
+    if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
+      IRIns *ir = IR(fright->op1);
+      /* For now only handle single string.reverse .lower .upper .rep. */
+      if (ir->o == IR_CALLL &&
+	  ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
+	  ir->op2 <= IRCALL_lj_buf_putstr_rep) {
+	IRIns *carg1 = IR(ir->op1);
+	if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
+	  IRIns *carg2 = IR(carg1->op1);
+	  if (carg2->op1 == hdr) {
+	    return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
+	  }
+	} else if (carg1->op1 == hdr) {
+	  return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
+	}
+      }
+    }
   }
   return EMITFOLD;  /* Always emit, CSE later. */
 }
@@ -626,14 +652,14 @@ LJFOLDF(bufstr_kfold_cse)
 	     "bad buffer constructor IR op %d", fleft->o);
   if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
     if (fleft->o == IR_BUFHDR) {  /* No put operations? */
-      if (!(fleft->op2 & IRBUFHDR_APPEND))  /* Empty buffer? */
+      if (fleft->op2 == IRBUFHDR_RESET)  /* Empty buffer? */
 	return lj_ir_kstr(J, &J2G(J)->strempty);
       fins->op1 = fleft->op1;
       fins->op2 = fleft->prev;  /* Relies on checks in bufput_append. */
       return CSEFOLD;
     } else if (fleft->o == IR_BUFPUT) {
       IRIns *irb = IR(fleft->op1);
-      if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
+      if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
 	return fleft->op2;  /* Shortcut for a single put operation. */
     }
   }
@@ -646,7 +672,7 @@ LJFOLDF(bufstr_kfold_cse)
 	lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
 		   ira->o == IR_CALLL || ira->o == IR_CARG,
 		   "bad buffer constructor IR op %d", ira->o);
-	if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
+	if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
 	  return ref;  /* CSE succeeded. */
 	if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
 	  break;
@@ -1009,8 +1035,7 @@ LJFOLDF(simplify_numadd_xneg)
 LJFOLD(SUB any KNUM)
 LJFOLDF(simplify_numsub_k)
 {
-  lua_Number n = knumright;
-  if (n == 0.0)  /* x - (+-0) ==> x */
+  if (ir_knum(fright)->u64 == 0)  /* x - (+0) ==> x */
     return LEFTFOLD;
   return NEXTFOLD;
 }
@@ -1084,61 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
   return RETRYFOLD;
 }
 
-LJFOLD(POW any KINT)
-LJFOLDF(simplify_numpow_xkint)
+LJFOLD(POW any KNUM)
+LJFOLDF(simplify_numpow_k)
 {
-  int32_t k = fright->i;
-  TRef ref = fins->op1;
-  if (k == 0)  /* x ^ 0 ==> 1 */
+  if (knumright == 0.0)  /* x ^ 0 ==> 1 */
     return lj_ir_knum_one(J);  /* Result must be a number, not an int. */
-  if (k == 1)  /* x ^ 1 ==> x */
+  else if (knumright == 1.0)  /* x ^ 1 ==> x */
     return LEFTFOLD;
-  if ((uint32_t)(k+65536) > 2*65536u)  /* Limit code explosion. */
+  else if (knumright == 2.0)  /* x ^ 2 ==> x * x */
+    return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
+  else
     return NEXTFOLD;
-  if (k < 0) {  /* x ^ (-k) ==> (1/x) ^ k. */
-    ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
-    k = -k;
-  }
-  /* Unroll x^k for 1 <= k <= 65536. */
-  for (; (k & 1) == 0; k >>= 1)  /* Handle leading zeros. */
-    ref = emitir(IRTN(IR_MUL), ref, ref);
-  if ((k >>= 1) != 0) {  /* Handle trailing bits. */
-    TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
-    for (; k != 1; k >>= 1) {
-      if (k & 1)
-	ref = emitir(IRTN(IR_MUL), ref, tmp);
-      tmp = emitir(IRTN(IR_MUL), tmp, tmp);
-    }
-    ref = emitir(IRTN(IR_MUL), ref, tmp);
-  }
-  return ref;
-}
-
-LJFOLD(POW any KNUM)
-LJFOLDF(simplify_numpow_xknum)
-{
-  if (knumright == 0.5)  /* x ^ 0.5 ==> sqrt(x) */
-    return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
-  return NEXTFOLD;
-}
-
-LJFOLD(POW KNUM any)
-LJFOLDF(simplify_numpow_kx)
-{
-  lua_Number n = knumleft;
-  if (n == 2.0 && irt_isint(fright->t)) {  /* 2.0 ^ i ==> ldexp(1.0, i) */
-#if LJ_TARGET_X86ORX64
-    /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
-    fins->o = IR_CONV;
-    fins->op1 = fins->op2;
-    fins->op2 = IRCONV_NUM_INT;
-    fins->op2 = (IRRef1)lj_opt_fold(J);
-#endif
-    fins->op1 = (IRRef1)lj_ir_knum_one(J);
-    fins->o = IR_LDEXP;
-    return RETRYFOLD;
-  }
-  return NEXTFOLD;
 }
 
 /* -- Simplify conversions ------------------------------------------------ */
@@ -1297,6 +1278,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
 LJFOLD(CONV MUL IRCONV_U32_U64)
 LJFOLDF(simplify_conv_narrow)
 {
+#if LJ_64
+  UNUSED(J);
+  return NEXTFOLD;
+#else
   IROp op = (IROp)fleft->o;
   IRType t = irt_type(fins->t);
   IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1307,6 +1292,7 @@ LJFOLDF(simplify_conv_narrow)
   fins->op1 = op1;
   fins->op2 = op2;
   return RETRYFOLD;
+#endif
 }
 
 /* Special CSE rule for CONV. */
@@ -1380,7 +1366,7 @@ LJFOLDF(simplify_intsub_k)
   if (fright->i == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i);  /* Overflow for -2^31 ok. */
+  fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u));  /* Overflow for -2^31 ok. */
   return RETRYFOLD;
 }
 
@@ -1411,7 +1397,7 @@ LJFOLDF(simplify_intsub_k64)
   if (k == 0)  /* i - 0 ==> i */
     return LEFTFOLD;
   fins->o = IR_ADD;  /* i - k ==> i + (-k) */
-  fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
+  fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u);
   return RETRYFOLD;
 }
 
@@ -1926,14 +1912,15 @@ LJFOLDF(abc_fwd)
 LJFOLD(ABC any KINT)
 LJFOLDF(abc_k)
 {
+  PHIBARRIER(fleft);
   if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
     IRRef ref = J->chain[IR_ABC];
     IRRef asize = fins->op1;
     while (ref > asize) {
       IRIns *ir = IR(ref);
       if (ir->op1 == asize && irref_isk(ir->op2)) {
-	int32_t k = IR(ir->op2)->i;
-	if (fright->i > k)
+	uint32_t k = (uint32_t)IR(ir->op2)->i;
+	if ((uint32_t)fright->i > k)
 	  ir->op2 = fins->op2;
 	return DROPFOLD;
       }
@@ -1985,7 +1972,10 @@ LJFOLD(NE any any)
 LJFOLDF(comm_equal)
 {
   /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
-  if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
+  if (fins->op1 == fins->op2 &&
+      (!irt_isnum(fins->t) ||
+       (fleft->o == IR_CONV &&  /* Converted integers cannot be NaN. */
+	(uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))))
     return CONDFOLD(fins->o == IR_EQ);
   return fold_comm_swap(J);
 }
@@ -2144,8 +2134,26 @@ LJFOLDX(lj_opt_fwd_uload)
 LJFOLD(ALEN any any)
 LJFOLDX(lj_opt_fwd_alen)
 
+/* Try to merge UREFO/UREFC into referenced instruction. */
+static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
+{
+  if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
+    /* Might be pointing to some other coroutine's stack.
+    ** And GC might shrink said stack, thereby repointing the upvalue.
+    ** GC might even collect said coroutine, thereby closing the upvalue.
+    */
+    if (gcstep_barrier(J, ref))
+      return EMITFOLD;  /* So cannot merge. */
+    /* Current fins wants a check, but ir doesn't have one. */
+    if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
+	irt_type(ir->t) == IRT_IGC)
+      ir->t.irt += IRT_PGC-IRT_IGC;  /* So install a check. */
+  }
+  return ref;  /* Not a TRef, but the caller doesn't care. */
+}
+
 /* Upvalue refs are really loads, but there are no corresponding stores.
-** So CSE is ok for them, except for UREFO across a GC step (see below).
+** So CSE is ok for them, except for guarded UREFO across a GC step.
 ** If the referenced function is const, its upvalue addresses are const, too.
 ** This can be used to improve CSE by looking for the same address,
 ** even if the upvalues originate from a different function.
@@ -2163,9 +2171,7 @@ LJFOLDF(cse_uref)
       if (irref_isk(ir->op1)) {
 	GCfunc *fn2 = ir_kfunc(IR(ir->op1));
 	if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
-	  if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
-	    break;
-	  return ref;
+	  return merge_uref(J, ref, ir);
 	}
       }
       ref = ir->prev;
@@ -2174,6 +2180,24 @@ LJFOLDF(cse_uref)
   return EMITFOLD;
 }
 
+/* Custom CSE for UREFO. */
+LJFOLD(UREFO any any)
+LJFOLDF(cse_urefo)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+    IRRef ref = J->chain[IR_UREFO];
+    IRRef lim = fins->op1;
+    IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+    while (ref > lim) {
+      IRIns *ir = IR(ref);
+      if (ir->op12 == op12)
+	return merge_uref(J, ref, ir);
+      ref = ir->prev;
+    }
+  }
+  return EMITFOLD;
+}
+
 LJFOLD(HREFK any any)
 LJFOLDX(lj_opt_fwd_hrefk)
 
@@ -2275,6 +2299,27 @@ LJFOLDF(fload_str_len_tostr)
   return NEXTFOLD;
 }
 
+LJFOLD(FLOAD any IRFL_SBUF_W)
+LJFOLD(FLOAD any IRFL_SBUF_E)
+LJFOLD(FLOAD any IRFL_SBUF_B)
+LJFOLD(FLOAD any IRFL_SBUF_L)
+LJFOLD(FLOAD any IRFL_SBUF_REF)
+LJFOLD(FLOAD any IRFL_SBUF_R)
+LJFOLDF(fload_sbuf)
+{
+  TRef tr = lj_opt_fwd_fload(J);
+  return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
+}
+
+/* The fast function ID of function objects is immutable. */
+LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
+LJFOLDF(fload_func_ffid_kgc)
+{
+  if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
+    return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
+  return NEXTFOLD;
+}
+
 /* The C type ID of cdata objects is immutable. */
 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
 LJFOLDF(fload_cdata_typeid_kgc)
@@ -2358,18 +2403,24 @@ LJFOLDF(xload_kptr)
 LJFOLD(XLOAD any any)
 LJFOLDX(lj_opt_fwd_xload)
 
+/* -- Frame handling ------------------------------------------------------ */
+
+/* Prevent CSE of a REF_BASE operand across IR_RETF. */
+LJFOLD(SUB any BASE)
+LJFOLD(SUB BASE any)
+LJFOLD(EQ any BASE)
+LJFOLDF(fold_base)
+{
+  return lj_opt_cselim(J, J->chain[IR_RETF]);
+}
+
 /* -- Write barriers ------------------------------------------------------ */
 
 /* Write barriers are amenable to CSE, but not across any incremental
 ** GC steps.
-**
-** The same logic applies to open upvalue references, because a stack
-** may be resized during a GC step (not the current stack, but maybe that
-** of a coroutine).
 */
 LJFOLD(TBAR any)
 LJFOLD(OBAR any any)
-LJFOLD(UREFO any any)
 LJFOLDF(barrier_tab)
 {
   TRef tr = lj_opt_cse(J);
@@ -2421,6 +2472,7 @@ LJFOLD(XSTORE any any)
 LJFOLDX(lj_opt_dse_xstore)
 
 LJFOLD(NEWREF any any)  /* Treated like a store. */
+LJFOLD(TMPREF any any)
 LJFOLD(CALLA any any)
 LJFOLD(CALLL any any)  /* Safeguard fallback. */
 LJFOLD(CALLS any any)
@@ -2431,7 +2483,6 @@ LJFOLD(TNEW any any)
 LJFOLD(TDUP any)
 LJFOLD(CNEW any any)
 LJFOLD(XSNEW any any)
-LJFOLD(BUFHDR any any)
 LJFOLDX(lj_ir_emit)
 
 /* ------------------------------------------------------------------------ */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_loop.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_loop.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_loop.c
@@ -1,6 +1,6 @@
 /*
 ** LOOP: Loop Optimizations.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_loop_c
@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J
   /* Setup new snapshot. */
   snap->mapofs = (uint32_t)nmapofs;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = nslots;
   snap->topslot = osnap->topslot;
   snap->count = 0;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_mem.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_mem.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_mem.c
@@ -3,7 +3,7 @@
 ** AA: Alias Analysis using high-level semantic disambiguation.
 ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
 ** DSE: Dead-Store Elimination.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_mem_c
@@ -72,6 +72,34 @@ static AliasRet aa_table(jit_State *J, I
   return aa_escape(J, taba, tabb);
 }
 
+/* Check whether there's no aliasing table.clear. */
+static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
+{
+  IRRef ref = J->chain[IR_CALLS];
+  while (ref > lim) {
+    IRIns *calls = IR(ref);
+    if (calls->op2 == IRCALL_lj_tab_clear &&
+	(ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
+      return 0;  /* Conflict. */
+    ref = calls->prev;
+  }
+  return 1;  /* No conflict. Can safely FOLD/CSE. */
+}
+
+/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
+int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
+{
+  IRRef ta = fins->op1;
+  IRRef ref = J->chain[IR_NEWREF];
+  while (ref > lim) {
+    IRIns *newref = IR(ref);
+    if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
+      return 0;  /* Conflict. */
+    ref = newref->prev;
+  }
+  return fwd_aa_tab_clear(J, lim, ta);
+}
+
 /* Alias analysis for array and hash access using key-based disambiguation. */
 static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
 {
@@ -154,9 +182,11 @@ static TRef fwd_ahload(jit_State *J, IRR
     IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
     IRRef tab = ir->op1;
     ir = IR(tab);
-    if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
+    if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) &&
+	fwd_aa_tab_clear(J, tab, tab)) {
       /* A NEWREF with a number key may end up pointing to the array part.
       ** But it's referenced from HSTORE and not found in the ASTORE chain.
+      ** Or a NEWREF may rehash the table and move unrelated number keys.
       ** For now simply consider this a conflict without forwarding anything.
       */
       if (xr->o == IR_AREF) {
@@ -167,6 +197,11 @@ static TRef fwd_ahload(jit_State *J, IRR
 	    goto cselim;
 	  ref2 = newref->prev;
 	}
+      } else {
+	IRIns *key = IR(xr->op2);
+	if (key->o == IR_KSLOT) key = IR(key->op1);
+	if (irt_isnum(key->t) && J->chain[IR_NEWREF] > tab)
+	  goto cselim;
       }
       /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
       ** But the above search for conflicting stores was limited by xref.
@@ -194,8 +229,8 @@ static TRef fwd_ahload(jit_State *J, IRR
 	if (key->o == IR_KSLOT) key = IR(key->op1);
 	lj_ir_kvalue(J->L, &keyv, key);
 	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
-	lj_assertJ(itype2irt(tv) == irt_type(fins->t),
-		   "mismatched type in constant table");
+	if (itype2irt(tv) != irt_type(fins->t))
+	  return 0;  /* Type instability in loop-carried dependency. */
 	if (irt_isnum(fins->t))
 	  return lj_ir_knum_u64(J, tv->u64);
 	else if (LJ_DUALNUM && irt_isint(fins->t))
@@ -269,7 +304,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_St
   while (ref > tab) {
     IRIns *newref = IR(ref);
     if (tab == newref->op1) {
-      if (fright->op1 == newref->op2)
+      if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab))
 	return ref;  /* Forward from NEWREF. */
       else
 	goto docse;
@@ -279,7 +314,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_St
     ref = newref->prev;
   }
   /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */
-  if (IR(tab)->o == IR_TDUP)
+  if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab))
     fins->t.irt &= ~IRT_GUARD;  /* Drop HREFK guard. */
 docse:
   return CSEFOLD;
@@ -313,34 +348,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(ji
   return 1;  /* No conflict. Can fold to niltv. */
 }
 
-/* Check whether there's no aliasing table.clear. */
-static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
-{
-  IRRef ref = J->chain[IR_CALLS];
-  while (ref > lim) {
-    IRIns *calls = IR(ref);
-    if (calls->op2 == IRCALL_lj_tab_clear &&
-	(ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
-      return 0;  /* Conflict. */
-    ref = calls->prev;
-  }
-  return 1;  /* No conflict. Can safely FOLD/CSE. */
-}
-
-/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
-int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
-{
-  IRRef ta = fins->op1;
-  IRRef ref = J->chain[IR_NEWREF];
-  while (ref > lim) {
-    IRIns *newref = IR(ref);
-    if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
-      return 0;  /* Conflict. */
-    ref = newref->prev;
-  }
-  return fwd_aa_tab_clear(J, lim, ta);
-}
-
 /* ASTORE/HSTORE elimination. */
 TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
 {
@@ -364,7 +371,10 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_
       /* Different value: try to eliminate the redundant store. */
       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
 	IRIns *ir;
-	/* Check for any intervening guards (includes conflicting loads). */
+	/* Check for any intervening guards (includes conflicting loads).
+	** Note that lj_tab_keyindex and lj_vm_next don't need guards,
+	** since they are followed by at least one guarded VLOAD.
+	*/
 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
 	  if (irt_isguard(ir->t) || ir->o == IR_ALEN)
 	    goto doemit;  /* No elimination possible. */
@@ -428,7 +438,7 @@ TRef LJ_FASTCALL lj_opt_fwd_alen(jit_Sta
 	    fins->op2 = aref->op2;  /* Set ALEN hint. */
 	  }
 	  goto doemit;  /* Conflicting store, possibly giving a hint. */
-	} else if (aa_table(J, tab, fref->op1) == ALIAS_NO) {
+	} else if (aa_table(J, tab, fref->op1) != ALIAS_NO) {
 	  goto doemit;  /* Conflicting store. */
 	}
 	sref = store->prev;
@@ -454,18 +464,23 @@ doemit:
 */
 static AliasRet aa_uref(IRIns *refa, IRIns *refb)
 {
-  if (refa->o != refb->o)
-    return ALIAS_NO;  /* Different UREFx type. */
   if (refa->op1 == refb->op1) {  /* Same function. */
     if (refa->op2 == refb->op2)
       return ALIAS_MUST;  /* Same function, same upvalue idx. */
     else
       return ALIAS_NO;  /* Same function, different upvalue idx. */
   } else {  /* Different functions, check disambiguation hash values. */
-    if (((refa->op2 ^ refb->op2) & 0xff))
+    if (((refa->op2 ^ refb->op2) & 0xff)) {
       return ALIAS_NO;  /* Upvalues with different hash values cannot alias. */
-    else
-      return ALIAS_MAY;  /* No conclusion can be drawn for same hash value. */
+    } else if (refa->o != refb->o) {
+      /* Different UREFx type, but need to confirm the UREFO really is open. */
+      if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
+      else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
+      return ALIAS_NO;
+    } else {
+      /* No conclusion can be drawn for same hash value and same UREFx type. */
+      return ALIAS_MAY;
+    }
   }
 }
 
@@ -620,8 +635,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_S
 	goto doemit;
       break;  /* Otherwise continue searching. */
     case ALIAS_MUST:
-      if (store->op2 == val)  /* Same value: drop the new store. */
-	return DROPFOLD;
+      if (store->op2 == val &&
+	  !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
+	return DROPFOLD;  /* Same value: drop the new store. */
       /* Different value: try to eliminate the redundant store. */
       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
 	IRIns *ir;
@@ -642,6 +658,29 @@ doemit:
   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
 }
 
+/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
+int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
+{
+  IRRef ref;
+  if (J->chain[IR_BUFPUT] > lim)
+    return 0;  /* Conflict. */
+  ref = J->chain[IR_CALLS];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  ref = J->chain[IR_CALLL];
+  while (ref > lim) {
+    IRIns *ir = IR(ref);
+    if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+      return 0;  /* Conflict. */
+    ref = ir->prev;
+  }
+  return 1;  /* No conflict. Can safely FOLD/CSE. */
+}
+
 /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
 
 /* Find cdata allocation for a reference (if any). */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_narrow.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_narrow.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_narrow.c
@@ -1,7 +1,7 @@
 /*
 ** NARROW: Narrowing of numbers to integers (double to int32_t).
 ** STRIPOV: Stripping of overflow checks.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_narrow_c
@@ -584,36 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRe
   return emitir(IRTN(IR_SUB), rb, tmp);
 }
 
-/* Narrowing of power operator or math.pow. */
-TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
-{
-  rb = conv_str_tonum(J, rb, vb);
-  rb = lj_ir_tonum(J, rb);  /* Left arg is always treated as an FP number. */
-  rc = conv_str_tonum(J, rc, vc);
-  /* Narrowing must be unconditional to preserve (-x)^i semantics. */
-  if (tvisint(vc) || numisint(numV(vc))) {
-    int checkrange = 0;
-    /* pow() is faster for bigger exponents. But do this only for (+k)^i. */
-    if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
-      int32_t k = numberVint(vc);
-      if (!(k >= -65536 && k <= 65536)) goto force_pow_num;
-      checkrange = 1;
-    }
-    if (!tref_isinteger(rc)) {
-      /* Guarded conversion to integer! */
-      rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
-    }
-    if (checkrange && !tref_isk(rc)) {  /* Range guard: -65536 <= i <= 65536 */
-      TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
-      emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
-    }
-  } else {
-force_pow_num:
-    rc = lj_ir_tonum(J, rc);  /* Want POW(num, num), not POW(num, int). */
-  }
-  return emitir(IRTN(IR_POW), rb, rc);
-}
-
 /* -- Predictive narrowing of induction variables ------------------------- */
 
 /* Narrow a single runtime value. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_sink.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_sink.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_sink.c
@@ -1,6 +1,6 @@
 /*
 ** SINK: Allocation Sinking and Store Sinking.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_sink_c
@@ -36,12 +36,14 @@ static IRIns *sink_checkalloc(jit_State
 }
 
 /* Recursively check whether a value depends on a PHI. */
-static int sink_phidep(jit_State *J, IRRef ref)
+static int sink_phidep(jit_State *J, IRRef ref, int *workp)
 {
   IRIns *ir = IR(ref);
+  if (!*workp) return 1;  /* Give up and pretend it does. */
+  (*workp)--;
   if (irt_isphi(ir->t)) return 1;
-  if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1;
-  if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1;
+  if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1, workp)) return 1;
+  if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2, workp)) return 1;
   return 0;
 }
 
@@ -56,7 +58,13 @@ static int sink_checkphi(jit_State *J, I
       return 1;  /* Sinkable PHI. */
     }
     /* Otherwise the value must be loop-invariant. */
-    return ref < J->loopref && !sink_phidep(J, ref);
+    if (ref < J->loopref) {
+      /* Check for PHI dependencies, but give up after reasonable effort. */
+      int work = 64;
+      return !sink_phidep(J, ref, &work);
+    } else {
+      return 0;  /* Loop-variant. */
+    }
   }
   return 1;  /* Constant (non-PHI). */
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_split.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_opt_split.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_opt_split.c
@@ -1,6 +1,6 @@
 /*
 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_opt_split_c
@@ -400,7 +400,7 @@ static void split_ir(jit_State *J)
 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
 	break;
       case IR_POW:
-	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow);
 	break;
       case IR_FPMATH:
 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
@@ -645,7 +645,7 @@ static void split_ir(jit_State *J)
       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 #endif
       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
-    } else if (ir->o == IR_TOSTR) {
+    } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
       if (hisubst[ir->op1]) {
 	if (irref_isk(ir->op1))
 	  nir->op1 = ir->op1;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_parse.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_parse.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_parse.c
@@ -1,6 +1,6 @@
 /*
 ** Lua parser (source code -> bytecode).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -964,22 +964,22 @@ static void bcemit_unop(FuncState *fs, B
 #if LJ_HASFFI
       if (e->k == VKCDATA) {  /* Fold in-place since cdata is not interned. */
 	GCcdata *cd = cdataV(&e->u.nval);
-	int64_t *p = (int64_t *)cdataptr(cd);
+	uint64_t *p = (uint64_t *)cdataptr(cd);
 	if (cd->ctypeid == CTID_COMPLEX_DOUBLE)
-	  p[1] ^= (int64_t)U64x(80000000,00000000);
+	  p[1] ^= U64x(80000000,00000000);
 	else
-	  *p = -*p;
+	  *p = ~*p+1u;
 	return;
       } else
 #endif
       if (expr_isnumk(e) && !expr_numiszero(e)) {  /* Avoid folding to -0. */
 	TValue *o = expr_numtv(e);
 	if (tvisint(o)) {
-	  int32_t k = intV(o);
-	  if (k == -k)
+	  int32_t k = intV(o), negk = (int32_t)(~(uint32_t)k+1u);
+	  if (k == negk)
 	    setnumV(o, -(lua_Number)k);
 	  else
-	    setintV(o, -k);
+	    setintV(o, negk);
 	  return;
 	} else {
 	  o->u64 ^= U64x(80000000,00000000);
@@ -1465,7 +1465,7 @@ static size_t fs_prep_var(LexState *ls,
     MSize len = s->len+1;
     char *p = lj_buf_more(&ls->sb, len);
     p = lj_buf_wmem(p, strdata(s), len);
-    setsbufP(&ls->sb, p);
+    ls->sb.w = p;
   }
   *ofsvar = sbuflen(&ls->sb);
   lastpc = 0;
@@ -1486,7 +1486,7 @@ static size_t fs_prep_var(LexState *ls,
       startpc = vs->startpc;
       p = lj_strfmt_wuleb128(p, startpc-lastpc);
       p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
-      setsbufP(&ls->sb, p);
+      ls->sb.w = p;
       lastpc = startpc;
     }
   }
@@ -1499,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, G
 {
   setmref(pt->uvinfo, p);
   setmref(pt->varinfo, (char *)p + ofsvar);
-  memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb));  /* Copy from temp. buffer. */
+  memcpy(p, ls->sb.b, sbuflen(&ls->sb));  /* Copy from temp. buffer. */
 }
 #else
 
@@ -1554,7 +1554,7 @@ static void fs_fixup_ret(FuncState *fs)
 	/* Replace with UCLO plus branch. */
 	fs->bcbase[pc].ins = BCINS_AD(BC_UCLO, 0, offset);
 	break;
-      case BC_UCLO:
+      case BC_FNEW:
 	return;  /* We're done. */
       default:
 	break;
@@ -2513,11 +2513,14 @@ static void parse_for_num(LexState *ls,
 */
 static int predict_next(LexState *ls, FuncState *fs, BCPos pc)
 {
-  BCIns ins = fs->bcbase[pc].ins;
+  BCIns ins;
   GCstr *name;
   cTValue *o;
+  if (pc >= fs->bclim) return 0;
+  ins = fs->bcbase[pc].ins;
   switch (bc_op(ins)) {
   case BC_MOV:
+    if (bc_d(ins) >= fs->nactvar) return 0;
     name = gco2str(gcref(var_get(ls, fs, bc_d(ins)).name));
     break;
   case BC_UGET:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_parse.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_parse.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_parse.h
@@ -1,6 +1,6 @@
 /*
 ** Lua parser (source code -> bytecode).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_PARSE_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_prng.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_prng.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_prng.c
@@ -1,6 +1,6 @@
 /*
 ** Pseudo-random number generation.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_prng_c
@@ -83,10 +83,14 @@ extern int XNetRandom(void *buf, unsigne
 
 extern int sys_get_random_number(void *buf, uint64_t len);
 
-#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
 
 extern int sceRandomGetRandomNumber(void *buf, size_t len);
 
+#elif LJ_TARGET_NX
+
+#include <unistd.h>
+
 #elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
 
 #define WIN32_LEAN_AND_MEAN
@@ -109,18 +113,24 @@ static PRGR libfunc_rgr;
 #include <sys/syscall.h>
 #else
 
-#if LJ_TARGET_OSX
+#if LJ_TARGET_OSX && !LJ_TARGET_IOS
+/*
+** In their infinite wisdom Apple decided to disallow getentropy() in the
+** iOS App Store. Even though the call is common to all BSD-ish OS, it's
+** recommended by Apple in their own security-related docs, and, to top
+** off the foolery, /dev/urandom is handled by the same kernel code,
+** yet accessing it is actually permitted (but less efficient).
+*/
 #include <Availability.h>
-#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 || \
-    __IPHONE_OS_VERSION_MIN_REQUIRED >= 100000
+#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
 #define LJ_TARGET_HAS_GETENTROPY	1
 #endif
-#elif LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN
+#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX
 #define LJ_TARGET_HAS_GETENTROPY	1
 #endif
 
 #if LJ_TARGET_HAS_GETENTROPY
-extern int getentropy(void *buf, size_t len);
+extern int getentropy(void *buf, size_t len)
 #ifdef __ELF__
   __attribute__((weak))
 #endif
@@ -165,9 +175,14 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNG
   if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0)
     goto ok;
 
-#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
+
+  if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
+    goto ok;
+
+#elif LJ_TARGET_NX
 
-  if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u) == 0)
+  if (getentropy(rs->u, sizeof(rs->u)) == 0)
     goto ok;
 
 #elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_prng.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_prng.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_prng.h
@@ -1,6 +1,6 @@
 /*
 ** Pseudo-random number generation.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_PRNG_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_profile.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_profile.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_profile.c
@@ -1,6 +1,6 @@
 /*
 ** Low-overhead profiling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_profile_c
@@ -185,7 +185,11 @@ static void profile_timer_start(ProfileS
   tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
   tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
   setitimer(ITIMER_PROF, &tm, NULL);
+#if LJ_TARGET_QNX
+  sa.sa_flags = 0;
+#else
   sa.sa_flags = SA_RESTART;
+#endif
   sa.sa_handler = profile_signal;
   sigemptyset(&sa.sa_mask);
   sigaction(SIGPROF, &sa, &ps->oldsa);
@@ -346,8 +350,7 @@ LUA_API void luaJIT_profile_stop(lua_Sta
     lj_trace_flushall(L);
 #endif
     lj_buf_free(g, &ps->sb);
-    setmref(ps->sb.b, NULL);
-    setmref(ps->sb.e, NULL);
+    ps->sb.w = ps->sb.e = NULL;
     ps->g = NULL;
   }
 }
@@ -362,7 +365,7 @@ LUA_API const char *luaJIT_profile_dumps
   lj_buf_reset(sb);
   lj_debug_dumpstack(L, sb, fmt, depth);
   *len = (size_t)sbuflen(sb);
-  return sbufB(sb);
+  return sb->b;
 }
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_profile.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_profile.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_profile.h
@@ -1,6 +1,6 @@
 /*
 ** Low-overhead profiling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_PROFILE_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_record.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_record.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_record.c
@@ -1,6 +1,6 @@
 /*
 ** Trace recorder (bytecode -> SSA IR).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_record_c
@@ -116,6 +116,7 @@ static void rec_check_slots(jit_State *J
       cTValue *tv = &base[s];
       IRRef ref = tref_ref(tr);
       IRIns *ir = NULL;  /* Silence compiler. */
+      lj_assertJ(tv < J->L->top, "slot %d above top of Lua stack", s);
       if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
 	lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
 		   "slot %d ref %04d out of range", s, ref - REF_BIAS);
@@ -156,6 +157,9 @@ static void rec_check_slots(jit_State *J
 	lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
 		   "cont slot %d not followed by frame", s);
 	depth++;
+      } else if ((tr & TREF_KEYINDEX)) {
+	lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d",
+				   s, tref_type(tr));
       } else {
 	/* Number repr. may differ, but other types must be the same. */
 	lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
@@ -259,6 +263,14 @@ TRef lj_record_constify(jit_State *J, cT
     return 0;  /* Can't represent lightuserdata (pointless). */
 }
 
+/* Emit a VLOAD with the correct type. */
+TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t)
+{
+  TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx);
+  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+  return tr;
+}
+
 /* -- Record loop ops ----------------------------------------------------- */
 
 /* Loop event. */
@@ -275,9 +287,9 @@ static void canonicalize_slots(jit_State
   if (LJ_DUALNUM) return;
   for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
     TRef tr = J->slot[s];
-    if (tref_isinteger(tr)) {
+    if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) {
       IRIns *ir = IR(tref_ref(tr));
-      if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
+      if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY))))
 	J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
     }
   }
@@ -598,6 +610,7 @@ static void rec_loop_interp(jit_State *J
 {
   if (J->parent == 0 && J->exitno == 0) {
     if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
+      if (bc_op(J->cur.startins) == BC_ITERN) return;  /* See rec_itern(). */
       /* Same loop? */
       if (ev == LOOPEV_LEAVE)  /* Must loop back to form a root trace. */
 	lj_trace_err(J, LJ_TRERR_LLEAVE);
@@ -638,6 +651,77 @@ static void rec_loop_jit(jit_State *J, T
   }  /* Side trace continues across a loop that's left or not entered. */
 }
 
+/* Record ITERN. */
+static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
+{
+#if LJ_BE
+  /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+  ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+  */
+  UNUSED(ra); UNUSED(rb);
+  setintV(&J->errinfo, (int32_t)BC_ITERN);
+  lj_trace_err_info(J, LJ_TRERR_NYIBC);
+#else
+  RecordIndex ix;
+  /* Since ITERN is recorded at the start, we need our own loop detection. */
+  if (J->pc == J->startpc &&
+      J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
+    IRRef ref = REF_FIRST + LJ_HASPROFILE;
+#ifdef LUAJIT_ENABLE_CHECKHOOK
+    ref += 3;
+#endif
+    if (J->cur.nins > ref ||
+       (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) {
+      J->instunroll = 0;  /* Cannot continue unrolling across an ITERN. */
+      lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Looping trace. */
+      return LOOPEV_ENTER;
+    }
+  }
+  J->maxslot = ra;
+  lj_snap_add(J);  /* Required to make JLOOP the first ins in a side-trace. */
+  ix.tab = getslot(J, ra-2);
+  ix.key = J->base[ra-1] ? J->base[ra-1] :
+	   sloadt(J, (int32_t)(ra-1), IRT_GUARD|IRT_INT,
+		  IRSLOAD_TYPECHECK|IRSLOAD_KEYINDEX);
+  copyTV(J->L, &ix.tabv, &J->L->base[ra-2]);
+  copyTV(J->L, &ix.keyv, &J->L->base[ra-1]);
+  ix.idxchain = (rb < 3);  /* Omit value type check, if unused. */
+  ix.mobj = 1;  /* We need the next index, too. */
+  J->maxslot = ra + lj_record_next(J, &ix);
+  J->needsnap = 1;
+  if (!tref_isnil(ix.key)) {  /* Looping back? */
+    J->base[ra-1] = ix.mobj | TREF_KEYINDEX;  /* Control var has next index. */
+    J->base[ra] = ix.key;
+    J->base[ra+1] = ix.val;
+    J->pc += bc_j(J->pc[1])+2;
+    return LOOPEV_ENTER;
+  } else {
+    J->maxslot = ra-3;
+    J->pc += 2;
+    return LOOPEV_LEAVE;
+  }
+#endif
+}
+
+/* Record ISNEXT. */
+static void rec_isnext(jit_State *J, BCReg ra)
+{
+  cTValue *b = &J->L->base[ra-3];
+  if (tvisfunc(b) && funcV(b)->c.ffid == FF_next &&
+      tvistab(b+1) && tvisnil(b+2)) {
+    /* These checks are folded away for a compiled pairs(). */
+    TRef func = getslot(J, ra-3);
+    TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID);
+    emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next));
+    (void)getslot(J, ra-2); /* Type check for table. */
+    (void)getslot(J, ra-1); /* Type check for nil key. */
+    J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX;
+    J->maxslot = ra;
+  } else {  /* Abort trace. Interpreter will despecialize bytecode. */
+    lj_trace_err(J, LJ_TRERR_RECERR);
+  }
+}
+
 /* -- Record profiler hook checks ----------------------------------------- */
 
 #if LJ_HASPROFILE
@@ -708,7 +792,7 @@ static TRef rec_call_specialize(jit_Stat
       /* NYI: io_file_iter doesn't have an ffid, yet. */
       {  /* Specialize to the ffid. */
 	TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
-	emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
+	emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid));
       }
       return tr;
     default:
@@ -832,6 +916,7 @@ void lj_record_ret(jit_State *J, BCReg r
     J->base -= cbase;
     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
     frame = frame_prevd(frame);
+    J->needsnap = 1;  /* Stop catching on-trace errors. */
   }
   /* Return to lower frame via interpreter for unhandled cases. */
   if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
@@ -891,6 +976,7 @@ void lj_record_ret(jit_State *J, BCReg r
       emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
       J->retdepth++;
       J->needsnap = 1;
+      J->scev.idx = REF_NIL;
       lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
       /* Shift result slots up and clear the slots of the new frame below. */
       memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
@@ -918,6 +1004,9 @@ void lj_record_ret(jit_State *J, BCReg r
       TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
       if (bslot != J->maxslot) {  /* Concatenate the remainder. */
 	TValue *b = J->L->base, save;  /* Simulate lower frame and result. */
+	/* Can't handle MM_concat + CALLT + fast func side-effects. */
+	if (J->postproc != LJ_POST_NONE)
+	  lj_trace_err(J, LJ_TRERR_NYIRETL);
 	J->base[J->maxslot] = tr;
 	copyTV(J->L, &save, b-(2<<LJ_FR2));
 	if (gotresults)
@@ -1366,16 +1455,16 @@ static TRef rec_idx_key(jit_State *J, Re
     key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
   if (tref_isk(key)) {
     /* Optimize lookup of constant hash keys. */
-    MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
-    if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
-	hslot <= 65535*(MSize)sizeof(Node)) {
+    GCSize hslot = (GCSize)((char *)ix->oldv-(char *)&noderef(t->node)[0].val);
+    if (hslot <= t->hmask*(GCSize)sizeof(Node) &&
+	hslot <= 65535*(GCSize)sizeof(Node)) {
       TRef node, kslot, hm;
       *rbref = J->cur.nins;  /* Mark possible rollback point. */
       *rbguard = J->guardemit;
       hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
       emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
       node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
-      kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
+      kslot = lj_ir_kslot(J, key, (IRRef)(hslot / sizeof(Node)));
       return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
     }
   }
@@ -1433,6 +1522,16 @@ TRef lj_record_idx(jit_State *J, RecordI
 	return 0;  /* No result yet. */
       }
     }
+#if LJ_HASBUFFER
+    /* The index table of buffer objects is treated as immutable. */
+    if (ix->mt == TREF_NIL && !ix->val &&
+	tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
+	tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
+      cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
+      TRef tr = lj_record_constify(J, val);
+      if (tr) return tr;  /* Specialize to the value, i.e. a method. */
+    }
+#endif
     /* Otherwise retry lookup with metaobject. */
     ix->tab = ix->mobj;
     copyTV(J->L, &ix->tabv, &ix->mobjv);
@@ -1501,8 +1600,16 @@ TRef lj_record_idx(jit_State *J, RecordI
       lj_assertJ(!hasmm, "inconsistent metamethod handling");
       if (oldv == niltvg(J2G(J))) {  /* Need to insert a new key. */
 	TRef key = ix->key;
-	if (tref_isinteger(key))  /* NEWREF needs a TValue as a key. */
+	if (tref_isinteger(key)) {  /* NEWREF needs a TValue as a key. */
 	  key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
+	} else if (tref_isnum(key)) {
+	  if (tref_isk(key)) {
+	    if (tvismzero(&ix->keyv))
+	      key = lj_ir_knum_zero(J);  /* Canonicalize -0.0 to +0.0. */
+	  } else {
+	    emitir(IRTG(IR_EQ, IRT_NUM), key, key);  /* Check for !NaN. */
+	  }
+	}
 	xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
 	keybarrier = 0;  /* NEWREF already takes care of the key barrier. */
 #ifdef LUAJIT_ENABLE_TABLE_BUMP
@@ -1543,6 +1650,47 @@ TRef lj_record_idx(jit_State *J, RecordI
   }
 }
 
+/* Determine result type of table traversal. */
+static IRType rec_next_types(GCtab *t, uint32_t idx)
+{
+  for (; idx < t->asize; idx++) {
+    cTValue *a = arrayslot(t, idx);
+    if (LJ_LIKELY(!tvisnil(a)))
+      return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8);
+  }
+  idx -= t->asize;
+  for (; idx <= t->hmask; idx++) {
+    Node *n = &noderef(t->node)[idx];
+    if (!tvisnil(&n->val))
+      return itype2irt(&n->key) + (itype2irt(&n->val) << 8);
+  }
+  return IRT_NIL + (IRT_NIL << 8);
+}
+
+/* Record a table traversal step aka next(). */
+int lj_record_next(jit_State *J, RecordIndex *ix)
+{
+  IRType t, tkey, tval;
+  TRef trvk;
+  t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo);
+  tkey = (t & 0xff); tval = (t >> 8);
+  trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key);
+  if (ix->mobj || tkey == IRT_NIL) {
+    TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk);
+    /* Always check for invalid key from next() for nil result. */
+    if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1));
+    ix->mobj = idx;
+  }
+  ix->key = lj_record_vload(J, trvk, 1, tkey);
+  if (tkey == IRT_NIL || ix->idxchain) {  /* Omit value type check. */
+    ix->val = TREF_NIL;
+    return 1;
+  } else {  /* Need value. */
+    ix->val = lj_record_vload(J, trvk, 0, tval);
+    return 2;
+  }
+}
+
 static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
 {
   RecordIndex ix;
@@ -1625,16 +1773,16 @@ noconstify:
   /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
   uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
   if (!uvp->closed) {
-    uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
     /* In current stack? */
     if (uvval(uvp) >= tvref(J->L->stack) &&
 	uvval(uvp) < tvref(J->L->maxstack)) {
       int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
       if (slot >= 0) {  /* Aliases an SSA slot? */
+	uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
 	emitir(IRTG(IR_EQ, IRT_PGC),
 	       REF_BASE,
 	       emitir(IRT(IR_ADD, IRT_PGC), uref,
-		      lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
+		      lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8)));
 	slot -= (int32_t)J->baseslot;  /* Note: slot number may be negative! */
 	if (val == 0) {
 	  return getslot(J, slot);
@@ -1645,12 +1793,21 @@ noconstify:
 	}
       }
     }
+    /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
+    ** Always marked as a guard, since it might get promoted to IRT_PGC later.
+    */
+    uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
+    uref = tref_ref(uref);
     emitir(IRTG(IR_UGT, IRT_PGC),
 	   emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
-	   lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
+	   lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
   } else {
+    /* If fn is constant, then so is the GCupval*, and the upvalue cannot
+    ** transition back to open, so no guard is required in this case.
+    */
+    IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
+    uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
     needbarrier = 1;
-    uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
   }
   if (val == 0) {  /* Upvalue load */
     IRType t = itype2irt(uvval(uvp));
@@ -1801,12 +1958,14 @@ static void rec_varg(jit_State *J, BCReg
   if (J->framedepth > 0) {  /* Simple case: varargs defined on-trace. */
     ptrdiff_t i;
     if (nvararg < 0) nvararg = 0;
-    if (nresults == -1) {
-      nresults = nvararg;
-      J->maxslot = dst + (BCReg)nvararg;
-    } else if (dst + nresults > J->maxslot) {
+    if (nresults != 1) {
+      if (nresults == -1) nresults = nvararg;
       J->maxslot = dst + (BCReg)nresults;
+    } else if (dst >= J->maxslot) {
+      J->maxslot = dst + 1;
     }
+    if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
+      lj_trace_err(J, LJ_TRERR_STACKOV);
     for (i = 0; i < nresults; i++)
       J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
   } else {  /* Unknown number of varargs passed to trace. */
@@ -1823,14 +1982,11 @@ static void rec_varg(jit_State *J, BCReg
 	  emitir(IRTGI(IR_EQ), fr,
 		 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
 	vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
-	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
+	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
+		       lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2)));
 	for (i = 0; i < nload; i++) {
 	  IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
-	  TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
-			     vbase, lj_ir_kint(J, (int32_t)i));
-	  TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	  if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
-	  J->base[dst+i] = tr;
+	  J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t);
 	}
       } else {
 	emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1838,15 +1994,19 @@ static void rec_varg(jit_State *J, BCReg
       }
       for (i = nvararg; i < nresults; i++)
 	J->base[dst+i] = TREF_NIL;
-      if (dst + (BCReg)nresults > J->maxslot)
+      if (nresults != 1 || dst >= J->maxslot) {
 	J->maxslot = dst + (BCReg)nresults;
+      }
     } else if (select_detect(J)) {  /* y = select(x, ...) */
       TRef tridx = J->base[dst-1];
       TRef tr = TREF_NIL;
       ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
       if (idx < 0) goto nyivarg;
-      if (idx != 0 && !tref_isinteger(tridx))
+      if (idx != 0 && !tref_isinteger(tridx)) {
+	if (tref_isstr(tridx))
+	  tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0);
 	tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
+      }
       if (idx != 0 && tref_isk(tridx)) {
 	emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
 	       fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
@@ -1874,11 +2034,10 @@ static void rec_varg(jit_State *J, BCReg
 	IRType t;
 	TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
 	vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
-		       lj_ir_kint(J, frofs-(8<<LJ_FR2)));
+		       lj_ir_kintpgc(J, frofs-(8<<LJ_FR2)));
 	t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
 	aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
-	tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
-	if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
+	tr = lj_record_vload(J, aref, 0, t);
       }
       J->base[dst-2-LJ_FR2] = tr;
       J->maxslot = dst-1-LJ_FR2;
@@ -1889,8 +2048,6 @@ static void rec_varg(jit_State *J, BCReg
       lj_trace_err_info(J, LJ_TRERR_NYIBC);
     }
   }
-  if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
-    lj_trace_err(J, LJ_TRERR_STACKOV);
 }
 
 /* -- Record allocations -------------------------------------------------- */
@@ -1915,7 +2072,7 @@ static TRef rec_tnew(jit_State *J, uint3
 static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
 {
   TRef *top = &J->base[topslot];
-  TValue savetv[5];
+  TValue savetv[5+LJ_FR2];
   BCReg s;
   RecordIndex ix;
   lj_assertJ(baseslot < topslot, "bad CAT arg");
@@ -1935,9 +2092,9 @@ static TRef rec_cat(jit_State *J, BCReg
     tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
 		      lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
     do {
-      tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
+      tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
     } while (trp <= top);
-    tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
+    tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
     J->maxslot = (BCReg)(xbase - J->base);
     if (xbase == base) return tr;  /* Return simple concatenation result. */
     /* Pass partial result. */
@@ -2050,7 +2207,7 @@ void lj_record_ins(jit_State *J)
   /* Need snapshot before recording next bytecode (e.g. after a store). */
   if (J->needsnap) {
     J->needsnap = 0;
-    lj_snap_purge(J);
+    if (J->pt) lj_snap_purge(J);
     lj_snap_add(J);
     J->mergesnap = 1;
   }
@@ -2105,6 +2262,7 @@ void lj_record_ins(jit_State *J)
   case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
   case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
     copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
+    tv->u32.hi == LJ_KEYINDEX ? (lj_ir_kint(J, 0) | TREF_KEYINDEX) :
     lj_ir_knumint(J, numV(tv)); } break;
   case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
     setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
@@ -2267,7 +2425,7 @@ void lj_record_ins(jit_State *J)
 
   case BC_POW:
     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
-      rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
+      rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
     else
       rc = rec_mm_arith(J, &ix, MM_pow);
     break;
@@ -2341,6 +2499,7 @@ void lj_record_ins(jit_State *J)
 
   case BC_TSETM:
     rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
+    J->maxslot = ra;  /* The table slot at ra-1 is the highest used slot. */
     break;
 
   case BC_TNEW:
@@ -2423,6 +2582,9 @@ void lj_record_ins(jit_State *J)
   case BC_ITERL:
     rec_loop_interp(J, pc, rec_iterl(J, *pc));
     break;
+  case BC_ITERN:
+    rec_loop_interp(J, pc, rec_itern(J, ra, rb));
+    break;
   case BC_LOOP:
     rec_loop_interp(J, pc, rec_loop(J, ra, 1));
     break;
@@ -2435,7 +2597,8 @@ void lj_record_ins(jit_State *J)
     break;
   case BC_JLOOP:
     rec_loop_jit(J, rc, rec_loop(J, ra,
-				 !bc_isret(bc_op(traceref(J, rc)->startins))));
+				 !bc_isret(bc_op(traceref(J, rc)->startins)) &&
+				 bc_op(traceref(J, rc)->startins) != BC_ITERN));
     break;
 
   case BC_IFORL:
@@ -2451,6 +2614,10 @@ void lj_record_ins(jit_State *J)
       J->maxslot = ra;  /* Shrink used slots. */
     break;
 
+  case BC_ISNEXT:
+    rec_isnext(J, ra);
+    break;
+
   /* -- Function headers -------------------------------------------------- */
 
   case BC_FUNCF:
@@ -2480,8 +2647,6 @@ void lj_record_ins(jit_State *J)
       break;
     }
     /* fallthrough */
-  case BC_ITERN:
-  case BC_ISNEXT:
   case BC_UCLO:
   case BC_FNEW:
     setintV(&J->errinfo, (int32_t)op);
@@ -2526,6 +2691,8 @@ static const BCIns *rec_setup_root(jit_S
     J->bc_min = pc;
     break;
   case BC_ITERL:
+    if (bc_op(pc[-1]) == BC_JLOOP)
+      lj_trace_err(J, LJ_TRERR_LINNER);
     lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL");
     J->maxslot = ra + bc_b(pc[-1]) - 1;
     J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
@@ -2533,6 +2700,13 @@ static const BCIns *rec_setup_root(jit_S
     lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
     J->bc_min = pc;
     break;
+  case BC_ITERN:
+    lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN");
+    J->maxslot = ra;
+    J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns);
+    J->bc_min = pc+2 + bc_j(pc[1]);
+    J->state = LJ_TRACE_RECORD_1ST;  /* Record the first ITERN, too. */
+    break;
   case BC_LOOP:
     /* Only check BC range for real loops, but not for "repeat until true". */
     pcj = pc + bc_j(ins);
@@ -2629,9 +2803,14 @@ void lj_record_setup(jit_State *J)
     }
     lj_snap_replay(J, T);
   sidecheck:
-    if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
-	T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
-				    J->param[JIT_P_tryside]) {
+    if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
+	 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
+				     J->param[JIT_P_tryside])) {
+      if (bc_op(*J->pc) == BC_JLOOP) {
+	BCIns startins = traceref(J, bc_d(*J->pc))->startins;
+	if (bc_op(startins) == BC_ITERN)
+	  rec_itern(J, bc_a(startins), bc_b(startins));
+      }
       lj_record_stop(J, LJ_TRLINK_INTERP, 0);
     }
   } else {  /* Root trace. */
@@ -2640,6 +2819,7 @@ void lj_record_setup(jit_State *J)
     J->pc = rec_setup_root(J);
     /* Note: the loop instruction itself is recorded at the end and not
     ** at the start! So snapshot #0 needs to point to the *next* instruction.
+    ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST.
     */
     lj_snap_add(J);
     if (bc_op(J->cur.startins) == BC_FORL)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_record.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_record.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_record.h
@@ -1,6 +1,6 @@
 /*
 ** Trace recorder (bytecode -> SSA IR).
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_RECORD_H
@@ -30,6 +30,7 @@ LJ_FUNC int lj_record_objcmp(jit_State *
 			     cTValue *av, cTValue *bv);
 LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
 LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
+LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t);
 
 LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
 LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
@@ -37,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J,
 
 LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm);
 LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix);
+LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix);
 
 LJ_FUNC void lj_record_ins(jit_State *J);
 LJ_FUNC void lj_record_setup(jit_State *J);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_serialize.c
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_serialize.c
@@ -0,0 +1,539 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_serialize_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#endif
+#if LJ_HASJIT
+#include "lj_ir.h"
+#endif
+#include "lj_serialize.h"
+
+/* Tags for internal serialization format. */
+enum {
+  SER_TAG_NIL,		/* 0x00 */
+  SER_TAG_FALSE,
+  SER_TAG_TRUE,
+  SER_TAG_NULL,
+  SER_TAG_LIGHTUD32,
+  SER_TAG_LIGHTUD64,
+  SER_TAG_INT,
+  SER_TAG_NUM,
+  SER_TAG_TAB,		/* 0x08 */
+  SER_TAG_DICT_MT = SER_TAG_TAB+6,
+  SER_TAG_DICT_STR,
+  SER_TAG_INT64,	/* 0x10 */
+  SER_TAG_UINT64,
+  SER_TAG_COMPLEX,
+  SER_TAG_0x13,
+  SER_TAG_0x14,
+  SER_TAG_0x15,
+  SER_TAG_0x16,
+  SER_TAG_0x17,
+  SER_TAG_0x18,		/* 0x18 */
+  SER_TAG_0x19,
+  SER_TAG_0x1a,
+  SER_TAG_0x1b,
+  SER_TAG_0x1c,
+  SER_TAG_0x1d,
+  SER_TAG_0x1e,
+  SER_TAG_0x1f,
+  SER_TAG_STR,		/* 0x20 + str->len */
+};
+LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
+
+/* -- Helper functions ---------------------------------------------------- */
+
+static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
+{
+  if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
+    sbx->w = w;
+    w = lj_buf_more2((SBuf *)sbx, sz);
+  }
+  return w;
+}
+
+/* Write U124 to buffer. */
+static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
+{
+  if (v < 0x1fe0) {
+    v -= 0xe0;
+    *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
+  } else {
+    *w++ = (char)0xff;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+    memcpy(w, &v, 4); w += 4;
+  }
+  return w;
+}
+
+static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
+{
+  if (LJ_LIKELY(v < 0xe0)) {
+    *w++ = (char)v;
+    return w;
+  } else {
+    return serialize_wu124_(w, v);
+  }
+}
+
+static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
+{
+  uint32_t v = *pv;
+  if (v != 0xff) {
+    if (r >= w) return NULL;
+    v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
+  } else {
+    if (r + 4 > w) return NULL;
+    v = lj_getu32(r); r += 4;
+#if LJ_BE
+    v = lj_bswap(v);
+#endif
+  }
+  *pv = v;
+  return r;
+}
+
+static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
+{
+  if (LJ_LIKELY(r < w)) {
+    uint32_t v = *(uint8_t *)r; r++;
+    *pv = v;
+    if (LJ_UNLIKELY(v >= 0xe0)) {
+      r = serialize_ru124_(r, w, pv);
+    }
+    return r;
+  }
+  return NULL;
+}
+
+/* Prepare string dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
+{
+  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
+    MSize i, len = lj_tab_len(dict);
+    if (!len) return;
+    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+    for (i = 1; i <= len && i < dict->asize; i++) {
+      cTValue *o = arrayslot(dict, i);
+      if (tvisstr(o)) {
+	if (!lj_tab_getstr(dict, strV(o))) {  /* Ignore dups. */
+	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+	}
+      } else if (!tvisfalse(o)) {
+	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+      }
+    }
+  }
+}
+
+/* Prepare metatable dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
+{
+  if (!dict->hmask) {  /* No hash part means not prepared, yet. */
+    MSize i, len = lj_tab_len(dict);
+    if (!len) return;
+    lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+    for (i = 1; i <= len && i < dict->asize; i++) {
+      cTValue *o = arrayslot(dict, i);
+      if (tvistab(o)) {
+	if (tvisnil(lj_tab_get(L, dict, o))) {  /* Ignore dups. */
+	  lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+	}
+      } else if (!tvisfalse(o)) {
+	lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+      }
+    }
+  }
+}
+
+/* -- Internal serializer ------------------------------------------------- */
+
+/* Put serialized object into buffer. */
+static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
+{
+  if (LJ_LIKELY(tvisstr(o))) {
+    const GCstr *str = strV(o);
+    MSize len = str->len;
+    w = serialize_more(w, sbx, 5+len);
+    w = serialize_wu124(w, SER_TAG_STR + len);
+    w = lj_buf_wmem(w, strdata(str), len);
+  } else if (tvisint(o)) {
+    uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
+    w = serialize_more(w, sbx, 1+4);
+    *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
+  } else if (tvisnum(o)) {
+    uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
+    w = serialize_more(w, sbx, 1+sizeof(lua_Number));
+    *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
+  } else if (tvispri(o)) {
+    w = serialize_more(w, sbx, 1);
+    *w++ = (char)(SER_TAG_NIL + ~itype(o));
+  } else if (tvistab(o)) {
+    const GCtab *t = tabV(o);
+    uint32_t narray = 0, nhash = 0, one = 2;
+    if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+    sbx->depth--;
+    if (t->asize > 0) {  /* Determine max. length of array part. */
+      ptrdiff_t i;
+      TValue *array = tvref(t->array);
+      for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
+	if (!tvisnil(&array[i]))
+	  break;
+      narray = (uint32_t)(i+1);
+      if (narray && tvisnil(&array[0])) one = 4;
+    }
+    if (t->hmask > 0) {  /* Count number of used hash slots. */
+      uint32_t i, hmask = t->hmask;
+      Node *node = noderef(t->node);
+      for (i = 0; i <= hmask; i++)
+	nhash += !tvisnil(&node[i].val);
+    }
+    /* Write metatable index. */
+    if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
+      TValue mto;
+      Node *n;
+      settabV(sbufL(sbx), &mto, tabref(t->metatable));
+      n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
+      do {
+	if (n->key.u64 == mto.u64) {
+	  uint32_t idx = n->val.u32.lo;
+	  w = serialize_more(w, sbx, 1+5);
+	  *w++ = SER_TAG_DICT_MT;
+	  w = serialize_wu124(w, idx);
+	  break;
+	}
+      } while ((n = nextnode(n)));
+    }
+    /* Write number of array slots and hash slots. */
+    w = serialize_more(w, sbx, 1+2*5);
+    *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
+    if (narray) w = serialize_wu124(w, narray);
+    if (nhash) w = serialize_wu124(w, nhash);
+    if (narray) {  /* Write array entries. */
+      cTValue *oa = tvref(t->array) + (one >> 2);
+      cTValue *oe = tvref(t->array) + narray;
+      while (oa < oe) w = serialize_put(w, sbx, oa++);
+    }
+    if (nhash) {  /* Write hash entries. */
+      const Node *node = noderef(t->node) + t->hmask;
+      GCtab *dict_str = tabref(sbx->dict_str);
+      if (LJ_UNLIKELY(dict_str)) {
+	for (;; node--)
+	  if (!tvisnil(&node->val)) {
+	    if (LJ_LIKELY(tvisstr(&node->key))) {
+	      /* Inlined lj_tab_getstr is 30% faster. */
+	      const GCstr *str = strV(&node->key);
+	      Node *n = hashstr(dict_str, str);
+	      do {
+		if (tvisstr(&n->key) && strV(&n->key) == str) {
+		  uint32_t idx = n->val.u32.lo;
+		  w = serialize_more(w, sbx, 1+5);
+		  *w++ = SER_TAG_DICT_STR;
+		  w = serialize_wu124(w, idx);
+		  break;
+		}
+		n = nextnode(n);
+		if (!n) {
+		  MSize len = str->len;
+		  w = serialize_more(w, sbx, 5+len);
+		  w = serialize_wu124(w, SER_TAG_STR + len);
+		  w = lj_buf_wmem(w, strdata(str), len);
+		  break;
+		}
+	      } while (1);
+	    } else {
+	      w = serialize_put(w, sbx, &node->key);
+	    }
+	    w = serialize_put(w, sbx, &node->val);
+	    if (--nhash == 0) break;
+	  }
+      } else {
+	for (;; node--)
+	  if (!tvisnil(&node->val)) {
+	    w = serialize_put(w, sbx, &node->key);
+	    w = serialize_put(w, sbx, &node->val);
+	    if (--nhash == 0) break;
+	  }
+      }
+    }
+    sbx->depth++;
+#if LJ_HASFFI
+  } else if (tviscdata(o)) {
+    CTState *cts = ctype_cts(sbufL(sbx));
+    CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
+    uint8_t *sp = cdataptr(cdataV(o));
+    if (ctype_isinteger(s->info) && s->size == 8) {
+      w = serialize_more(w, sbx, 1+8);
+      *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
+#if LJ_BE
+      { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
+#else
+      memcpy(w, sp, 8);
+#endif
+      w += 8;
+    } else if (ctype_iscomplex(s->info) && s->size == 16) {
+      w = serialize_more(w, sbx, 1+16);
+      *w++ = SER_TAG_COMPLEX;
+#if LJ_BE
+      {  /* Only swap the doubles. The re/im order stays the same. */
+	uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
+	u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
+      }
+#else
+      memcpy(w, sp, 16);
+#endif
+      w += 16;
+    } else {
+      goto badenc;  /* NYI other cdata */
+    }
+#endif
+  } else if (tvislightud(o)) {
+    uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
+    w = serialize_more(w, sbx, 1+sizeof(ud));
+    if (ud == 0) {
+      *w++ = SER_TAG_NULL;
+    } else if (LJ_32 || checku32(ud)) {
+#if LJ_BE && LJ_64
+      ud = lj_bswap64(ud);
+#elif LJ_BE
+      ud = lj_bswap(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
+#if LJ_64
+    } else {
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+      *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
+#endif
+    }
+  } else {
+    /* NYI userdata */
+#if LJ_HASFFI
+  badenc:
+#endif
+    lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
+  }
+  return w;
+}
+
+/* Get serialized object from buffer. */
+static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
+{
+  char *w = sbx->w;
+  uint32_t tp;
+  r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+  if (LJ_LIKELY(tp >= SER_TAG_STR)) {
+    uint32_t len = tp - SER_TAG_STR;
+    if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
+    setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
+    r += len;
+  } else if (tp == SER_TAG_INT) {
+    if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+    setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
+    r += 4;
+  } else if (tp == SER_TAG_NUM) {
+    if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+    memcpy(o, r, 8); r += 8;
+#if LJ_BE
+    o->u64 = lj_bswap64(o->u64);
+#endif
+    if (!tvisnum(o)) setnanV(o);  /* Fix non-canonical NaNs. */
+  } else if (tp <= SER_TAG_TRUE) {
+    setpriV(o, ~tp);
+  } else if (tp == SER_TAG_DICT_STR) {
+    GCtab *dict_str;
+    uint32_t idx;
+    r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+    idx++;
+    dict_str = tabref(sbx->dict_str);
+    if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
+      copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
+    else
+      lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+  } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
+    uint32_t narray = 0, nhash = 0;
+    GCtab *t, *mt = NULL;
+    if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+    sbx->depth--;
+    if (tp == SER_TAG_DICT_MT) {
+      GCtab *dict_mt;
+      uint32_t idx;
+      r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+      idx++;
+      dict_mt = tabref(sbx->dict_mt);
+      if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
+	mt = tabV(arrayslot(dict_mt, idx));
+      else
+	lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+      r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+      if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
+    }
+    if (tp >= SER_TAG_TAB+2) {
+      r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    if ((tp & 1)) {
+      r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
+    }
+    t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
+    /* NOBARRIER: The table is new (marked white). */
+    setgcref(t->metatable, obj2gco(mt));
+    settabV(sbufL(sbx), o, t);
+    if (narray) {
+      TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
+      TValue *oe = tvref(t->array) + narray;
+      while (oa < oe) r = serialize_get(r, sbx, oa++);
+    }
+    if (nhash) {
+      do {
+	TValue k, *v;
+	r = serialize_get(r, sbx, &k);
+	v = lj_tab_set(sbufL(sbx), t, &k);
+	if (LJ_UNLIKELY(!tvisnil(v)))
+	  lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
+	r = serialize_get(r, sbx, v);
+      } while (--nhash);
+    }
+    sbx->depth++;
+#if LJ_HASFFI
+  } else if (tp >= SER_TAG_INT64 &&  tp <= SER_TAG_COMPLEX) {
+    uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
+    GCcdata *cd;
+    if (LJ_UNLIKELY(r + sz > w)) goto eob;
+    if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag;
+    cd = lj_cdata_new_(sbufL(sbx),
+	   tp == SER_TAG_INT64 ? CTID_INT64 :
+	   tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
+	   sz);
+    memcpy(cdataptr(cd), r, sz); r += sz;
+#if LJ_BE
+    *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
+    if (sz == 16)
+      ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
+#endif
+    if (sz == 16) {  /* Fix non-canonical NaNs. */
+      TValue *cdo = (TValue *)cdataptr(cd);
+      if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
+      if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
+    }
+    setcdataV(sbufL(sbx), o, cd);
+#endif
+  } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
+    uintptr_t ud = 0;
+    if (tp == SER_TAG_LIGHTUD32) {
+      if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+      ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
+      r += 4;
+    }
+#if LJ_64
+    else if (tp == SER_TAG_LIGHTUD64) {
+      if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+      memcpy(&ud, r, 8); r += 8;
+#if LJ_BE
+      ud = lj_bswap64(ud);
+#endif
+    }
+    setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
+#else
+    setrawlightudV(o, (void *)ud);
+#endif
+  } else {
+badtag:
+    lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
+  }
+  return r;
+eob:
+  lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
+  return NULL;
+}
+
+/* -- External serialization API ------------------------------------------ */
+
+/* Encode to buffer. */
+SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
+{
+  sbx->depth = LJ_SERIALIZE_DEPTH;
+  sbx->w = serialize_put(sbx->w, sbx, o);
+  return sbx;
+}
+
+/* Decode from buffer. */
+char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
+{
+  sbx->depth = LJ_SERIALIZE_DEPTH;
+  return serialize_get(sbx->r, sbx, o);
+}
+
+/* Stand-alone encoding, borrowing from global temporary buffer. */
+GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
+{
+  SBufExt sbx;
+  char *w;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
+  sbx.depth = LJ_SERIALIZE_DEPTH;
+  w = serialize_put(sbx.w, &sbx, o);
+  return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
+}
+
+/* Stand-alone decoding, copy-on-write from string. */
+void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
+{
+  SBufExt sbx;
+  char *r;
+  memset(&sbx, 0, sizeof(SBufExt));
+  lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
+  /* No need to set sbx.cowref here. */
+  sbx.depth = LJ_SERIALIZE_DEPTH;
+  r = serialize_get(sbx.r, &sbx, o);
+  if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
+}
+
+#if LJ_HASJIT
+/* Peek into buffer to find the result IRType for specialization purposes. */
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
+{
+  uint32_t tp;
+  if (serialize_ru124(sbx->r, sbx->w, &tp)) {
+    /* This must match the handling of all tags in the decoder above. */
+    switch (tp) {
+    case SER_TAG_NIL: return IRT_NIL;
+    case SER_TAG_FALSE: return IRT_FALSE;
+    case SER_TAG_TRUE: return IRT_TRUE;
+    case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
+      return IRT_LIGHTUD;
+    case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
+    case SER_TAG_NUM: return IRT_NUM;
+    case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
+    case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
+    case SER_TAG_DICT_MT:
+      return IRT_TAB;
+    case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
+      return IRT_CDATA;
+    case SER_TAG_DICT_STR:
+    default:
+      return IRT_STR;
+    }
+  }
+  return IRT_NIL;  /* Will fail on actual decode. */
+}
+#endif
+
+#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_serialize.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_serialize.h
@@ -0,0 +1,28 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_SERIALIZE_H
+#define _LJ_SERIALIZE_H
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+
+#if LJ_HASBUFFER
+
+#define LJ_SERIALIZE_DEPTH	100	/* Default depth. */
+
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
+LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
+LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
+LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
+LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
+#if LJ_HASJIT
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
+#endif
+
+#endif
+
+#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_snap.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_snap.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_snap.c
@@ -1,6 +1,6 @@
 /*
 ** Snapshot handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_snap_c
@@ -171,6 +171,7 @@ static void snapshot_stack(jit_State *J,
   nent += snapshot_framelinks(J, p + nent, &snap->topslot);
   snap->mapofs = (uint32_t)nsnapmap;
   snap->ref = (IRRef1)J->cur.nins;
+  snap->mcofs = 0;
   snap->nslots = (uint8_t)nslots;
   snap->count = 0;
   J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
@@ -251,7 +252,12 @@ static BCReg snap_usedef(jit_State *J, u
       BCReg minslot = bc_a(ins);
       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
-      else if (op == BC_UCLO) { pc += bc_j(ins); break; }
+      else if (op == BC_UCLO) {
+	ptrdiff_t delta = bc_j(ins);
+	if (delta < 0) return maxslot;  /* Prevent loop. */
+	pc += delta;
+	break;
+      }
       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
       return minslot < maxslot ? minslot : maxslot;
       }
@@ -275,7 +281,7 @@ static BCReg snap_usedef(jit_State *J, u
        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
        break;
     case BCMbase:
-      if (op >= BC_CALLM && op <= BC_VARG) {
+      if (op >= BC_CALLM && op <= BC_ITERN) {
 	BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
 		    maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
 	if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
@@ -286,6 +292,8 @@ static BCReg snap_usedef(jit_State *J, u
 	  for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 	  return 0;
 	}
+      } else if (op == BC_VARG) {
+	return maxslot;  /* NYI: punt. */
       } else if (op == BC_KNIL) {
 	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
       } else if (op == BC_TSETM) {
@@ -304,15 +312,45 @@ static BCReg snap_usedef(jit_State *J, u
   return 0;  /* unreachable */
 }
 
+/* Mark slots used by upvalues of child prototypes as used. */
+static void snap_useuv(GCproto *pt, uint8_t *udf)
+{
+  /* This is a coarse check, because it's difficult to correlate the lifetime
+  ** of slots and closures. But the number of false positives is quite low.
+  ** A false positive may cause a slot not to be purged, which is just
+  ** a missed optimization.
+  */
+  if ((pt->flags & PROTO_CHILD)) {
+    ptrdiff_t i, j, n = pt->sizekgc;
+    GCRef *kr = mref(pt->k, GCRef) - 1;
+    for (i = 0; i < n; i++, kr--) {
+      GCobj *o = gcref(*kr);
+      if (o->gch.gct == ~LJ_TPROTO) {
+	for (j = 0; j < gco2pt(o)->sizeuv; j++) {
+	  uint32_t v = proto_uv(gco2pt(o))[j];
+	  if ((v & PROTO_UV_LOCAL)) {
+	    udf[(v & 0xff)] = 0;
+	  }
+	}
+      }
+    }
+  }
+}
+
 /* Purge dead slots before the next snapshot. */
 void lj_snap_purge(jit_State *J)
 {
   uint8_t udf[SNAP_USEDEF_SLOTS];
-  BCReg maxslot = J->maxslot;
-  BCReg s = snap_usedef(J, udf, J->pc, maxslot);
-  for (; s < maxslot; s++)
-    if (udf[s] != 0)
-      J->base[s] = 0;  /* Purge dead slots. */
+  BCReg s, maxslot = J->maxslot;
+  if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams)
+    maxslot = J->pt->numparams;
+  s = snap_usedef(J, udf, J->pc, maxslot);
+  if (s < maxslot) {
+    snap_useuv(J->pt, udf);
+    for (; s < maxslot; s++)
+      if (udf[s] != 0)
+	J->base[s] = 0;  /* Purge dead slots. */
+  }
 }
 
 /* Shrink last snapshot. */
@@ -325,6 +363,7 @@ void lj_snap_shrink(jit_State *J)
   BCReg maxslot = J->maxslot;
   BCReg baseslot = J->baseslot;
   BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
+  if (minslot < maxslot) snap_useuv(J->pt, udf);
   maxslot += baseslot;
   minslot += baseslot;
   snap->nslots = (uint8_t)maxslot;
@@ -424,7 +463,7 @@ static TRef snap_dedup(jit_State *J, Sna
   MSize j;
   for (j = 0; j < nmax; j++)
     if (snap_ref(map[j]) == ref)
-      return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
+      return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
   return 0;
 }
 
@@ -499,10 +538,12 @@ void lj_snap_replay(jit_State *J, GCtrac
       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
       if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
+      if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
     }
   setslot:
-    J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
+    /* Same as TREF_* flags. */
+    J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
     if ((sn & SNAP_FRAME))
       J->baseslot = s+1;
@@ -839,11 +880,19 @@ static void snap_unsink(jit_State *J, GC
 		   irs->o == IR_FSTORE,
 		   "sunk store with bad op %d", irs->o);
 	if (irk->o == IR_FREF) {
-	  lj_assertJ(irk->op2 == IRFL_TAB_META,
-		     "sunk store with bad field %d", irk->op2);
-	  snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
-	  /* NOBARRIER: The table is new (marked white). */
-	  setgcref(t->metatable, obj2gco(tabV(&tmp)));
+	  switch (irk->op2) {
+	  case IRFL_TAB_META:
+	    snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
+	    /* NOBARRIER: The table is new (marked white). */
+	    setgcref(t->metatable, obj2gco(tabV(&tmp)));
+	    break;
+	  case IRFL_TAB_NOMM:
+	    /* Negative metamethod cache invalidated by lj_tab_set() below. */
+	    break;
+	  default:
+	    lj_assertJ(0, "sunk store with bad field %d", irk->op2);
+	    break;
+	  }
 	} else {
 	  irk = &T->ir[irk->op2];
 	  if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
@@ -922,6 +971,10 @@ const BCIns *lj_snap_restore(jit_State *
 	setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
 	L->base = o+1;
 #endif
+      } else if ((sn & SNAP_KEYINDEX)) {
+	/* A IRT_INT key index slot is restored as a number. Undo this. */
+	o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
+	o->u32.hi = LJ_KEYINDEX;
       }
     }
   }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_snap.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_snap.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_snap.h
@@ -1,6 +1,6 @@
 /*
 ** Snapshot handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_SNAP_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_state.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_state.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_state.c
@@ -1,6 +1,6 @@
 /*
 ** State and stack handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -103,8 +103,17 @@ void lj_state_shrinkstack(lua_State *L,
 void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
 {
   MSize n;
-  if (L->stacksize > LJ_STACK_MAXEX)  /* Overflow while handling overflow? */
-    lj_err_throw(L, LUA_ERRERR);
+  if (L->stacksize >= LJ_STACK_MAXEX) {
+    /* 4. Throw 'error in error handling' when we are _over_ the limit. */
+    if (L->stacksize > LJ_STACK_MAXEX)
+      lj_err_throw(L, LUA_ERRERR);  /* Does not invoke an error handler. */
+    /* 1. We are _at_ the limit after the last growth. */
+    if (L->status < LUA_ERRRUN) {  /* 2. Throw 'stack overflow'. */
+      L->status = LUA_ERRRUN;  /* Prevent ending here again for pushed msg. */
+      lj_err_msg(L, LJ_ERR_STKOV);  /* May invoke an error handler. */
+    }
+    /* 3. Add space (over the limit) for pushed message and error handler. */
+  }
   n = L->stacksize + need;
   if (n > LJ_STACK_MAX) {
     n += 2*LUA_MINSTACK;
@@ -114,8 +123,6 @@ void LJ_FASTCALL lj_state_growstack(lua_
       n = LJ_STACK_MAX;
   }
   resizestack(L, n);
-  if (L->stacksize > LJ_STACK_MAXEX)
-    lj_err_msg(L, LJ_ERR_STKOV);
 }
 
 void LJ_FASTCALL lj_state_growstack1(lua_State *L)
@@ -123,6 +130,18 @@ void LJ_FASTCALL lj_state_growstack1(lua
   lj_state_growstack(L, 1);
 }
 
+static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud)
+{
+  UNUSED(dummy);
+  lj_state_growstack(co, *(MSize *)ud);
+  return NULL;
+}
+
+int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need)
+{
+  return lj_vm_cpcall(L, NULL, &need, cpgrowstack);
+}
+
 /* Allocate basic stack for new state. */
 static void stack_init(lua_State *L1, lua_State *L)
 {
@@ -156,6 +175,7 @@ static TValue *cpluaopen(lua_State *L, l
   fixstring(lj_err_str(L, LJ_ERR_ERRMEM));  /* Preallocate memory error msg. */
   g->gc.threshold = 4*g->gc.total;
   lj_trace_initstate(g);
+  lj_err_verify();
   return NULL;
 }
 
@@ -326,8 +346,11 @@ void LJ_FASTCALL lj_state_free(global_St
   lj_assertG(L != mainthread(g), "free of main thread");
   if (obj2gco(L) == gcref(g->cur_L))
     setgcrefnull(g->cur_L);
-  lj_func_closeuv(L, tvref(L->stack));
-  lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+  if (gcref(L->openupval) != NULL) {
+    lj_func_closeuv(L, tvref(L->stack));
+    lj_trace_abort(g);  /* For aa_uref soundness. */
+    lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+  }
   lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
   lj_mem_freet(g, L);
 }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_state.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_state.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_state.h
@@ -1,6 +1,6 @@
 /*
 ** State and stack handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_STATE_H
@@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_S
 LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
 LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
 LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
+LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need);
 
 static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
 {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_str.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_str.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_str.c
@@ -1,6 +1,6 @@
 /*
 ** String handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_str_c
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_str.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_str.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_str.h
@@ -1,6 +1,6 @@
 /*
 ** String handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_STR_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_strfmt.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt.c
@@ -1,6 +1,6 @@
 /*
 ** String formatting.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <stdio.h>
@@ -9,11 +9,17 @@
 #define LUA_CORE
 
 #include "lj_obj.h"
+#include "lj_err.h"
 #include "lj_buf.h"
 #include "lj_str.h"
+#include "lj_meta.h"
 #include "lj_state.h"
 #include "lj_char.h"
 #include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
+#include "lj_lib.h"
 
 /* -- Format parser ------------------------------------------------------- */
 
@@ -96,7 +102,7 @@ retlit:
 char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
 {
   uint32_t u = (uint32_t)k;
-  if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
+  if (k < 0) { u = ~u+1u; *p++ = '-'; }
   if (u < 10000) {
     if (u < 10) goto dig1;
     if (u < 100) goto dig2;
@@ -161,6 +167,10 @@ const char *lj_strfmt_wstrnum(lua_State
   if (tvisstr(o)) {
     *lenp = strV(o)->len;
     return strVdata(o);
+  } else if (tvisbuf(o)) {
+    SBufExt *sbx = bufV(o);
+    *lenp = sbufxlen(sbx);
+    return sbx->r;
   } else if (tvisint(o)) {
     sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
   } else if (tvisnum(o)) {
@@ -169,7 +179,7 @@ const char *lj_strfmt_wstrnum(lua_State
     return NULL;
   }
   *lenp = sbuflen(sb);
-  return sbufB(sb);
+  return sb->b;
 }
 
 /* -- Unformatted conversions to buffer ----------------------------------- */
@@ -177,7 +187,7 @@ const char *lj_strfmt_wstrnum(lua_State
 /* Add integer to buffer. */
 SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
 {
-  setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
+  sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
   return sb;
 }
 
@@ -191,80 +201,93 @@ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf
 
 SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
 {
-  setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
+  sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
   return sb;
 }
 
 /* Add quoted string to buffer. */
-SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
 {
-  const char *s = strdata(str);
-  MSize len = str->len;
   lj_buf_putb(sb, '"');
   while (len--) {
     uint32_t c = (uint32_t)(uint8_t)*s++;
-    char *p = lj_buf_more(sb, 4);
+    char *w = lj_buf_more(sb, 4);
     if (c == '"' || c == '\\' || c == '\n') {
-      *p++ = '\\';
+      *w++ = '\\';
     } else if (lj_char_iscntrl(c)) {  /* This can only be 0-31 or 127. */
       uint32_t d;
-      *p++ = '\\';
+      *w++ = '\\';
       if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
-	*p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
+	*w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
 	goto tens;
       } else if (c >= 10) {
       tens:
-	d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
+	d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
       }
       c += '0';
     }
-    *p++ = (char)c;
-    setsbufP(sb, p);
+    *w++ = (char)c;
+    sb->w = w;
   }
   lj_buf_putb(sb, '"');
   return sb;
 }
 
+#if LJ_HASJIT
+SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+{
+  return strfmt_putquotedlen(sb, strdata(str), str->len);
+}
+#endif
+
 /* -- Formatted conversions to buffer ------------------------------------- */
 
 /* Add formatted char to buffer. */
 SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
 {
   MSize width = STRFMT_WIDTH(sf);
-  char *p = lj_buf_more(sb, width > 1 ? width : 1);
-  if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
-  while (width-- > 1) *p++ = ' ';
-  if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
-  setsbufP(sb, p);
+  char *w = lj_buf_more(sb, width > 1 ? width : 1);
+  if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
+  while (width-- > 1) *w++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
+  sb->w = w;
   return sb;
 }
 
 /* Add formatted string to buffer. */
-SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
 {
-  MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
   MSize width = STRFMT_WIDTH(sf);
-  char *p = lj_buf_more(sb, width > len ? width : len);
-  if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
-  while (width-- > len) *p++ = ' ';
-  if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
-  setsbufP(sb, p);
+  char *w;
+  if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
+  w = lj_buf_more(sb, width > len ? width : len);
+  if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+  while (width-- > len) *w++ = ' ';
+  if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+  sb->w = w;
   return sb;
 }
 
+#if LJ_HASJIT
+SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+{
+  return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
+}
+#endif
+
 /* Add formatted signed/unsigned integer to buffer. */
 SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
 {
-  char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
+  char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
 #ifdef LUA_USE_ASSERT
-  char *ps;
+  char *ws;
 #endif
   MSize prefix = 0, len, prec, pprec, width, need;
 
   /* Figure out signed prefixes. */
   if (STRFMT_TYPE(sf) == STRFMT_INT) {
     if ((int64_t)k < 0) {
-      k = (uint64_t)-(int64_t)k;
+      k = ~k+1u;
       prefix = 256 + '-';
     } else if ((sf & STRFMT_F_PLUS)) {
       prefix = 256 + '+';
@@ -301,27 +324,27 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SForm
   width = STRFMT_WIDTH(sf);
   pprec = prec + (prefix >> 8);
   need = width > pprec ? width : pprec;
-  p = lj_buf_more(sb, need);
+  w = lj_buf_more(sb, need);
 #ifdef LUA_USE_ASSERT
-  ps = p;
+  ws = w;
 #endif
 
   /* Format number with leading/trailing whitespace and zeros. */
   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
-    while (width-- > pprec) *p++ = ' ';
+    while (width-- > pprec) *w++ = ' ';
   if (prefix) {
-    if ((char)prefix >= 'X') *p++ = '0';
-    *p++ = (char)prefix;
+    if ((char)prefix >= 'X') *w++ = '0';
+    *w++ = (char)prefix;
   }
   if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
-    while (width-- > pprec) *p++ = '0';
-  while (prec-- > len) *p++ = '0';
-  while (q < buf + sizeof(buf)) *p++ = *q++;  /* Add number itself. */
+    while (width-- > pprec) *w++ = '0';
+  while (prec-- > len) *w++ = '0';
+  while (q < buf + sizeof(buf)) *w++ = *q++;  /* Add number itself. */
   if ((sf & STRFMT_F_LEFT))
-    while (width-- > pprec) *p++ = ' ';
+    while (width-- > pprec) *w++ = ' ';
 
-  lj_assertX(need == (MSize)(p - ps), "miscalculated format size");
-  setsbufP(sb, p);
+  lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
+  sb->w = w;
   return sb;
 }
 
@@ -346,6 +369,117 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, S
   return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
 }
 
+/* Format stack arguments to buffer. */
+int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
+{
+  int narg = (int)(L->top - L->base);
+  GCstr *fmt = lj_lib_checkstr(L, arg);
+  FormatState fs;
+  SFormat sf;
+  lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+  while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+    if (sf == STRFMT_LIT) {
+      lj_buf_putmem(sb, fs.str, fs.len);
+    } else if (sf == STRFMT_ERR) {
+      lj_err_callerv(L, LJ_ERR_STRFMT,
+		     strdata(lj_str_new(L, fs.str, fs.len)));
+    } else {
+      TValue *o = &L->base[arg++];
+      if (arg > narg)
+	lj_err_arg(L, arg, LJ_ERR_NOVAL);
+      switch (STRFMT_TYPE(sf)) {
+      case STRFMT_INT:
+	if (tvisint(o)) {
+	  int32_t k = intV(o);
+	  if (sf == STRFMT_INT)
+	    lj_strfmt_putint(sb, k);  /* Shortcut for plain %d. */
+	  else
+	    lj_strfmt_putfxint(sb, sf, k);
+	  break;
+	}
+#if LJ_HASFFI
+	if (tviscdata(o)) {
+	  GCcdata *cd = cdataV(o);
+	  if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+	    lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+	    break;
+	  }
+	}
+#endif
+	lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_UINT:
+	if (tvisint(o)) {
+	  lj_strfmt_putfxint(sb, sf, intV(o));
+	  break;
+	}
+#if LJ_HASFFI
+	if (tviscdata(o)) {
+	  GCcdata *cd = cdataV(o);
+	  if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+	    lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+	    break;
+	  }
+	}
+#endif
+	lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_NUM:
+	lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
+	break;
+      case STRFMT_STR: {
+	MSize len;
+	const char *s;
+	cTValue *mo;
+	if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
+	    !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+	  /* Call __tostring metamethod once. */
+	  copyTV(L, L->top++, mo);
+	  copyTV(L, L->top++, o);
+	  lua_call(L, 1, 1);
+	  o = &L->base[arg-1];  /* Stack may have been reallocated. */
+	  copyTV(L, o, --L->top);  /* Replace inline for retry. */
+	  if (retry < 2) {  /* Global buffer may have been overwritten. */
+	    retry = 1;
+	    break;
+	  }
+	}
+	if (LJ_LIKELY(tvisstr(o))) {
+	  len = strV(o)->len;
+	  s = strVdata(o);
+#if LJ_HASBUFFER
+	} else if (tvisbuf(o)) {
+	  SBufExt *sbx = bufV(o);
+	  if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
+	  len = sbufxlen(sbx);
+	  s = sbx->r;
+#endif
+	} else {
+	  GCstr *str = lj_strfmt_obj(L, o);
+	  len = str->len;
+	  s = strdata(str);
+	}
+	if ((sf & STRFMT_T_QUOTED))
+	  strfmt_putquotedlen(sb, s, len);  /* No formatting. */
+	else
+	  strfmt_putfstrlen(sb, sf, s, len);
+	break;
+	}
+      case STRFMT_CHAR:
+	lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+	break;
+      case STRFMT_PTR:  /* No formatting. */
+	lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
+	break;
+      default:
+	lj_assertL(0, "bad string format type");
+	break;
+      }
+    }
+  }
+  return retry;
+}
+
 /* -- Conversions to strings ---------------------------------------------- */
 
 /* Convert integer to string. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_strfmt.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt.h
@@ -1,6 +1,6 @@
 /*
 ** String formatting.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_STRFMT_H
@@ -95,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_put
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
 #endif
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
+#if LJ_HASJIT
 LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
+#endif
 
 /* Formatted conversions to buffer. */
 LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
@@ -103,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf
 LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
 LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
 LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
+#if LJ_HASJIT
 LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
+#endif
+LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
 
 /* Conversions to strings. */
 LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt_num.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_strfmt_num.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strfmt_num.c
@@ -1,6 +1,6 @@
 /*
 ** String formatting for floating-point numbers.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 ** Contributed by Peter Cawley.
 */
 
@@ -576,7 +576,7 @@ static char *lj_strfmt_wfnum(SBuf *sb, S
 /* Add formatted floating-point number to buffer. */
 SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
 {
-  setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL));
+  sb->w = lj_strfmt_wfnum(sb, sf, n, NULL);
   return sb;
 }
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strscan.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_strscan.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strscan.c
@@ -1,6 +1,6 @@
 /*
 ** String scanning.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <math.h>
@@ -63,6 +63,7 @@
 #define STRSCAN_MAXDIG	800		/* 772 + extra are sufficient. */
 #define STRSCAN_DDIG	(STRSCAN_DIG/2)
 #define STRSCAN_DMASK	(STRSCAN_DDIG-1)
+#define STRSCAN_MAXEXP	(1 << 20)
 
 /* Helpers for circular buffer. */
 #define DNEXT(a)	(((a)+1) & STRSCAN_DMASK)
@@ -121,20 +122,21 @@ static StrScanFmt strscan_hex(const uint
   /* Format-specific handling. */
   switch (fmt) {
   case STRSCAN_INT:
-    if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+    if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg &&
+	!(x == 0 && neg)) {
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 8) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
     if (dig > 16) return STRSCAN_ERROR;
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -166,12 +168,12 @@ static StrScanFmt strscan_oct(const uint
     /* fallthrough */
   case STRSCAN_U32:
     if ((x >> 32)) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~(uint32_t)x+1u) : (int32_t)x;
     break;
   default:
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     break;
   }
   return fmt;
@@ -227,18 +229,18 @@ static StrScanFmt strscan_dec(const uint
       switch (fmt) {
       case STRSCAN_INT:
 	if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-	  o->i = neg ? -(int32_t)x : (int32_t)x;
+	  o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	  return STRSCAN_INT;  /* Fast path for 32 bit integers. */
 	}
 	if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; }
 	/* fallthrough */
       case STRSCAN_U32:
 	if ((x >> 32) != 0) return STRSCAN_ERROR;
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_U32;
       case STRSCAN_I64:
       case STRSCAN_U64:
-	o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+	o->u64 = neg ? ~x+1u : x;
 	return fmt;
       default:
       plainnumber:  /* Fast path for plain numbers < 2^63. */
@@ -346,18 +348,18 @@ static StrScanFmt strscan_bin(const uint
   switch (fmt) {
   case STRSCAN_INT:
     if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
-      o->i = neg ? -(int32_t)x : (int32_t)x;
+      o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
       return STRSCAN_INT;  /* Fast path for 32 bit integers. */
     }
     if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
     /* fallthrough */
   case STRSCAN_U32:
     if (dig > 32) return STRSCAN_ERROR;
-    o->i = neg ? -(int32_t)x : (int32_t)x;
+    o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
     return STRSCAN_U32;
   case STRSCAN_I64:
   case STRSCAN_U64:
-    o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+    o->u64 = neg ? ~x+1u : x;
     return fmt;
   default:
     break;
@@ -448,6 +450,7 @@ StrScanFmt lj_strscan_scan(const uint8_t
       if (dig) {
 	ex = (int32_t)(dp-(p-1)); dp = p-1;
 	while (ex < 0 && *dp-- == '0') ex++, dig--;  /* Skip trailing zeros. */
+	if (ex <= -STRSCAN_MAXEXP) return STRSCAN_ERROR;
 	if (base == 16) ex *= 4;
       }
     }
@@ -461,10 +464,11 @@ StrScanFmt lj_strscan_scan(const uint8_t
       if (!lj_char_isdigit(*p)) return STRSCAN_ERROR;
       xx = (*p++ & 15);
       while (lj_char_isdigit(*p)) {
-	if (xx < 65536) xx = xx * 10 + (*p & 15);
+	xx = xx * 10 + (*p & 15);
+	if (xx >= STRSCAN_MAXEXP) return STRSCAN_ERROR;
 	p++;
       }
-      ex += negx ? -(int32_t)xx : (int32_t)xx;
+      ex += negx ? (int32_t)(~xx+1u) : (int32_t)xx;
     }
 
     /* Parse suffix. */
@@ -499,8 +503,11 @@ StrScanFmt lj_strscan_scan(const uint8_t
       if ((opt & STRSCAN_OPT_TONUM)) {
 	o->n = neg ? -(double)x : (double)x;
 	return STRSCAN_NUM;
+      } else if (x == 0 && neg) {
+	o->n = -0.0;
+	return STRSCAN_NUM;
       } else {
-	o->i = neg ? -(int32_t)x : (int32_t)x;
+	o->i = neg ? (int32_t)(~x+1u) : (int32_t)x;
 	return STRSCAN_INT;
       }
     }
@@ -516,7 +523,7 @@ StrScanFmt lj_strscan_scan(const uint8_t
       fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
 
     /* Try to convert number to integer, if requested. */
-    if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
+    if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) {
       double n = o->n;
       int32_t i = lj_num2int(n);
       if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strscan.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_strscan.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_strscan.h
@@ -1,6 +1,6 @@
 /*
 ** String scanning.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_STRSCAN_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_tab.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_tab.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_tab.c
@@ -1,6 +1,6 @@
 /*
 ** Table handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -16,25 +16,6 @@
 
 /* -- Object hashing ------------------------------------------------------ */
 
-/* Hash values are masked with the table hash mask and used as an index. */
-static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
-{
-  Node *n = noderef(t->node);
-  return &n[hash & t->hmask];
-}
-
-/* String IDs are generated when a string is interned. */
-#define hashstr(t, s)		hashmask(t, (s)->sid)
-
-#define hashlohi(t, lo, hi)	hashmask((t), hashrot((lo), (hi)))
-#define hashnum(t, o)		hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
-#if LJ_GC64
-#define hashgcref(t, r) \
-  hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
-#else
-#define hashgcref(t, r)		hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
-#endif
-
 /* Hash an arbitrary key and return its anchor position in the hash table. */
 static Node *hashkey(const GCtab *t, cTValue *key)
 {
@@ -413,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCt
   return NULL;
 }
 
-cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
+cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
 {
   Node *n = hashstr(t, key);
   do {
@@ -546,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCt
   return lj_tab_newkey(L, t, &k);
 }
 
-TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
+TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
 {
   TValue k;
   Node *n = hashstr(t, key);
@@ -587,56 +568,66 @@ TValue *lj_tab_set(lua_State *L, GCtab *
 
 /* -- Table traversal ----------------------------------------------------- */
 
-/* Get the traversal index of a key. */
-static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
+/* Table traversal indexes:
+**
+** Array key index: [0 .. t->asize-1]
+** Hash key index:  [t->asize .. t->asize+t->hmask]
+** Invalid key:     ~0
+*/
+
+/* Get the successor traversal index of a key. */
+uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
 {
   TValue tmp;
   if (tvisint(key)) {
     int32_t k = intV(key);
     if ((uint32_t)k < t->asize)
-      return (uint32_t)k;  /* Array key indexes: [0..t->asize-1] */
+      return (uint32_t)k + 1;
     setnumV(&tmp, (lua_Number)k);
     key = &tmp;
   } else if (tvisnum(key)) {
     lua_Number nk = numV(key);
     int32_t k = lj_num2int(nk);
     if ((uint32_t)k < t->asize && nk == (lua_Number)k)
-      return (uint32_t)k;  /* Array key indexes: [0..t->asize-1] */
+      return (uint32_t)k + 1;
   }
   if (!tvisnil(key)) {
     Node *n = hashkey(t, key);
     do {
       if (lj_obj_equal(&n->key, key))
-	return t->asize + (uint32_t)(n - noderef(t->node));
-	/* Hash key indexes: [t->asize..t->asize+t->nmask] */
+	return t->asize + (uint32_t)((n+1) - noderef(t->node));
     } while ((n = nextnode(n)));
-    if (key->u32.hi == 0xfffe7fff)  /* ITERN was despecialized while running. */
-      return key->u32.lo - 1;
-    lj_err_msg(L, LJ_ERR_NEXTIDX);
-    return 0;  /* unreachable */
-  }
-  return ~0u;  /* A nil key starts the traversal. */
-}
-
-/* Advance to the next step in a table traversal. */
-int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
-{
-  uint32_t i = keyindex(L, t, key);  /* Find predecessor key index. */
-  for (i++; i < t->asize; i++)  /* First traverse the array keys. */
-    if (!tvisnil(arrayslot(t, i))) {
-      setintV(key, i);
-      copyTV(L, key+1, arrayslot(t, i));
+    if (key->u32.hi == LJ_KEYINDEX)  /* Despecialized ITERN while running. */
+      return key->u32.lo;
+    return ~0u;  /* Invalid key to next. */
+  }
+  return 0;  /* A nil key starts the traversal. */
+}
+
+/* Get the next key/value pair of a table traversal. */
+int lj_tab_next(GCtab *t, cTValue *key, TValue *o)
+{
+  uint32_t idx = lj_tab_keyindex(t, key);  /* Find successor index of key. */
+  /* First traverse the array part. */
+  for (; idx < t->asize; idx++) {
+    cTValue *a = arrayslot(t, idx);
+    if (LJ_LIKELY(!tvisnil(a))) {
+      setintV(o, idx);
+      o[1] = *a;
       return 1;
     }
-  for (i -= t->asize; i <= t->hmask; i++) {  /* Then traverse the hash keys. */
-    Node *n = &noderef(t->node)[i];
+  }
+  idx -= t->asize;
+  /* Then traverse the hash part. */
+  for (; idx <= t->hmask; idx++) {
+    Node *n = &noderef(t->node)[idx];
     if (!tvisnil(&n->val)) {
-      copyTV(L, key, &n->key);
-      copyTV(L, key+1, &n->val);
+      o[0] = n->key;
+      o[1] = n->val;
       return 1;
     }
   }
-  return 0;  /* End of traversal. */
+  return (int32_t)idx < 0 ? -1 : 0;  /* Invalid key or end of traversal. */
 }
 
 /* -- Table length calculation -------------------------------------------- */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_tab.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_tab.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_tab.h
@@ -1,6 +1,6 @@
 /*
 ** Table handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TAB_H
@@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint3
   return hi;
 }
 
+/* Hash values are masked with the table hash mask and used as an index. */
+static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
+{
+  Node *n = noderef(t->node);
+  return &n[hash & t->hmask];
+}
+
+/* String IDs are generated when a string is interned. */
+#define hashstr(t, s)		hashmask(t, (s)->sid)
+
+#define hashlohi(t, lo, hi)	hashmask((t), hashrot((lo), (hi)))
+#define hashnum(t, o)		hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
+#if LJ_GC64
+#define hashgcref(t, r) \
+  hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
+#else
+#define hashgcref(t, r)		hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
+#endif
+
 #define hsize2hbits(s)	((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
 
 LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
@@ -50,14 +69,14 @@ LJ_FUNCA void lj_tab_reasize(lua_State *
 /* Caveat: all getters except lj_tab_get() can return NULL! */
 
 LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
-LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
+LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
 LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
 
 /* Caveat: all setters require a write barrier for the stored value. */
 
 LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
 LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
-LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
+LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
 LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
 
 #define inarray(t, key)		((MSize)(key) < (MSize)(t)->asize)
@@ -67,7 +86,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L,
 #define lj_tab_setint(L, t, key) \
   (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
 
-LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
+LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key);
+LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o);
 LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
 #if LJ_HASJIT
 LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for target CPU.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_H
@@ -55,10 +55,16 @@ typedef uint32_t RegSP;
 /* Bitset for registers. 32 registers suffice for most architectures.
 ** Note that one set holds bits for both GPRs and FPRs.
 */
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64
 typedef uint64_t RegSet;
+#define RSET_BITS		6
+#define rset_picktop_(rs)	((Reg)lj_fls64(rs))
+#define rset_pickbot_(rs)	((Reg)lj_ffs64(rs))
 #else
 typedef uint32_t RegSet;
+#define RSET_BITS		5
+#define rset_picktop_(rs)	((Reg)lj_fls(rs))
+#define rset_pickbot_(rs)	((Reg)lj_ffs(rs))
 #endif
 
 #define RID2RSET(r)		(((RegSet)1) << (r))
@@ -69,13 +75,6 @@ typedef uint32_t RegSet;
 #define rset_set(rs, r)		(rs |= RID2RSET(r))
 #define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
 #define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
-#define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
-#define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
-#else
-#define rset_picktop(rs)	((Reg)lj_fls(rs))
-#define rset_pickbot(rs)	((Reg)lj_ffs(rs))
-#endif
 
 /* -- Register allocation cost -------------------------------------------- */
 
@@ -144,6 +143,8 @@ typedef uint32_t RegCost;
 #include "lj_target_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "lj_target_mips.h"
+#elif LJ_TARGET_RISCV64
+#include "lj_target_riscv.h"
 #else
 #error "Missing include for target CPU"
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_arm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target_arm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_arm.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for ARM CPUs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_ARM_H
@@ -211,6 +211,7 @@ typedef enum ARMIns {
   /* ARMv6T2 */
   ARMI_MOVW = 0xe3000000,
   ARMI_MOVT = 0xe3400000,
+  ARMI_BFI = 0xe7c00010,
 
   /* VFP */
   ARMI_VMOV_D = 0xeeb00b40,
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_arm64.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target_arm64.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_arm64.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for ARM64 CPUs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_ARM64_H
@@ -31,6 +31,8 @@ enum {
 
   /* Calling conventions. */
   RID_RET = RID_X0,
+  RID_RETLO = RID_X0,
+  RID_RETHI = RID_X1,
   RID_FPRET = RID_D0,
 
   /* These definitions must match with the *.dasc file(s): */
@@ -210,6 +212,8 @@ typedef enum A64Ins {
 
   A64I_EXTRw = 0x13800000,
   A64I_EXTRx = 0x93c00000,
+  A64I_BFMw = 0x33000000,
+  A64I_BFMx = 0xb3400000,
   A64I_SBFMw = 0x13000000,
   A64I_SBFMx = 0x93400000,
   A64I_SXTBw = 0x13001c00,
@@ -230,6 +234,8 @@ typedef enum A64Ins {
   A64I_MOVZx = 0xd2800000,
   A64I_MOVNw = 0x12800000,
   A64I_MOVNx = 0x92800000,
+  A64I_ADR = 0x10000000,
+  A64I_ADRP = 0x90000000,
 
   A64I_LDRB = 0x39400000,
   A64I_LDRH = 0x79400000,
@@ -256,6 +262,9 @@ typedef enum A64Ins {
   A64I_CBZ = 0x34000000,
   A64I_CBNZ = 0x35000000,
 
+  A64I_BRAAZ = 0xd61f081f,
+  A64I_BLRAAZ = 0xd63f081f,
+
   A64I_NOP = 0xd503201f,
 
   /* FP */
@@ -313,6 +322,9 @@ typedef enum A64Ins {
   A64I_FMOV_DI = 0x1e601000,
 } A64Ins;
 
+#define A64I_BR_AUTH	(LJ_ABI_PAUTH ? A64I_BRAAZ : A64I_BR)
+#define A64I_BLR_AUTH	(LJ_ABI_PAUTH ? A64I_BLRAAZ : A64I_BLR)
+
 typedef enum A64Shift {
   A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
 } A64Shift;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_mips.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target_mips.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_mips.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for MIPS CPUs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_MIPS_H
@@ -256,6 +256,8 @@ typedef enum MIPSIns {
   MIPSI_ROTRV = 0x00000046,	/* MIPSXXR2 */
   MIPSI_DROTRV = 0x00000056,
 
+  MIPSI_INS = 0x7c000004,	/* MIPSXXR2 */
+
   MIPSI_SEB = 0x7c000420,	/* MIPSXXR2 */
   MIPSI_SEH = 0x7c000620,	/* MIPSXXR2 */
   MIPSI_WSBH = 0x7c0000a0,	/* MIPSXXR2 */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_ppc.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target_ppc.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_ppc.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for PPC CPUs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_PPC_H
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_riscv.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_riscv.h
@@ -0,0 +1,513 @@
+/*
+** Definitions for RISC-V CPUs.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_RISCV_H
+#define _LJ_TARGET_RISCV_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#if LJ_ARCH_EMBEDDED
+#define GPRDEF(_) \
+  _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
+  _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15)
+#else
+#define GPRDEF(_) \
+  _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \
+  _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
+  _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
+  _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31)
+#endif
+#if LJ_SOFTFP
+#define FPRDEF(_)
+#else
+#define FPRDEF(_) \
+  _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
+  _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
+  _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
+  _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
+#endif
+#define VRIDDEF(_)
+
+#define RIDENUM(name)	RID_##name,
+
+enum {
+  GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */
+  FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */
+  RID_MAX,
+  RID_ZERO = RID_X0,
+  RID_TMP = RID_RA,
+  RID_GP = RID_X3,
+  RID_TP = RID_X4,
+
+  /* Calling conventions. */
+  RID_RET = RID_X10,
+#if LJ_LE
+  RID_RETHI = RID_X11,
+  RID_RETLO = RID_X10,
+#else
+  RID_RETHI = RID_X10,
+  RID_RETLO = RID_X11,
+#endif
+#if LJ_SOFTFP
+  RID_FPRET = RID_X10,
+#else
+  RID_FPRET = RID_F10,
+#endif
+  RID_CFUNCADDR = RID_X5,
+
+  /* These definitions must match with the *.dasc file(s): */
+  RID_BASE = RID_X18,		/* Interpreter BASE. */
+  RID_LPC = RID_X20,		/* Interpreter PC. */
+  RID_GL = RID_X21,		/* Interpreter GL. */
+  RID_LREG = RID_X23,		/* Interpreter L. */
+
+  /* Register ranges [min, max) and number of registers. */
+  RID_MIN_GPR = RID_X0,
+  RID_MAX_GPR = RID_X31+1,
+  RID_MIN_FPR = RID_MAX_GPR,
+#if LJ_SOFTFP
+  RID_MAX_FPR = RID_MIN_FPR,
+#else
+  RID_MAX_FPR = RID_F31+1,
+#endif
+  RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+  RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR	/* Only even regs are used. */
+};
+
+#define RID_NUM_KREF		RID_NUM_GPR
+#define RID_MIN_KREF		RID_X0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */
+#define RSET_FIXED \
+  (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
+   RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL))
+#define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#if LJ_SOFTFP
+#define RSET_FPR	0
+#else
+#define RSET_FPR	RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#endif
+
+#define RSET_ALL	(RSET_GPR|RSET_FPR)
+#define RSET_INIT	RSET_ALL
+
+#define RSET_SCRATCH_GPR \
+  (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\
+   RSET_RANGE(RID_X10, RID_X17+1))
+
+#if LJ_SOFTFP
+#define RSET_SCRATCH_FPR	0
+#else
+#define RSET_SCRATCH_FPR \
+  (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\
+   RSET_RANGE(RID_F28, RID_F31+1))
+#endif
+#define RSET_SCRATCH		(RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+
+#define REGARG_FIRSTGPR		RID_X10
+#define REGARG_LASTGPR		RID_X17
+#define REGARG_NUMGPR		8
+
+#if LJ_ABI_SOFTFP
+#define REGARG_FIRSTFPR		0
+#define REGARG_LASTFPR		0
+#define REGARG_NUMFPR		0
+#else
+#define REGARG_FIRSTFPR		RID_F10
+#define REGARG_LASTFPR		RID_F17
+#define REGARG_NUMFPR		8
+#endif
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+**
+** SPS_FIRST: First spill slot for general use.
+*/
+#if LJ_32
+#define SPS_FIXED	5
+#else
+#define SPS_FIXED	4
+#endif
+#define SPS_FIRST	4
+
+#define SPOFS_TMP	0
+
+#define sps_scale(slot)		(4 * (int32_t)(slot))
+#define sps_align(slot)		(((slot) - SPS_FIXED + 3) & ~3)
+
+/* -- Exit state ---------------------------------------------------------- */
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+#if !LJ_SOFTFP
+  lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */
+#endif
+  intptr_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
+  int32_t spill[256];		/* Spill slots. */
+} ExitState;
+
+/* Highest exit + 1 indicates stack check. */
+#define EXITSTATE_CHECKEXIT	1
+
+/* Return the address of a per-trace exit stub. */
+static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
+{
+  while (*p == 0x00000013) p++;  /* Skip RISCVI_NOP. */
+  return p + 4 + exitno;
+}
+/* Avoid dependence on lj_jit.h if only including lj_target.h. */
+#define exitstub_trace_addr(T, exitno) \
+  exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* Instruction fields. */
+#define RISCVF_D(d)	(((d)&31) << 7)
+#define RISCVF_S1(r)	(((r)&31) << 15)
+#define RISCVF_S2(r)	(((r)&31) << 20)
+#define RISCVF_S3(r)	(((r)&31) << 27)
+#define RISCVF_FUNCT2(f)	(((f)&3) << 25)
+#define RISCVF_FUNCT3(f)	(((f)&7) << 12)
+#define RISCVF_FUNCT7(f)	(((f)&127) << 25)
+#define RISCVF_SHAMT(s)	((s) << 20)
+#define RISCVF_RM(m)	(((m)&7) << 12)
+#define RISCVF_IMMI(i)	((i) << 20)
+#define RISCVF_IMMS(i)	(((i)&0xfe0) << 20 | ((i)&0x1f) << 7)
+#define RISCVF_IMMB(i)	(((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7)
+#define RISCVF_IMMU(i)	(((i)&0xfffff) << 12)
+#define RISCVF_IMMJ(i)	(((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20)
+
+/* Encode helpers. */
+#define RISCVF_W_HI(w)  ((w) - ((((w)&0xfff)^0x800) - 0x800))
+#define RISCVF_W_LO(w)  ((w)&0xfff)
+#define RISCVF_HI(i)	((RISCVF_W_HI(i) >> 12) & 0xfffff)
+#define RISCVF_LO(i)	RISCVF_W_LO(i)
+
+/* Check for valid field range. */
+#define RISCVF_SIMM_OK(x, b)	((((x) + (1 << (b-1))) >> (b)) == 0)
+#define checki12(i)		RISCVF_SIMM_OK(i, 12)
+#define checki13(i)		RISCVF_SIMM_OK(i, 13)
+#define checki20(i)		RISCVF_SIMM_OK(i, 20)
+#define checki21(i)		RISCVF_SIMM_OK(i, 21)
+
+typedef enum RISCVIns {
+
+  /* --- RVI --- */
+  RISCVI_LUI = 0x00000037,
+  RISCVI_AUIPC = 0x00000017,
+
+  RISCVI_JAL = 0x0000006f,
+  RISCVI_JALR = 0x00000067,
+
+  RISCVI_ADDI = 0x00000013,
+  RISCVI_SLTI = 0x00002013,
+  RISCVI_SLTIU = 0x00003013,
+  RISCVI_XORI = 0x00004013,
+  RISCVI_ORI = 0x00006013,
+  RISCVI_ANDI = 0x00007013,
+
+  RISCVI_SLLI = 0x00001013,
+  RISCVI_SRLI = 0x00005013,
+  RISCVI_SRAI = 0x40005013,
+
+  RISCVI_ADD = 0x00000033,
+  RISCVI_SUB = 0x40000033,
+  RISCVI_SLL = 0x00001033,
+  RISCVI_SLT = 0x00002033,
+  RISCVI_SLTU = 0x00003033,
+  RISCVI_XOR = 0x00004033,
+  RISCVI_SRL = 0x00005033,
+  RISCVI_SRA = 0x40005033,
+  RISCVI_OR = 0x00006033,
+  RISCVI_AND = 0x00007033,
+
+  RISCVI_LB = 0x00000003,
+  RISCVI_LH = 0x00001003,
+  RISCVI_LW = 0x00002003,
+  RISCVI_LBU = 0x00004003,
+  RISCVI_LHU = 0x00005003,
+  RISCVI_SB = 0x00000023,
+  RISCVI_SH = 0x00001023,
+  RISCVI_SW = 0x00002023,
+
+  RISCVI_BEQ = 0x00000063,
+  RISCVI_BNE = 0x00001063,
+  RISCVI_BLT = 0x00004063,
+  RISCVI_BGE = 0x00005063,
+  RISCVI_BLTU = 0x00006063,
+  RISCVI_BGEU = 0x00007063,
+
+  RISCVI_ECALL = 0x00000073,
+  RISCVI_EBREAK = 0x00100073,
+
+  RISCVI_NOP = 0x00000013,
+  RISCVI_MV = 0x00000013,
+  RISCVI_NOT = 0xfff04013,
+  RISCVI_NEG = 0x40000033,
+  RISCVI_RET = 0x00008067,
+  RISCVI_ZEXT_B = 0x0ff07013,
+
+#if LJ_TARGET_RISCV64
+  RISCVI_LWU = 0x00007003,
+  RISCVI_LD = 0x00003003,
+  RISCVI_SD = 0x00003023,
+
+  RISCVI_ADDIW = 0x0000001b,
+
+  RISCVI_SLLIW = 0x0000101b,
+  RISCVI_SRLIW = 0x0000501b,
+  RISCVI_SRAIW = 0x4000501b,
+
+  RISCVI_ADDW = 0x0000003b,
+  RISCVI_SUBW = 0x4000003b,
+  RISCVI_SLLW = 0x0000103b,
+  RISCVI_SRLW = 0x0000503b,
+  RISCVI_SRAW = 0x4000503b,
+
+  RISCVI_NEGW = 0x4000003b,
+  RISCVI_SEXT_W = 0x0000001b,
+#endif
+
+  /* --- RVM --- */
+  RISCVI_MUL = 0x02000033,
+  RISCVI_MULH = 0x02001033,
+  RISCVI_MULHSU = 0x02002033,
+  RISCVI_MULHU = 0x02003033,
+  RISCVI_DIV = 0x02004033,
+  RISCVI_DIVU = 0x02005033,
+  RISCVI_REM = 0x02006033,
+  RISCVI_REMU = 0x02007033,
+#if LJ_TARGET_RISCV64
+  RISCVI_MULW = 0x0200003b,
+  RISCVI_DIVW = 0x0200403b,
+  RISCVI_DIVUW = 0x0200503b,
+  RISCVI_REMW = 0x0200603b,
+  RISCVI_REMUW = 0x0200703b,
+#endif
+
+  /* --- RVF --- */
+  RISCVI_FLW = 0x00002007,
+  RISCVI_FSW = 0x00002027,
+
+  RISCVI_FMADD_S = 0x00000043,
+  RISCVI_FMSUB_S = 0x00000047,
+  RISCVI_FNMSUB_S = 0x0000004b,
+  RISCVI_FNMADD_S = 0x0000004f,
+
+  RISCVI_FADD_S = 0x00000053,
+  RISCVI_FSUB_S = 0x08000053,
+  RISCVI_FMUL_S = 0x10000053,
+  RISCVI_FDIV_S = 0x18000053,
+  RISCVI_FSQRT_S = 0x58000053,
+
+  RISCVI_FSGNJ_S = 0x20000053,
+  RISCVI_FSGNJN_S = 0x20001053,
+  RISCVI_FSGNJX_S = 0x20002053,
+
+  RISCVI_FMIN_S = 0x28000053,
+  RISCVI_FMAX_S = 0x28001053,
+
+  RISCVI_FCVT_W_S = 0xc0000053,
+  RISCVI_FCVT_WU_S = 0xc0100053,
+
+  RISCVI_FMV_X_W = 0xe0000053,
+
+  RISCVI_FEQ_S = 0xa0002053,
+  RISCVI_FLT_S = 0xa0001053,
+  RISCVI_FLE_S = 0xa0000053,
+
+  RISCVI_FCLASS_S = 0xe0001053,
+
+  RISCVI_FCVT_S_W = 0xd0000053,
+  RISCVI_FCVT_S_WU = 0xd0100053,
+  RISCVI_FMV_W_X = 0xf0000033,
+
+  RISCVI_FMV_S = 0x20000053,
+  RISCVI_FNEG_S = 0x20001053,
+  RISCVI_FABS_S = 0x20002053,
+#if LJ_TARGET_RISCV64
+  RISCVI_FCVT_L_S = 0xc0200053,
+  RISCVI_FCVT_LU_S = 0xc0300053,
+  RISCVI_FCVT_S_L = 0xd0200053,
+  RISCVI_FCVT_S_LU = 0xd0300053,
+#endif
+
+  /* --- RVD --- */
+  RISCVI_FLD = 0x00003007,
+  RISCVI_FSD = 0x00003027,
+
+  RISCVI_FMADD_D = 0x02000043,
+  RISCVI_FMSUB_D = 0x02000047,
+  RISCVI_FNMSUB_D = 0x0200004b,
+  RISCVI_FNMADD_D = 0x0200004f,
+
+  RISCVI_FADD_D = 0x02000053,
+  RISCVI_FSUB_D = 0x0a000053,
+  RISCVI_FMUL_D = 0x12000053,
+  RISCVI_FDIV_D = 0x1a000053,
+  RISCVI_FSQRT_D = 0x5a000053,
+
+  RISCVI_FSGNJ_D = 0x22000053,
+  RISCVI_FSGNJN_D = 0x22001053,
+  RISCVI_FSGNJX_D = 0x22002053,
+
+  RISCVI_FMIN_D = 0x2a000053,
+  RISCVI_FMAX_D = 0x2a001053,
+
+  RISCVI_FCVT_S_D = 0x40100053,
+  RISCVI_FCVT_D_S = 0x42000053,
+
+  RISCVI_FEQ_D = 0xa2002053,
+  RISCVI_FLT_D = 0xa2001053,
+  RISCVI_FLE_D = 0xa2000053,
+
+  RISCVI_FCLASS_D = 0xe2001053,
+
+  RISCVI_FCVT_W_D = 0xc2000053,
+  RISCVI_FCVT_WU_D = 0xc2100053,
+  RISCVI_FCVT_D_W = 0xd2000053,
+  RISCVI_FCVT_D_WU = 0xd2100053,
+
+  RISCVI_FMV_D = 0x22000053,
+  RISCVI_FNEG_D = 0x22001053,
+  RISCVI_FABS_D = 0x22002053,
+#if LJ_TARGET_RISCV64
+  RISCVI_FCVT_L_D = 0xc2200053,
+  RISCVI_FCVT_LU_D = 0xc2300053,
+  RISCVI_FMV_X_D = 0xe2000053,
+  RISCVI_FCVT_D_L = 0xd2200053,
+  RISCVI_FCVT_D_LU = 0xd2300053,
+  RISCVI_FMV_D_X = 0xf2000053,
+#endif
+
+  /* --- Zifencei --- */
+  RISCVI_FENCE = 0x0000000f,
+  RISCVI_FENCE_I = 0x0000100f,
+
+  /* --- Zicsr --- */
+  RISCVI_CSRRW = 0x00001073,
+  RISCVI_CSRRS = 0x00002073,
+  RISCVI_CSRRC = 0x00003073,
+  RISCVI_CSRRWI = 0x00005073,
+  RISCVI_CSRRSI = 0x00006073,
+  RISCVI_CSRRCI = 0x00007073,
+
+  /* --- RVB --- */
+  /* Zba */
+  RISCVI_SH1ADD = 0x20002033,
+  RISCVI_SH2ADD = 0x20004033,
+  RISCVI_SH3ADD = 0x20006033,
+#if LJ_TARGET_RISCV64
+  RISCVI_ADD_UW = 0x0800003b,
+
+  RISCVI_SH1ADD_UW = 0x2000203b,
+  RISCVI_SH2ADD_UW = 0x2000403b,
+  RISCVI_SH3ADD_UW = 0x2000603b,
+
+  RISCVI_SLLI_UW = 0x0800101b,
+
+  RISCVI_ZEXT_W = 0x0800003b,
+#endif
+  /* Zbb */
+  RISCVI_ANDN = 0x40007033,
+  RISCVI_ORN = 0x40006033,
+  RISCVI_XNOR = 0x40004033,
+
+  RISCVI_CLZ = 0x60001013,
+  RISCVI_CTZ = 0x60101013,
+
+  RISCVI_CPOP = 0x60201013,
+
+  RISCVI_MAX = 0x0a006033,
+  RISCVI_MAXU = 0x0a007033,
+  RISCVI_MIN = 0x0a004033,
+  RISCVI_MINU = 0x0a005033,
+
+  RISCVI_SEXT_B = 0x60401013,
+  RISCVI_SEXT_H = 0x60501013,
+#if LJ_TARGET_RISCV32
+  RISCVI_ZEXT_H = 0x08004033,
+#elif LJ_TARGET_RISCV64
+  RISCVI_ZEXT_H = 0x0800403b,
+#endif
+
+  RISCVI_ROL = 0x60001033,
+  RISCVI_ROR = 0x60005033,
+  RISCVI_RORI = 0x60005013,
+
+  RISCVI_ORC_B = 0x28705013,
+
+#if LJ_TARGET_RISCV32
+  RISCVI_REV8 = 0x69805013,
+#elif LJ_TARGET_RISCV64
+  RISCVI_REV8 = 0x6b805013,
+
+  RISCVI_CLZW = 0x6000101b,
+  RISCVI_CTZW = 0x6010101b,
+
+  RISCVI_CPOPW = 0x6020101b,
+
+  RISCVI_ROLW = 0x6000103b,
+  RISCVI_RORIW = 0x6000501b,
+  RISCVI_RORW = 0x6000503b,
+#endif
+  /* NYI: Zbc, Zbs */
+
+  /* TBD: RVV?, RVP?, RVJ? */
+
+  /* --- XThead* --- */
+  /* XTHeadBa */
+  RISCVI_TH_ADDSL = 0x0000100b,
+
+  /* XTHeadBb */
+  RISCVI_TH_SRRI = 0x1000100b,
+#if LJ_TARGET_RISCV64
+  RISCVI_TH_SRRIW = 0x1400100b,
+#endif
+  RISCVI_TH_EXT = 0x0000200b,
+  RISCVI_TH_EXTU = 0x0000300b,
+  RISCVI_TH_FF0 = 0x8400100b,
+  RISCVI_TH_FF1 = 0x8600100b,
+  RISCVI_TH_REV = 0x8200100b,
+#if LJ_TARGET_RISCV64
+  RISCVI_TH_REVW = 0x9000100b,
+#endif
+  RISCVI_TH_TSTNBZ = 0x8000100b,
+
+  /* XTHeadBs */
+  RISCVI_TH_TST = 0x8800100b,
+
+  /* XTHeadCondMov */
+  RISCVI_TH_MVEQZ = 0x4000100b,
+  RISCVI_TH_MVNEZ = 0x4200100b,
+
+  /* XTHeadMac */
+  RISCVI_TH_MULA = 0x2000100b,
+  RISCVI_TH_MULAH = 0x2800100b,
+#if LJ_TARGET_RISCV64
+  RISCVI_TH_MULAW = 0x2400100b,
+#endif
+  RISCVI_TH_MULS = 0x2200100b,
+  RISCVI_TH_MULSH = 0x2a00100b,
+  RISCVI_TH_MULSW = 0x2600100b,
+
+  /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */
+} RISCVIns;
+
+typedef enum RISCVRM {
+  RISCVRM_RNE = 0,
+  RISCVRM_RTZ = 1,
+  RISCVRM_RDN = 2,
+  RISCVRM_RUP = 3,
+  RISCVRM_RMM = 4,
+  RISCVRM_DYN = 7,
+} RISCVRM;
+
+#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_x86.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_target_x86.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_target_x86.h
@@ -1,6 +1,6 @@
 /*
 ** Definitions for x86 and x64 CPUs.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TARGET_X86_H
@@ -38,10 +38,9 @@ enum {
   RID_RET = RID_EAX,
 #if LJ_64
   RID_FPRET = RID_XMM0,
-#else
+#endif
   RID_RETLO = RID_EAX,
   RID_RETHI = RID_EDX,
-#endif
 
   /* These definitions must match with the *.dasc file(s): */
   RID_BASE = RID_EDX,		/* Interpreter BASE. */
@@ -117,8 +116,8 @@ enum {
 
 #if LJ_64
 /* Prefer the low 8 regs of each type to reduce REX prefixes. */
-#undef rset_picktop
-#define rset_picktop(rs)	(lj_fls(lj_bswap(rs)) ^ 0x18)
+#undef rset_picktop_
+#define rset_picktop_(rs)	(lj_fls(lj_bswap(rs)) ^ 0x18)
 #endif
 
 /* -- Spill slots --------------------------------------------------------- */
@@ -165,6 +164,8 @@ typedef struct {
 #define EXITSTUB_SPACING	(2+2)
 #define EXITSTUBS_PER_GROUP	32
 
+#define EXITTRACE_VMSTATE	1	/* g->vmstate has traceno on exit. */
+
 /* -- x86 ModRM operand encoding ------------------------------------------ */
 
 typedef enum {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_trace.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_trace.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_trace.c
@@ -1,6 +1,6 @@
 /*
 ** Trace management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_trace_c
@@ -153,6 +153,9 @@ static void trace_save(jit_State *J, GCt
   newwhite(J2G(J), T);
   T->gct = ~LJ_TTRACE;
   T->ir = (IRIns *)p - J->cur.nk;  /* The IR has already been copied above. */
+#if LJ_ABI_PAUTH
+  T->mcauth = lj_ptr_sign((ASMFunction)T->mcode, T);
+#endif
   p += szins;
   TRACE_APPENDVEC(snap, nsnap, SnapShot)
   TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
@@ -215,8 +218,8 @@ static void trace_unpatch(jit_State *J,
     break;
   case BC_JITERL:
   case BC_JLOOP:
-    lj_assertJ(op == BC_ITERL || op == BC_LOOP || bc_isret(op),
-	       "bad original bytecode %d", op);
+    lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP ||
+	       bc_isret(op), "bad original bytecode %d", op);
     *pc = T->startins;
     break;
   case BC_JMP:
@@ -373,8 +376,13 @@ void lj_trace_freestate(global_State *g)
 /* Blacklist a bytecode instruction. */
 static void blacklist_pc(GCproto *pt, BCIns *pc)
 {
-  setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
-  pt->flags |= PROTO_ILOOP;
+  if (bc_op(*pc) == BC_ITERN) {
+    setbc_op(pc, BC_ITERC);
+    setbc_op(pc+1+bc_j(pc[1]), BC_JMP);
+  } else {
+    setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
+    pt->flags |= PROTO_ILOOP;
+  }
 }
 
 /* Penalize a bytecode instruction. */
@@ -411,7 +419,7 @@ static void trace_start(jit_State *J)
   TraceNo traceno;
 
   if ((J->pt->flags & PROTO_NOJIT)) {  /* JIT disabled for this proto? */
-    if (J->parent == 0 && J->exitno == 0) {
+    if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) {
       /* Lazy bytecode patching to disable hotcount events. */
       lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
 		 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF,
@@ -423,6 +431,12 @@ static void trace_start(jit_State *J)
     return;
   }
 
+  /* Ensuring forward progress for BC_ITERN can trigger hotcount again. */
+  if (!J->parent && bc_op(*J->pc) == BC_JLOOP) {  /* Already compiled. */
+    J->state = LJ_TRACE_IDLE;  /* Silently ignored. */
+    return;
+  }
+
   /* Get a new trace number. */
   traceno = trace_findfree(J);
   if (LJ_UNLIKELY(traceno == 0)) {  /* No free trace? */
@@ -496,6 +510,7 @@ static void trace_stop(jit_State *J)
     J->cur.nextroot = pt->trace;
     pt->trace = (TraceNo1)traceno;
     break;
+  case BC_ITERN:
   case BC_RET:
   case BC_RET0:
   case BC_RET1:
@@ -506,7 +521,11 @@ static void trace_stop(jit_State *J)
     lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace");
     lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
     /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
-    traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE;
+    {
+      SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
+      snap->count = SNAPCOUNT_DONE;
+      if (J->cur.topslot > snap->topslot) snap->topslot = J->cur.topslot;
+    }
     /* Add to side trace chain in root trace. */
     {
       GCtrace *root = traceref(J, J->cur.root);
@@ -594,21 +613,27 @@ static int trace_abort(jit_State *J)
     J->cur.link = 0;
     J->cur.linktype = LJ_TRLINK_NONE;
     lj_vmevent_send(L, TRACE,
-      TValue *frame;
+      cTValue *bot = tvref(L->stack)+LJ_FR2;
+      cTValue *frame;
       const BCIns *pc;
-      GCfunc *fn;
+      BCPos pos = 0;
       setstrV(L, L->top++, lj_str_newlit(L, "abort"));
       setintV(L->top++, traceno);
       /* Find original Lua function call to generate a better error message. */
-      frame = J->L->base-1;
-      pc = J->pc;
-      while (!isluafunc(frame_func(frame))) {
-	pc = (frame_iscont(frame) ? frame_contpc(frame) : frame_pc(frame)) - 1;
-	frame = frame_prev(frame);
+      for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
+	if (isluafunc(frame_func(frame))) {
+	  pos = proto_bcpos(funcproto(frame_func(frame)), pc);
+	  break;
+	} else if (frame_prev(frame) <= bot) {
+	  break;
+	} else if (frame_iscont(frame)) {
+	  pc = frame_contpc(frame) - 1;
+	} else {
+	  pc = frame_pc(frame) - 1;
+	}
       }
-      fn = frame_func(frame);
-      setfuncV(L, L->top++, fn);
-      setintV(L->top++, proto_bcpos(funcproto(fn), pc));
+      setfuncV(L, L->top++, frame_func(frame));
+      setintV(L->top++, pos);
       copyTV(L, L->top++, restorestack(L, errobj));
       copyTV(L, L->top++, &J->errinfo);
     );
@@ -651,15 +676,22 @@ static TValue *trace_state(lua_State *L,
       J->state = LJ_TRACE_RECORD;  /* trace_start() may change state. */
       trace_start(J);
       lj_dispatch_update(J2G(J));
-      break;
+      if (J->state != LJ_TRACE_RECORD_1ST)
+	break;
+      /* fallthrough */
 
+    case LJ_TRACE_RECORD_1ST:
+      J->state = LJ_TRACE_RECORD;
+      /* fallthrough */
     case LJ_TRACE_RECORD:
       trace_pendpatch(J, 0);
       setvmstate(J2G(J), RECORD);
       lj_vmevent_send_(L, RECORD,
-	/* Save/restore tmptv state for trace recorder. */
+	/* Save/restore state for trace recorder. */
 	TValue savetv = J2G(J)->tmptv;
 	TValue savetv2 = J2G(J)->tmptv2;
+	TraceNo parent = J->parent;
+	ExitNo exitno = J->exitno;
 	setintV(L->top++, J->cur.traceno);
 	setfuncV(L, L->top++, J->fn);
 	setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1);
@@ -667,6 +699,8 @@ static TValue *trace_state(lua_State *L,
       ,
 	J2G(J)->tmptv = savetv;
 	J2G(J)->tmptv2 = savetv2;
+	J->parent = parent;
+	J->exitno = exitno;
       );
       lj_record_ins(J);
       break;
@@ -821,7 +855,7 @@ static void trace_exit_regs(lua_State *L
 }
 #endif
 
-#ifdef EXITSTATE_PCREG
+#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
 /* Determine trace number from pc of exit instruction. */
 static TraceNo trace_exit_find(jit_State *J, MCode *pc)
 {
@@ -843,10 +877,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State
   lua_State *L = J->L;
   ExitState *ex = (ExitState *)exptr;
   ExitDataCP exd;
-  int errcode;
-  const BCIns *pc;
+  int errcode, exitcode = J->exitcode;
+  TValue exiterr;
+  const BCIns *pc, *retpc;
   void *cf;
   GCtrace *T;
+
+  setnilV(&exiterr);
+  if (exitcode) {  /* Trace unwound with error code. */
+    J->exitcode = 0;
+    copyTV(L, &exiterr, L->top-1);
+  }
+
 #ifdef EXITSTATE_PCREG
   J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
 #endif
@@ -866,6 +908,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State
   if (errcode)
     return -errcode;  /* Return negated error code. */
 
+  if (exitcode) copyTV(L, L->top++, &exiterr);  /* Anchor the error object. */
+
   if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
     lj_vmevent_send(L, TEXIT,
       lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
@@ -877,7 +921,9 @@ int LJ_FASTCALL lj_trace_exit(jit_State
   pc = exd.pc;
   cf = cframe_raw(L->cframe);
   setcframe_pc(cf, pc);
-  if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
+  if (exitcode) {
+    return -exitcode;
+  } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
     /* Just exit to interpreter. */
   } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
     if (!(G(L)->hookmask & HOOK_GC))
@@ -885,21 +931,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State
   } else {
     trace_hotside(J, pc);
   }
-  if (bc_op(*pc) == BC_JLOOP) {
-    BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
-    if (bc_isret(bc_op(*retpc))) {
-      if (J->state == LJ_TRACE_RECORD) {
-	J->patchins = *pc;
-	J->patchpc = (BCIns *)pc;
-	*J->patchpc = *retpc;
-	J->bcskip = 1;
-      } else {
-	pc = retpc;
-	setcframe_pc(cf, pc);
-      }
-    }
-  }
-  /* Return MULTRES or 0. */
+  /* Return MULTRES or 0 or -17. */
   ERRNO_RESTORE
   switch (bc_op(*pc)) {
   case BC_CALLM: case BC_CALLMT:
@@ -908,6 +940,18 @@ int LJ_FASTCALL lj_trace_exit(jit_State
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
   case BC_TSETM:
     return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc));
+  case BC_JLOOP:
+    retpc = &traceref(J, bc_d(*pc))->startins;
+    if (bc_isret(bc_op(*retpc)) || bc_op(*retpc) == BC_ITERN) {
+      /* Dispatch to original ins to ensure forward progress. */
+      if (J->state != LJ_TRACE_RECORD) return -17;
+      /* Unpatch bytecode when recording. */
+      J->patchins = *pc;
+      J->patchpc = (BCIns *)pc;
+      *J->patchpc = *retpc;
+      J->bcskip = 1;
+    }
+    return 0;
   default:
     if (bc_op(*pc) >= BC_FUNCF)
       return (int)((BCReg)(L->top - L->base) + 1);
@@ -915,4 +959,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State
   }
 }
 
+#if LJ_UNWIND_JIT
+/* Given an mcode address determine trace exit address for unwinding. */
+uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
+{
+#if EXITTRACE_VMSTATE
+  TraceNo traceno = J2G(J)->vmstate;
+#else
+  TraceNo traceno = trace_exit_find(J, (MCode *)addr);
+#endif
+  GCtrace *T = traceref(J, traceno);
+  if (T
+#if EXITTRACE_VMSTATE
+      && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode
+#endif
+     ) {
+    SnapShot *snap = T->snap;
+    SnapNo lo = 0, exitno = T->nsnap;
+    uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode);  /* MCode units! */
+    /* Rightmost binary search for mcode offset to determine exit number. */
+    do {
+      SnapNo mid = (lo+exitno) >> 1;
+      if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
+    } while (lo < exitno);
+    exitno--;
+    *ep = exitno;
+#ifdef EXITSTUBS_PER_GROUP
+    return (uintptr_t)exitstub_addr(J, exitno);
+#else
+    return (uintptr_t)exitstub_trace_addr(T, exitno);
+#endif
+  }
+  /* Cannot correlate addr with trace/exit. This will be fatal. */
+  lj_assertJ(0, "bad exit pc");
+  return 0;
+}
+#endif
+
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_trace.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_trace.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_trace.h
@@ -1,6 +1,6 @@
 /*
 ** Trace management.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_TRACE_H
@@ -37,6 +37,9 @@ LJ_FUNC void lj_trace_ins(jit_State *J,
 LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
 LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
 LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
+#if LJ_UNWIND_EXT
+LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep);
+#endif
 
 /* Signal asynchronous abort of trace or end of trace. */
 #define lj_trace_abort(g)	(G2J(g)->state &= ~LJ_TRACE_ACTIVE)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_traceerr.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_traceerr.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_traceerr.h
@@ -1,6 +1,6 @@
 /*
 ** Trace compiler error messages.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 /* This file may be included multiple times with different TREDEF macros. */
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_udata.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_udata.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_udata.c
@@ -1,6 +1,6 @@
 /*
 ** Userdata handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_udata_c
@@ -8,6 +8,7 @@
 
 #include "lj_obj.h"
 #include "lj_gc.h"
+#include "lj_err.h"
 #include "lj_udata.h"
 
 GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_St
   lj_mem_free(g, ud, sizeudata(ud));
 }
 
+#if LJ_64
+void *lj_lightud_intern(lua_State *L, void *p)
+{
+  global_State *g = G(L);
+  uint64_t u = (uint64_t)p;
+  uint32_t up = lightudup(u);
+  uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
+  MSize segnum = g->gc.lightudnum;
+  if (segmap) {
+    MSize seg;
+    for (seg = 0; seg <= segnum; seg++)
+      if (segmap[seg] == up)  /* Fast path. */
+	return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
+    segnum++;
+    /* Leave last segment unused to avoid clash with ITERN key. */
+    if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU);
+  }
+  if (!((segnum-1) & segnum) && segnum != 1) {
+    lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
+    setmref(g->gc.lightudseg, segmap);
+  }
+  g->gc.lightudnum = segnum;
+  segmap[segnum] = up;
+  return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
+}
+#endif
+
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_udata.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_udata.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_udata.h
@@ -1,6 +1,6 @@
 /*
 ** Userdata handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_UDATA_H
@@ -10,5 +10,8 @@
 
 LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
 LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
+#if LJ_64
+LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p);
+#endif
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vm.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_vm.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vm.h
@@ -1,6 +1,6 @@
 /*
 ** Assembler VM interface definitions.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_VM_H
@@ -26,6 +26,9 @@ LJ_ASMF void lj_vm_unwind_ff_eh(void);
 #if LJ_TARGET_X86ORX64
 LJ_ASMF void lj_vm_unwind_rethrow(void);
 #endif
+#if LJ_TARGET_MIPS
+LJ_ASMF void lj_vm_unwind_stub(void);
+#endif
 
 /* Miscellaneous functions. */
 #if LJ_TARGET_X86ORX64
@@ -48,10 +51,11 @@ LJ_ASMF void lj_vm_inshook(void);
 LJ_ASMF void lj_vm_rethook(void);
 LJ_ASMF void lj_vm_callhook(void);
 LJ_ASMF void lj_vm_profhook(void);
+LJ_ASMF void lj_vm_IITERN(void);
 
 /* Trace exit handling. */
-LJ_ASMF void lj_vm_exit_handler(void);
-LJ_ASMF void lj_vm_exit_interp(void);
+LJ_ASMF char lj_vm_exit_handler[];
+LJ_ASMF char lj_vm_exit_interp[];
 
 /* Internal math helper functions. */
 #if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
@@ -79,10 +83,6 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(i
 LJ_ASMF void lj_vm_floor_sse(void);
 LJ_ASMF void lj_vm_ceil_sse(void);
 LJ_ASMF void lj_vm_trunc_sse(void);
-LJ_ASMF void lj_vm_powi_sse(void);
-#define lj_vm_powi	NULL
-#else
-LJ_ASMF double lj_vm_powi(double, int32_t);
 #endif
 #if LJ_TARGET_PPC || LJ_TARGET_ARM64
 #define lj_vm_trunc	trunc
@@ -95,6 +95,7 @@ LJ_ASMF double lj_vm_trunc_sf(double);
 #if LJ_HASFFI
 LJ_ASMF int lj_vm_errno(void);
 #endif
+LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
 #endif
 
 /* Continuations for metamethods. */
@@ -110,6 +111,6 @@ LJ_ASMF void lj_cont_stitch(void);  /* T
 LJ_ASMF char lj_vm_asm_begin[];
 
 /* Bytecode offsets are relative to lj_vm_asm_begin. */
-#define makeasmfunc(ofs)	((ASMFunction)(lj_vm_asm_begin + (ofs)))
+#define makeasmfunc(ofs) lj_ptr_sign((ASMFunction)(lj_vm_asm_begin + (ofs)), 0)
 
 #endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmevent.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_vmevent.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmevent.c
@@ -1,6 +1,6 @@
 /*
 ** VM event handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #include <stdio.h>
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmevent.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_vmevent.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmevent.h
@@ -1,6 +1,6 @@
 /*
 ** VM event handling.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LJ_VMEVENT_H
@@ -24,9 +24,10 @@
 /* VM event IDs. */
 typedef enum {
   VMEVENT_DEF(BC,	0x00003883),
-  VMEVENT_DEF(TRACE,	0xb2d91467),
-  VMEVENT_DEF(RECORD,	0x9284bf4f),
-  VMEVENT_DEF(TEXIT,	0xb29df2b0),
+  VMEVENT_DEF(TRACE,	0x12d91467),
+  VMEVENT_DEF(RECORD,	0x1284bf4f),
+  VMEVENT_DEF(TEXIT,	0x129df2b0),
+  VMEVENT_DEF(ERRFIN,	0x12d93888),
   LJ_VMEVENT__MAX
 } VMEvent;
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmmath.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lj_vmmath.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lj_vmmath.c
@@ -1,6 +1,6 @@
 /*
 ** Math helper functions for assembler VM.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define lj_vmmath_c
@@ -34,7 +34,18 @@ LJ_FUNCA double lj_wrap_pow(double x, do
 LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
 #endif
 
-/* -- Helper functions for generated machine code ------------------------- */
+/* -- Helper functions ---------------------------------------------------- */
+
+/* Required to prevent the C compiler from applying FMA optimizations.
+**
+** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory.
+** But the current state of C compilers is a mess in this regard.
+** Also, this function is not performance sensitive at all.
+*/
+LJ_NOINLINE static double lj_vm_floormul(double x, double y)
+{
+  return lj_vm_floor(x / y) * y;
+}
 
 double lj_vm_foldarith(double x, double y, int op)
 {
@@ -43,7 +54,7 @@ double lj_vm_foldarith(double x, double
   case IR_SUB - IR_ADD: return x-y; break;
   case IR_MUL - IR_ADD: return x*y; break;
   case IR_DIV - IR_ADD: return x/y; break;
-  case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
+  case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break;
   case IR_POW - IR_ADD: return pow(x, y); break;
   case IR_NEG - IR_ADD: return -x; break;
   case IR_ABS - IR_ADD: return fabs(x); break;
@@ -56,17 +67,20 @@ double lj_vm_foldarith(double x, double
   }
 }
 
-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
+/* -- Helper functions for generated machine code ------------------------- */
+
+#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \
+ || LJ_TARGET_RISCV64
 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
 {
   uint32_t y, ua, ub;
   /* This must be checked before using this function. */
   lj_assertX(b != 0, "modulo with zero divisor");
-  ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
-  ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
+  ua = a < 0 ? ~(uint32_t)a+1u : (uint32_t)a;
+  ub = b < 0 ? ~(uint32_t)b+1u : (uint32_t)b;
   y = ua % ub;
   if (y != 0 && (a^b) < 0) y = y - ub;
-  if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
+  if (((int32_t)y^b) < 0) y = ~y+1u;
   return (int32_t)y;
 }
 #endif
@@ -80,40 +94,6 @@ double lj_vm_log2(double a)
 }
 #endif
 
-#if !LJ_TARGET_X86ORX64
-/* Unsigned x^k. */
-static double lj_vm_powui(double x, uint32_t k)
-{
-  double y;
-  lj_assertX(k != 0, "pow with zero exponent");
-  for (; (k & 1) == 0; k >>= 1) x *= x;
-  y = x;
-  if ((k >>= 1) != 0) {
-    for (;;) {
-      x *= x;
-      if (k == 1) break;
-      if (k & 1) y *= x;
-      k >>= 1;
-    }
-    y *= x;
-  }
-  return y;
-}
-
-/* Signed x^k. */
-double lj_vm_powi(double x, int32_t k)
-{
-  if (k > 1)
-    return lj_vm_powui(x, (uint32_t)k);
-  else if (k == 1)
-    return x;
-  else if (k == 0)
-    return 1.0;
-  else
-    return 1.0 / lj_vm_powui(x, (uint32_t)-k);
-}
-#endif
-
 /* Computes fpm(x) for extended math functions. */
 double lj_vm_foldfpm(double x, int fpm)
 {
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/ljamalg.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/ljamalg.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/ljamalg.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT core and libraries amalgamation.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #define ljamalg_c
@@ -39,6 +39,7 @@
 #include "lj_strscan.c"
 #include "lj_strfmt.c"
 #include "lj_strfmt_num.c"
+#include "lj_serialize.c"
 #include "lj_api.c"
 #include "lj_profile.c"
 #include "lj_lex.c"
@@ -85,5 +86,6 @@
 #include "lib_bit.c"
 #include "lib_jit.c"
 #include "lib_ffi.c"
+#include "lib_buffer.c"
 #include "lib_init.c"
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/luaconf.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/luaconf.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/luaconf.h
@@ -1,6 +1,6 @@
 /*
 ** Configuration header.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef luaconf_h
@@ -37,7 +37,7 @@
 #endif
 #define LUA_LROOT	"/usr/local"
 #define LUA_LUADIR	"/lua/5.1/"
-#define LUA_LJDIR	"/luajit-2.1.0-beta3/"
+#define LUA_LJDIR	"/luajit-2.1/"
 
 #ifdef LUA_ROOT
 #define LUA_JROOT	LUA_ROOT
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/luajit.c
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/luajit.c
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/luajit.c
@@ -1,6 +1,6 @@
 /*
 ** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 **
 ** Major portions taken verbatim or adapted from the Lua interpreter.
 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
@@ -39,6 +39,7 @@
 
 static lua_State *globalL = NULL;
 static const char *progname = LUA_PROGNAME;
+static char *empty_argv[2] = { NULL, NULL };
 
 #if !LJ_TARGET_CONSOLE
 static void lstop(lua_State *L, lua_Debug *ar)
@@ -78,9 +79,9 @@ static void print_usage(void)
   fflush(stderr);
 }
 
-static void l_message(const char *pname, const char *msg)
+static void l_message(const char *msg)
 {
-  if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
+  if (progname) { fputs(progname, stderr); fputc(':', stderr); fputc(' ', stderr); }
   fputs(msg, stderr); fputc('\n', stderr);
   fflush(stderr);
 }
@@ -90,7 +91,7 @@ static int report(lua_State *L, int stat
   if (status && !lua_isnil(L, -1)) {
     const char *msg = lua_tostring(L, -1);
     if (msg == NULL) msg = "(error object is not a string)";
-    l_message(progname, msg);
+    l_message(msg);
     lua_pop(L, 1);
   }
   return status;
@@ -256,9 +257,8 @@ static void dotty(lua_State *L)
       lua_getglobal(L, "print");
       lua_insert(L, 1);
       if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
-	l_message(progname,
-	  lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)",
-			      lua_tostring(L, -1)));
+	l_message(lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)",
+				  lua_tostring(L, -1)));
     }
   }
   lua_settop(L, 0);  /* clear stack */
@@ -310,8 +310,7 @@ static int loadjitmodule(lua_State *L)
   lua_getfield(L, -1, "start");
   if (lua_isnil(L, -1)) {
   nomodule:
-    l_message(progname,
-	      "unknown luaJIT command or jit.* modules not installed");
+    l_message("unknown luaJIT command or jit.* modules not installed");
     return 1;
   }
   lua_remove(L, -2);  /* Drop module table. */
@@ -516,8 +515,6 @@ static int pmain(lua_State *L)
   int argn;
   int flags = 0;
   globalL = L;
-  if (argv[0] && argv[0][0]) progname = argv[0];
-
   LUAJIT_VERSION_SYM();  /* Linker-enforced version check. */
 
   argn = collectargs(argv, &flags);
@@ -572,9 +569,11 @@ static int pmain(lua_State *L)
 int main(int argc, char **argv)
 {
   int status;
-  lua_State *L = lua_open();
+  lua_State *L;
+  if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0];
+  L = lua_open();
   if (L == NULL) {
-    l_message(argv[0], "cannot create state: not enough memory");
+    l_message("cannot create state: not enough memory");
     return EXIT_FAILURE;
   }
   smain.argc = argc;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/luajit.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/luajit.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-** LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
-**
-** Copyright (C) 2005-2021 Mike Pall. All rights reserved.
-**
-** Permission is hereby granted, free of charge, to any person obtaining
-** a copy of this software and associated documentation files (the
-** "Software"), to deal in the Software without restriction, including
-** without limitation the rights to use, copy, modify, merge, publish,
-** distribute, sublicense, and/or sell copies of the Software, and to
-** permit persons to whom the Software is furnished to do so, subject to
-** the following conditions:
-**
-** The above copyright notice and this permission notice shall be
-** included in all copies or substantial portions of the Software.
-**
-** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-**
-** [ MIT license: https://www.opensource.org/licenses/mit-license.php ]
-*/
-
-#ifndef _LUAJIT_H
-#define _LUAJIT_H
-
-#include "lua.h"
-
-#define LUAJIT_VERSION		"LuaJIT 2.1.0-beta3"
-#define LUAJIT_VERSION_NUM	20100  /* Version 2.1.0 = 02.01.00. */
-#define LUAJIT_VERSION_SYM	luaJIT_version_2_1_0_beta3
-#define LUAJIT_COPYRIGHT	"Copyright (C) 2005-2021 Mike Pall"
-#define LUAJIT_URL		"https://luajit.org/"
-
-/* Modes for luaJIT_setmode. */
-#define LUAJIT_MODE_MASK	0x00ff
-
-enum {
-  LUAJIT_MODE_ENGINE,		/* Set mode for whole JIT engine. */
-  LUAJIT_MODE_DEBUG,		/* Set debug mode (idx = level). */
-
-  LUAJIT_MODE_FUNC,		/* Change mode for a function. */
-  LUAJIT_MODE_ALLFUNC,		/* Recurse into subroutine protos. */
-  LUAJIT_MODE_ALLSUBFUNC,	/* Change only the subroutines. */
-
-  LUAJIT_MODE_TRACE,		/* Flush a compiled trace. */
-
-  LUAJIT_MODE_WRAPCFUNC = 0x10,	/* Set wrapper mode for C function calls. */
-
-  LUAJIT_MODE_MAX
-};
-
-/* Flags or'ed in to the mode. */
-#define LUAJIT_MODE_OFF		0x0000	/* Turn feature off. */
-#define LUAJIT_MODE_ON		0x0100	/* Turn feature on. */
-#define LUAJIT_MODE_FLUSH	0x0200	/* Flush JIT-compiled code. */
-
-/* LuaJIT public C API. */
-
-/* Control the JIT engine. */
-LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
-
-/* Low-overhead profiling API. */
-typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
-					int samples, int vmstate);
-LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
-				  luaJIT_profile_callback cb, void *data);
-LUA_API void luaJIT_profile_stop(lua_State *L);
-LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
-					     int depth, size_t *len);
-
-/* Enforce (dynamic) linker error for version mismatches. Call from main. */
-LUA_API void LUAJIT_VERSION_SYM(void);
-
-#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/luajit_rolling.h
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/luajit_rolling.h
@@ -0,0 +1,79 @@
+/*
+** LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/
+**
+** Copyright (C) 2005-2023 Mike Pall. All rights reserved.
+**
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: https://www.opensource.org/licenses/mit-license.php ]
+*/
+
+#ifndef _LUAJIT_H
+#define _LUAJIT_H
+
+#include "lua.h"
+
+#define LUAJIT_VERSION		"LuaJIT 2.1.ROLLING"
+#define LUAJIT_VERSION_NUM	20199  /* Deprecated. */
+#define LUAJIT_VERSION_SYM	luaJIT_version_2_1_ROLLING
+#define LUAJIT_COPYRIGHT	"Copyright (C) 2005-2023 Mike Pall"
+#define LUAJIT_URL		"https://luajit.org/"
+
+/* Modes for luaJIT_setmode. */
+#define LUAJIT_MODE_MASK	0x00ff
+
+enum {
+  LUAJIT_MODE_ENGINE,		/* Set mode for whole JIT engine. */
+  LUAJIT_MODE_DEBUG,		/* Set debug mode (idx = level). */
+
+  LUAJIT_MODE_FUNC,		/* Change mode for a function. */
+  LUAJIT_MODE_ALLFUNC,		/* Recurse into subroutine protos. */
+  LUAJIT_MODE_ALLSUBFUNC,	/* Change only the subroutines. */
+
+  LUAJIT_MODE_TRACE,		/* Flush a compiled trace. */
+
+  LUAJIT_MODE_WRAPCFUNC = 0x10,	/* Set wrapper mode for C function calls. */
+
+  LUAJIT_MODE_MAX
+};
+
+/* Flags or'ed in to the mode. */
+#define LUAJIT_MODE_OFF		0x0000	/* Turn feature off. */
+#define LUAJIT_MODE_ON		0x0100	/* Turn feature on. */
+#define LUAJIT_MODE_FLUSH	0x0200	/* Flush JIT-compiled code. */
+
+/* LuaJIT public C API. */
+
+/* Control the JIT engine. */
+LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
+
+/* Low-overhead profiling API. */
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+					int samples, int vmstate);
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+				  luaJIT_profile_callback cb, void *data);
+LUA_API void luaJIT_profile_stop(lua_State *L);
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+					     int depth, size_t *len);
+
+/* Enforce (dynamic) linker error for version mismatches. Call from main. */
+LUA_API void LUAJIT_VERSION_SYM(void);
+
+#endif
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/lualib.h
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/lualib.h
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/lualib.h
@@ -1,6 +1,6 @@
 /*
 ** Standard library header.
-** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 */
 
 #ifndef _LUALIB_H
@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *
 LUALIB_API int luaopen_bit(lua_State *L);
 LUALIB_API int luaopen_jit(lua_State *L);
 LUALIB_API int luaopen_ffi(lua_State *L);
+LUALIB_API int luaopen_string_buffer(lua_State *L);
 
 LUALIB_API void luaL_openlibs(lua_State *L);
 
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/msvcbuild.bat
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/msvcbuild.bat
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/msvcbuild.bat
@@ -1,5 +1,5 @@
 @rem Script to build LuaJIT with MSVC.
-@rem Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+@rem Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 @rem
 @rem Open a "Visual Studio Command Prompt" (either x86 or x64).
 @rem Then cd to this directory and run this script. Use the following
@@ -25,38 +25,54 @@
 @set LJDLLNAME=lua51.dll
 @set LJLIBNAME=lua51.lib
 @set BUILDTYPE=release
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 
+@setlocal
+@call :SETHOSTVARS
 %LJCOMPILE% host\minilua.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:minilua.exe minilua.obj
 @if errorlevel 1 goto :BAD
 if exist minilua.exe.manifest^
   %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+@endlocal
 
-@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64
+@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
 @set LJARCH=x64
 @minilua
-@if errorlevel 8 goto :X64
+@if errorlevel 8 goto :NO32
 @set DASC=vm_x86.dasc
-@set DASMFLAGS=-D WIN -D JIT -D FFI
+@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU
 @set LJARCH=x86
 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2
+@goto :DA
+:NO32
+@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
+@set DASC=vm_arm64.dasc
+@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
+@set LJARCH=arm64
+@goto :DA
 :X64
-@if "%1" neq "nogc64" goto :GC64
+@if "%1" neq "nogc64" goto :DA
 @shift
 @set DASC=vm_x86.dasc
 @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
-:GC64
+:DA
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
 @if errorlevel 1 goto :BAD
 
-%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
+@setlocal
+@call :SETHOSTVARS
+%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
 @if errorlevel 1 goto :BAD
 if exist buildvm.exe.manifest^
   %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+@endlocal
 
 buildvm -m peobj -o lj_vm.obj
 @if errorlevel 1 goto :BAD
@@ -116,6 +132,12 @@ if exist luajit.exe.manifest^
 @echo === Successfully built LuaJIT for Windows/%LJARCH% ===
 
 @goto :END
+:SETHOSTVARS
+@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" (
+  call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo
+  echo on
+)
+@goto :END
 :BAD
 @echo.
 @echo *******************************************************
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/nxbuild.bat
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/nxbuild.bat
@@ -0,0 +1,164 @@
+@rem Script to build LuaJIT with NintendoSDK + NX Addon.
+@rem Donated to the public domain by Swyter.
+@rem
+@rem To run this script you must open a "Native Tools Command Prompt for VS".
+@rem
+@rem Either the x86 version for NX32, or x64 for the NX64 target.
+@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@rem must match the cross-compiled target (32 or 64 bits).
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem nxbuild            # release build, amalgamated
+@rem nxbuild debug      # debug build, amalgamated
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem noamalg            # (after debug) non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined NINTENDO_SDK_ROOT goto :FAIL
+@if not defined PLATFORM goto :FAIL
+
+@if "%platform%" == "x86" goto :DO_NX32
+@if "%platform%" == "x64" goto :DO_NX64
+
+@echo Error: Current host platform is %platform%!
+@echo.
+@goto :FAIL
+
+@setlocal
+
+:DO_NX32
+@set DASC=vm_arm.dasc
+@set DASMFLAGS= -D HFABI -D FPU
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
+@set HOST_PTR_SIZE=4
+goto :BEGIN
+
+:DO_NX64
+@set DASC=vm_arm64.dasc
+@set DASMFLAGS= -D ENDIAN_LE
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
+@set HOST_PTR_SIZE=8
+
+:BEGIN
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+  %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
+@minilua
+@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
+
+@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
+@echo Check that the script is being ran under the correct x86/x64 VS prompt.
+@goto :BAD
+
+:PASSED_PTR_CHECK
+@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
+%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+  %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc
+@set TARGETLIB_SUFFIX=nx64
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s
+goto :DEBUGCHECK
+
+:NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc
+@set TARGETLIB_SUFFIX=nx32
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s
+:DEBUGCHECK
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
+@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
+@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
+:BUILD
+del %TARGETLIB%
+@set LJCOMPILE=%LJCOMPILE% -fPIC
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+  %LJCOMPILE% %%f
+  @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Native Tools Command Prompt for VS".
+@echo.
+@echo Either the x86 version for NX32, or x64 for the NX64 target.
+@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@echo must match the cross-compiled target (32 or 64 bits).
+@echo.
+@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
+:END
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/ps4build.bat
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/ps4build.bat
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/ps4build.bat
@@ -26,7 +26,7 @@
 @set LJMT=mt /nologo
 @set DASMDIR=..\dynasm
 @set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 @set GC64=
 @set DASC=vm_x64.dasc
 
@@ -51,7 +51,11 @@ if exist minilua.exe.manifest^
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
 @if errorlevel 1 goto :BAD
 
-%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -DLUAJIT_NO_UNWIND host\buildvm*.c
+
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
 @if errorlevel 1 goto :BAD
@@ -78,7 +82,7 @@ buildvm -m folddef -o lj_folddef.h lj_op
 @set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
 @set INCLUDE=""
 
-orbis-as -o lj_vm.o lj_vm.s
+"%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-as" -o lj_vm.o lj_vm.s
 
 @if "%1" neq "debug" goto :NODEBUG
 @shift
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/ps5build.bat
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/ps5build.bat
@@ -0,0 +1,126 @@
+@rem Script to build LuaJIT with the PS5 SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS20xx x64 Native Tools Command Prompt".
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps5build        release build, amalgamated, 64-bit GC
+@rem ps5build debug    debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug)    32-bit GC
+@rem noamalg (after debug)  non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined SCE_PROSPERO_SDK_DIR goto :FAIL
+
+@setlocal
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+@set GC64=
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=-DLUAJIT_DISABLE_GC64
+@set DASC=vm_x86.dasc
+:NOGC32
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+  %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check for 64 bit host compiler.
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D P64 -D NO_UNWIND
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+  %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC %GC64%
+@set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus
+@set INCLUDE=""
+
+"%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -o lj_vm.o lj_vm.s
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -g -O0
+@set TARGETLIB=libluajitD_ps5.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -O2
+@set TARGETLIB=libluajit_ps5.a
+:BUILD
+del %TARGETLIB%
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+  %LJCOMPILE% %%f
+  @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for PS5 ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The PS5 Prospero SDK must be installed, too.
+:END
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/psvitabuild.bat
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/psvitabuild.bat
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/psvitabuild.bat
@@ -14,7 +14,7 @@
 @set LJMT=mt /nologo
 @set DASMDIR=..\dynasm
 @set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 
 %LJCOMPILE% host\minilua.c
 @if errorlevel 1 goto :BAD
@@ -31,6 +31,9 @@ if exist minilua.exe.manifest^
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_arm.dasc
 @if errorlevel 1 goto :BAD
 
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
 %LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_ARM -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLJ_TARGET_PSVITA=1 host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_arm.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_arm.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_arm.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for ARM CPUs.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |
 |.arch arm
 |.section code_op, code_sub
@@ -539,13 +539,13 @@ static void build_subroutines(BuildCtx *
   |    cmp CARG1, #1
   |.endif
   |   ldr PC, [CARG4, #-12]		// Restore PC from [cont|PC].
-  |  ldr CARG3, LFUNC:CARG3->field_pc
   |    mvn INS, #~LJ_TNIL
   |    add CARG2, RA, RC
   |    str INS, [CARG2, #-4]		// Ensure one valid arg.
   |.if FFI
   |    bls >1
   |.endif
+  |  ldr CARG3, LFUNC:CARG3->field_pc
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  // BASE = base, RA = resultptr, CARG4 = meta base
   |    bx CARG1
@@ -1111,24 +1111,18 @@ static void build_subroutines(BuildCtx *
   |  checktab CARG2, ->fff_fallback
   |   strd CARG34, [BASE, NARGS8:RC]	// Set missing 2nd arg to nil.
   |   ldr PC, [BASE, FRAME_PC]
-  |  mov CARG2, CARG1
-  |    str BASE, L->base		// Add frame since C call can throw.
-  |  mov CARG1, L
-  |    str BASE, L->top			// Dummy frame length is ok.
-  |  add CARG3, BASE, #8
-  |   str PC, SAVE_PC
-  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Returns 0 at end of traversal.
+  |  add CARG2, BASE, #8
+  |  sub CARG3, BASE, #8
+  |  bl extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |  // Returns 1=found, 0=end, -1=error.
   |  .IOS ldr BASE, L->base
   |  cmp CRET1, #0
-  |  mvneq CRET2, #~LJ_TNIL
-  |  beq ->fff_restv			// End of traversal: return nil.
-  |  ldrd CARG12, [BASE, #8]		// Copy key and value to results.
-  |   ldrd CARG34, [BASE, #16]
-  |    mov RC, #(2+1)*8
-  |  strd CARG12, [BASE, #-8]
-  |   strd CARG34, [BASE]
-  |  b ->fff_res
+  |   mov RC, #(2+1)*8
+  |  bgt ->fff_res			// Found key/value.
+  |  bmi ->fff_fallback			// Invalid key.
+  |  // End of traversal: return nil.
+  |  mvn CRET2, #~LJ_TNIL
+  |  b ->fff_restv
   |
   |.ffunc_1 pairs
   |  checktab CARG2, ->fff_fallback
@@ -1810,7 +1804,7 @@ static void build_subroutines(BuildCtx *
   |   str BASE, L->base
   |   str PC, SAVE_PC
   |   str L, SBUF:CARG1->L
-  |  str CARG4, SBUF:CARG1->p
+  |  str CARG4, SBUF:CARG1->w
   |  bl extern lj_buf_putstr_ .. name
   |  bl extern lj_buf_tostr
   |  b ->fff_resstr
@@ -2202,8 +2196,8 @@ static void build_subroutines(BuildCtx *
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1, #0
-  |  blt >9				// Check for error from exit.
+  |  cmn CARG1, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |   lsl RC, CARG1, #3
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
   |   str RC, SAVE_MULTRES
@@ -2219,6 +2213,8 @@ static void build_subroutines(BuildCtx *
   |   ldr INS, [PC], #4
   |     lsl MASKR8, MASKR8, #3		// MASKR8 = 255*8.
   |    st_vmstate CARG4
+  |  cmn CARG1, #17			// Static dispatch?
+  |  beq >5
   |  cmp OP, #BC_FUNCC+2		// Fast function?
   |  bhs >4
   |2:
@@ -2244,9 +2240,21 @@ static void build_subroutines(BuildCtx *
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:CARG1, [CARG1, RC, lsl #2]
+  |  ldr INS, TRACE:CARG1->startins
+  |  decode_OP OP, INS
+  |   decode_RA8 RA, INS
+  |  add OP, DISPATCH, OP, lsl #2
+  |   decode_RD RC, INS
+  |  ldr pc, [OP, #GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
+  |  rsb CARG2, CARG1, #0
   |  mov CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -2429,6 +2437,64 @@ static void build_subroutines(BuildCtx *
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
   |
+  |.define NEXT_TAB,		TAB:CARG1
+  |.define NEXT_RES,		CARG1
+  |.define NEXT_IDX,		CARG2
+  |.define NEXT_TMP0,		CARG3
+  |.define NEXT_TMP1,		CARG4
+  |.define NEXT_LIM,		r12
+  |.define NEXT_RES_PTR,	sp
+  |.define NEXT_RES_VAL,	[sp]
+  |.define NEXT_RES_KEY_I,	[sp, #8]
+  |.define NEXT_RES_KEY_IT,	[sp, #12]
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in CRET2.
+  |->vm_next:
+  |.if JIT
+  |  ldr NEXT_TMP0, NEXT_TAB->array
+  |   ldr NEXT_LIM, NEXT_TAB->asize
+  |  add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
+  |1:  // Traverse array part.
+  |   subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
+  |   bhs >5
+  |  ldr NEXT_TMP1, [NEXT_TMP0, #4]
+  |   str NEXT_IDX, NEXT_RES_KEY_I
+  |   add NEXT_TMP0, NEXT_TMP0, #8
+  |   add NEXT_IDX, NEXT_IDX, #1
+  |  checktp NEXT_TMP1, LJ_TNIL
+  |  beq <1				// Skip holes in array part.
+  |  ldr NEXT_TMP0, [NEXT_TMP0, #-8]
+  |   mov NEXT_RES, NEXT_RES_PTR
+  |  strd NEXT_TMP0, NEXT_RES_VAL	// Stores NEXT_TMP1, too.
+  |  mvn NEXT_TMP0, #~LJ_TISNUM
+  |  str NEXT_TMP0, NEXT_RES_KEY_IT
+  |  bx lr
+  |
+  |5:  // Traverse hash part.
+  |  ldr NEXT_TMP0, NEXT_TAB->hmask
+  |   ldr NODE:NEXT_RES, NEXT_TAB->node
+  |   add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
+  |  add NEXT_LIM, NEXT_LIM, NEXT_TMP0
+  |   add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
+  |6:
+  |  cmp NEXT_IDX, NEXT_LIM
+  |  bhi >9
+  |  ldr NEXT_TMP1, NODE:NEXT_RES->val.it
+  |  checktp NEXT_TMP1, LJ_TNIL
+  |   add NEXT_IDX, NEXT_IDX, #1
+  |  bxne lr
+  |  // Skip holes in hash part.
+  |  add NEXT_RES, NEXT_RES, #sizeof(Node)
+  |  b <6
+  |
+  |9:  // End of iteration. Set the key to nil (not the value).
+  |  mvn NEXT_TMP0, #0
+  |   mov NEXT_RES, NEXT_RES_PTR
+  |  str NEXT_TMP0, NEXT_RES_KEY_IT
+  |  bx lr
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2505,16 +2571,16 @@ static void build_subroutines(BuildCtx *
   |.endif
   |  mov r11, sp
   |  sub sp, sp, CARG1			// Readjust stack.
-  |   subs CARG2, CARG2, #1
+  |   subs CARG2, CARG2, #4
   |.if HFABI
   |  vldm RB, {d0-d7}
   |.endif
   |    ldr RB, CCSTATE->func
   |   bmi >2
   |1:  // Copy stack slots.
-  |  ldr CARG4, [CARG3, CARG2, lsl #2]
-  |  str CARG4, [sp, CARG2, lsl #2]
-  |  subs CARG2, CARG2, #1
+  |  ldr CARG4, [CARG3, CARG2]
+  |  str CARG4, [sp, CARG2]
+  |  subs CARG2, CARG2, #4
   |  bpl <1
   |2:
   |  ldrd CARG12, CCSTATE->gpr[0]
@@ -3919,10 +3985,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
     |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |  hotloop
     |.endif
+    |->vm_IITERN:
+    |  // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
     |  add RA, BASE, RA
     |  ldr TAB:RB, [RA, #-16]
     |  ldr CARG1, [RA, #-8]		// Get index from control var.
@@ -3988,7 +4055,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   ins_next1
     |   ins_next2
     |  mov CARG1, #0
-    |  mvn CARG2, #0x00018000
+    |  mvn CARG2, #~LJ_KEYINDEX
     |  strd CARG1, [RA, #-8]		// Initialize control var.
     |1:
     |   ins_next3
@@ -3997,9 +4064,25 @@ static void build_ins(BuildCtx *ctx, BCO
     |   mov OP, #BC_ITERC
     |  strb CARG1, [PC, #-4]
     |   sub PC, RC, #0x20000
+    |.if JIT
+    |   ldrb CARG1, [PC]
+    |   cmp CARG1, #BC_ITERN
+    |   bne >6
+    |.endif
     |   strb OP, [PC]			// Subsumes ins_next1.
     |   ins_next2
     |  b <1
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+    |  ldrh CARG2, [PC, #2]
+    |  ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
+    |  // Subsumes ins_next1 and ins_next2.
+    |  ldr INS, TRACE:CARG1->startins
+    |  bfi INS, OP, #0, #8
+    |  str INS, [PC], #4
+    |  b <1
+    |.endif
     break;
 
   case BC_VARG:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_arm64.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_arm64.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_arm64.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for ARM64 CPUs.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |
 |.arch arm64
 |.section code_op, code_sub
@@ -77,51 +77,94 @@
 |.define CRET1,		x0
 |.define CRET1w,	w0
 |
+|//-----------------------------------------------------------------------
+|
+|// ARM64e pointer authentication codes (PAC).
+|.if PAUTH
+|.macro sp_auth; pacibsp; .endmacro
+|.macro br_auth, reg; braaz reg; .endmacro
+|.macro blr_auth, reg; blraaz reg; .endmacro
+|.macro ret_auth; retab; .endmacro
+|.else
+|.macro sp_auth; .endmacro
+|.macro br_auth, reg; br reg; .endmacro
+|.macro blr_auth, reg; blr reg; .endmacro
+|.macro ret_auth; ret; .endmacro
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
 |// Stack layout while in interpreter. Must match with lj_frame.h.
 |
 |.define CFRAME_SPACE,	208
 |//----- 16 byte aligned, <-- sp entering interpreter
-|// Unused		[sp, #204]	// 32 bit values
-|.define SAVE_NRES,	[sp, #200]
-|.define SAVE_ERRF,	[sp, #196]
-|.define SAVE_MULTRES,	[sp, #192]
-|.define TMPD,		[sp, #184]	// 64 bit values
-|.define SAVE_L,	[sp, #176]
-|.define SAVE_PC,	[sp, #168]
-|.define SAVE_CFRAME,	[sp, #160]
-|.define SAVE_FPR_,	96		// 96+8*8: 64 bit FPR saves
-|.define SAVE_GPR_,	16		// 16+10*8: 64 bit GPR saves
-|.define SAVE_LR,	[sp, #8]
-|.define SAVE_FP,	[sp]
+|.define SAVE_FP_LR_,	192
+|.define SAVE_GPR_,	112		// 112+10*8: 64 bit GPR saves
+|.define SAVE_FPR_,	48		// 48+8*8: 64 bit FPR saves
+|// Unused		[sp, #44]	// 32 bit values
+|.define SAVE_NRES,	[sp, #40]
+|.define SAVE_ERRF,	[sp, #36]
+|.define SAVE_MULTRES,	[sp, #32]
+|.define TMPD,		[sp, #24]	// 64 bit values
+|.define SAVE_L,	[sp, #16]
+|.define SAVE_PC,	[sp, #8]
+|.define SAVE_CFRAME,	[sp, #0]
 |//----- 16 byte aligned, <-- sp while in interpreter.
 |
-|.define TMPDofs,	#184
+|.define TMPDofs,	#24
+|
+|.if WIN
+|// Windows unwind data is suited to r1 stored first.
+|.macro stp_unwind, r1, r2, where
+|  stp r1, r2, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where
+|  ldp r1, r2, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where, post_index
+|  ldp r1, r2, where, post_index
+|.endmacro
+|.else
+|// Otherwise store r2 first for compact unwind info (OSX).
+|.macro stp_unwind, r1, r2, where
+|  stp r2, r1, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where
+|  ldp r2, r1, where
+|.endmacro
+|.macro ldp_unwind, r1, r2, where, post_index
+|  ldp r2, r1, where, post_index
+|.endmacro
+|.endif
 |
 |.macro save_, gpr1, gpr2, fpr1, fpr2
-|  stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |.macro rest_, gpr1, gpr2, fpr1, fpr2
-|  ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
-|  ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
+|  ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
+|  ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
 |.endmacro
 |
 |.macro saveregs
-|  stp fp, lr, [sp, #-CFRAME_SPACE]!
-|  add fp, sp, #0
-|  stp x19, x20, [sp, # SAVE_GPR_]
+|  sp_auth
+|  sub sp, sp, # CFRAME_SPACE
+|  stp fp, lr, [sp, # SAVE_FP_LR_]
+|  add fp, sp, # SAVE_FP_LR_
+|  stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
 |  save_ 21, 22, 8, 9
 |  save_ 23, 24, 10, 11
 |  save_ 25, 26, 12, 13
 |  save_ 27, 28, 14, 15
 |.endmacro
 |.macro restoreregs
-|  ldp x19, x20, [sp, # SAVE_GPR_]
+|  ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
 |  rest_ 21, 22, 8, 9
 |  rest_ 23, 24, 10, 11
 |  rest_ 25, 26, 12, 13
 |  rest_ 27, 28, 14, 15
-|  ldp fp, lr, [sp], # CFRAME_SPACE
+|  ldp fp, lr, [sp, # SAVE_FP_LR_]
+|  add sp, sp, # CFRAME_SPACE
 |.endmacro
 |
 |// Type definitions. Some of these are only used for documentation.
@@ -179,7 +222,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   decode_RD RC, INS
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |// Instruction footer.
@@ -208,7 +251,7 @@
 |   decode_RA RA, INS
 |  ldr TMP0, [TMP1, #GG_G2DISP]
 |   add RA, BASE, RA, lsl #3
-|  br TMP0
+|  br_auth TMP0
 |.endmacro
 |
 |.macro ins_call
@@ -248,8 +291,17 @@
 |  blo target
 |.endmacro
 |
+|.macro init_constants
+|  movn TISNIL, #0
+|  movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+|  movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+|.endmacro
+|
 |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
 |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|.macro mov_nil, reg; mov reg, TISNIL; .endmacro
+|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro
+|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro
 |
 #define GL_J(field)	(GG_G2J + (int)offsetof(jit_State, field))
 |
@@ -355,7 +407,7 @@ static void build_subroutines(BuildCtx *
   |
   |->vm_leave_unw:
   |  restoreregs
-  |  ret
+  |  ret_auth
   |
   |6:
   |  bgt >7				// Less results wanted?
@@ -387,26 +439,26 @@ static void build_subroutines(BuildCtx *
   |
   |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
   |  // (void *cframe, int errcode)
+  |  add fp, CARG1, # SAVE_FP_LR_
   |  mov sp, CARG1
   |  mov CRET1, CARG2
-  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
   |  ldr L, SAVE_L
-  |   mv_vmstate TMP0w, C
   |  ldr GL, L->glref
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |   mv_vmstate TMP0w, C
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
   |
   |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
   |  // (void *cframe)
-  |  and sp, CARG1, #CFRAME_RAWMASK
-  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  add fp, CARG1, # SAVE_FP_LR_
+  |  mov sp, CARG1
   |  ldr L, SAVE_L
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |    movn TISNIL, #0
+  |    init_constants
+  |   ldr GL, L->glref			// Setup pointer to global state.
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
   |    mov RC, #16			// 2 results: false + error message.
   |  ldr BASE, L->base
-  |   ldr GL, L->glref			// Setup pointer to global state.
   |    mov_false TMP0
   |  sub RA, BASE, #8			// Results start at BASE-8.
   |  ldr PC, [BASE, FRAME_PC]		// Fetch PC of previous frame.
@@ -467,11 +519,9 @@ static void build_subroutines(BuildCtx *
   |  str L, GL->cur_L
   |  mov RA, BASE
   |   ldp BASE, CARG1, L->base
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |    init_constants
   |  ldr PC, [BASE, FRAME_PC]
   |     strb wzr, L->status
-  |    movn TISNIL, #0
   |   sub RC, CARG1, BASE
   |  ands CARG1, PC, #FRAME_TYPE
   |   add RC, RC, #8
@@ -500,16 +550,15 @@ static void build_subroutines(BuildCtx *
   |    ldr GL, L->glref			// Setup pointer to global state.
   |     mov BASE, CARG2
   |   str CARG1, SAVE_PC		// Any value outside of bytecode is ok.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |
   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
   |  str L, GL->cur_L
   |  ldp RB, CARG1, L->base		// RB = old base (for vmeta_call).
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
   |  add PC, PC, BASE
-  |    movn TISNIL, #0
+  |    init_constants
   |  sub PC, PC, RB			// PC = frame delta + frame type
   |   sub NARGS8:RC, CARG1, BASE
   |    st_vmstate ST_INTERP
@@ -536,10 +585,11 @@ static void build_subroutines(BuildCtx *
   |   sub RA, RA, RB			// Compute -savestack(L, L->top).
   |   str RAw, SAVE_NRES		// Neg. delta means cframe w/o frame.
   |  str wzr, SAVE_ERRF			// No error function.
-  |  str RC, SAVE_CFRAME
-  |  str fp, L->cframe			// Add our C frame to cframe chain.
+  |  add TMP0, sp, #0
+  |   str RC, SAVE_CFRAME
+  |  str TMP0, L->cframe		// Add our C frame to cframe chain.
   |    str L, GL->cur_L
-  |  blr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  blr_auth CARG4		// (lua_State *L, lua_CFunction func, void *ud)
   |  mov BASE, CRET1
   |   mov PC, #FRAME_CP
   |  cbnz BASE, <3			// Else continue with the call.
@@ -562,15 +612,15 @@ static void build_subroutines(BuildCtx *
   |    cmp CARG1, #1
   |.endif
   |   ldr PC, [CARG4, #-24]		// Restore PC from [cont|PC].
-  |  ldr CARG3, LFUNC:CARG3->pc
   |    add TMP0, RA, RC
   |    str TISNIL, [TMP0, #-8]		// Ensure one valid arg.
   |.if FFI
   |    bls >1
   |.endif
+  |  ldr CARG3, LFUNC:CARG3->pc
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  // BASE = base, RA = resultptr, CARG4 = meta base
-  |    br CARG1
+  |    br_auth CARG1
   |
   |.if FFI
   |1:
@@ -617,7 +667,7 @@ static void build_subroutines(BuildCtx *
   |  b >1
   |
   |->vmeta_tgetb:			// RB = table, RC = index
-  |  add RC, RC, TISNUM
+  |  add_TISNUM RC, RC
   |   add CARG2, BASE, RB, lsl #3
   |   add CARG3, sp, TMPDofs
   |  str RC, TMPD
@@ -652,7 +702,7 @@ static void build_subroutines(BuildCtx *
   |  sxtw CARG2, TMP1w
   |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
   |  // Returns cTValue * or NULL.
-  |  mov TMP0, TISNIL
+  |  mov_nil TMP0
   |  cbz CRET1, ->BC_TGETR_Z
   |  ldr TMP0, [CRET1]
   |  b ->BC_TGETR_Z
@@ -675,7 +725,7 @@ static void build_subroutines(BuildCtx *
   |  b >1
   |
   |->vmeta_tsetb:			// RB = table, RC = index
-  |  add RC, RC, TISNUM
+  |  add_TISNUM RC, RC
   |   add CARG2, BASE, RB, lsl #3
   |   add CARG3, sp, TMPDofs
   |  str RC, TMPD
@@ -989,7 +1039,7 @@ static void build_subroutines(BuildCtx *
   |1:  // Field metatable must be at same offset for GCtab and GCudata!
   |  ldr TAB:RB, TAB:CARG1->metatable
   |2:
-  |   mov CARG1, TISNIL
+  |   mov_nil CARG1
   |   ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
   |  cbz TAB:RB, ->fff_restv
   |  ldr TMP1w, TAB:RB->hmask
@@ -1011,7 +1061,7 @@ static void build_subroutines(BuildCtx *
   |  movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
   |  b ->fff_restv
   |5:
-  |  cmp TMP0, TISNIL
+  |  cmp_nil TMP0
   |  bne ->fff_restv
   |  b <4
   |
@@ -1086,21 +1136,19 @@ static void build_subroutines(BuildCtx *
   |//-- Base library: iterators -------------------------------------------
   |
   |.ffunc_1 next
-  |  checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
+  |  checktp CARG1, LJ_TTAB, ->fff_fallback
   |  str TISNIL, [BASE, NARGS8:RC]	// Set missing 2nd arg to nil.
   |  ldr PC, [BASE, FRAME_PC]
-  |   stp BASE, BASE, L->base		// Add frame since C call can throw.
-  |  mov CARG1, L
-  |  add CARG3, BASE, #8
-  |   str PC, SAVE_PC
-  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Returns 0 at end of traversal.
+  |  add CARG2, BASE, #8
+  |  sub CARG3, BASE, #16
+  |  bl extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |  // Returns 1=found, 0=end, -1=error.
+  |   mov RC, #(2+1)*8
+  |  tbnz CRET1w, #31, ->fff_fallback	// Invalid key.
+  |  cbnz CRET1, ->fff_res		// Found key/value.
+  |  // End of traversal: return nil.
   |  str TISNIL, [BASE, #-16]
-  |  cbz CRET1, ->fff_res1		// End of traversal: return nil.
-  |  ldp CARG1, CARG2, [BASE, #8]	// Copy key and value to results.
-  |    mov RC, #(2+1)*8
-  |  stp CARG1, CARG2, [BASE, #-16]
-  |  b ->fff_res
+  |  b ->fff_res1
   |
   |.ffunc_1 pairs
   |  checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
@@ -1113,8 +1161,8 @@ static void build_subroutines(BuildCtx *
   |  cbnz TAB:CARG2, ->fff_fallback
 #endif
   |  mov RC, #(3+1)*8
-  |  stp CARG1, TISNIL, [BASE, #-8]
-  |   str CFUNC:CARG4, [BASE, #-16]
+  |  stp CFUNC:CARG4, CARG1, [BASE, #-16]
+  |   str TISNIL, [BASE]
   |  b ->fff_res
   |
   |.ffunc_2 ipairs_aux
@@ -1126,14 +1174,14 @@ static void build_subroutines(BuildCtx *
   |  add CARG2w, CARG2w, #1
   |  cmp CARG2w, TMP1w
   |    ldr PC, [BASE, FRAME_PC]
-  |     add TMP2, CARG2, TISNUM
+  |     add_TISNUM TMP2, CARG2
   |   mov RC, #(0+1)*8
   |     str TMP2, [BASE, #-16]
   |  bhs >2				// Not in array part?
   |  ldr TMP0, [CARG3, CARG2, lsl #3]
   |1:
   |   mov TMP1, #(2+1)*8
-  |   cmp TMP0, TISNIL
+  |   cmp_nil TMP0
   |  str TMP0, [BASE, #-8]
   |   csel RC, RC, TMP1, eq
   |  b ->fff_res
@@ -1156,16 +1204,17 @@ static void build_subroutines(BuildCtx *
   |  cbnz TAB:CARG2, ->fff_fallback
 #endif
   |  mov RC, #(3+1)*8
-  |  stp CARG1, TISNUM, [BASE, #-8]
-  |   str CFUNC:CARG4, [BASE, #-16]
+  |  stp CFUNC:CARG4, CARG1, [BASE, #-16]
+  |   str TISNUM, [BASE]
   |  b ->fff_res
   |
   |//-- Base library: catch errors ----------------------------------------
   |
   |.ffunc pcall
+  |   cmp NARGS8:RC, #8
   |  ldrb TMP0w, GL->hookmask
-  |   subs NARGS8:RC, NARGS8:RC, #8
   |   blo ->fff_fallback
+  |   sub NARGS8:RC, NARGS8:RC, #8
   |    mov RB, BASE
   |    add BASE, BASE, #16
   |  ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
@@ -1346,7 +1395,7 @@ static void build_subroutines(BuildCtx *
   |  eor CARG2w, CARG1w, CARG1w, asr #31
   |   movz CARG3, #0x41e0, lsl #48	// 2^31.
   |  subs CARG1w, CARG2w, CARG1w, asr #31
-  |   add CARG1, CARG1, TISNUM
+  |   add_TISNUM CARG1, CARG1
   |  csel CARG1, CARG1, CARG3, pl
   |  // Fallthrough.
   |
@@ -1437,7 +1486,7 @@ static void build_subroutines(BuildCtx *
   |    ldr PC, [BASE, FRAME_PC]
   |  str d0, [BASE, #-16]
   |    mov RC, #(2+1)*8
-  |   add CARG2, CARG2, TISNUM
+  |   add_TISNUM CARG2, CARG2
   |   str CARG2, [BASE, #-8]
   |  b ->fff_res
   |
@@ -1503,7 +1552,7 @@ static void build_subroutines(BuildCtx *
   |  bne ->fff_fallback
   |  ldrb TMP0w, STR:CARG1[1]		// Access is always ok (NUL at end).
   |   ldr CARG3w, STR:CARG1->len
-  |  add TMP0, TMP0, TISNUM
+  |  add_TISNUM TMP0, TMP0
   |  str TMP0, [BASE, #-16]
   |  mov RC, #(0+1)*8
   |   cbz CARG3, ->fff_res
@@ -1589,7 +1638,7 @@ static void build_subroutines(BuildCtx *
   |   str BASE, L->base
   |   str PC, SAVE_PC
   |   str L, GL->tmpbuf.L
-  |  str TMP0, GL->tmpbuf.p
+  |  str TMP0, GL->tmpbuf.w
   |  bl extern lj_buf_putstr_ .. name
   |  bl extern lj_buf_tostr
   |  b ->fff_resstr
@@ -1649,17 +1698,17 @@ static void build_subroutines(BuildCtx *
   |.ffunc_bit tobit
   |  mov TMP0w, CARG1w
   |9:  // Label reused by .ffunc_bit_op users.
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.ffunc_bit bswap
   |  rev TMP0w, CARG1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.ffunc_bit bnot
   |  mvn TMP0w, CARG1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |
   |.macro .ffunc_bit_sh, name, ins, shmod
@@ -1680,7 +1729,7 @@ static void build_subroutines(BuildCtx *
   |  checkint CARG1, ->vm_tobit_fb
   |2:
   |  ins TMP0w, CARG1w, TMP1w
-  |  add CARG1, TMP0, TISNUM
+  |  add_TISNUM CARG1, TMP0
   |  b ->fff_restv
   |.endmacro
   |
@@ -1705,7 +1754,7 @@ static void build_subroutines(BuildCtx *
   |  cmp TMP1, TMP2
   |   mov CARG1, L
   |  bhi >5				// Need to grow stack.
-  |   blr CARG3				// (lua_State *L)
+  |   blr_auth CARG3			// (lua_State *L)
   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   |   ldr BASE, L->base
   |  cmp CRET1w, #0
@@ -1741,6 +1790,7 @@ static void build_subroutines(BuildCtx *
   |
   |->fff_gcstep:			// Call GC step function.
   |  // BASE = new base, RC = nargs*8
+  |  sp_auth
   |   add CARG2, BASE, NARGS8:RC	// Calculate L->top.
   |  mov RA, lr
   |   stp BASE, CARG2, L->base
@@ -1752,7 +1802,7 @@ static void build_subroutines(BuildCtx *
   |  mov lr, RA				// Help return address predictor.
   |  sub NARGS8:RC, CARG2, BASE		// Calculate nargs*8.
   |   and CFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  ret
+  |  ret_auth
   |
   |//-----------------------------------------------------------------------
   |//-- Special dispatch targets -------------------------------------------
@@ -1779,7 +1829,7 @@ static void build_subroutines(BuildCtx *
   |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
   |5:  // Re-dispatch to static ins.
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->vm_inshook:			// Dispatch target for instr/line hooks.
   |  ldrb TMP2w, GL->hookmask
@@ -1805,7 +1855,7 @@ static void build_subroutines(BuildCtx *
   |   decode_RA RA, INS
   |  ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
   |   decode_RD RC, INS
-  |  br TMP0
+  |  br_auth TMP0
   |
   |->cont_hook:				// Continue from hook yield.
   |  ldr CARG1, [CARG4, #-40]
@@ -1855,7 +1905,7 @@ static void build_subroutines(BuildCtx *
   |  sub NARGS8:RC, TMP1, BASE
   |   ldr INSw, [PC, #-4]
   |  and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |  br CRET1
+  |  br_auth CRET1
   |
   |->cont_stitch:			// Trace stitching.
   |.if JIT
@@ -1868,8 +1918,7 @@ static void build_subroutines(BuildCtx *
   |    and CARG3, CARG3, #LJ_GCVMASK
   |   beq >2
   |1:  // Move results down.
-  |  ldr CARG1, [RA]
-  |    add RA, RA, #8
+  |  ldr CARG1, [RA], #8
   |   subs RB, RB, #8
   |  str CARG1, [BASE, RC, lsl #3]
   |    add RC, RC, #1
@@ -1984,13 +2033,11 @@ static void build_subroutines(BuildCtx *
   |.if JIT
   |  ldr L, SAVE_L
   |1:
-  |  cmp CARG1w, #0
-  |  blt >9				// Check for error from exit.
-  |   lsl RC, CARG1, #3
+  |   init_constants
+  |  cmn CARG1w, #LUA_ERRERR
+  |  bhs >9				// Check for error from exit.
   |  ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
-  |    movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |    movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |    movn TISNIL, #0
+  |   lsl RC, CARG1, #3
   |  and LFUNC:CARG2, CARG2, #LJ_GCVMASK
   |   str RCw, SAVE_MULTRES
   |   str BASE, L->base
@@ -2002,6 +2049,8 @@ static void build_subroutines(BuildCtx *
   |  ldrb RBw, [PC, # OFS_OP]
   |   ldr INSw, [PC], #4
   |    st_vmstate CARG4w
+  |  cmn CARG1w, #17			// Static dispatch?
+  |  beq >5
   |  cmp RBw, #BC_FUNCC+2		// Fast function?
   |   add TMP1, GL, INS, uxtb #3
   |  bhs >4
@@ -2012,13 +2061,13 @@ static void build_subroutines(BuildCtx *
   |   decode_RA RA, INS
   |   lsr TMP0, INS, #16
   |   csel RC, TMP0, RC, lo
-  |   blo >5
+  |   blo >3
   |   ldr CARG3, [BASE, FRAME_FUNC]
   |   sub RC, RC, #8
   |   add RA, BASE, RA, lsl #3	// Yes: RA = BASE+framesize*8, RC = nargs*8
   |   and LFUNC:CARG3, CARG3, #LJ_GCVMASK
-  |5:
-  |  br RB
+  |3:
+  |  br_auth RB
   |
   |4:  // Check frame below fast function.
   |  ldr CARG1, [BASE, FRAME_PC]
@@ -2034,9 +2083,21 @@ static void build_subroutines(BuildCtx *
   |  ldr KBASE, [CARG3, #PC2PROTO(k)]
   |  b <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ldr RA, [GL, #GL_J(trace)]
+  |  decode_RD RC, INS
+  |  ldr TRACE:RA, [RA, RC, lsl #3]
+  |  ldr INSw, TRACE:RA->startins
+  |  add TMP0, GL, INS, uxtb #3
+  |   decode_RA RA, INS
+  |  ldr RB, [TMP0, #GG_G2DISP+GG_DISP2STATIC]
+  |   decode_RD RC, INS
+  |  br_auth RB
+  |
   |9:  // Rethrow error from the right C frame.
+  |  neg CARG2w, CARG1w
   |  mov CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -2065,12 +2126,69 @@ static void build_subroutines(BuildCtx *
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
   |
+  |.define NEXT_TAB,		TAB:CARG1
+  |.define NEXT_RES,		CARG1
+  |.define NEXT_IDX,		CARG2w
+  |.define NEXT_LIM,		CARG3w
+  |.define NEXT_TMP0,		TMP0
+  |.define NEXT_TMP0w,		TMP0w
+  |.define NEXT_TMP1,		TMP1
+  |.define NEXT_TMP1w,		TMP1w
+  |.define NEXT_RES_PTR,	sp
+  |.define NEXT_RES_VAL,	[sp]
+  |.define NEXT_RES_KEY,	[sp, #8]
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in CRET2w.
+  |->vm_next:
+  |.if JIT
+  |  ldr NEXT_LIM, NEXT_TAB->asize
+  |   ldr NEXT_TMP1, NEXT_TAB->array
+  |1:  // Traverse array part.
+  |  subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM
+  |  bhs >5				// Index points after array part?
+  |  ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3]
+  |  cmn NEXT_TMP0, #-LJ_TNIL
+  |   cinc NEXT_IDX, NEXT_IDX, eq
+  |  beq <1				// Skip holes in array part.
+  |  str NEXT_TMP0, NEXT_RES_VAL
+  |   movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16
+  |   stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY
+  |  add NEXT_IDX, NEXT_IDX, #1
+  |  mov NEXT_RES, NEXT_RES_PTR
+  |4:
+  |  ret
+  |
+  |5:  // Traverse hash part.
+  |  ldr NEXT_TMP1w, NEXT_TAB->hmask
+  |   ldr NODE:NEXT_RES, NEXT_TAB->node
+  |   add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1
+  |  add NEXT_LIM, NEXT_LIM, NEXT_TMP1w
+  |   add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3
+  |6:
+  |  cmp NEXT_IDX, NEXT_LIM
+  |  bhi >9
+  |  ldr NEXT_TMP0, NODE:NEXT_RES->val
+  |  cmn NEXT_TMP0, #-LJ_TNIL
+  |   add NEXT_IDX, NEXT_IDX, #1
+  |  bne <4
+  |  // Skip holes in hash part.
+  |  add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node)
+  |  b <6
+  |
+  |9:  // End of iteration. Set the key to nil (not the value).
+  |  movn NEXT_TMP0, #0
+  |  str NEXT_TMP0, NEXT_RES_KEY
+  |  mov NEXT_RES, NEXT_RES_PTR
+  |  ret
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
   |
   |// Handler for callback functions.
-  |// Saveregs already performed. Callback slot number in [sp], g in r12.
+  |// Saveregs already performed. Callback slot number in w9, g in x10.
   |->vm_ffi_callback:
   |.if FFI
   |.type CTSTATE, CTState, PC
@@ -2094,9 +2212,7 @@ static void build_subroutines(BuildCtx *
   |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
   |  // Returns lua_State *.
   |  ldp BASE, RC, L:CRET1->base
-  |   movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
-  |   movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
-  |   movn TISNIL, #0
+  |   init_constants
   |   mov L, CRET1
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
   |  sub RC, RC, BASE
@@ -2122,21 +2238,22 @@ static void build_subroutines(BuildCtx *
   |  // Caveat: needs special frame unwinding, see below.
   |.if FFI
   |  .type CCSTATE, CCallState, x19
-  |  stp fp, lr, [sp, #-32]!
-  |  add fp, sp, #0
-  |  str CCSTATE, [sp, #16]
+  |  sp_auth
+  |  stp_unwind CCSTATE, x20, [sp, #-32]!
+  |  stp fp, lr, [sp, #16]
+  |  add fp, sp, #16
   |  mov CCSTATE, x0
   |  ldr TMP0w, CCSTATE:x0->spadj
   |   ldrb TMP1w, CCSTATE->nsp
   |    add TMP2, CCSTATE, #offsetof(CCallState, stack)
-  |   subs TMP1, TMP1, #1
+  |   subs TMP1, TMP1, #8
   |    ldr TMP3, CCSTATE->func
-  |  sub sp, fp, TMP0
+  |  sub sp, sp, TMP0
   |   bmi >2
   |1:  // Copy stack slots
-  |  ldr TMP0, [TMP2, TMP1, lsl #3]
-  |  str TMP0, [sp, TMP1, lsl #3]
-  |  subs TMP1, TMP1, #1
+  |  ldr TMP0, [TMP2, TMP1]
+  |  str TMP0, [sp, TMP1]
+  |  subs TMP1, TMP1, #8
   |  bpl <1
   |2:
   |  ldp x0, x1, CCSTATE->gpr[0]
@@ -2148,14 +2265,14 @@ static void build_subroutines(BuildCtx *
   |  ldp x6, x7, CCSTATE->gpr[6]
   |   ldp d6, d7, CCSTATE->fpr[6]
   |  ldr x8, CCSTATE->retp
-  |  blr TMP3
-  |  mov sp, fp
+  |  blr_auth TMP3
+  |  sub sp, fp, #16
   |  stp x0, x1, CCSTATE->gpr[0]
   |   stp d0, d1, CCSTATE->fpr[0]
   |   stp d2, d3, CCSTATE->fpr[2]
-  |  ldr CCSTATE, [sp, #16]
-  |  ldp fp, lr, [sp], #32
-  |  ret
+  |  ldp fp, lr, [sp, #16]
+  |  ldp_unwind CCSTATE, x20, [sp], #32
+  |  ret_auth
   |.endif
   |// Note: vm_ffi_call must be the last function in this object file!
   |
@@ -2474,7 +2591,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  bne >5
     |  negs TMP0w, TMP0w
     |   movz CARG3, #0x41e0, lsl #48	// 2^31.
-    |   add TMP0, TMP0, TISNUM
+    |   add_TISNUM TMP0, TMP0
     |  csel TMP0, TMP0, CARG3, vc
     |5:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -2489,7 +2606,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  bne >2
     |  ldr CARG1w, STR:CARG1->len
     |1:
-    |  add CARG1, CARG1, TISNUM
+    |  add_TISNUM CARG1, CARG1
     |  str CARG1, [BASE, RA, lsl #3]
     |  ins_next
     |
@@ -2576,7 +2693,9 @@ static void build_ins(BuildCtx *ctx, BCO
     |.macro ins_arithmod, res, reg1, reg2
     |  fdiv d2, reg1, reg2
     |  frintm d2, d2
-    |  fmsub res, d2, reg2, reg1
+    |  // Cannot use fmsub, because FMA is not enabled by default.
+    |  fmul d2, d2, reg2
+    |  fsub res, reg1, d2
     |.endmacro
     |
     |.macro ins_arithdn, intins, fpins
@@ -2595,7 +2714,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  intins CARG1w, CARG1w, CARG2w
     |  ins_arithfallback bvs
     |.endif
-    |  add CARG1, CARG1, TISNUM
+    |  add_TISNUM CARG1, CARG1
     |  str CARG1, [BASE, RA, lsl #3]
     |4:
     |  ins_next
@@ -2688,7 +2807,7 @@ static void build_ins(BuildCtx *ctx, BCO
   case BC_KSHORT:
     |  // RA = dst, RC = int16_literal
     |  sxth RCw, RCw
-    |  add TMP0, RC, TISNUM
+    |  add_TISNUM TMP0, RC
     |  str TMP0, [BASE, RA, lsl #3]
     |  ins_next
     break;
@@ -2884,7 +3003,7 @@ static void build_ins(BuildCtx *ctx, BCO
   case BC_GGET:
     |  // RA = dst, RC = str_const (~)
   case BC_GSET:
-    |  // RA = dst, RC = str_const (~)
+    |  // RA = src, RC = str_const (~)
     |  ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
     |   mvn RC, RC
     |  and LFUNC:CARG1, CARG1, #LJ_GCVMASK
@@ -2911,7 +3030,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   cmp TMP1w, CARG1w		// In array part?
     |   bhs ->vmeta_tgetv
     |  ldr TMP0, [CARG3]
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |1:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -2954,7 +3073,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   ldr NODE:CARG3, NODE:CARG3->next
     |  cmp CARG1, CARG4
     |  bne >4
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |3:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -2963,7 +3082,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |4:  // Follow hash chain.
     |  cbnz NODE:CARG3, <1
     |  // End of hash chain: key not found, nil result.
-    |   mov TMP0, TISNIL
+    |   mov_nil TMP0
     |
     |5:  // Check for __index if table value is nil.
     |  ldr TAB:CARG1, TAB:CARG2->metatable
@@ -2984,7 +3103,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   cmp RCw, CARG1w			// In array part?
     |   bhs ->vmeta_tgetb
     |  ldr TMP0, [CARG3]
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |  beq >5
     |1:
     |  str TMP0, [BASE, RA, lsl #3]
@@ -3031,7 +3150,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  ldr TMP1, [CARG3]
     |   ldr TMP0, [BASE, RA, lsl #3]
     |    ldrb TMP2w, TAB:CARG2->marked
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >5
     |1:
     |   str TMP0, [CARG3]
@@ -3083,7 +3202,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  cmp CARG1, CARG4
     |  bne >5
     |   ldr TMP0, [BASE, RA, lsl #3]
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >4
     |2:
     |   str TMP0, NODE:CARG3->val
@@ -3142,7 +3261,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  ldr TMP1, [CARG3]
     |   ldr TMP0, [BASE, RA, lsl #3]
     |    ldrb TMP2w, TAB:CARG2->marked
-    |  cmp TMP1, TISNIL			// Previous value is nil?
+    |  cmp_nil TMP1			// Previous value is nil?
     |  beq >5
     |1:
     |   str TMP0, [CARG3]
@@ -3241,9 +3360,8 @@ static void build_ins(BuildCtx *ctx, BCO
     |->BC_CALL_Z:
     |  mov RB, BASE			// Save old BASE for vmeta_call.
     |  add BASE, BASE, RA, lsl #3
-    |  ldr CARG3, [BASE]
+    |  ldr CARG3, [BASE], #16
     |   sub NARGS8:RC, NARGS8:RC, #8
-    |   add BASE, BASE, #16
     |  checkfunc CARG3, ->vmeta_call
     |  ins_call
     break;
@@ -3259,9 +3377,8 @@ static void build_ins(BuildCtx *ctx, BCO
     |  // RA = base, (RB = 0,) RC = (nargs+1)*8
     |->BC_CALLT1_Z:
     |  add RA, BASE, RA, lsl #3
-    |  ldr TMP1, [RA]
+    |  ldr TMP1, [RA], #16
     |   sub NARGS8:RC, NARGS8:RC, #8
-    |   add RA, RA, #16
     |  checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
     |  ldr PC, [BASE, FRAME_PC]
     |->BC_CALLT2_Z:
@@ -3321,10 +3438,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |  hotloop
     |.endif
+    |->vm_IITERN:
+    |  // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |  add RA, BASE, RA, lsl #3
     |  ldr TAB:RB, [RA, #-16]
     |    ldrh TMP3w, [PC, # OFS_RD]
@@ -3340,10 +3458,10 @@ static void build_ins(BuildCtx *ctx, BCO
     |   add CARG3, CARG2, CARG1, lsl #3
     |  bhs >5				// Index points after array part?
     |   ldr TMP0, [CARG3]
-    |   cmp TMP0, TISNIL
+    |   cmp_nil TMP0
     |   cinc CARG1, CARG1, eq		// Skip holes in array part.
     |   beq <1
-    |   add CARG1, CARG1, TISNUM
+    |   add_TISNUM CARG1, CARG1
     |   stp CARG1, TMP0, [RA]
     |    add CARG1, CARG1, #1
     |3:
@@ -3361,7 +3479,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   add NODE:CARG3, NODE:RB, CARG1, lsl #3  // node = tab->node + idx*3*8
     |  bhi <4
     |  ldp TMP0, CARG1, NODE:CARG3->val
-    |  cmp TMP0, TISNIL
+    |  cmp_nil TMP0
     |   add RC, RC, #1
     |  beq <6				// Skip holes in hash part.
     |  stp CARG1, TMP0, [RA]
@@ -3379,11 +3497,11 @@ static void build_ins(BuildCtx *ctx, BCO
     |  checkfunc CFUNC:CARG1, >5
     |   asr TMP0, TAB:CARG3, #47
     |  ldrb TMP1w, CFUNC:CARG1->ffid
-    |   cmn TMP0, #-LJ_TTAB
-    |   ccmp CARG4, TISNIL, #0, eq
+    |   cmp_nil CARG4
+    |   ccmn TMP0, #-LJ_TTAB, #0, eq
     |  ccmp TMP1w, #FF_next_N, #0, eq
     |  bne >5
-    |  mov TMP0w, #0xfffe7fff
+    |  mov TMP0w, #0xfffe7fff		// LJ_KEYINDEX
     |  lsl TMP0, TMP0, #32
     |  str TMP0, [RA, #-8]		// Initialize control var.
     |1:
@@ -3391,11 +3509,28 @@ static void build_ins(BuildCtx *ctx, BCO
     |  ins_next
     |
     |5:  // Despecialize bytecode if any of the checks fail.
+    |.if JIT
+    |  ldrb TMP2w, [RC, # OFS_OP]
+    |.endif
     |  mov TMP0, #BC_JMP
     |   mov TMP1, #BC_ITERC
     |  strb TMP0w, [PC, #-4+OFS_OP]
+    |.if JIT
+    |  cmp TMP2w, #BC_ITERN
+    |  bne >6
+    |.endif
     |   strb TMP1w, [RC, # OFS_OP]
     |  b <1
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  ldr RA, [GL, #GL_J(trace)]
+    |  ldrh TMP2w, [RC, # OFS_RD]
+    |  ldr TRACE:RA, [RA, TMP2, lsl #3]
+    |  ldr TMP2w, TRACE:RA->startins
+    |  bfxil TMP2w, TMP1w, #0, #8
+    |  str TMP2w, [RC]
+    |  b <1
+    |.endif
     break;
 
   case BC_VARG:
@@ -3403,51 +3538,51 @@ static void build_ins(BuildCtx *ctx, BCO
     |   and RC, RC, #255
     |  // RA = base, RB = (nresults+1), RC = numparams
     |  ldr TMP1, [BASE, FRAME_PC]
-    |  add RC, BASE, RC, lsl #3
-    |   add RA, BASE, RA, lsl #3
-    |  add RC, RC, #FRAME_VARG
-    |   add TMP2, RA, RB, lsl #3
-    |  sub RC, RC, TMP1			// RC = vbase
-    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
+    |  add TMP0, BASE, RC, lsl #3
+    |   add RC, BASE, RA, lsl #3	// RC = destination
+    |  add TMP0, TMP0, #FRAME_VARG
+    |   add TMP2, RC, RB, lsl #3
+    |  sub RA, TMP0, TMP1		// RA = vbase
+    |  // Note: RA may now be even _above_ BASE if nargs was < numparams.
     |   sub TMP3, BASE, #16		// TMP3 = vtop
     |  cbz RB, >5
     |   sub TMP2, TMP2, #16
     |1:  // Copy vararg slots to destination slots.
-    |  cmp RC, TMP3
-    |  ldr TMP0, [RC], #8
-    |  csel TMP0, TMP0, TISNIL, lo
-    |   cmp RA, TMP2
-    |  str TMP0, [RA], #8
+    |  cmp RA, TMP3
+    |  ldr TMP0, [RA], #8
+    |  csinv TMP0, TMP0, xzr, lo	// TISNIL = ~xzr
+    |   cmp RC, TMP2
+    |  str TMP0, [RC], #8
     |   blo <1
     |2:
     |  ins_next
     |
     |5:  // Copy all varargs.
     |  ldr TMP0, L->maxstack
-    |   subs TMP2, TMP3, RC
+    |   subs TMP2, TMP3, RA
     |   csel RB, xzr, TMP2, le		// MULTRES = (max(vtop-vbase,0)+1)*8
     |   add RB, RB, #8
-    |  add TMP1, RA, TMP2
+    |  add TMP1, RC, TMP2
     |   str RBw, SAVE_MULTRES
     |   ble <2				// Nothing to copy.
     |  cmp TMP1, TMP0
     |  bhi >7
     |6:
-    |  ldr TMP0, [RC], #8
-    |  str TMP0, [RA], #8
-    |  cmp RC, TMP3
+    |  ldr TMP0, [RA], #8
+    |  str TMP0, [RC], #8
+    |  cmp RA, TMP3
     |  blo <6
     |  b <2
     |
     |7:  // Grow stack for varargs.
     |  lsr CARG2, TMP2, #3
-    |   stp BASE, RA, L->base
+    |   stp BASE, RC, L->base
     |  mov CARG1, L
-    |  sub RC, RC, BASE			// Need delta, because BASE may change.
+    |  sub RA, RA, BASE			// Need delta, because BASE may change.
     |   str PC, SAVE_PC
     |  bl extern lj_state_growstack	// (lua_State *L, int n)
-    |  ldp BASE, RA, L->base
-    |  add RC, BASE, RC
+    |  ldp BASE, RC, L->base
+    |  add RA, BASE, RA
     |  sub TMP3, BASE, #16
     |  b <6
     break;
@@ -3591,7 +3726,7 @@ static void build_ins(BuildCtx *ctx, BCO
     } else {
       |  adds CARG1w, CARG1w, CARG3w
       |  bvs >2
-      |   add TMP0, CARG1, TISNUM
+      |   add_TISNUM TMP0, CARG1
       |  tbnz CARG3w, #31, >4
       |  cmp CARG1w, CARG2w
     }
@@ -3670,7 +3805,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  // RA = base, RC = target
     |  ldr CARG1, [BASE, RA, lsl #3]
     |   add TMP1, BASE, RA, lsl #3
-    |  cmp CARG1, TISNIL
+    |  cmp_nil CARG1
     |  beq >1				// Stop if iterator returned nil.
     if (op == BC_JITERL) {
       |  str CARG1, [TMP1, #-8]
@@ -3703,15 +3838,22 @@ static void build_ins(BuildCtx *ctx, BCO
     |.if JIT
     |  // RA = base (ignored), RC = traceno
     |  ldr CARG1, [GL, #GL_J(trace)]
-    |   mov CARG2w, #0  // Traces on ARM64 don't store the trace #, so use 0.
+    |   st_vmstate wzr  // Traces on ARM64 don't store the trace #, so use 0.
     |  ldr TRACE:RC, [CARG1, RC, lsl #3]
-    |   st_vmstate CARG2w
+    |.if PAUTH
+    |  ldr RA, TRACE:RC->mcauth
+    |.else
     |  ldr RA, TRACE:RC->mcode
+    |.endif
     |   str BASE, GL->jit_base
     |   str L, GL->tmpbuf.L
     |  sub sp, sp, #16	// See SPS_FIXED. Avoids sp adjust in every root trace.
+    |.if PAUTH
+    |  braa RA, RC
+    |.else
     |  br RA
     |.endif
+    |.endif
     break;
 
   case BC_JMP:
@@ -3772,6 +3914,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   add TMP2, BASE, RC
     |   add LFUNC:CARG3, CARG3, TMP0, lsl #47
     |  add RA, RA, RC
+    |  sub CARG1, CARG1, #8
     |   add TMP0, RC, #16+FRAME_VARG
     |   str LFUNC:CARG3, [TMP2], #8	// Store (tagged) copy of LFUNC.
     |    ldr KBASE, [PC, #-4+PC2PROTO(k)]
@@ -3821,7 +3964,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |  mov CARG1, L
     |   bhi ->vm_growstack_c		// Need to grow stack.
     |    st_vmstate TMP0w
-    |  blr CARG4			// (lua_State *L [, lua_CFunction f])
+    |  blr_auth CARG4			// (lua_State *L [, lua_CFunction f])
     |  // Returns nresults.
     |  ldp BASE, TMP1, L->base
     |    str L, GL->cur_L
@@ -3860,7 +4003,7 @@ static int build_backend(BuildCtx *ctx)
 static void emit_asm_debug(BuildCtx *ctx)
 {
   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
-  int i, cf = CFRAME_SIZE >> 3;
+  int i;
   switch (ctx->mode) {
   case BUILD_elfasm:
     fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
@@ -3874,7 +4017,7 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.uleb128 0x1\n"
 	"\t.sleb128 -8\n"
 	"\t.byte 30\n"				/* Return address is in lr. */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE0:\n\n");
     fprintf(ctx->fp,
@@ -3884,15 +4027,14 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.long .Lframe0\n"
 	"\t.quad .Lbegin\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE0:\n\n");
@@ -3904,13 +4046,14 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.long .Lframe0\n"
 	"\t.quad lj_vm_ffi_call\n"
 	"\t.quad %d\n"
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
 	"\t.align 3\n"
 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
 #endif
+#if !LJ_NO_UNWIND
     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
     fprintf(ctx->fp,
 	".Lframe1:\n"
@@ -3926,7 +4069,7 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
 	"\t.long lj_err_unwind_dwarf-.\n"
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE1:\n\n");
     fprintf(ctx->fp,
@@ -3937,15 +4080,14 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.long .Lbegin-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 %d\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 %d\n",	/* offset lr */
-	fcofs, CFRAME_SIZE, cf, cf-1);
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	fcofs);
     for (i = 19; i <= 28; i++)  /* offset x19-x28 */
-      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
+      fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
     for (i = 8; i <= 15; i++)  /* offset d8-d15 */
       fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
-	      64+i, cf-i-4);
+	      64+i, i+(3+(28-19+1)-8));
     fprintf(ctx->fp,
 	"\t.align 3\n"
 	".LEFDE2:\n\n");
@@ -3962,7 +4104,7 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.byte 30\n"				/* Return address is in lr. */
 	"\t.uleb128 1\n"			/* augmentation length */
 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
-	"\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n"	/* def_cfa sp */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
 	"\t.align 3\n"
 	".LECIE2:\n\n");
     fprintf(ctx->fp,
@@ -3973,14 +4115,107 @@ static void emit_asm_debug(BuildCtx *ctx
 	"\t.long lj_vm_ffi_call-.\n"
 	"\t.long %d\n"
 	"\t.uleb128 0\n"			/* augmentation length */
-	"\t.byte 0xe\n\t.uleb128 32\n"		/* def_cfa_offset */
-	"\t.byte 0x9d\n\t.uleb128 4\n"		/* offset fp */
-	"\t.byte 0x9e\n\t.uleb128 3\n"		/* offset lr */
-	"\t.byte 0x93\n\t.uleb128 2\n"		/* offset x19 */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
+#endif
     break;
+#if !LJ_NO_UNWIND
+  case BUILD_machasm: {
+#if LJ_HASFFI
+    int fcsize = 0;
+#endif
+    int j;
+    fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+    fprintf(ctx->fp,
+	"EH_frame1:\n"
+	"\t.set L$set$x,LECIEX-LSCIEX\n"
+	"\t.long L$set$x\n"
+	"LSCIEX:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zPR\\0\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.uleb128 6\n"			/* augmentation length */
+	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
+	"\t.long _lj_err_unwind_dwarf@GOT-.\n"
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
+	"\t.align 3\n"
+	"LECIEX:\n\n");
+    for (j = 0; j < ctx->nsym; j++) {
+      const char *name = ctx->sym[j].name;
+      int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
+      if (size == 0) continue;
+#if LJ_HASFFI
+      if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+      fprintf(ctx->fp,
+	"LSFDE%d:\n"
+	"\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+	"\t.long L$set$%d\n"
+	"LASFDE%d:\n"
+	"\t.long LASFDE%d-EH_frame1\n"
+	"\t.long %s-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n",		/* offset fp */
+	j, j, j, j, j, j, j, name, size);
+      for (i = 19; i <= 28; i++)  /* offset x19-x28 */
+	fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+      for (i = 8; i <= 15; i++)  /* offset d8-d15 */
+	fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+		64+i, i+(3+(28-19+1)-8));
+      fprintf(ctx->fp,
+	"\t.align 3\n"
+	"LEFDE%d:\n\n", j);
+    }
+#if LJ_HASFFI
+    if (fcsize) {
+      fprintf(ctx->fp,
+	"EH_frame2:\n"
+	"\t.set L$set$y,LECIEY-LSCIEY\n"
+	"\t.long L$set$y\n"
+	"LSCIEY:\n"
+	"\t.long 0\n"
+	"\t.byte 0x1\n"
+	"\t.ascii \"zR\\0\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -8\n"
+	"\t.byte 30\n"				/* Return address is in lr. */
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
+	"\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n"	/* def_cfa fp 16 */
+	"\t.align 3\n"
+	"LECIEY:\n\n");
+      fprintf(ctx->fp,
+	"LSFDEY:\n"
+	"\t.set L$set$yy,LEFDEY-LASFDEY\n"
+	"\t.long L$set$yy\n"
+	"LASFDEY:\n"
+	"\t.long LASFDEY-EH_frame2\n"
+	"\t.long _lj_vm_ffi_call-.\n"
+	"\t.long %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x9e\n\t.uleb128 1\n"		/* offset lr */
+	"\t.byte 0x9d\n\t.uleb128 2\n"		/* offset fp */
+	"\t.byte 0x93\n\t.uleb128 3\n"		/* offset x19 */
+	"\t.byte 0x94\n\t.uleb128 4\n"		/* offset x20 */
+	"\t.align 3\n"
+	"LEFDEY:\n\n", fcsize);
+    }
+#endif
+    fprintf(ctx->fp, ".subsections_via_symbols\n");
+    }
+    break;
+#endif
   default:
     break;
   }
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_mips.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_mips.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_mips.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for MIPS CPUs.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |//
 |// MIPS soft-float support contributed by Djordje Kovacevic and
 |// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
@@ -190,7 +190,7 @@
 |//-----------------------------------------------------------------------
 |
 |// Trap for not-yet-implemented parts.
-|.macro NYI; .long 0xf0f0f0f0; .endmacro
+|.macro NYI; .long 0xec1cf0f0; .endmacro
 |
 |// Macros to mark delay slots.
 |.macro ., a; a; .endmacro
@@ -501,6 +501,10 @@ static void build_subroutines(BuildCtx *
   |  b ->vm_returnc
   |.  li RD, 16				// 2 results: false + error message.
   |
+  |->vm_unwind_stub:			// Jump to exit stub from unwinder.
+  |  jr CARG1
+  |.  move ra, CARG2
+  |
   |//-----------------------------------------------------------------------
   |//-- Grow stack for calls -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -669,11 +673,11 @@ static void build_subroutines(BuildCtx *
   |.endif
   |     lw PC, -16+HI(RB)		// Restore PC from [cont|PC].
   |   addu TMP2, RA, RD
-  |    lw TMP1, LFUNC:TMP1->pc
   |.if FFI
   |  bnez AT, >1
   |.endif
   |.  sw TISNIL, -8+HI(TMP2)		// Ensure one valid arg.
+  |    lw TMP1, LFUNC:TMP1->pc
   |  // BASE = base, RA = resultptr, RB = meta base
   |  jr TMP0				// Jump to continuation.
   |.  lw KBASE, PC2PROTO(k)(TMP1)
@@ -1258,35 +1262,27 @@ static void build_subroutines(BuildCtx *
   |//-- Base library: iterators -------------------------------------------
   |
   |.ffunc next
-  |  lw CARG1, HI(BASE)
-  |   lw TAB:CARG2, LO(BASE)
+  |  lw CARG2, HI(BASE)
+  |   lw TAB:CARG1, LO(BASE)
   |  beqz NARGS8:RC, ->fff_fallback
   |.  addu TMP2, BASE, NARGS8:RC
   |  li AT, LJ_TTAB
   |   sw TISNIL, HI(TMP2)		// Set missing 2nd arg to nil.
-  |  bne CARG1, AT, ->fff_fallback
+  |  bne CARG2, AT, ->fff_fallback
   |.  lw PC, FRAME_PC(BASE)
   |  load_got lj_tab_next
-  |   sw BASE, L->base			// Add frame since C call can throw.
-  |   sw BASE, L->top			// Dummy frame length is ok.
-  |  addiu CARG3, BASE, 8
-  |   sw PC, SAVE_PC
-  |  call_intern lj_tab_next		// (lua_State *L, GCtab *t, TValue *key)
-  |.  move CARG1, L
-  |  // Returns 0 at end of traversal.
+  |  addiu CARG2, BASE, 8
+  |  call_intern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |.  addiu CARG3, BASE, -8
+  |  // Returns 1=found, 0=end, -1=error.
+  |   addiu RA, BASE, -8
+  |  bgtz CRET1, ->fff_res		// Found key/value.
+  |.  li RD, (2+1)*8
   |  beqz CRET1, ->fff_restv		// End of traversal: return nil.
   |.  li SFARG1HI, LJ_TNIL
-  |  lw TMP0, 8+HI(BASE)
-  |   lw TMP1, 8+LO(BASE)
-  |    addiu RA, BASE, -8
-  |  lw TMP2, 16+HI(BASE)
-  |   lw TMP3, 16+LO(BASE)
-  |  sw TMP0, HI(RA)
-  |   sw TMP1, LO(RA)
-  |  sw TMP2, 8+HI(RA)
-  |   sw TMP3, 8+LO(RA)
-  |  b ->fff_res
-  |.  li RD, (2+1)*8
+  |   lw CFUNC:RB, FRAME_FUNC(BASE)
+  |  b ->fff_fallback			// Invalid key.
+  |.  li RC, 2*8
   |
   |.ffunc_1 pairs
   |  li AT, LJ_TTAB
@@ -1967,7 +1963,7 @@ static void build_subroutines(BuildCtx *
   |  lw TMP0, SBUF:CARG1->b
   |   sw L, SBUF:CARG1->L
   |   sw BASE, L->base
-  |  sw TMP0, SBUF:CARG1->p
+  |  sw TMP0, SBUF:CARG1->w
   |  call_intern extern lj_buf_putstr_ .. name
   |.  sw PC, SAVE_PC
   |  load_got lj_buf_tostr
@@ -2470,7 +2466,8 @@ static void build_subroutines(BuildCtx *
   |   addiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sw BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  lw LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0			// TOBIT = 2^52 + 2^51 (float).
   |  sll MULTRES, CRET1, 3
@@ -2484,14 +2481,16 @@ static void build_subroutines(BuildCtx *
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   addiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  addiu PC, PC, 4
   |  decode_OP4a TMP1, INS
   |  decode_OP4b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  addu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*4
   |  lw AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2519,9 +2518,26 @@ static void build_subroutines(BuildCtx *
   |  jr AT
   |.  addu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lw TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4b RD
+  |  addu TMP0, TMP0, RD
+  |  lw TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP4a TMP1, INS
+  |  decode_OP4b TMP1
+  |  addu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  lw AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
-  |  load_got lj_err_run
-  |  call_intern lj_err_run		// (lua_State *L)
+  |  load_got lj_err_trace
+  |  sub CARG2, r0, CRET1
+  |  call_intern lj_err_trace		// (lua_State *L, int errcode)
   |.  move CARG1, L
   |.endif
   |
@@ -2801,6 +2817,73 @@ static void build_subroutines(BuildCtx *
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
   |
+  |.define NEXT_TAB,		TAB:CARG1
+  |.define NEXT_IDX,		CARG2
+  |.define NEXT_ASIZE,		CARG3
+  |.define NEXT_NIL,		CARG4
+  |.define NEXT_TMP0,		r12
+  |.define NEXT_TMP1,		r13
+  |.define NEXT_TMP2,		r14
+  |.define NEXT_RES_VK,		CRET1
+  |.define NEXT_RES_IDX,	CRET2
+  |.define NEXT_RES_PTR,	sp
+  |.define NEXT_RES_VAL_I,	0(sp)
+  |.define NEXT_RES_VAL_IT,	4(sp)
+  |.define NEXT_RES_KEY_I,	8(sp)
+  |.define NEXT_RES_KEY_IT,	12(sp)
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in CRET2.
+  |->vm_next:
+  |.if JIT and ENDIAN_LE
+  |   lw NEXT_ASIZE, NEXT_TAB->asize
+  |  lw NEXT_TMP0, NEXT_TAB->array
+  |    li NEXT_NIL, LJ_TNIL
+  |1:  // Traverse array part.
+  |   sltu AT, NEXT_IDX, NEXT_ASIZE
+  |    sll NEXT_TMP1, NEXT_IDX, 3
+  |   beqz AT, >5
+  |.   addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+  |  lw NEXT_TMP2, 4(NEXT_TMP1)
+  |   sw NEXT_IDX, NEXT_RES_KEY_I
+  |  beq NEXT_TMP2, NEXT_NIL, <1
+  |.  addiu NEXT_IDX, NEXT_IDX, 1
+  |    lw NEXT_TMP0, 0(NEXT_TMP1)
+  |   li AT, LJ_TISNUM
+  |  sw NEXT_TMP2, NEXT_RES_VAL_IT
+  |   sw AT, NEXT_RES_KEY_IT
+  |    sw NEXT_TMP0, NEXT_RES_VAL_I
+  |  move NEXT_RES_VK, NEXT_RES_PTR
+  |  jr ra
+  |.  move NEXT_RES_IDX, NEXT_IDX
+  |
+  |5:  // Traverse hash part.
+  |  subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+  |   lw NODE:NEXT_RES_VK, NEXT_TAB->node
+  |    sll NEXT_TMP2, NEXT_RES_IDX, 5
+  |  lw NEXT_TMP0, NEXT_TAB->hmask
+  |    sll AT, NEXT_RES_IDX, 3
+  |    subu AT, NEXT_TMP2, AT
+  |   addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
+  |6:
+  |  sltu AT, NEXT_TMP0, NEXT_RES_IDX
+  |  bnez AT, >8
+  |.  nop
+  |  lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it
+  |  bne NEXT_TMP2, NEXT_NIL, >9
+  |.  addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
+  |  // Skip holes in hash part.
+  |  b <6
+  |.  addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+  |
+  |8:  // End of iteration. Set the key to nil (not the value).
+  |  sw NEXT_NIL, NEXT_RES_KEY_IT
+  |  move NEXT_RES_VK, NEXT_RES_PTR
+  |9:
+  |  jr ra
+  |.  addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2868,7 +2951,6 @@ static void build_subroutines(BuildCtx *
   |  move TMP2, sp
   |  subu sp, sp, TMP1
   |  sw ra, -4(TMP2)
-  |   sll CARG2, CARG2, 2
   |  sw r16, -8(TMP2)
   |  sw CCSTATE, -12(TMP2)
   |  move r16, TMP2
@@ -4524,10 +4606,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
-    |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |.if JIT and ENDIAN_LE
+    |  hotloop
     |.endif
+    |->vm_IITERN:
+    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
     |  addu RA, BASE, RA
     |  lw TAB:RB, -16+LO(RA)
     |  lw RC, -8+LO(RA)			// Get index from control var.
@@ -4606,9 +4689,9 @@ static void build_ins(BuildCtx *ctx, BCO
     |  addiu CARG2, CARG2, -FF_next_N
     |  or CARG2, CARG2, CARG3
     |  bnez CARG2, >5
-    |.  lui TMP1, 0xfffe
+    |.  lui TMP1, (LJ_KEYINDEX >> 16)
     |  addu PC, TMP0, TMP2
-    |  ori TMP1, TMP1, 0x7fff
+    |  ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
     |  sw r0, -8+LO(RA)			// Initialize control var.
     |  sw TMP1, -8+HI(RA)
     |1:
@@ -4617,9 +4700,28 @@ static void build_ins(BuildCtx *ctx, BCO
     |  li TMP3, BC_JMP
     |   li TMP1, BC_ITERC
     |  sb TMP3, -4+OFS_OP(PC)
-    |    addu PC, TMP0, TMP2
+    |  addu PC, TMP0, TMP2
+    |.if JIT
+    |  lb TMP0, OFS_OP(PC)
+    |  li AT, BC_ITERN
+    |  bne TMP0, AT, >6
+    |.  lhu TMP2, OFS_RD(PC)
+    |.endif
     |  b <1
     |.  sb TMP1, OFS_OP(PC)
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  lw TMP0, DISPATCH_J(trace)(DISPATCH)
+    |   sll TMP2, TMP2, 2
+    |  addu TMP0, TMP0, TMP2
+    |  lw TRACE:TMP2, 0(TMP0)
+    |  lw TMP0, TRACE:TMP2->startins
+    |   li AT, -256
+    |  and TMP0, TMP0, AT
+    |  or TMP0, TMP0, TMP1
+    |  b <1
+    |.  sw TMP0, 0(PC)
+    |.endif
     break;
 
   case BC_VARG:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_mips64.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_mips64.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_mips64.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for MIPS64 CPUs.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |//
 |// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
 |// Sponsored by Cisco Systems, Inc.
@@ -193,7 +193,7 @@
 |//-----------------------------------------------------------------------
 |
 |// Trap for not-yet-implemented parts.
-|.macro NYI; .long 0xf0f0f0f0; .endmacro
+|.macro NYI; .long 0xec1cf0f0; .endmacro
 |
 |// Macros to mark delay slots.
 |.macro ., a; a; .endmacro
@@ -556,6 +556,10 @@ static void build_subroutines(BuildCtx *
   |  b ->vm_returnc
   |.  li RD, 16				// 2 results: false + error message.
   |
+  |->vm_unwind_stub:			// Jump to exit stub from unwinder.
+  |  jr CARG1
+  |.  move ra, CARG2
+  |
   |//-----------------------------------------------------------------------
   |//-- Grow stack for calls -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -724,11 +728,11 @@ static void build_subroutines(BuildCtx *
   |     ld PC, -24(RB)			// Restore PC from [cont|PC].
   |    cleartp LFUNC:TMP1
   |   daddu TMP2, RA, RD
-  |    ld TMP1, LFUNC:TMP1->pc
   |.if FFI
   |  bnez AT, >1
   |.endif
   |.  sd TISNIL, -8(TMP2)		// Ensure one valid arg.
+  |    ld TMP1, LFUNC:TMP1->pc
   |  // BASE = base, RA = resultptr, RB = meta base
   |  jr TMP0				// Jump to continuation.
   |.  ld KBASE, PC2PROTO(k)(TMP1)
@@ -1318,27 +1322,24 @@ static void build_subroutines(BuildCtx *
   |//-- Base library: iterators -------------------------------------------
   |
   |.ffunc_1 next
-  |  checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
+  |  checktp CARG1, -LJ_TTAB, ->fff_fallback
   |  daddu TMP2, BASE, NARGS8:RC
   |  sd TISNIL, 0(TMP2)			// Set missing 2nd arg to nil.
-  |  ld PC, FRAME_PC(BASE)
   |  load_got lj_tab_next
-  |   sd BASE, L->base			// Add frame since C call can throw.
-  |   sd BASE, L->top			// Dummy frame length is ok.
-  |  daddiu CARG3, BASE, 8
-  |   sd PC, SAVE_PC
-  |  call_intern lj_tab_next		// (lua_State *L, GCtab *t, TValue *key)
-  |.  move CARG1, L
-  |  // Returns 0 at end of traversal.
+  |  ld PC, FRAME_PC(BASE)
+  |  daddiu CARG2, BASE, 8
+  |  call_intern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |.  daddiu CARG3, BASE, -16
+  |  // Returns 1=found, 0=end, -1=error.
+  |   daddiu RA, BASE, -16
+  |  bgtz CRET1, ->fff_res		// Found key/value.
+  |.  li RD, (2+1)*8
   |  beqz CRET1, ->fff_restv		// End of traversal: return nil.
   |.  move CARG1, TISNIL
-  |  ld TMP0, 8(BASE)
-  |    daddiu RA, BASE, -16
-  |  ld TMP2, 16(BASE)
-  |  sd TMP0, 0(RA)
-  |  sd TMP2, 8(RA)
-  |  b ->fff_res
-  |.  li RD, (2+1)*8
+  |   ld CFUNC:RB, FRAME_FUNC(BASE)
+  |   cleartp CFUNC:RB
+  |  b ->fff_fallback			// Invalid key.
+  |.  li RC, 2*8
   |
   |.ffunc_1 pairs
   |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
@@ -2037,7 +2038,7 @@ static void build_subroutines(BuildCtx *
   |  ld TMP0, SBUF:CARG1->b
   |   sd L, SBUF:CARG1->L
   |   sd BASE, L->base
-  |  sd TMP0, SBUF:CARG1->p
+  |  sd TMP0, SBUF:CARG1->w
   |  call_intern extern lj_buf_putstr_ .. name
   |.  sd PC, SAVE_PC
   |  load_got lj_buf_tostr
@@ -2570,7 +2571,8 @@ static void build_subroutines(BuildCtx *
   |   daddiu DISPATCH, JGL, -GG_DISP2G-32768
   |  sd BASE, L->base
   |1:
-  |  bltz CRET1, >9			// Check for error from exit.
+  |  sltiu TMP0, CRET1, -LUA_ERRERR	// Check for error from exit.
+  |  beqz TMP0, >9
   |.  ld LFUNC:RB, FRAME_FUNC(BASE)
   |    .FPU lui TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   |  dsll MULTRES, CRET1, 3
@@ -2585,14 +2587,16 @@ static void build_subroutines(BuildCtx *
   |    .FPU cvt.d.s TOBIT, TOBIT
   |  // Modified copy of ins_next which handles function header dispatch, too.
   |  lw INS, 0(PC)
-  |   daddiu PC, PC, 4
+  |  addiu CRET1, CRET1, 17		// Static dispatch?
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1
   |    sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |.  daddiu PC, PC, 4
   |  decode_OP8a TMP1, INS
   |  decode_OP8b TMP1
-  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  daddu TMP0, DISPATCH, TMP1
-  |   decode_RD8a RD, INS
+  |    sltiu TMP2, TMP1, BC_FUNCF*8
   |  ld AT, 0(TMP0)
   |   decode_RA8a RA, INS
   |    beqz TMP2, >2
@@ -2621,9 +2625,26 @@ static void build_subroutines(BuildCtx *
   |  jr AT
   |.  daddu RA, RA, BASE
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ld TMP0, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD8b RD
+  |  daddu TMP0, TMP0, RD
+  |  ld TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP8a TMP1, INS
+  |  decode_OP8b TMP1
+  |  daddu TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld AT, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |  jr AT
+  |.  decode_RA8b RA
+  |
   |9:  // Rethrow error from the right C frame.
-  |  load_got lj_err_run
-  |  call_intern lj_err_run		// (lua_State *L)
+  |  load_got lj_err_trace
+  |  sub CARG2, r0, CRET1
+  |  call_intern lj_err_trace		// (lua_State *L, int errcode)
   |.  move CARG1, L
   |.endif
   |
@@ -2902,6 +2923,70 @@ static void build_subroutines(BuildCtx *
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
   |
+  |.define NEXT_TAB,		TAB:CARG1
+  |.define NEXT_IDX,		CARG2
+  |.define NEXT_ASIZE,		CARG3
+  |.define NEXT_NIL,		CARG4
+  |.define NEXT_TMP0,		r12
+  |.define NEXT_TMP1,		r13
+  |.define NEXT_TMP2,		r14
+  |.define NEXT_RES_VK,		CRET1
+  |.define NEXT_RES_IDX,	CRET2
+  |.define NEXT_RES_PTR,	sp
+  |.define NEXT_RES_VAL,	0(sp)
+  |.define NEXT_RES_KEY,	8(sp)
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in CRET2.
+  |->vm_next:
+  |.if JIT and ENDIAN_LE
+  |   lw NEXT_ASIZE, NEXT_TAB->asize
+  |  ld NEXT_TMP0, NEXT_TAB->array
+  |    li NEXT_NIL, LJ_TNIL
+  |1:  // Traverse array part.
+  |   sltu AT, NEXT_IDX, NEXT_ASIZE
+  |    sll NEXT_TMP1, NEXT_IDX, 3
+  |   beqz AT, >5
+  |.   daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+  |   li AT, LJ_TISNUM
+  |  ld NEXT_TMP2, 0(NEXT_TMP1)
+  |   dsll AT, AT, 47
+  |   or NEXT_TMP1, NEXT_IDX, AT
+  |  beq NEXT_TMP2, NEXT_NIL, <1
+  |.  addiu NEXT_IDX, NEXT_IDX, 1
+  |  sd NEXT_TMP2, NEXT_RES_VAL
+  |   sd NEXT_TMP1, NEXT_RES_KEY
+  |  move NEXT_RES_VK, NEXT_RES_PTR
+  |  jr ra
+  |.  move NEXT_RES_IDX, NEXT_IDX
+  |
+  |5:  // Traverse hash part.
+  |  subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+  |   ld NODE:NEXT_RES_VK, NEXT_TAB->node
+  |    sll NEXT_TMP2, NEXT_RES_IDX, 5
+  |  lw NEXT_TMP0, NEXT_TAB->hmask
+  |    sll AT, NEXT_RES_IDX, 3
+  |    subu AT, NEXT_TMP2, AT
+  |   daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
+  |6:
+  |  sltu AT, NEXT_TMP0, NEXT_RES_IDX
+  |  bnez AT, >8
+  |.  nop
+  |  ld NEXT_TMP2, NODE:NEXT_RES_VK->val
+  |  bne NEXT_TMP2, NEXT_NIL, >9
+  |.  addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
+  |  // Skip holes in hash part.
+  |  b <6
+  |.  daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+  |
+  |8:  // End of iteration. Set the key to nil (not the value).
+  |  sd NEXT_NIL, NEXT_RES_KEY
+  |  move NEXT_RES_VK, NEXT_RES_PTR
+  |9:
+  |  jr ra
+  |.  addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2980,7 +3065,6 @@ static void build_subroutines(BuildCtx *
   |  move TMP2, sp
   |  dsubu sp, sp, TMP1
   |  sd ra, -8(TMP2)
-  |   sll CARG2, CARG2, 3
   |  sd r16, -16(TMP2)
   |  sd CCSTATE, -24(TMP2)
   |  move r16, TMP2
@@ -4698,10 +4782,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
-    |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |.if JIT and ENDIAN_LE
+    |  hotloop
     |.endif
+    |->vm_IITERN:
+    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
     |  daddu RA, BASE, RA
     |  ld TAB:RB, -16(RA)
     |   lw RC, -8+LO(RA)		// Get index from control var.
@@ -4722,11 +4807,10 @@ static void build_ins(BuildCtx *ctx, BCO
     |.  addiu RC, RC, 1
     |   sd TMP2, 0(RA)
     |  sd CARG1, 8(RA)
-    |   or TMP0, RC, CARG3
     |     lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
     |     decode_RD4b RD
     |     daddu RD, RD, TMP3
-    |   sw TMP0, -8+LO(RA)		// Update control var.
+    |   sw RC, -8+LO(RA)		// Update control var.
     |     daddu PC, PC, RD
     |3:
     |  ins_next
@@ -4776,9 +4860,9 @@ static void build_ins(BuildCtx *ctx, BCO
     |  daddiu TMP1, TMP1, -FF_next_N
     |  or AT, AT, TMP1
     |  bnez AT, >5
-    |.  lui TMP1, 0xfffe
+    |.  lui TMP1, (LJ_KEYINDEX >> 16)
     |  daddu PC, TMP0, TMP2
-    |  ori TMP1, TMP1, 0x7fff
+    |  ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
     |  dsll TMP1, TMP1, 32
     |  sd TMP1, -8(RA)
     |1:
@@ -4788,8 +4872,27 @@ static void build_ins(BuildCtx *ctx, BCO
     |   li TMP1, BC_ITERC
     |  sb TMP3, -4+OFS_OP(PC)
     |   daddu PC, TMP0, TMP2
+    |.if JIT
+    |  lb TMP0, OFS_OP(PC)
+    |  li AT, BC_ITERN
+    |  bne TMP0, AT, >6
+    |.  lhu TMP2, OFS_RD(PC)
+    |.endif
     |  b <1
     |.  sb TMP1, OFS_OP(PC)
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  ld TMP0, DISPATCH_J(trace)(DISPATCH)
+    |   sll TMP2, TMP2, 3
+    |  daddu TMP0, TMP0, TMP2
+    |  ld TRACE:TMP2, 0(TMP0)
+    |  lw TMP0, TRACE:TMP2->startins
+    |   li AT, -256
+    |  and TMP0, TMP0, AT
+    |  or TMP0, TMP0, TMP1
+    |  b <1
+    |.  sw TMP0, 0(PC)
+    |.endif
     break;
 
   case BC_VARG:
@@ -5293,6 +5396,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   settp LFUNC:RB, TMP0
     |  daddu TMP0, RA, RC
     |   sd LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
+    |  daddiu TMP2, TMP2, -8
     |   daddiu TMP3, RC, 16+FRAME_VARG
     |  sltu AT, TMP0, TMP2
     |    ld KBASE, -4+PC2PROTO(k)(PC)
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_ppc.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_ppc.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_ppc.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |
 |.arch ppc
 |.section code_op, code_sub
@@ -859,11 +859,11 @@ static void build_subroutines(BuildCtx *
   |.endif
   |     lwz PC, -16(RB)			// Restore PC from [cont|PC].
   |   subi TMP2, RD, 8
-  |    lwz TMP1, LFUNC:TMP1->pc
   |   stwx TISNIL, RA, TMP2		// Ensure one valid arg.
   |.if FFI
   |  ble >1
   |.endif
+  |    lwz TMP1, LFUNC:TMP1->pc
   |    lwz KBASE, PC2PROTO(k)(TMP1)
   |  // BASE = base, RA = resultptr, RB = meta base
   |  mtctr TMP0
@@ -1559,43 +1559,24 @@ static void build_subroutines(BuildCtx *
   |
   |//-- Base library: iterators -------------------------------------------
   |
-  |.ffunc next
-  |  cmplwi NARGS8:RC, 8
-  |   lwz CARG1, 0(BASE)
-  |    lwz TAB:CARG2, 4(BASE)
-  |  blt ->fff_fallback
+  |.ffunc_1 next
   |   stwx TISNIL, BASE, NARGS8:RC	// Set missing 2nd arg to nil.
-  |  checktab CARG1
+  |  checktab CARG3
   |   lwz PC, FRAME_PC(BASE)
   |  bne ->fff_fallback
-  |   stp BASE, L->base			// Add frame since C call can throw.
-  |  mr CARG1, L
-  |   stp BASE, L->top			// Dummy frame length is ok.
-  |  la CARG3, 8(BASE)
-  |   stw PC, SAVE_PC
-  |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Returns 0 at end of traversal.
-  |  cmplwi CRET1, 0
-  |   li CARG3, LJ_TNIL
-  |  beq ->fff_restv			// End of traversal: return nil.
+  |  la CARG2, 8(BASE)
+  |  la CARG3, -8(BASE)
+  |  bl extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |  // Returns 1=found, 0=end, -1=error.
+  |  cmpwi CRET1, 0
   |   la RA, -8(BASE)
-  |.if FPU
-  |  lfd f0, 8(BASE)			// Copy key and value to results.
-  |  lfd f1, 16(BASE)
-  |  stfd f0, 0(RA)
-  |  stfd f1, 8(RA)
-  |.else
-  |  lwz CARG1, 8(BASE)
-  |  lwz CARG2, 12(BASE)
-  |  lwz CARG3, 16(BASE)
-  |  lwz CARG4, 20(BASE)
-  |  stw CARG1, 0(RA)
-  |  stw CARG2, 4(RA)
-  |  stw CARG3, 8(RA)
-  |  stw CARG4, 12(RA)
-  |.endif
   |   li RD, (2+1)*8
-  |  b ->fff_res
+  |  bgt ->fff_res			// Found key/value.
+  |   li CARG3, LJ_TNIL
+  |  beq ->fff_restv			// End of traversal: return nil.
+  |   lwz CFUNC:RB, FRAME_FUNC(BASE)
+  |   li NARGS8:RC, 2*8
+  |  b ->fff_fallback			// Invalid key.
   |
   |.ffunc_1 pairs
   |  checktab CARG3
@@ -2516,7 +2497,7 @@ static void build_subroutines(BuildCtx *
   |  stw L, SBUF:CARG1->L
   |  stp BASE, L->base
   |  stw PC, SAVE_PC
-  |   stw TMP0, SBUF:CARG1->p
+  |   stw TMP0, SBUF:CARG1->w
   |  bl extern lj_buf_putstr_ .. name
   |  bl extern lj_buf_tostr
   |  b ->fff_resstr
@@ -3034,8 +3015,9 @@ static void build_subroutines(BuildCtx *
   |  addi DISPATCH, JGL, -GG_DISP2G-32768
   |  stp BASE, L->base
   |1:
-  |  cmpwi CARG1, 0
-  |  blt >9				// Check for error from exit.
+  |  li TMP2, -LUA_ERRERR
+  |  cmplw CARG1, TMP2
+  |  bge >9				// Check for error from exit.
   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   |   slwi MULTRES, CARG1, 3
   |    li TMP2, 0
@@ -3060,6 +3042,8 @@ static void build_subroutines(BuildCtx *
   |   addi PC, PC, 4
   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1.
   |    stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+  |  cmpwi CARG1, -17			// Static dispatch?
+  |  beq >5
   |  decode_OPP TMP1, INS
   |   decode_RA8 RA, INS
   |  lpx TMP0, DISPATCH, TMP1
@@ -3089,9 +3073,25 @@ static void build_subroutines(BuildCtx *
   |   add RA, RA, BASE
   |  bctr
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
+  |  decode_RD4 RD, INS
+  |  lwzx TRACE:TMP1, TMP1, RD
+  |  lwz INS, TRACE:TMP1->startins
+  |  decode_OPP TMP1, INS
+  |  addi TMP1, TMP1, GG_DISP2STATIC
+  |  lpx TMP0, DISPATCH, TMP1
+  |  mtctr TMP0
+  |   decode_RB8 RB, INS
+  |   decode_RD8 RD, INS
+  |   decode_RA8 RA, INS
+  |   decode_RC8 RC, INS
+  |  bctr
+  |
   |9:  // Rethrow error from the right C frame.
+  |  neg CARG2, CARG1
   |  mr CARG1, L
-  |  bl extern lj_err_run		// (lua_State *L)
+  |  bl extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -3181,6 +3181,11 @@ static void build_subroutines(BuildCtx *
   |  blr
   |.endif
   |
+  |->vm_next:
+  |.if JIT
+  |  NYI  // On big-endian.
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- FFI helper functions -----------------------------------------------
   |//-----------------------------------------------------------------------
@@ -3264,14 +3269,13 @@ static void build_subroutines(BuildCtx *
   |    stw TMP0, 4(sp)
   |   cmpwi cr1, CARG3, 0
   |  mr TMP2, sp
-  |   addic. CARG2, CARG2, -1
+  |   addic. CARG2, CARG2, -4
   |  stwux sp, sp, TMP1
   |   crnot 4*cr1+eq, 4*cr1+eq		// For vararg calls.
   |  stw r14, -4(TMP2)
   |  stw CCSTATE, -8(TMP2)
   |  mr r14, TMP2
   |  la TMP1, CCSTATE->stack
-  |   slwi CARG2, CARG2, 2
   |   blty >2
   |  la TMP2, 8(sp)
   |1:
@@ -4100,8 +4104,8 @@ static void build_ins(BuildCtx *ctx, BCO
     |.macro addo32., y, a, b
     |  // Need to check overflow for (a<<32) + (b<<32).
     |  rldicr TMP0, a, 32, 31
-    |  rldicr TMP3, b, 32, 31
-    |  addo. TMP0, TMP0, TMP3
+    |  rldicr TMP1, b, 32, 31
+    |  addo. TMP0, TMP0, TMP1
     |  add y, a, b
     |.endmacro
     |  ins_arith addo32., fadd, __adddf3
@@ -4114,8 +4118,8 @@ static void build_ins(BuildCtx *ctx, BCO
     |.macro subo32., y, a, b
     |  // Need to check overflow for (a<<32) - (b<<32).
     |  rldicr TMP0, a, 32, 31
-    |  rldicr TMP3, b, 32, 31
-    |  subo. TMP0, TMP0, TMP3
+    |  rldicr TMP1, b, 32, 31
+    |  subo. TMP0, TMP0, TMP1
     |  sub y, a, b
     |.endmacro
     |  ins_arith subo32., fsub, __subdf3
@@ -5130,8 +5134,9 @@ static void build_ins(BuildCtx *ctx, BCO
   case BC_ITERN:
     |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
     |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |  // NYI on big-endian
     |.endif
+    |->vm_IITERN:
     |  add RA, BASE, RA
     |  lwz TAB:RB, -12(RA)
     |  lwz RC, -4(RA)			// Get index from control var.
@@ -5250,8 +5255,8 @@ static void build_ins(BuildCtx *ctx, BCO
     |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
     |    add TMP3, PC, TMP0
     |  bne cr0, >5
-    |  lus TMP1, 0xfffe
-    |  ori TMP1, TMP1, 0x7fff
+    |  lus TMP1, (LJ_KEYINDEX >> 16)
+    |  ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
     |  stw ZERO, -4(RA)			// Initialize control var.
     |  stw TMP1, -8(RA)
     |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
@@ -5262,6 +5267,7 @@ static void build_ins(BuildCtx *ctx, BCO
     |   li TMP1, BC_ITERC
     |  stb TMP0, -1(PC)
     |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
+    |  // NYI on big-endian: unpatch JLOOP.
     |   stb TMP1, 3(PC)
     |  b <1
     break;
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_riscv64.dasc
===================================================================
--- /dev/null
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_riscv64.dasc
@@ -0,0 +1,4853 @@
+|// Low-level VM code for RISC-V 64 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
+|//
+|// Contributed by gns from PLCT Lab, ISCAS.
+|// Sponsored by PLCT Lab, ISCAS.
+|
+|.arch riscv64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|//       The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter.
+|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp
+|
+|
+|// The following must be C callee-save (but BASE is often refetched).
+|.define BASE,		x18	// Base of current Lua stack frame.
+|.define KBASE,		x19	// Constants of current Lua function.
+|.define PC,		x20	// Next PC.
+|.define GLREG,		x21	// Global state.
+|.define DISPATCH,	x22	// Opcode dispatch table.
+|.define LREG,		x23	// Register holding lua_State (also in SAVE_L).
+|.define MULTRES,	x24	// Size of multi-result: (nresults+1)*8.
+|
+|// Constants for type-comparisons, stores and conversions. C callee-save.
+|.define TISNIL,	x8
+|.define TISNUM,	x25
+|.define TOBIT,		f27	// 2^52 + 2^51.
+|
+|// The following temporaries are not saved across C calls, except for RA.
+|.define RA,		x9	// Callee-save.
+|.define RB,		x14
+|.define RC,		x15
+|.define RD,		x16
+|.define INS,		x17
+|
+|.define TMP0,		x6
+|.define TMP1,		x7
+|.define TMP2,		x28
+|.define TMP3,		x29
+|.define TMP4,		x30
+|
+|// RISC-V lp64d calling convention.
+|.define CFUNCADDR,	x5
+|.define CARG1,		x10
+|.define CARG2,		x11
+|.define CARG3,		x12
+|.define CARG4,		x13
+|.define CARG5,		x14
+|.define CARG6,		x15
+|.define CARG7,		x16
+|.define CARG8,		x17
+|
+|.define CRET1,		x10
+|.define CRET2,		x11
+|
+|.define FARG1,		f10
+|.define FARG2,		f11
+|.define FARG3,		f12
+|.define FARG4,		f13
+|.define FARG5,		f14
+|.define FARG6,		f15
+|.define FARG7,		f16
+|.define FARG8,		f17
+|
+|.define FRET1,		f10
+|.define FRET2,		f11
+|
+|.define FTMP0,		f0
+|.define FTMP1,		f1
+|.define FTMP2,		f2
+|.define FTMP3,		f3
+|.define FTMP4,		f4
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|// RISC-V 64 lp64d.
+|
+|.define CFRAME_SPACE,	256	// Delta for sp.
+|
+|//----- 16 byte aligned, <-- sp entering interpreter
+|.define SAVE_ERRF,	252	// 32 bit values.
+|.define SAVE_NRES,	248
+|.define SAVE_CFRAME,	240	// 64 bit values.
+|.define SAVE_L,	232
+|.define SAVE_PC,	224
+|//----- 16 byte aligned
+|// Padding		216
+|.define SAVE_GPR_,	112	// .. 112+13*8: 64 bit GPR saves.
+|.define SAVE_FPR_,	16	// .. 16+12*8: 64 bit FPR saves.
+|
+|
+|.define TMPD,		0
+|//----- 16 byte aligned
+|
+|.define TMPD_OFS,	0
+|
+|//-----------------------------------------------------------------------
+|
+|.macro saveregs
+|  addi sp, sp, -CFRAME_SPACE
+|  fsd f27, SAVE_FPR_+11*8(sp)
+|  fsd f26, SAVE_FPR_+10*8(sp)
+|  fsd f25, SAVE_FPR_+9*8(sp)
+|  fsd f24, SAVE_FPR_+8*8(sp)
+|  fsd f23, SAVE_FPR_+7*8(sp)
+|  fsd f22, SAVE_FPR_+6*8(sp)
+|  fsd f21, SAVE_FPR_+5*8(sp)
+|  fsd f20, SAVE_FPR_+4*8(sp)
+|  fsd f19, SAVE_FPR_+3*8(sp)
+|  fsd f18, SAVE_FPR_+2*8(sp)
+|  fsd f9,  SAVE_FPR_+1*8(sp)
+|  fsd f8,  SAVE_FPR_+0*8(sp)
+|  sd ra,  SAVE_GPR_+12*8(sp)
+|  sd x27, SAVE_GPR_+11*8(sp)
+|  sd x26, SAVE_GPR_+10*8(sp)
+|  sd x25, SAVE_GPR_+9*8(sp)
+|  sd x24, SAVE_GPR_+8*8(sp)
+|  sd x23, SAVE_GPR_+7*8(sp)
+|  sd x22, SAVE_GPR_+6*8(sp)
+|  sd x21, SAVE_GPR_+5*8(sp)
+|  sd x20, SAVE_GPR_+4*8(sp)
+|  sd x19, SAVE_GPR_+3*8(sp)
+|  sd x18, SAVE_GPR_+2*8(sp)
+|  sd x9,  SAVE_GPR_+1*8(sp)
+|  sd x8,  SAVE_GPR_+0*8(sp)
+|.endmacro
+|
+|.macro restoreregs_ret
+|  ld ra,  SAVE_GPR_+12*8(sp)
+|  ld x27, SAVE_GPR_+11*8(sp)
+|  ld x26, SAVE_GPR_+10*8(sp)
+|  ld x25, SAVE_GPR_+9*8(sp)
+|  ld x24, SAVE_GPR_+8*8(sp)
+|  ld x23, SAVE_GPR_+7*8(sp)
+|  ld x22, SAVE_GPR_+6*8(sp)
+|  ld x21, SAVE_GPR_+5*8(sp)
+|  ld x20, SAVE_GPR_+4*8(sp)
+|  ld x19, SAVE_GPR_+3*8(sp)
+|  ld x18, SAVE_GPR_+2*8(sp)
+|  ld x9,  SAVE_GPR_+1*8(sp)
+|  ld x8,  SAVE_GPR_+0*8(sp)
+|  fld f27, SAVE_FPR_+11*8(sp)
+|  fld f26, SAVE_FPR_+10*8(sp)
+|  fld f25, SAVE_FPR_+9*8(sp)
+|  fld f24, SAVE_FPR_+8*8(sp)
+|  fld f23, SAVE_FPR_+7*8(sp)
+|  fld f22, SAVE_FPR_+6*8(sp)
+|  fld f21, SAVE_FPR_+5*8(sp)
+|  fld f20, SAVE_FPR_+4*8(sp)
+|  fld f19, SAVE_FPR_+3*8(sp)
+|  fld f18, SAVE_FPR_+2*8(sp)
+|  fld f9,  SAVE_FPR_+1*8(sp)
+|  fld f8,  SAVE_FPR_+0*8(sp)
+|  addi sp, sp, CFRAME_SPACE
+|  ret
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Pseudo-instruction macros
+|// Be cautious with local label 9 since we use them here!
+|.macro bxeq, a, b, tgt
+|  bne a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxne, a, b, tgt
+|  beq a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxlt, a, b, tgt
+|  bge a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxge, a, b, tgt
+|  blt a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxgt, a, b, tgt
+|  bge b, a, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxle, a, b, tgt
+|  blt b, a, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxltu, a, b, tgt
+|  bgeu a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxgeu, a, b, tgt
+|  bltu a, b, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxgtu, a, b, tgt
+|  bgeu b, a, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxleu, a, b, tgt
+|  bltu b, a, >9
+|  j tgt
+|9:
+|.endmacro
+|
+|.macro bxeqz, a, tgt
+|  bxeq a, x0, tgt
+|.endmacro
+|
+|.macro bxnez, a, tgt
+|  bxne a, x0, tgt
+|.endmacro
+|
+|.macro bxlez, a, tgt
+|  bxge x0, a, tgt
+|.endmacro
+|
+|.macro bxgez, a, tgt
+|  bxge a, x0, tgt
+|.endmacro
+|
+|.macro bxltz, a, tgt
+|  bxlt a, x0, tgt
+|.endmacro
+|
+|.macro bxgtz, a, tgt
+|  bxlt x0, a, tgt
+|.endmacro
+|
+|.macro lxi, a, b
+|  lui a, (b)&0xfffff
+|  srai a, a, 12
+|.endmacro
+|
+|.macro lzi, a, b
+|  lui a, (b)&0xfffff
+|  srli a, a, 12
+|.endmacro
+|
+|.macro addxi, a, b, c
+|  lui x31, (c)&0xfffff
+|  srai x31, x31, 12
+|  add a, x31, b
+|.endmacro
+|
+|.macro sext.b, a, b
+|  slli a, b, 56
+|  srai a, a, 56
+|.endmacro
+|
+|.macro sext.h, a, b
+|  slli a, b, 48
+|  srai a, a, 48
+|.endmacro
+|
+|.macro zext.h, a, b
+|  slli a, b, 48
+|  srli a, a, 48
+|.endmacro
+|
+|.macro zext.w, a, b
+|  slli a, b, 32
+|  srli a, a, 32
+|.endmacro
+|
+|.macro bfextri, a, b, c, d
+|  slli a, b, (63-c)
+|  srli a, a, (d+63-c)
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L,		lua_State,	LREG
+|.type GL,		global_State,	GLREG
+|.type TVALUE,		TValue
+|.type GCOBJ,		GCobj
+|.type STR,		GCstr
+|.type TAB,		GCtab
+|.type LFUNC,		GCfuncL
+|.type CFUNC,		GCfuncC
+|.type PROTO,		GCproto
+|.type UPVAL,		GCupval
+|.type NODE,		Node
+|.type NARGS8,		int
+|.type TRACE,		GCtrace
+|.type SBUF,		SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; .long 0x00100073; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_PC,	-8
+|.define FRAME_FUNC,	-16
+|
+|//-----------------------------------------------------------------------
+|
+|// Endian-specific defines. RISC-V only has little endian ABI for now.
+|.define OFS_RD,	2
+|.define OFS_RA,	1
+|.define OFS_OP,	0
+|
+|// Instruction decode.
+|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
+|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro
+|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro
+|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro
+|
+|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro
+|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro
+|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro
+|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro
+|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro
+|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro
+|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro
+|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro
+|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro
+|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro
+|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro
+|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
+|
+|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro
+|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro
+|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro
+|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro
+|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro
+|
+|// Instruction fetch.
+|.macro ins_NEXT1
+|  lw INS, 0(PC)
+|   addi PC, PC, 4
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT2
+|  decode_OP8 TMP1, INS
+|  add TMP0, DISPATCH, TMP1
+|   decode_RD8a RD, INS
+|  ld TMP4, 0(TMP0)
+|   decode_RA8a RA, INS
+|   decode_RD8b RD
+|   decode_RA8b RA
+|  jr TMP4
+|.endmacro
+|.macro ins_NEXT
+|  ins_NEXT1
+|  ins_NEXT2
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+|  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+|  .define ins_next, ins_NEXT
+|  .define ins_next_, ins_NEXT
+|  .define ins_next1, ins_NEXT1
+|  .define ins_next2, ins_NEXT2
+|.else
+|  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+|  // Affects only certain kinds of benchmarks (and only with -j off).
+|  .macro ins_next
+|    j ->ins_next
+|  .endmacro
+|  .macro ins_next1
+|  .endmacro
+|  .macro ins_next2
+|    j ->ins_next
+|  .endmacro
+|  .macro ins_next_
+|  ->ins_next:
+|    ins_NEXT
+|  .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+|  ld PC, LFUNC:RB->pc
+|  lw INS, 0(PC)
+|   addi PC, PC, 4
+|  decode_OP8 TMP1, INS
+|   decode_RA8 RA, INS
+|  add TMP0, DISPATCH, TMP1
+|  ld TMP0, 0(TMP0)
+|   add RA, RA, BASE
+|  jr TMP0
+|.endmacro
+|
+|.macro ins_call
+|  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+|  sd PC, FRAME_PC(BASE)
+|  ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|.macro branch_RD
+|  srliw TMP0, RD, 1
+|  lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff
+|  addw TMP0, TMP0, TMP4
+|  add PC, PC, TMP0
+|.endmacro
+|
+|// Assumes J is relative to GL. Some J members might be out of range though.
+#define GL_J(field)	(GG_G2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro call_intern, curfunc, func
+|->curfunc .. _pcrel_ .. func:
+|  auipc CFUNCADDR, extern %pcrel_hi(func)
+|  jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func)
+|.endmacro
+|.macro call_extern, func
+|  call extern func
+|  empty
+|.endmacro
+|
+|// Set current VM state. Uses TMP0.
+|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
+|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro
+|
+|.macro hotcheck, delta, target
+|  srli TMP1, PC, 1
+|  andi TMP1, TMP1, 126
+|  add TMP1, TMP1, DISPATCH
+|  lhu TMP2, GG_DISP2HOT(TMP1)
+|  addiw TMP2, TMP2, -delta
+|  sh TMP2, GG_DISP2HOT(TMP1)
+|  bxltz TMP2, target
+|.endmacro
+|
+|.macro hotloop
+|  hotcheck HOTCOUNT_LOOP, ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+|  hotcheck HOTCOUNT_CALL, ->vm_hotcall
+|.endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp, target
+|  ld tmp, GL->gc.grayagain
+|  andi mark, mark, ~LJ_GC_BLACK & 255		// black2gray(tab)
+|  sd tab, GL->gc.grayagain
+|  sb mark, tab->marked
+|  sd tmp, tab->gclist
+|  j target
+|.endmacro
+|
+|// Clear type tag. Isolate lowest 64-17=47 bits of reg.
+|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro
+|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro
+|
+|// Set type tag: Merge 17 type bits into bits [47, 63] of dst.
+|.macro settp_a, dst; cleartp dst; .endmacro
+|.macro settp_a, dst, src; cleartp dst, src; .endmacro
+|.macro settp_b, dst, tp;
+|  slli x31, tp, 47
+|  or dst, dst, x31
+|.endmacro
+|.macro settp_b, dst, src, tp;
+|  slli x31, tp, 47
+|  or dst, src, x31
+|.endmacro
+|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro
+|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro
+|
+|// Extract (negative) type tag.
+|.macro gettp, dst, src; srai dst, src, 47; .endmacro
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+|  gettp TMP4, reg
+|  addi TMP4, TMP4, tp
+|  cleartp reg
+|  bxnez TMP4, target
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+|  gettp TMP4, reg
+|  addi TMP4, TMP4, tp
+|  cleartp dst, reg
+|  bxnez TMP4, target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target
+|  gettp TMP4, reg
+|  bxne TMP4, TISNUM, target
+|.endmacro
+|.macro checknum, reg, target
+|  gettp TMP4, reg
+|  sltiu TMP4, TMP4, LJ_TISNUM
+|  bxeqz TMP4, target
+|.endmacro
+|
+|.macro mov_false, reg
+|  li reg, 0x001
+|  slli reg, reg, 47
+|  not reg, reg
+|.endmacro
+|.macro mov_true, reg
+|  li reg, 0x001
+|  slli reg, reg, 48
+|  not reg, reg
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+  |.code_sub
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Return handling ----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_returnp:
+  |  // See vm_return. Also: TMP2 = previous base.
+  |  andi TMP0, PC, FRAME_P
+  |
+  |  // Return from pcall or xpcall fast func.
+  |  mov_true TMP1
+  |  bxeqz TMP0, ->cont_dispatch
+  |  ld PC, FRAME_PC(TMP2)		// Fetch PC of previous frame.
+  |  mv BASE, TMP2			// Restore caller base.
+  |  // Prepending may overwrite the pcall frame, so do it at the end.
+  |  sd TMP1, -8(RA)			// Prepend true to results.
+  |  addi RA, RA, -8
+  |
+  |->vm_returnc:
+  |  addiw RD, RD, 8			// RD = (nresults+1)*8.
+  |  andi TMP0, PC, FRAME_TYPE
+  |  li CRET1, LUA_YIELD
+  |  bxeqz RD, ->vm_unwind_c_eh
+  |  mv MULTRES, RD
+  |  bxeqz TMP0, ->BC_RET_Z		// Handle regular return to Lua.
+  |
+  |->vm_return:
+  |  // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
+  |  // TMP0 = PC & FRAME_TYPE
+  |  andi TMP2, PC, ~FRAME_TYPEP
+  |  xori TMP0, TMP0, FRAME_C
+  |  sub TMP2, BASE, TMP2		// TMP2 = previous base.
+  |  bxnez TMP0, ->vm_returnp
+  |
+  |  addiw TMP1, RD, -8
+  |  sd TMP2, L->base
+  |  li_vmstate C
+  |  lw TMP2, SAVE_NRES(sp)
+  |  addi BASE, BASE, -16
+  |  st_vmstate
+  |  slliw TMP2, TMP2, 3
+  |  beqz TMP1, >2
+  |1:
+  |  addiw TMP1, TMP1, -8
+  |  ld CRET1, 0(RA)
+  |  addi RA, RA, 8
+  |  sd CRET1, 0(BASE)
+  |  addi BASE, BASE, 8
+  |  bnez TMP1, <1
+  |
+  |2:
+  |  bne TMP2, RD, >6
+  |3:
+  |  sd BASE, L->top			// Store new top.
+  |
+  |->vm_leave_cp:
+  |  ld TMP0, SAVE_CFRAME(sp)		// Restore previous C frame.
+  |  mv CRET1, x0			// Ok return status for vm_pcall.
+  |  sd TMP0, L->cframe
+  |
+  |->vm_leave_unw:
+  |  restoreregs_ret
+  |
+  |6:
+  |  ld TMP1, L->maxstack
+  |  blt TMP2, RD, >7
+  |  // More results wanted. Check stack size and fill up results with nil.
+  |  bge BASE, TMP1, >9
+  |  sd TISNIL, 0(BASE)
+  |  addiw RD, RD, 8
+  |  addi BASE, BASE, 8
+  |  j <2
+  |
+  |7:  // Less results wanted.
+  |  subw TMP0, RD, TMP2
+  |  sub TMP0, BASE, TMP0		// Either keep top or shrink it.
+  |  beqz TMP2, >8
+  |  mv BASE, TMP0 	// LUA_MULTRET+1 case
+  |8:
+  |  j <3
+  |
+  |9:  // Corner case: need to grow stack for filling up results.
+  |  // This can happen if:
+  |  // - A C function grows the stack (a lot).
+  |  // - The GC shrinks the stack in between.
+  |  // - A return back from a lua_call() with (high) nresults adjustment.
+  |
+  |  sd BASE, L->top			// Save current top held in BASE (yes).
+  |   mv MULTRES, RD
+  |  srliw CARG2, TMP2, 3
+  |  mv CARG1, L
+  |  call_intern vm_leave_unw, lj_state_growstack		// (lua_State *L, int n)
+  |    lw TMP2, SAVE_NRES(sp)
+  |  ld BASE, L->top			// Need the (realloced) L->top in BASE.
+  |   mv RD, MULTRES
+  |   slliw TMP2, TMP2, 3
+  |  j <2
+  |
+  |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
+  |  // (void *cframe, int errcode)
+  |  mv sp, CARG1
+  |  mv CRET1, CARG2
+  |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  ld L, SAVE_L(sp)
+  |   li TMP0, ~LJ_VMST_C
+  |  ld GL, L->glref
+  |  sw TMP0, GL->vmstate
+  |  j ->vm_leave_unw
+  |
+  |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
+  |  // (void *cframe)
+  |  andi sp, CARG1, CFRAME_RAWMASK
+  |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  ld L, SAVE_L(sp)
+  |  lui TMP3, 0x43380		// TOBIT = Hiword of 2^52 + 2^51 (double).
+  |  li TISNIL, LJ_TNIL
+  |  li TISNUM, LJ_TISNUM
+  |  ld BASE, L->base
+  |  ld GL, L->glref			// Setup pointer to global state.
+  |  slli TMP3, TMP3, 32
+  |  mov_false TMP1
+  |    li_vmstate INTERP
+  |  ld PC, FRAME_PC(BASE)		// Fetch PC of previous frame.
+  |    fmv.d.x TOBIT, TMP3
+  |  addi RA, BASE, -8		// Results start at BASE-8.
+  |  addxi DISPATCH, GL, GG_G2DISP
+  |  sd TMP1, 0(RA)			// Prepend false to error message.
+  |    st_vmstate
+  |  li RD, 16			// 2 results: false + error message.
+  |  j ->vm_returnc
+  |
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Grow stack for calls -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_growstack_c:			// Grow stack for C function.
+  |  li CARG2, LUA_MINSTACK
+  |  j >2
+  |
+  |->vm_growstack_l:			// Grow stack for Lua function.
+  |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+  |  add RC, BASE, RC
+  |   sub RA, RA, BASE
+  |  sd BASE, L->base
+  |   addi PC, PC, 4			// Must point after first instruction.
+  |  sd RC, L->top
+  |   srliw CARG2, RA, 3
+  |2:
+  |  // L->base = new base, L->top = top
+  |  sd PC, SAVE_PC(sp)
+  |  mv CARG1, L
+  |  call_intern vm_growstack_l, lj_state_growstack	// (lua_State *L, int n)
+  |  ld BASE, L->base
+  |  ld RC, L->top
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  sub RC, RC, BASE
+  |  cleartp LFUNC:RB
+  |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+  |  ins_callt				// Just retry the call.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Entry points into the assembler VM ---------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_resume:				// Setup C frame and resume thread.
+  |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+  |  saveregs
+  |  mv L, CARG1
+  |    ld GL, L->glref		// Setup pointer to global state.
+  |  mv BASE, CARG2
+  |    lbu TMP1, L->status
+  |   sd L, SAVE_L(sp)
+  |  li PC, FRAME_CP
+  |  addi TMP0, sp, CFRAME_RESUME
+  |    addxi DISPATCH, GL, GG_G2DISP
+  |   sw x0, SAVE_NRES(sp)
+  |   sw x0, SAVE_ERRF(sp)
+  |   sd CARG1, SAVE_PC(sp)			// Any value outside of bytecode is ok.
+  |   sd x0, SAVE_CFRAME(sp)
+  |   sd TMP0, L->cframe
+  |    beqz TMP1, >3
+  |
+  |  // Resume after yield (like a return).
+  |  sd L, GL->cur_L
+  |  mv RA, BASE
+  |   ld BASE, L->base
+  |   ld TMP1, L->top
+  |  ld PC, FRAME_PC(BASE)
+  |     lui TMP3, 0x43380		// TOBIT = Hiword of 2^52 + 2^51 (double).
+  |   sub RD, TMP1, BASE
+  |     slli TMP3, TMP3, 32
+  |    sb x0, L->status
+  |     fmv.d.x TOBIT, TMP3
+  |    li_vmstate INTERP
+  |   addi RD, RD, 8
+  |    st_vmstate
+  |   mv MULTRES, RD
+  |  andi TMP0, PC, FRAME_TYPE
+  |   li TISNIL, LJ_TNIL
+  |   li TISNUM, LJ_TISNUM
+  |  bxeqz TMP0, ->BC_RET_Z
+  |  j ->vm_return
+  |
+  |->vm_pcall:				// Setup protected C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+  |  saveregs
+  |  sw CARG4, SAVE_ERRF(sp)
+  |  li PC, FRAME_CP
+  |  j >1
+  |
+  |->vm_call:				// Setup C frame and enter VM.
+  |  // (lua_State *L, TValue *base, int nres1)
+  |  saveregs
+  |  li PC, FRAME_C
+  |
+  |1:  // Entry point for vm_pcall above (PC = ftype).
+  |  ld TMP1, L:CARG1->cframe
+  |    mv L, CARG1
+  |   sw CARG3, SAVE_NRES(sp)
+  |    ld GL, L->glref		// Setup pointer to global state.
+  |   sd CARG1, SAVE_L(sp)
+  |     mv BASE, CARG2
+  |    addxi DISPATCH, GL, GG_G2DISP
+  |   sd CARG1, SAVE_PC(sp)		// Any value outside of bytecode is ok.
+  |  sd TMP1, SAVE_CFRAME(sp)
+  |  sd sp, L->cframe			// Add our C frame to cframe chain.
+  |
+  |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+  |  sd L, GL->cur_L
+  |  ld TMP2, L->base			// TMP2 = old base (used in vmeta_call).
+  |     lui TMP3, 0x43380		// TOBIT = Hiword of 2^52 + 2^51 (double).
+  |   ld TMP1, L->top
+  |     slli TMP3, TMP3, 32
+  |  add PC, PC, BASE
+  |   sub NARGS8:RC, TMP1, BASE
+  |     li TISNUM, LJ_TISNUM
+  |  sub PC, PC, TMP2			// PC = frame delta + frame type
+  |     fmv.d.x TOBIT, TMP3
+  |    li_vmstate INTERP
+  |     li TISNIL, LJ_TNIL
+  |    st_vmstate
+  |
+  |->vm_call_dispatch:
+  |  // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  checkfunc LFUNC:RB, ->vmeta_call
+  |
+  |->vm_call_dispatch_f:
+  |  ins_call
+  |  // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
+  |
+  |->vm_cpcall:				// Setup protected C frame, call C.
+  |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+  |  saveregs
+  |  mv L, CARG1
+  |   ld TMP0, L:CARG1->stack
+  |  sd CARG1, SAVE_L(sp)
+  |   ld TMP1, L->top
+  |     ld GL, L->glref		// Setup pointer to global state.
+  |  sd CARG1, SAVE_PC(sp)		// Any value outside of bytecode is ok.
+  |   sub TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
+  |    ld TMP1, L->cframe
+  |     addxi DISPATCH, GL, GG_G2DISP
+  |   sw TMP0, SAVE_NRES(sp)		// Neg. delta means cframe w/o frame.
+  |  sw x0, SAVE_ERRF(sp)		// No error function.
+  |    sd TMP1, SAVE_CFRAME(sp)
+  |    sd sp, L->cframe			// Add our C frame to cframe chain.
+  |      sd L, GL->cur_L
+  |  jalr CARG4			// (lua_State *L, lua_CFunction func, void *ud)
+  |  mv BASE, CRET1
+  |  li PC, FRAME_CP
+  |  bnez CRET1, <3			// Else continue with the call.
+  |  j ->vm_leave_cp			// No base? Just remove C frame.
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Metamethod handling ------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |//-- Continuation dispatch ----------------------------------------------
+  |
+  |->cont_dispatch:
+  |  // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
+  |  ld TMP0, -32(BASE)		// Continuation.
+  |   mv RB, BASE
+  |   mv BASE, TMP2			// Restore caller BASE.
+  |    ld LFUNC:TMP1, FRAME_FUNC(TMP2)
+  |     ld PC, -24(RB)			// Restore PC from [cont|PC].
+  |.if FFI
+  |  sltiu TMP3, TMP0, 2
+  |.endif
+  |    cleartp LFUNC:TMP1
+  |   add TMP2, RA, RD
+  |  ld TMP1, LFUNC:TMP1->pc
+  |  sd TISNIL, -8(TMP2)               // Ensure one valid arg.
+  |.if FFI
+  |  bnez TMP3, >1
+  |.endif
+  |  // BASE = base, RA = resultptr, RB = meta base
+  |  ld KBASE, PC2PROTO(k)(TMP1)
+  |  jr TMP0				// Jump to continuation.
+  |
+  |.if FFI
+  |1:
+  |  addi TMP1, RB, -32
+  |  bxnez TMP0, ->cont_ffi_callback	// cont = 1: return from FFI callback.
+  |  // cont = 0: tailcall from C function.
+  |  sub RC, TMP1, BASE
+  |  j ->vm_call_tail
+  |.endif
+  |
+  |->cont_cat:				// RA = resultptr, RB = meta base
+  |  lw INS, -4(PC)
+  |   addi CARG2, RB, -32
+  |  ld TMP0, 0(RA)
+  |  decode_RB8 MULTRES, INS
+  |   decode_RA8 RA, INS
+  |  add TMP1, BASE, MULTRES
+  |   sd BASE, L->base
+  |   sub CARG3, CARG2, TMP1
+  |  sd TMP0, 0(CARG2)
+  |  bxne TMP1, CARG2, ->BC_CAT_Z
+  |  add RA, BASE, RA
+  |  sd TMP0, 0(RA)
+  |  j ->cont_nop
+  |
+  |//-- Table indexing metamethods -----------------------------------------
+  |
+  |->vmeta_tgets1:
+  |  addi CARG3, GL, offsetof(global_State, tmptv)
+  |  li TMP0, LJ_TSTR
+  |  settp STR:RC, TMP0
+  |  sd STR:RC, 0(CARG3)
+  |  j >1
+  |
+  |->vmeta_tgets:
+  |  addi CARG2, GL, offsetof(global_State, tmptv)
+  |   addi CARG3, GL, offsetof(global_State, tmptv2)
+  |  li TMP0, LJ_TTAB
+  |   li TMP1, LJ_TSTR
+  |  settp TAB:RB, TMP0
+  |   settp STR:RC, TMP1
+  |  sd TAB:RB, 0(CARG2)
+  |   sd STR:RC, 0(CARG3)
+  |  j >1
+  |
+  |->vmeta_tgetb:			// TMP0 = index
+  |  addi CARG3, GL, offsetof(global_State, tmptv)
+  |  settp TMP0, TISNUM
+  |  sd TMP0, 0(CARG3)
+  |
+  |->vmeta_tgetv:
+  |1:
+  |  sd BASE, L->base
+  |  mv CARG1, L
+  |  sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, TValue *o, TValue *k)
+  |  call_intern vmeta_tgetv, lj_meta_tget
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |  beqz CRET1, >3
+  |  ld TMP0, 0(CRET1)
+  |  ins_next1
+  |  sd TMP0, 0(RA)
+  |  ins_next2
+  |
+  |3:  // Call __index metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k
+  |  addi TMP1, BASE, -FRAME_CONT
+  |  li NARGS8:RC, 16		// 2 args for func(t, k).
+  |  ld BASE, L->top
+  |  sd PC, -24(BASE)			// [cont|PC]
+  |   sub PC, BASE, TMP1
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
+  |  cleartp LFUNC:RB
+  |  j ->vm_call_dispatch_f
+  |
+  |->vmeta_tgetr:
+  |  call_intern vmeta_tgetr, lj_tab_getinth	// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  mv TMP1, TISNIL
+  |  bxeqz CRET1, ->BC_TGETR_Z
+  |  ld TMP1, 0(CRET1)
+  |  j ->BC_TGETR_Z
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->vmeta_tsets1:
+  |  addi, CARG3, GL, offsetof(global_State, tmptv)
+  |  li TMP0, LJ_TSTR
+  |  settp STR:RC, TMP0
+  |  sd STR:RC, 0(CARG3)
+  |  j >1
+  |
+  |->vmeta_tsets:
+  |  addi CARG2, GL, offsetof(global_State, tmptv)
+  |   addi CARG3, GL, offsetof(global_State, tmptv2)
+  |  li TMP0, LJ_TTAB
+  |   li TMP1, LJ_TSTR
+  |  settp TAB:RB, TMP0
+  |   settp STR:RC, TMP1
+  |  sd TAB:RB, 0(CARG2)
+  |   sd STR:RC, 0(CARG3)
+  |  j >1
+  |
+  |->vmeta_tsetb:			// TMP0 = index
+  |  addi CARG3, GL, offsetof(global_State, tmptv)
+  |  settp TMP0, TISNUM
+  |  sd TMP0, 0(CARG3)
+  |
+  |->vmeta_tsetv:
+  |1:
+  |  sd BASE, L->base
+  |  mv CARG1, L
+  |  sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, TValue *o, TValue *k)
+  |  call_intern vmeta_tsetv, lj_meta_tset
+  |  // Returns TValue * (finished) or NULL (metamethod).
+  |  ld TMP2, 0(RA)
+  |  beqz CRET1, >3
+  |  ins_next1
+  |  // NOBARRIER: lj_meta_tset ensures the table is not black.
+  |  sd TMP2, 0(CRET1)
+  |  ins_next2
+  |
+  |3:  // Call __newindex metamethod.
+  |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+  |  addi TMP1, BASE, -FRAME_CONT
+  |  ld BASE, L->top
+  |  sd PC, -24(BASE)			// [cont|PC]
+  |   sub PC, BASE, TMP1
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
+  |  li NARGS8:RC, 24		// 3 args for func(t, k, v)
+  |  cleartp LFUNC:RB
+  |  sd TMP2, 16(BASE)		// Copy value to third argument.
+  |  j ->vm_call_dispatch_f
+  |
+  |->vmeta_tsetr:
+  |  sd BASE, L->base
+  |  mv CARG1, L
+  |  sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, GCtab *t, int32_t key)
+  |  call_intern vmeta_tsetr, lj_tab_setinth
+  |  // Returns TValue *.
+  |  j ->BC_TSETR_Z
+  |
+  |//-- Comparison metamethods ---------------------------------------------
+  |
+  |->vmeta_comp:
+  |  // RA/RD point to o1/o2.
+  |  mv CARG2, RA
+  |  mv CARG3, RD
+  |  addi PC, PC, -4
+  |  sd BASE, L->base
+  |  mv CARG1, L
+  |  decode_OP1 CARG4, INS
+  |  sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, TValue *o1, *o2, int op)
+  |  call_intern vmeta_comp, lj_meta_comp
+  |  // Returns 0/1 or TValue * (metamethod).
+  |3:
+  |  sltiu TMP1, CRET1, 2
+  |  bxeqz TMP1, ->vmeta_binop
+  |   negw TMP2, CRET1
+  |4:
+  |  lhu RD, OFS_RD(PC)
+  |   addi PC, PC, 4
+  |   lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff
+  |  slliw RD, RD, 2
+  |  addw RD, RD, TMP1
+  |  and RD, RD, TMP2
+  |  add PC, PC, RD
+  |->cont_nop:
+  |  ins_next
+  |
+  |->cont_ra:				// RA = resultptr
+  |  lbu TMP1, -4+OFS_RA(PC)
+  |   ld TMP2, 0(RA)
+  |  slliw TMP1, TMP1, 3
+  |  add TMP1, BASE, TMP1
+  |   sd TMP2, 0(TMP1)
+  |  j ->cont_nop
+  |
+  |->cont_condt:			// RA = resultptr
+  |  ld TMP0, 0(RA)
+  |  gettp TMP0, TMP0
+  |  sltiu TMP1, TMP0, LJ_TISTRUECOND
+  |  negw TMP2, TMP1		// Branch if result is true.
+  |  j <4
+  |
+  |->cont_condf:			// RA = resultptr
+  |  ld TMP0, 0(RA)
+  |  gettp TMP0, TMP0
+  |  sltiu TMP1, TMP0, LJ_TISTRUECOND
+  |  addiw TMP2, TMP1, -1		// Branch if result is false.
+  |  j <4
+  |
+  |->vmeta_equal:
+  |  // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
+  |   cleartp LFUNC:CARG3, CARG2
+  |  cleartp LFUNC:CARG2, CARG1
+  |    mv CARG4, TMP0
+  |  addi PC, PC, -4
+  |   sd BASE, L->base
+  |   mv CARG1, L
+  |   sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, GCobj *o1, *o2, int ne)
+  |  call_intern vmeta_equal, lj_meta_equal
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  j <3
+  |
+  |->vmeta_equal_cd:
+  |.if FFI
+  |  addi PC, PC, -4
+  |  mv CARG1, L
+  |  mv CARG2, INS
+  |  sd BASE, L->base
+  |  sd PC, SAVE_PC(sp)
+  |  call_intern vmeta_equal_cd, lj_meta_equal_cd	// (lua_State *L, BCIns op)
+  |  // Returns 0/1 or TValue * (metamethod).
+  |  j <3
+  |.endif
+  |
+  |->vmeta_istype:
+  |  addi PC, PC, -4
+  |   sd BASE, L->base
+  |   mv CARG1, L 
+  |   srliw CARG2, RA, 3
+  |   srliw CARG3, RD, 3
+  |  sd PC, SAVE_PC(sp)
+  |  // (lua_State *L, TValue *o, BCReg tp)
+  |  call_intern vmeta_istype, lj_meta_istype
+  |  j ->cont_nop
+  |
+  |//-- Arithmetic metamethods ---------------------------------------------
+  |
+  |->vmeta_unm:
+  |  mv RC, RB
+  |
+  |->vmeta_arith:
+  |  mv CARG1, L
+  |   sd BASE, L->base
+  |  mv CARG2, RA
+  |   sd PC, SAVE_PC(sp)
+  |  mv CARG3, RB
+  |  mv CARG4, RC
+  |  decode_OP1 CARG5, INS
+  |  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+  |  call_intern vmeta_arith, lj_meta_arith
+  |  // Returns NULL (finished) or TValue * (metamethod).
+  |  bxeqz CRET1, ->cont_nop
+  |
+  |  // Call metamethod for binary op.
+  |->vmeta_binop:
+  |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+  |  sub TMP1, CRET1, BASE
+  |   sd PC, -24(CRET1)			// [cont|PC]
+  |   mv TMP2, BASE
+  |  addi PC, TMP1, FRAME_CONT
+  |   mv BASE, CRET1
+  |  li NARGS8:RC, 16                  // 2 args for func(o1, o2).
+  |  j ->vm_call_dispatch
+  |
+  |->vmeta_len:
+  |  // CARG2 already set by BC_LEN.
+#if LJ_52
+  |  mv MULTRES, CARG1
+#endif
+  |   sd BASE, L->base
+  |   mv CARG1, L
+  |   sd PC, SAVE_PC(sp)
+  |  call_intern vmeta_len, lj_meta_len	// (lua_State *L, TValue *o)
+  |  // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+  |  bxnez CRET1, ->vmeta_binop		// Binop call for compatibility.
+  |  mv CARG1, MULTRES
+  |  j ->BC_LEN_Z
+#else
+  |  j ->vmeta_binop			// Binop call for compatibility.
+#endif
+  |
+  |//-- Call metamethod ----------------------------------------------------
+  |
+  |->vmeta_call:			// Resolve and call __call metamethod.
+  |  // TMP2 = old base, BASE = new base, RC = nargs*8
+  |  mv CARG1, L
+  |   sd TMP2, L->base			// This is the callers base!
+  |  addi CARG2, BASE, -16
+  |   sd PC, SAVE_PC(sp)
+  |  add CARG3, BASE, RC
+  |   mv MULTRES, NARGS8:RC
+  |  // (lua_State *L, TValue *func, TValue *top)
+  |  call_intern vmeta_call, lj_meta_call
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
+  |   addi NARGS8:RC, MULTRES, 8	// Got one more argument now.
+  |  cleartp LFUNC:RB
+  |  ins_call
+  |
+  |->vmeta_callt:			// Resolve __call for BC_CALLT.
+  |  // BASE = old base, RA = new base, RC = nargs*8
+  |  mv CARG1, L
+  |   sd BASE, L->base
+  |  addi CARG2, RA, -16
+  |   sd PC, SAVE_PC(sp)
+  |  add CARG3, RA, RC
+  |   mv MULTRES, NARGS8:RC
+  |  // (lua_State *L, TValue *func, TValue *top)
+  |  call_intern vmeta_callt, lj_meta_call
+  |   ld RB, FRAME_FUNC(RA)		// Guaranteed to be a function here.
+  |  ld TMP1, FRAME_PC(BASE)
+  |  addi NARGS8:RC, MULTRES, 8	// Got one more argument now.
+  |  cleartp LFUNC:CARG3, RB
+  |  j ->BC_CALLT_Z
+  |
+  |//-- Argument coercion for 'for' statement ------------------------------
+  |
+  |->vmeta_for:
+  |  mv CARG1, L
+  |   sd BASE, L->base
+  |  mv CARG2, RA
+  |   sd PC, SAVE_PC(sp)
+  |  mv MULTRES, INS
+  |  call_intern vmeta_for, lj_meta_for	// (lua_State *L, TValue *base)
+  |.if JIT
+  |  decode_OP1 TMP0, MULTRES
+  |  li TMP1, BC_JFORI
+  |.endif
+  |  decode_RA8 RA, MULTRES
+  |   decode_RD8 RD, MULTRES
+  |.if JIT
+  |  bxeq TMP0, TMP1, =>BC_JFORI
+  |.endif
+  |  j =>BC_FORI
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Fast functions -----------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro .ffunc, name
+  |->ff_ .. name:
+  |.endmacro
+  |
+  |.macro .ffunc_1, name
+  |->ff_ .. name:
+  |  ld CARG1, 0(BASE)
+  |  bxeqz NARGS8:RC, ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_2, name
+  |->ff_ .. name:
+  |  sltiu TMP0, NARGS8:RC, 16
+  |  ld CARG1, 0(BASE)
+  |  ld CARG2, 8(BASE)
+  |  bxnez TMP0, ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_n, name
+  |->ff_ .. name:
+  |  ld CARG1, 0(BASE)
+  |  fld FARG1, 0(BASE)
+  |  bxeqz NARGS8:RC, ->fff_fallback
+  |  checknum CARG1, ->fff_fallback
+  |.endmacro
+  |
+  |.macro .ffunc_nn, name
+  |->ff_ .. name:
+  |  ld CARG1, 0(BASE)
+  |    sltiu TMP0, NARGS8:RC, 16
+  |   ld CARG2, 8(BASE)
+  |  bxnez TMP0, ->fff_fallback
+  |  gettp TMP1, CARG1
+  |   gettp TMP2, CARG2
+  |  sltiu TMP1, TMP1, LJ_TISNUM
+  |   sltiu TMP2, TMP2, LJ_TISNUM
+  |  fld FARG1, 0(BASE)
+  |  and TMP1, TMP1, TMP2
+  |   fld FARG2, 8(BASE)
+  |  bxeqz TMP1, ->fff_fallback
+  |.endmacro
+  |
+  |// Inlined GC threshold check.
+  |.macro ffgccheck
+  |   ld TMP0, GL->gc.total
+  |   ld TMP1, GL->gc.threshold
+  |  bltu TMP0, TMP1, >1
+  |  jal ->fff_gcstep
+  |1:
+  |.endmacro
+  |
+  |//-- Base library: checks -----------------------------------------------
+  |.ffunc_1 assert
+  |  gettp TMP1, CARG1
+  |  sltiu TMP1, TMP1, LJ_TISTRUECOND
+  |  addi RA, BASE, -16
+  |  bxeqz TMP1, ->fff_fallback
+  |  ld PC, FRAME_PC(BASE)
+  |  addiw RD, NARGS8:RC, 8		// Compute (nresults+1)*8.
+  |  addi TMP1, BASE, 8
+  |  add TMP2, RA, RD
+  |  sd CARG1, 0(RA)
+  |  bne BASE, TMP2, >1
+  |  j ->fff_res		// Done if exactly 1 argument.
+  |1:
+  |  ld TMP0, 0(TMP1)
+  |  sd TMP0, -16(TMP1)
+  |  mv TMP3, TMP1
+  |  addi TMP1, TMP1, 8
+  |  bne TMP3, TMP2, <1
+  |  j ->fff_res
+  |
+  |.ffunc_1 type
+  |  gettp TMP0, CARG1
+  |  not TMP3, TMP0
+  |  bltu TISNUM, TMP0, >1
+  |  li TMP3, ~LJ_TISNUM
+  |1:
+  |  slli TMP3, TMP3, 3
+  |  add TMP3, CFUNC:RB, TMP3
+  |  ld CARG1, CFUNC:TMP3->upvalue
+  |  j ->fff_restv
+  |
+  |//-- Base library: getters and setters ---------------------------------
+  |
+  |.ffunc_1 getmetatable
+  |  gettp TMP2, CARG1
+  |  addi TMP0, TMP2, -LJ_TTAB
+  |  addi TMP1, TMP2, -LJ_TUDATA
+  |  snez TMP0, TMP0
+  |  neg TMP0, TMP0
+  |  and TMP0, TMP0, TMP1
+  |  cleartp TAB:CARG1
+  |  bnez TMP0, >6
+  |1:  // Field metatable must be at same offset for GCtab and GCudata!
+  |  ld TAB:RB, TAB:CARG1->metatable
+  |2:
+  |   ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
+  |  li CARG1, LJ_TNIL
+  |  bxeqz TAB:RB, ->fff_restv
+  |  lw TMP0, TAB:RB->hmask
+  |   lw TMP1, STR:RC->sid
+  |    ld NODE:TMP2, TAB:RB->node
+  |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
+  |  slli TMP0, TMP1, 5
+  |  slli TMP1, TMP1, 3
+  |  sub TMP1, TMP0, TMP1
+  |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
+  |  li CARG4, LJ_TSTR
+  |  settp STR:RC, CARG4		// Tagged key to look for.
+  |3:  // Rearranged logic, because we expect _not_ to find the key.
+  |  ld TMP0, NODE:TMP2->key
+  |   ld CARG1, NODE:TMP2->val
+  |    ld NODE:TMP2, NODE:TMP2->next
+  |  li TMP3, LJ_TTAB
+  |  beq RC, TMP0, >5
+  |  bnez NODE:TMP2, <3
+  |4:
+  |  settp CARG1, RB, TMP3
+  |  j ->fff_restv			// Not found, keep default result.
+  |5:
+  |  bxne CARG1, TISNIL, ->fff_restv
+  |  j <4				// Ditto for nil value.
+  |
+  |6:
+  |  sltiu TMP3, TMP2, LJ_TISNUM
+  |  neg TMP3, TMP3
+  |  and TMP0, TISNUM, TMP3
+  |  not TMP3, TMP3
+  |  and TMP2, TMP2, TMP3
+  |  or TMP2, TMP2, TMP0
+  |  slli TMP2, TMP2, 3
+  |   sub TMP0, GL, TMP2
+  |   ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0)
+  |  j <2
+  |
+  |.ffunc_2 setmetatable
+  |  // Fast path: no mt for table yet and not clearing the mt.
+  |  checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+  |  gettp TMP3, CARG2
+  |   ld TAB:TMP0, TAB:TMP1->metatable
+  |   lbu TMP2, TAB:TMP1->marked
+  |  addi TMP3, TMP3, -LJ_TTAB
+  |   cleartp TAB:CARG2
+  |  or TMP3, TMP3, TAB:TMP0
+  |  bxnez TMP3, ->fff_fallback
+  |  andi TMP3, TMP2, LJ_GC_BLACK		// isblack(table)
+  |  sd TAB:CARG2, TAB:TMP1->metatable
+  |  bxeqz TMP3, ->fff_restv
+  |  barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
+  |
+  |.ffunc rawget
+  |  ld CARG2, 0(BASE)
+  |  sltiu TMP0, NARGS8:RC, 16
+  |  gettp TMP1, CARG2
+  |   cleartp CARG2
+  |  addi TMP1, TMP1, -LJ_TTAB
+  |  or TMP0, TMP0, TMP1
+  |  addi CARG3, BASE, 8
+  |  bxnez TMP0, ->fff_fallback
+  |  mv CARG1, L
+  |  call_intern ff_rawget, lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
+  |  // Returns cTValue *.
+  |  ld CARG1, 0(CRET1)
+  |  j ->fff_restv
+  |
+  |//-- Base library: conversions ------------------------------------------
+  |
+  |.ffunc tonumber
+  |  // Only handles the number case inline (without a base argument).
+  |  ld CARG1, 0(BASE)
+  |  xori TMP0, NARGS8:RC, 8		// Exactly one number argument.
+  |  gettp TMP1, CARG1
+  |  sltu TMP1, TISNUM, TMP1
+  |  or TMP0, TMP0, TMP1
+  |  bxnez TMP0, ->fff_fallback		// No args or CARG1 is not number
+  |  j ->fff_restv
+  |
+  |.ffunc_1 tostring
+  |  // Only handles the string or number case inline.
+  |  gettp TMP0, CARG1
+  |  addi TMP1, TMP0, -LJ_TSTR
+  |  // A __tostring method in the string base metatable is ignored.
+  |  bxeqz TMP1, ->fff_restv	// String key?
+  |  // Handle numbers inline, unless a number base metatable is present.
+  |   ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
+  |  sltu TMP0, TISNUM, TMP0
+  |  sd BASE, L->base			// Add frame since C call can throw.
+  |  or TMP0, TMP0, TMP1
+  |  bxnez TMP0, ->fff_fallback
+  |  sd PC, SAVE_PC(sp)		// Redundant (but a defined value).
+  |  ffgccheck
+  |  mv CARG1, L
+  |  mv CARG2, BASE
+  |  call_intern ff_tostring, lj_strfmt_number	// (lua_State *L, cTValue *o)
+  |  // Returns GCstr *.
+  |  li TMP1, LJ_TSTR
+  |//  ld BASE, L->base
+  |  settp CARG1, TMP1
+  |  j ->fff_restv
+  |
+  |//-- Base library: iterators -------------------------------------------
+  |
+  |.ffunc_1 next
+  |  checktp CARG1, -LJ_TTAB, ->fff_fallback
+  |  add TMP0, BASE, NARGS8:RC
+  |  ld PC, FRAME_PC(BASE)
+  |  sd TISNIL, 0(TMP0)		// Set missing 2nd arg to nil.
+  |  addi CARG2, BASE, 8
+  |  addi CARG3, BASE, -16
+  |  call_intern ff_next, lj_tab_next	// (GCtab *t, cTValue *key, TValue *o)
+  |  // Returns 1=found, 0=end, -1=error.
+  |//  addi RA, BASE, -16
+  |  li RD, (2+1)*8
+  |  bxgtz CRET1, ->fff_res		// Found key/value.
+  |  mv TMP1, CRET1
+  |  mv CARG1, TISNIL
+  |  bxeqz TMP1, ->fff_restv		// End of traversal: return nil.
+  |   ld CFUNC:RB, FRAME_FUNC(BASE)
+  |  li RC, 2*8
+  |   cleartp CFUNC:RB
+  |  j ->fff_fallback			// Invalid key.
+  |
+  |.ffunc_1 pairs
+  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+  |  ld PC, FRAME_PC(BASE)
+#if LJ_52
+  |  ld TAB:TMP2, TAB:TMP1->metatable
+  |  ld TMP0, CFUNC:RB->upvalue[0]
+  |  addi RA, BASE, -16
+  |  bxnez TAB:TMP2, ->fff_fallback
+#else
+  |  ld TMP0, CFUNC:RB->upvalue[0]
+  |  addi RA, BASE, -16
+#endif
+  |  sd TISNIL, 0(BASE)
+  |   sd CARG1, -8(BASE)
+  |    sd TMP0, 0(RA)
+  |  li RD, (3+1)*8
+  |  j ->fff_res
+  |
+  |.ffunc_2 ipairs_aux
+  |  checktab CARG1, ->fff_fallback
+  |   checkint CARG2, ->fff_fallback
+  |  lw TMP0, TAB:CARG1->asize
+  |   ld TMP1, TAB:CARG1->array
+  |    ld PC, FRAME_PC(BASE)
+  |  sext.w TMP2, CARG2
+  |  addiw TMP2, TMP2, 1
+  |  sltu TMP3, TMP2, TMP0
+  |    addi RA, BASE, -16
+  |   zext.w TMP0, TMP2
+  |   settp_b TMP0, TISNUM
+  |  sd TMP0, 0(RA)
+  |  beqz TMP3, >2			// Not in array part?
+  |  slli TMP3, TMP2, 3
+  |  add TMP3, TMP1, TMP3
+  |  ld TMP1, 0(TMP3)
+  |1:
+  |  li RD, (0+1)*8
+  |  bxeq TMP1, TISNIL, ->fff_res	// End of iteration, return 0 results.
+  |  sd TMP1, -8(BASE)
+  |  li RD, (2+1)*8
+  |  j ->fff_res
+  |2:  // Check for empty hash part first. Otherwise call C function.
+  |  lw TMP0, TAB:CARG1->hmask
+  |  li RD, (0+1)*8
+  |  bxeqz TMP0, ->fff_res
+  |  mv CARG2, TMP2
+  |  call_intern ff_ipairs_aux, lj_tab_getinth	// (GCtab *t, int32_t key)
+  |  // Returns cTValue * or NULL.
+  |  li RD, (0+1)*8
+  |  bxeqz CRET1, ->fff_res
+  |  ld TMP1, 0(CRET1)
+  |  j <1
+  |
+  |.ffunc_1 ipairs
+  |  checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+  |  ld PC, FRAME_PC(BASE)
+#if LJ_52
+  |  ld TAB:TMP2, TAB:TMP1->metatable
+#endif
+  |  ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
+  |  addi RA, BASE, -16
+#if LJ_52
+  |  bxnez TAB:TMP2, ->fff_fallback
+#endif
+  |  slli TMP1, TISNUM, 47
+  |  sd CARG1, -8(BASE)
+  |   sd TMP1, 0(BASE)
+  |    sd CFUNC:TMP0, 0(RA)
+  |  li RD, (3+1)*8
+  |  j ->fff_res
+  |
+  |//-- Base library: catch errors ----------------------------------------
+  |
+  |.ffunc pcall
+  |  addi NARGS8:RC, NARGS8:RC, -8
+  |   lbu TMP3, GL->hookmask
+  |   mv TMP2, BASE
+  |  bxltz NARGS8:RC, ->fff_fallback
+  |   addi BASE, BASE, 16
+  |  // Remember active hook before pcall.
+  |  srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
+  |  andi TMP3, TMP3, 1
+  |  addi PC, TMP3, 16+FRAME_PCALL
+  |  bxeqz NARGS8:RC, ->vm_call_dispatch
+  |1:
+  |   add TMP0, BASE, NARGS8:RC
+  |2:
+  |  ld TMP1, -16(TMP0)
+  |  sd TMP1, -8(TMP0)
+  |  addi TMP0, TMP0, -8
+  |  bne TMP0, BASE, <2
+  |  j ->vm_call_dispatch
+  |
+  |.ffunc xpcall
+  |  addi NARGS8:TMP0, NARGS8:RC, -16
+  |  ld CARG1, 0(BASE)
+  |   ld CARG2, 8(BASE)
+  |     lbu TMP1, GL->hookmask
+  |    bxltz NARGS8:TMP0, ->fff_fallback
+  |  gettp TMP2, CARG2
+  |  addi TMP2, TMP2, -LJ_TFUNC
+  |  bxnez TMP2, ->fff_fallback		// Traceback must be a function.
+  |   mv TMP2, BASE
+  |  mv NARGS8:RC, NARGS8:TMP0
+  |   addi BASE, BASE, 24
+  |  // Remember active hook before pcall.
+  |  srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT
+  |   sd CARG2, 0(TMP2)			// Swap function and traceback.
+  |  andi TMP3, TMP3, 1
+  |   sd CARG1, 8(TMP2)
+  |  addi PC, TMP3, 24+FRAME_PCALL
+  |  bnez NARGS8:RC, <1
+  |  j ->vm_call_dispatch
+  |
+  |//-- Coroutine library --------------------------------------------------
+  |
+  |.macro coroutine_resume_wrap, resume
+  |.if resume
+  |.ffunc_1 coroutine_resume
+  |  checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
+  |.else
+  |.ffunc coroutine_wrap_aux
+  |  ld L:CARG1, CFUNC:RB->upvalue[0].gcr
+  |  cleartp L:CARG1
+  |.endif
+  |  lbu TMP0, L:CARG1->status
+  |   ld TMP1, L:CARG1->cframe
+  |    ld CARG2, L:CARG1->top
+  |    ld TMP2, L:CARG1->base
+  |  addiw CARG4, TMP0, -LUA_YIELD
+  |    add CARG3, CARG2, TMP0
+  |   addi TMP3, CARG2, 8
+  |  seqz TMP4, CARG4
+  |  neg TMP4, TMP4
+  |  and CARG2, CARG2, TMP4
+  |  not TMP4, TMP4
+  |  and TMP3, TMP3, TMP4
+  |   or CARG2, CARG2, TMP3
+  |  bxgtz CARG4, ->fff_fallback		// st > LUA_YIELD?
+  |   xor TMP2, TMP2, CARG3
+  |   or CARG4, TMP2, TMP0
+  |  bxnez TMP1, ->fff_fallback		// cframe != 0?
+  |  ld TMP0, L:CARG1->maxstack
+  |   ld PC, FRAME_PC(BASE)
+  |  bxeqz CARG4, ->fff_fallback		// base == top && st == 0?
+  |  add TMP2, CARG2, NARGS8:RC
+  |  sd BASE, L->base
+  |  sd PC, SAVE_PC(sp)
+  |  bxltu TMP0, TMP2, ->fff_fallback		// Stack overflow?
+  |1:
+  |.if resume
+  |  addi BASE, BASE, 8		// Keep resumed thread in stack for GC.
+  |  addi NARGS8:RC, NARGS8:RC, -8
+  |  addi TMP2, TMP2, -8
+  |.endif
+  |  sd TMP2, L:CARG1->top
+  |  sd BASE, L->top
+  |  add TMP1, BASE, NARGS8:RC
+  |  mv CARG3, CARG2
+  |2:  // Move args to coroutine.
+  |   ld TMP0, 0(BASE)
+  |  sltu TMP3, BASE, TMP1
+  |   addi BASE, BASE, 8
+  |  beqz TMP3, >3
+  |   sd TMP0, 0(CARG3)
+  |   addi CARG3, CARG3, 8
+  |  j <2
+  |3:
+  |   mv L:RA, L:CARG1
+  |  jal ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
+  |  // Returns thread status.
+  |4:
+  |  ld TMP2, L:RA->base
+  |   sltiu TMP1, CRET1, LUA_YIELD+1
+  |  ld TMP3, L:RA->top
+  |    li_vmstate INTERP
+  |  ld BASE, L->base
+  |     sd L, GL->cur_L
+  |    st_vmstate
+  |  sub RD, TMP3, TMP2
+  |   beqz TMP1, >8
+  |  ld TMP0, L->maxstack
+  |   add TMP1, BASE, RD
+  |  beqz RD, >6			// No results?
+  |   add TMP3, TMP2, RD
+  |  bltu TMP0, TMP1, >9		// Need to grow stack?
+  |  sd TMP2, L:RA->top		// Clear coroutine stack.
+  |  mv TMP1, BASE
+  |5:  // Move results from coroutine.
+  |  ld TMP0, 0(TMP2)
+  |  addi TMP2, TMP2, 8
+  |  sd TMP0, 0(TMP1)
+  |  addi TMP1, TMP1, 8
+  |  bltu TMP2, TMP3, <5
+  |6:
+  |.if resume
+  |  mov_true TMP1
+  |  addi RD, RD, 16
+  |7:
+  |  sd TMP1, -8(BASE)	// Prepend true/false to results.
+  |   addi RA, BASE, -8
+  |.else
+  |  mv RA, BASE
+  |  addi RD, RD, 8
+  |.endif
+  |  andi TMP0, PC, FRAME_TYPE
+  |  sd PC, SAVE_PC(sp)
+  |   mv MULTRES, RD
+  |//  bxeqz TMP0, ->BC_RET_Z	// Local label 9 in use
+  |  bnez TMP0, >6
+  |  j ->BC_RET_Z
+  |6:
+  |  j ->vm_return
+  |
+  |8:  // Coroutine returned with error (at co->top-1).
+  |.if resume
+  |  addi TMP3, TMP3, -8
+  |   mov_false TMP1
+  |   li RD, (2+1)*8
+  |   ld TMP0, 0(TMP3)
+  |  sd TMP3, L:RA->top		// Remove error from coroutine stack.
+  |  sd TMP0, 0(BASE)			// Copy error message.
+  |  j <7
+  |.else
+  |  mv CARG1, L
+  |  mv CARG2, L:RA
+  |  // (lua_State *L, lua_State *co)
+  |  call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err
+  |.endif
+  |
+  |9:  // Handle stack expansion on return from yield.
+  |  mv CARG1, L
+  |  srliw CARG2, RD, 3
+  |  // (lua_State *L, int n)
+  |.if resume
+  |  call_intern ff_coroutine_resume, lj_state_growstack
+  |.else
+  |  call_intern ff_coroutine_wrap_aux, lj_state_growstack
+  |.endif
+  |  mv CRET1, x0
+  |  j <4
+  |.endmacro
+  |
+  |  coroutine_resume_wrap 1		// coroutine.resume
+  |  coroutine_resume_wrap 0		// coroutine.wrap
+  |
+  |.ffunc coroutine_yield
+  |  ld TMP0, L->cframe
+  |   add TMP1, BASE, NARGS8:RC
+  |    li CRET1, LUA_YIELD
+  |   sd BASE, L->base
+  |  andi TMP0, TMP0, CFRAME_RESUME
+  |   sd TMP1, L->top
+  |  bxeqz TMP0, ->fff_fallback
+  |  sd x0, L->cframe
+  |    sb CRET1, L->status
+  |  j ->vm_leave_unw
+  |
+  |//-- Math library -------------------------------------------------------
+  |
+  |.macro math_round, func
+  |->ff_math_ .. func:
+  |  ld CARG1, 0(BASE)
+  |   gettp TMP0, CARG1
+  |  bxeqz NARGS8:RC, ->fff_fallback
+  |  bxeq TMP0, TISNUM, ->fff_restv
+  |   fld FARG1, 0(BASE)
+  |  bxgeu TMP0, TISNUM, ->fff_fallback
+  |  jal ->vm_ .. func
+  |  j ->fff_resn
+  |.endmacro
+  |
+  |  math_round floor
+  |  math_round ceil
+  |
+  |.ffunc_1 math_abs
+  |  gettp CARG2, CARG1
+  |  addi TMP2, CARG2, -LJ_TISNUM
+  |   sext.w TMP1, CARG1
+  |  bnez TMP2, >1
+  |  sraiw TMP0, TMP1, 31			// Extract sign. int
+  |  xor TMP1, TMP1, TMP0
+  |  sub CARG1, TMP1, TMP0
+  |  slli TMP3, CARG1, 32
+  |   settp CARG1, TISNUM
+  |  bxgez TMP3, ->fff_restv
+  |  lui CARG1, 0x41e00		// 2^31 as a double.
+  |  slli CARG1, CARG1, 32
+  |  j ->fff_restv
+  |1:
+  |  sltiu TMP2, CARG2, LJ_TISNUM
+  |  slli CARG1, CARG1, 1
+  |  srli CARG1, CARG1, 1
+  |  bxeqz TMP2, ->fff_fallback		// int
+  |// fallthrough
+  |
+  |->fff_restv:
+  |  // CARG1 = TValue result.
+  |  ld PC, FRAME_PC(BASE)
+  |  sd CARG1, -16(BASE)
+  |->fff_res1:
+  |  // RA = results, PC = return.
+  |  li RD, (1+1)*8
+  |->fff_res:
+  |  // RA = results, RD = (nresults+1)*8, PC = return.
+  |  andi TMP0, PC, FRAME_TYPE
+  |   mv MULTRES, RD
+  |  addi RA, BASE, -16
+  |  bxnez TMP0, ->vm_return
+  |  lw INS, -4(PC)
+  |  decode_RB8 RB, INS
+  |5:
+  |  bltu RD, RB, >6			// More results expected?
+  |  decode_RA8a TMP0, INS
+  |  ins_next1
+  |  decode_RA8b TMP0
+  |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
+  |  sub BASE, RA, TMP0
+  |  ins_next2
+  |
+  |6:  // Fill up results with nil.
+  |  add TMP1, RA, RD
+  |   addi RD, RD, 8
+  |   sd TISNIL, -8(TMP1)
+  |  j <5
+  |
+  |.macro math_extern, func
+  |  .ffunc_n math_ .. func
+  |  call_extern func
+  |  j ->fff_resn
+  |.endmacro
+  |
+  |.macro math_extern2, func
+  |  .ffunc_nn math_ .. func
+  |  call_extern func
+  |  j ->fff_resn
+  |.endmacro
+  |
+  |.ffunc_n math_sqrt
+  |  fsqrt.d FRET1, FARG1
+  |->fff_resn:
+  |  ld PC, FRAME_PC(BASE)
+  |  fsd FRET1, -16(BASE)
+  |  j ->fff_res1
+  |
+  |.ffunc math_log
+  |  li TMP1, 8
+  |   ld CARG1, 0(BASE)
+  |   fld FARG1, 0(BASE)
+  |  bxne NARGS8:RC, TMP1, ->fff_fallback		// Need exactly 1 argument.
+  |  checknum CARG1, ->fff_fallback
+  |  call_extern log
+  |  j ->fff_resn
+  |
+  |  math_extern log10
+  |  math_extern exp
+  |  math_extern sin
+  |  math_extern cos
+  |  math_extern tan
+  |  math_extern asin
+  |  math_extern acos
+  |  math_extern atan
+  |  math_extern sinh
+  |  math_extern cosh
+  |  math_extern tanh
+  |  math_extern2 pow
+  |  math_extern2 atan2
+  |  math_extern2 fmod
+  |
+  |.ffunc_2 math_ldexp
+  |  checknum CARG1, ->fff_fallback
+  |  checkint CARG2, ->fff_fallback
+  |   fld FARG1, 0(BASE)
+  |   lw CARG1, 8(BASE)
+  |  call_extern ldexp			// (double x, int exp)
+  |  j ->fff_resn
+  |
+  |.ffunc_n math_frexp
+  |   ld PC, FRAME_PC(BASE)
+  |  addi CARG1, GL, offsetof(global_State, tmptv)
+  |  call_extern frexp
+  |    lw TMP1, GL->tmptv
+  |   fcvt.d.w FARG2, TMP1
+  |  fsd FRET1, -16(BASE)
+  |   fsd FARG2, -8(BASE)
+  |  li RD, (2+1)*8
+  |  j ->fff_res
+  |
+  |.ffunc_n math_modf
+  |   addi CARG1, BASE, -16
+  |   ld PC, FRAME_PC(BASE)
+  |  call_extern modf
+  |  fsd FRET1, -8(BASE)
+  |   li RD, (2+1)*8
+  |  j ->fff_res
+  |
+  |.macro math_minmax, name, ismax, fpins
+  |  .ffunc_1 name
+  |  add RB, BASE, NARGS8:RC
+  |   addi RA, BASE, 8
+  |  checkint CARG1, >4
+  |1:  // Handle integers.
+  |   ld CARG2, 0(RA)
+  |  bxeq RA, RB, ->fff_restv
+  |   sext.w CARG1, CARG1
+  |  checkint CARG2, >3
+  |   sext.w CARG2, CARG2
+  |   slt TMP0, CARG1, CARG2
+  |.if ismax
+  |   addi TMP1, TMP0, -1
+  |.else
+  |   neg TMP1, TMP0
+  |.endif
+  | and CARG1, CARG1, TMP1
+  |  not TMP1, TMP1
+  |  and CARG2, CARG2, TMP1
+  |   or CARG1, CARG1, CARG2
+  |  addi RA, RA, 8
+  |   zext.w CARG1, CARG1
+  |   settp_b CARG1, TISNUM
+  |  j <1
+  |3:  // Convert intermediate result to number and continue below.
+  |   fcvt.d.w FARG1, CARG1
+  |  checknum CARG2, ->fff_fallback
+  |   fld FARG2, 0(RA)
+  |  j >6
+  |
+  |4:
+  |  fld FARG1, 0(BASE)
+  |  checknum CARG1, ->fff_fallback
+  |5:  // Handle numbers.
+  |  ld CARG2, 0(RA)
+  |  fld FARG2, 0(RA)
+  |   bxgeu RA, RB, ->fff_resn
+  |  checknum CARG2, >7
+  |6:
+  |  fpins FARG1, FARG1, FARG2
+  |   addi RA, RA, 8
+  |  j <5
+  |7:  // Convert integer to number and continue above.
+  |  checkint CARG2, ->fff_fallback
+  |   fcvt.d.w FARG2, CARG2
+  |  j <6
+  |.endmacro
+  |
+  |  math_minmax math_min, 0, fmin.d
+  |  math_minmax math_max, 1, fmax.d
+  |
+  |//-- String library -----------------------------------------------------
+  |
+  |.ffunc string_byte			// Only handle the 1-arg case here.
+  |  ld CARG1, 0(BASE)
+  |  gettp TMP0, CARG1
+  |  xori TMP1, NARGS8:RC, 8
+  |  addi TMP0, TMP0, -LJ_TSTR
+  |  or TMP1, TMP1, TMP0
+  |   cleartp STR:CARG1
+  |  bxnez TMP1, ->fff_fallback		// Need exactly 1 string argument.
+  |  lw TMP0, STR:CARG1->len
+  |    ld PC, FRAME_PC(BASE)
+  |  snez RD, TMP0
+  |   lbu TMP2, STR:CARG1[1]		// Access is always ok (NUL at end).
+  |  addiw RD, RD, 1
+  |  slliw RD, RD, 3			// RD = ((str->len != 0)+1)*8
+  |  settp_b TMP2, TISNUM
+  |   sd TMP2, -16(BASE)
+  |  j ->fff_res
+  |
+  |.ffunc string_char			// Only handle the 1-arg case here.
+  |  ffgccheck
+  |  ld CARG1, 0(BASE)
+  |  gettp TMP0, CARG1
+  |  xori TMP1, NARGS8:RC, 8		// Need exactly 1 argument.
+  |  addi TMP0, TMP0, -LJ_TISNUM	// Integer.
+  |  li TMP2, 255
+  |   sext.w CARG1, CARG1
+  |  or TMP1, TMP1, TMP0
+  |   sltu TMP2, TMP2, CARG1		// !(255 < n).
+  |   or TMP1, TMP1, TMP2
+  |   li CARG3, 1
+  |  bxnez TMP1, ->fff_fallback
+  |  addi CARG2, sp, TMPD_OFS
+  |  sb CARG1, TMPD(sp)
+  |->fff_newstr:
+  |  sd BASE, L->base
+  |  sd PC, SAVE_PC(sp)
+  |  mv CARG1, L
+  |  // (lua_State *L, const char *str, size_t l)
+  |  call_intern fff_newstr, lj_str_new
+  |  // Returns GCstr *.
+  |  ld BASE, L->base
+  |->fff_resstr:
+  |  li TMP1, LJ_TSTR
+  |  settp CRET1, TMP1
+  |  j ->fff_restv
+  |
+  |.ffunc string_sub
+  |  ffgccheck
+  |  ld CARG1, 0(BASE)
+  |  ld CARG2, 8(BASE)
+  |  ld CARG3, 16(BASE)
+  |  addi TMP0, NARGS8:RC, -16
+  |   gettp TMP1, CARG1
+  |  bxltz TMP0, ->fff_fallback
+  |  cleartp STR:CARG1, CARG1
+  |   li CARG4, -1
+  |  beqz TMP0, >1
+  |   sext.w CARG4, CARG3
+  |  checkint CARG3, ->fff_fallback
+  |1:
+  |  checkint CARG2, ->fff_fallback
+  |  addi TMP0, TMP1, -LJ_TSTR
+  |   sext.w CARG3, CARG2
+  |  bxnez TMP0, ->fff_fallback
+  |  lw CARG2, STR:CARG1->len
+  |  // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
+  |  addiw TMP0, CARG2, 1
+  |  bgez CARG4, >2
+  |  addw CARG4, CARG4, TMP0		// if (end < 0) end += len+1
+  |2:
+  |  bgez CARG3, >3
+  |  addw CARG3, CARG3, TMP0		// if (start < 0) start += len+1
+  |3:
+  |  bgez CARG4, >4
+  |  mv CARG4, x0			// if (end < 0) end = 0
+  |4:
+  |  bgtz CARG3, >5
+  |   li CARG3, 1		// if (start < 1) start = 1
+  |5:
+  |  ble CARG4, CARG2, >6
+  |  mv CARG4, CARG2		// if (end > len) end = len
+  |6:
+  |   add CARG2, STR:CARG1, CARG3
+  |  sub CARG3, CARG4, CARG3		// len = end - start
+  |   addi CARG2, CARG2, sizeof(GCstr)-1
+  |   addiw CARG3, CARG3, 1             // len += 1
+  |  bxgez CARG3, ->fff_newstr
+  |->fff_emptystr:  // Return empty string.
+  |  li TMP1, LJ_TSTR
+  |  addi STR:CARG1, GL, offsetof(global_State, strempty)
+  |   settp CARG1, TMP1
+  |  j ->fff_restv
+  |
+  |.macro ffstring_op, name
+  |  .ffunc string_ .. name
+  |  ffgccheck
+  |   ld CARG2, 0(BASE)
+  |  bxeqz NARGS8:RC, ->fff_fallback
+  |  checkstr STR:CARG2, ->fff_fallback
+  |  addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf)
+  |  ld TMP0, SBUF:CARG1->b
+  |   sd L, SBUF:CARG1->L
+  |   sd BASE, L->base
+  |  sd TMP0, SBUF:CARG1->w
+  |   sd PC, SAVE_PC(sp)
+  |  call_intern ff_string_ .. name, lj_buf_putstr_ .. name
+  |//  mv SBUF:CARG1, SBUF:CRET1
+  |  call_intern ff_string_ .. name, lj_buf_tostr
+  |   ld BASE, L->base
+  |  j ->fff_resstr
+  |.endmacro
+  |
+  |ffstring_op reverse
+  |ffstring_op lower
+  |ffstring_op upper
+  |
+  |//-- Bit library --------------------------------------------------------
+  |
+  |->vm_tobit_fb:
+  |  fld FARG1, 0(BASE)
+  |  bxeqz TMP1, ->fff_fallback
+  |   fadd.d FARG1, FARG1, TOBIT
+  |  fmv.x.w CRET1, FARG1
+  |  zext.w CRET1, CRET1
+  |  ret
+  |
+  |.macro .ffunc_bit, name
+  |  .ffunc_1 bit_..name
+  |  gettp TMP0, CARG1
+  |   zext.w CRET1, CARG1
+  |  beq TMP0, TISNUM, >1
+  |   sltiu TMP1, TMP0, LJ_TISNUM
+  |  jal ->vm_tobit_fb
+  |1:
+  |.endmacro
+  |
+  |.macro .ffunc_bit_op, name, bins
+  |  .ffunc_bit name
+  |  addi TMP2, BASE, 8
+  |  add TMP3, BASE, NARGS8:RC
+  |1:
+  |   ld TMP1, 0(TMP2)
+  |  bxeq TMP2, TMP3, ->fff_resi
+  |  gettp TMP0, TMP1
+  |   addi TMP2, TMP2, 8
+  |  bne TMP0, TISNUM, >2
+  |  zext.w TMP1, TMP1
+  |   bins CRET1, CRET1, TMP1
+  |  j <1
+  |2:
+  |   fld FARG1, -8(TMP2)
+  |  sltiu TMP0, TMP0, LJ_TISNUM
+  |   fadd.d FARG1, FARG1, TOBIT
+  |  bxeqz TMP0, ->fff_fallback
+  |  fmv.x.w TMP1, FARG1
+  |  zext.w TMP1, TMP1
+  |   bins CRET1, CRET1, TMP1
+  |  j <1
+  |.endmacro
+  |
+  |.ffunc_bit_op band, and
+  |.ffunc_bit_op bor, or
+  |.ffunc_bit_op bxor, xor
+  |
+  |.ffunc_bit bswap
+  |  srliw CARG2, CARG1, 8
+  |   lui CARG3, 16
+  |   addiw CARG3, CARG3, -256
+  |  and CARG2, CARG2, CARG3
+  |   srliw CARG3, CARG1, 24
+  |  or CARG2, CARG2, CARG3
+  |   slli CARG3, CARG1, 8
+  |    lui CARG4, 0x00ff0
+  |   and CARG3, CARG3, CARG4
+  |  slli CARG1, CARG1, 24
+  |  or CARG1, CARG1, CARG3
+  |  or CARG1, CARG1, CARG2
+  |  slli CARG1, CARG1, 32
+  |  srli CARG1, CARG1, 32
+  |  j ->fff_resi
+  |
+  |.ffunc_bit tobit
+  |->fff_resi:
+  |  settp CARG1, TISNUM	// CARG1 = CRET1
+  |  j ->fff_restv
+  |
+  |.ffunc_bit bnot
+  |  not CRET1, CRET1
+  |  zext.w CRET1, CRET1
+  |  j ->fff_resi
+  |
+  |.macro .ffunc_bit_sh, name, shins
+  |  .ffunc_2 bit_..name
+  |  gettp TMP0, CARG1
+  |  beq TMP0, TISNUM, >1
+  |   sltiu TMP1, TMP0, LJ_TISNUM
+  |  jal ->vm_tobit_fb
+  |//  mv CARG1, CRET1		// CARG1 = CRET1
+  |1:
+  |  gettp TMP0, CARG2
+  |   zext.w CARG2, CARG2
+  |  bxne TMP0, TISNUM, ->fff_fallback
+  |  sext.w CARG1, CARG1
+  |  shins CRET1, CARG1, CARG2
+  |   zext.w CRET1, CRET1
+  |  j ->fff_resi
+  |.endmacro
+  |
+  |.ffunc_bit_sh lshift, sllw
+  |.ffunc_bit_sh rshift, srlw
+  |.ffunc_bit_sh arshift, sraw
+  |
+  |.macro .ffunc_bit_rot, name, rotinsa, rotinsb
+  |  .ffunc_2 bit_..name
+  |  gettp TMP0, CARG1
+  |  beq TMP0, TISNUM, >1
+  |   sltiu TMP1, TMP0, LJ_TISNUM
+  |  jal ->vm_tobit_fb
+  |//  mv CARG1, CRET1		// CARG1 = CRET1
+  |1:
+  |  gettp TMP0, CARG2
+  |   zext.w CARG2, CARG2
+  |  bxne TMP0, TISNUM, ->fff_fallback
+  |  sext.w CARG1, CARG1
+  |  neg TMP2, CARG2
+  |  rotinsa TMP1, CARG1, CARG2
+  |  rotinsb TMP0, CARG1, TMP2
+  |  or CRET1, TMP0, TMP1
+  |   zext.w CRET1, CRET1
+  |  j ->fff_resi
+  |.endmacro
+  |
+  |.ffunc_bit_rot rol, sllw, srlw
+  |.ffunc_bit_rot ror, srlw, sllw
+  |
+  |//-----------------------------------------------------------------------
+  |
+  |->fff_fallback:			// Call fast function fallback handler.
+  |  // BASE = new base, RB = CFUNC, RC = nargs*8
+  |   ld PC, FRAME_PC(BASE)		// Fallback may overwrite PC.
+  |  ld CARG3, CFUNC:RB->f
+  |    add TMP1, BASE, NARGS8:RC
+  |  sd BASE, L->base
+  |    addi TMP0, TMP1, 8*LUA_MINSTACK
+  |     ld TMP2, L->maxstack
+  |   sd PC, SAVE_PC(sp)			// Redundant (but a defined value).
+  |    sd TMP1, L->top
+  |   mv CARG1, L
+  |  bltu TMP2, TMP0, >5			// Need to grow stack.
+  |  jalr CARG3				// (lua_State *L)
+  |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+  |  ld BASE, L->base
+  |   slliw RD, CRET1, 3
+  |  bxgtz CRET1, ->fff_res		// Returned nresults+1?
+  |1:  // Returned 0 or -1: retry fast path.
+  |   ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  ld TMP0, L->top
+  |   sub NARGS8:RC, TMP0, BASE
+  |   cleartp LFUNC:RB
+  |  bxnez CRET1, ->vm_call_tail		// Returned -1?
+  |  ins_callt				// Returned 0: retry fast path.
+  |
+  |// Reconstruct previous base for vmeta_call during tailcall.
+  |->vm_call_tail:
+  |  andi TMP0, PC, FRAME_TYPE
+  |   andi TMP1, PC, ~FRAME_TYPEP	// TODO
+  |  bnez TMP0, >3
+  |  lbu TMP1, OFS_RA(PC)
+  |  slliw TMP1, TMP1, 3
+  |  addiw TMP1, TMP1, 16
+  |3:
+  |   sub TMP2, BASE, TMP1
+  |  j ->vm_call_dispatch		// Resolve again for tailcall.
+  |
+  |5:  // Grow stack for fallback handler.
+  |  li CARG2, LUA_MINSTACK
+  |   mv CARG1, L
+  |  call_intern vm_call_tail, lj_state_growstack	// (lua_State *L, int n)
+  |  ld BASE, L->base
+  |   mv CRET1, x0		// Set zero-flag to force retry.
+  |  j <1
+  |
+  |->fff_gcstep:			// Call GC step function.
+  |  // BASE = new base, RC = nargs*8
+  |  mv MULTRES, ra
+  |  add TMP0, BASE, NARGS8:RC	// Calculate L->top.
+  |   sd BASE, L->base
+  |   sd PC, SAVE_PC(sp)		// Redundant (but a defined value).
+  |   mv CARG1, L
+  |  sd TMP0, L->top
+  |  call_intern fff_gc_step, lj_gc_step	// (lua_State *L)
+  |   ld BASE, L->base
+  |  mv ra, MULTRES			// Help return address predictor.
+  |    ld TMP0, L->top
+  |  ld CFUNC:RB, FRAME_FUNC(BASE)
+  |  cleartp CFUNC:RB
+  |   sub NARGS8:RC, TMP0, BASE
+  |  ret
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Special dispatch targets -------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |->vm_record:				// Dispatch target for recording phase.
+  |.if JIT
+  |  lbu TMP3, GL->hookmask
+  |  andi TMP1, TMP3, HOOK_VMEVENT	// No recording while in vmevent.
+  |  bnez TMP1, >5
+  |  // Decrement the hookcount for consistency, but always do the call.
+  |  lw TMP2, GL->hookcount
+  |  andi TMP1, TMP3, HOOK_ACTIVE
+  |  bnez TMP1, >1
+  |  addiw TMP2, TMP2, -1
+  |  andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+  |  beqz TMP1, >1
+  |  sw TMP2, GL->hookcount
+  |  j >1
+  |.endif
+  |
+  |->vm_rethook:			// Dispatch target for return hooks.
+  |   lbu TMP3, GL->hookmask
+  |  andi TMP1, TMP3, HOOK_ACTIVE		// Hook already active?
+  |  beqz TMP1, >1
+  |5:  // Re-dispatch to static ins.
+  |   ld TMP1, GG_DISP2STATIC(TMP0)	// Assumes TMP0 holds DISPATCH+OP*4.
+  |  jr TMP1
+  |
+  |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  lbu TMP3, GL->hookmask
+  |  lw TMP2, GL->hookcount
+  |  andi TMP1, TMP3, HOOK_ACTIVE		// Hook already active?
+  |  bnez TMP1, <5
+  |   andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+  |   addiw TMP2, TMP2, -1
+  |  beqz TMP1, <5
+  |   sw TMP2, GL->hookcount
+  |  beqz TMP2, >1
+  |  andi TMP1, TMP3, LUA_MASKLINE
+  |  beqz TMP1, <5
+  |1:
+  |   sw MULTRES, TMPD(sp)
+  |  mv CARG2, PC
+  |   sd BASE, L->base
+  |   mv CARG1, L
+  |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+  |  call_intern vm_inshook, lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
+  |3:
+  |  ld BASE, L->base
+  |4:  // Re-dispatch to static ins.
+  |  lw INS, -4(PC)
+  |  decode_OP8 TMP1, INS
+  |  add TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld TMP1, GG_DISP2STATIC(TMP0)
+  |   decode_RA8 RA, INS
+  |   decode_RD8b RD
+  |  jr TMP1
+  |
+  |->cont_hook:				// Continue from hook yield.
+  |  addi PC, PC, 4
+  |   lw MULTRES, -24(RB)		// Restore MULTRES for *M ins.
+  |  j <4
+  |
+  |->vm_hotloop:			// Hot loop counter underflow.
+  |.if JIT
+  |  ld LFUNC:TMP1, FRAME_FUNC(BASE)
+  |  addi CARG1, GL, GG_G2J
+  |  cleartp LFUNC:TMP1
+  |  sd PC, SAVE_PC(sp)
+  |  ld TMP1, LFUNC:TMP1->pc
+  |  mv CARG2, PC
+  |  sd L, (offsetof(jit_State, L))(CARG1)
+  |  lbu TMP1, PC2PROTO(framesize)(TMP1)
+  |  sd BASE, L->base
+  |  slli TMP1, TMP1, 3
+  |  add TMP1, BASE, TMP1
+  |  sd TMP1, L->top
+  |  call_intern vm_hotloop, lj_trace_hot	// (jit_State *J, const BCIns *pc)
+  |  j <3
+  |.endif
+  |
+  |
+  |->vm_callhook:			// Dispatch target for call hooks.
+  |  mv CARG2, PC
+  |.if JIT
+  |  j >1
+  |.endif
+  |
+  |->vm_hotcall:			// Hot call counter underflow.
+  |.if JIT
+  |  ori CARG2, PC, 1
+  |1:
+  |.endif
+  |  add TMP0, BASE, RC
+  |  sd PC, SAVE_PC(sp)
+  |  sd BASE, L->base
+  |  sub RA, RA, BASE
+  |  sd TMP0, L->top
+  |  mv CARG1, L
+  |  call_intern vm_hotcall, lj_dispatch_call	// (lua_State *L, const BCIns *pc)
+  |  // Returns ASMFunction.
+  |  ld BASE, L->base
+  |  ld TMP0, L->top
+  |  sd x0, SAVE_PC(sp)		// Invalidate for subsequent line hook.
+  |  add RA, BASE, RA
+  |  sub NARGS8:RC, TMP0, BASE
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  cleartp LFUNC:RB
+  |  lw INS, -4(PC)
+  |  jr CRET1
+  |
+  |->cont_stitch:			// Trace stitching.
+  |.if JIT
+  |  // RA = resultptr, RB = meta base
+  |  lw INS, -4(PC)
+  |  ld TRACE:TMP2, -40(RB)		// Save previous trace.
+  |  decode_RA8 RC, INS
+  |  addi TMP1, MULTRES, -8
+  |  cleartp TRACE:TMP2
+  |  add RC, BASE, RC			// Call base.
+  |  beqz TMP1, >2
+  |1:  // Move results down.
+  |  ld CARG1, 0(RA)
+  |  addi TMP1, TMP1, -8
+  |  addi RA, RA, 8
+  |  sd CARG1, 0(RC)
+  |  addi RC, RC, 8
+  |  bnez TMP1, <1
+  |2:
+  |  decode_RA8 RA, INS
+  |  decode_RB8 RB, INS
+  |  add RA, RA, RB
+  |  add RA, BASE, RA
+  |3:
+  |  bltu RC, RA, >8			// More results wanted?
+  |
+  |  lhu TMP3, TRACE:TMP2->traceno
+  |  lhu RD, TRACE:TMP2->link
+  |  bxeq RD, TMP3, ->cont_nop		// Blacklisted.
+  |  slliw RD, RD, 3
+  |  bxnez RD, =>BC_JLOOP		// Jump to stitched trace.
+  |
+  |  // Stitch a new trace to the previous trace.
+  |  addi CARG1, GL, GG_G2J
+  |  // addi CARG2, CARG1, 1		// We don't care what's on the verge.
+  |  addi CARG2, CARG1, 2047		// jit_State too large.
+  |  sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2)
+  |  sd L, (offsetof(jit_State, L)-2047)(CARG2)
+  |  sd BASE, L->base
+  |  mv CARG2, PC
+  |  // (jit_State *J, const BCIns *pc)
+  |  call_intern cont_stitch, lj_dispatch_stitch
+  |  ld BASE, L->base
+  |  j ->cont_nop
+  |
+  |8:
+  |  sd TISNIL, 0(RC)
+  |  addi RC, RC, 8
+  |  j <3
+  |.endif
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |   mv CARG1, L
+  |  mv CARG2, PC
+  |   sd BASE, L->base
+  |   sw MULTRES, TMPD(sp)
+  |  // (lua_State *L, const BCIns *pc)
+  |  call_intern vm_profhook, lj_dispatch_profile
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  addi PC, PC, -4
+  |   ld BASE, L->base
+  |  j ->cont_nop
+#endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Trace exit handler -------------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.macro savex_, a, b
+  |  fsd f..a, a*8(sp)
+  |  fsd f..b, b*8(sp)
+  |  sd x..a, 32*8+a*8(sp)
+  |  sd x..b, 32*8+b*8(sp)
+  |.endmacro
+  |
+  |->vm_exit_handler:
+  |.if JIT
+  |  addi sp, sp, -(32*8+32*8)
+  |  savex_ 0, 5
+  |  savex_ 6, 7
+  |  savex_ 8, 9
+  |  savex_ 10, 11
+  |  savex_ 12, 13
+  |  savex_ 14, 15
+  |  savex_ 16, 17
+  |  savex_ 18, 19
+  |  savex_ 20, 21
+  |  savex_ 22, 23
+  |  savex_ 24, 25
+  |  savex_ 26, 27
+  |  savex_ 28, 29
+  |  savex_ 30, 31
+  |  fsd f1, 1*8(sp)
+  |  fsd f2, 2*8(sp)
+  |  fsd f3, 3*8(sp)
+  |  fsd f4, 4*8(sp)
+  |  sd x0, 32*8+1*8(sp)		// Clear RID_TMP.
+  |  ld TMP1, 32*8+32*8(sp)			// Load exit pc.
+  |   addi TMP2, sp, 32*8+32*8		// Recompute original value of sp.
+  |  addxi DISPATCH, GL, GG_G2DISP
+  |   sd TMP2, 32*8+2*8(sp)		// Store sp in RID_SP
+  |  addi CARG1, GL, GG_G2J
+  |    li_vmstate EXIT
+  |  // addi CARG2, CARG1, 1		// We don't care what's on the verge.
+  |  addi CARG2, CARG1, 2047		// jit_State too large.
+  |  sub TMP1, TMP1, ra
+  |   lw TMP2, 0(ra)			// Load trace number.
+  |    st_vmstate
+  |  srli TMP1, TMP1, 2
+  |  ld L, GL->cur_L
+  |  ld BASE, GL->jit_base
+  |  srli TMP2, TMP2, 12
+  |  addi TMP1, TMP1, -2
+  |  sd L, (offsetof(jit_State, L)-2047)(CARG2)
+  |  sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2)	// Store trace number.
+  |  sd BASE, L->base
+  |  sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2)	// Store exit number.
+  |  sd x0, GL->jit_base
+  |  mv CARG2, sp
+  |  call_intern vm_exit_handler, lj_trace_exit	// (jit_State *J, ExitState *ex)
+  |  // Returns MULTRES (unscaled) or negated error code.
+  |  ld TMP1, L->cframe
+  |  ld BASE, L->base
+  |  andi sp, TMP1, CFRAME_RAWMASK
+  |  ld PC, SAVE_PC(sp)		// Get SAVE_PC.
+  |  sd L, SAVE_L(sp)			// Set SAVE_L (on-trace resume/yield).
+  |  j >1
+  |.endif
+  |
+  |->vm_exit_interp:
+  |.if JIT
+  |  // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
+  |  ld L, SAVE_L(sp)
+  |  addxi DISPATCH, GL, GG_G2DISP
+  |  sd BASE, L->base
+  |1:
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  sltiu TMP0, CRET1, -LUA_ERRERR  // Check for error from exit.
+  |  beqz TMP0, >9
+  |  lui TMP3, 0x43380		// TOBIT = Hiword of 2^52 + 2^51 (double).
+  |  slli MULTRES, CRET1, 3
+  |  cleartp LFUNC:RB
+  |  sw MULTRES, TMPD(sp)
+  |  li TISNIL, LJ_TNIL
+  |  li TISNUM, LJ_TISNUM		// Setup type comparison constants.
+  |  slli TMP3, TMP3, 32
+  |  ld TMP1, LFUNC:RB->pc
+  |  sd x0, GL->jit_base
+  |  ld KBASE, PC2PROTO(k)(TMP1)
+  |  fmv.d.x TOBIT, TMP3
+  |  // Modified copy of ins_next which handles function header dispatch, too.
+  |  lw INS, 0(PC)
+  |   addi PC, PC, 4
+  |  addiw CRET1, CRET1, 17		// Static dispatch?
+  |  // Assumes TISNIL == ~LJ_VMST_INTERP == -1
+  |  sw TISNIL, GL->vmstate
+  |   decode_RD8a RD, INS
+  |  beqz CRET1, >5
+  |  decode_OP8 TMP1, INS
+  |  add TMP0, DISPATCH, TMP1
+  |    sltiu TMP2, TMP1, BC_FUNCF*8
+  |  ld TMP3, 0(TMP0)
+  |   decode_RA8 RA, INS
+  |    beqz TMP2, >2
+  |   decode_RD8b RD
+  |  jr TMP3
+  |2:
+  |  sltiu TMP2, TMP1, (BC_FUNCC+2)*8	// Fast function?
+  |  ld TMP1, FRAME_PC(BASE)
+  |  bnez TMP2, >3
+  |  // Check frame below fast function.
+  |  andi TMP0, TMP1, FRAME_TYPE
+  |  bnez TMP0, >3			// Trace stitching continuation?
+  |  // Otherwise set KBASE for Lua function below fast function.
+  |  lw TMP2, -4(TMP1)
+  |  decode_RA8 TMP0, TMP2
+  |  sub TMP1, BASE, TMP0
+  |  ld LFUNC:TMP2, -32(TMP1)
+  |  cleartp LFUNC:TMP2
+  |  ld TMP1, LFUNC:TMP2->pc
+  |  ld KBASE, PC2PROTO(k)(TMP1)
+  |3:
+  |  addi RC, MULTRES, -8
+  |  add RA, RA, BASE
+  |  jr TMP3
+  |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  ld TMP0, GL_J(trace)(GL)
+  |  decode_RD8b RD
+  |  add TMP0, TMP0, RD
+  |  ld TRACE:TMP2, 0(TMP0)
+  |  lw INS, TRACE:TMP2->startins
+  |  decode_OP8 TMP1, INS
+  |  add TMP0, DISPATCH, TMP1
+  |   decode_RD8a RD, INS
+  |  ld TMP3, GG_DISP2STATIC(TMP0)
+  |   decode_RA8a RA, INS
+  |   decode_RD8b RD
+  |   decode_RA8b RA
+  |  jr TMP3
+  |
+  |9:  // Rethrow error from the right C frame.
+  |  negw CARG2, CRET1
+  |  mv CARG1, L
+  |  call_intern vm_exit_interp, lj_err_trace	// (lua_State *L, int errcode)
+  |.endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Math helper functions ----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |
+  |// Hard-float round to integer.
+  |// Modifies TMP0, TMP1, FARG1, FARG5, FTMP1, FTMP3, FTMP4
+  |.macro vm_round_hf, func
+  |  lui TMP0, 0x43300		// Hiword of 2^52 (double).
+  |  slli TMP0, TMP0, 32
+  |  fmv.d.x FARG5, TMP0
+  |  fabs.d FTMP4, FARG1		// |x|
+  |   fmv.x.d TMP1, FARG1
+  |  flt.d TMP0, FTMP4, FARG5
+  |  fadd.d FTMP3, FTMP4, FARG5		// (|x| + 2^52) - 2^52
+  |  fsub.d FTMP3, FTMP3, FARG5
+  |  beqz TMP0, >5			// Truncate only if |x| < 2^52.
+  |  sltz TMP1, TMP1
+  |.if "func" == "ceil"
+  |  lui TMP0, 0xbff00	// Hiword of -1 (double).
+  |.else
+  |  lui TMP0, 0x3ff00	// Hiword of +1 (double).
+  |.endif
+  |.if "func" == "trunc"
+  |  slli TMP0, TMP0, 32
+  |  fmv.d.x FARG5, TMP0
+  |  flt.d TMP0, FTMP4, FRET1	// |x| < result?
+  |  fsub.d FTMP4, FTMP3, FARG5
+  |  beqz TMP0, >1
+  |  fmv.d FTMP1, FTMP4
+  |  j >2
+  |1:
+  |  fmv.d FTMP1, FTMP3
+  |2:
+  |  fneg.d FTMP4, FTMP1
+  |  beqz TMP1, >3
+  |  fmv.d FTMP3, FTMP4
+  |  j >4
+  |3:
+  |  fmv.d FTMP3, FTMP1
+  |4:
+  |  ret
+  |.else
+  |  fneg.d FTMP4, FTMP3
+  |  slli TMP0, TMP0, 32
+  |  fmv.d.x FARG5, TMP0
+  |  beqz TMP1, >1
+  |  fmv.d FTMP1, FTMP4
+  |  j >2
+  |1:
+  |  fmv.d FTMP1, FTMP3
+  |2:
+  |.if "func" == "ceil"
+  |  flt.d TMP0, FTMP1, FARG1	// x > result?
+  |.else
+  |  flt.d TMP0, FARG1, FTMP1	// x < result?
+  |.endif
+  |  beqz TMP0, >3
+  |  fsub.d FTMP4, FTMP1, FARG5		// If yes, subtract +-1.
+  |  fmv.d FRET1, FTMP4
+  |  j >4
+  |3:
+  |  fmv.d FRET1, FTMP1
+  |4:
+  |  ret
+  |.endif
+  |5:
+  |  fmv.d FTMP3, FARG1
+  |  ret
+  |.endmacro
+  |
+  |
+  |->vm_floor:
+  |  vm_round_hf floor
+  |->vm_ceil:
+  |  vm_round_hf ceil
+  |->vm_trunc:
+  |.if JIT
+  |  vm_round_hf trunc
+  |.endif
+  |
+  |
+  |//-----------------------------------------------------------------------
+  |//-- Miscellaneous functions --------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |.define NEXT_TAB,            TAB:CARG1
+  |.define NEXT_IDX,            CARG2
+  |.define NEXT_ASIZE,          CARG3
+  |.define NEXT_NIL,            CARG4
+  |.define NEXT_TMP0,           TMP0
+  |.define NEXT_TMP1,           TMP1
+  |.define NEXT_TMP2,           TMP2
+  |.define NEXT_RES_VK,         CRET1
+  |.define NEXT_RES_IDX,        CRET2
+  |.define NEXT_RES_PTR,        sp
+  |.define NEXT_RES_VAL,        0(sp)
+  |.define NEXT_RES_KEY,        8(sp)
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in CRET2.
+  |->vm_next:
+  |.if JIT
+  |  lw NEXT_ASIZE, NEXT_TAB->asize
+  |  ld NEXT_TMP0, NEXT_TAB->array
+  |  li NEXT_NIL, LJ_TNIL
+  |1:  // Traverse array part.
+  |  bgeu NEXT_IDX, NEXT_ASIZE, >5
+  |  slliw NEXT_TMP1, NEXT_IDX, 3
+  |  add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+  |  li TMP3, LJ_TISNUM
+  |  ld NEXT_TMP2, 0(NEXT_TMP1)
+  |  slli TMP3, TMP3, 47
+  |  or NEXT_TMP1, NEXT_IDX, TMP3
+  |  addiw NEXT_IDX, NEXT_IDX, 1
+  |  beq NEXT_TMP2, NEXT_NIL, <1
+  |  sd NEXT_TMP2, NEXT_RES_VAL
+  |  sd NEXT_TMP1, NEXT_RES_KEY
+  |  mv NEXT_RES_VK, NEXT_RES_PTR
+  |  mv NEXT_RES_IDX, NEXT_IDX
+  |  ret
+  |
+  |5:  // Traverse hash part.
+  |  subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+  |  lw NEXT_TMP0, NEXT_TAB->hmask
+  |  ld NODE:NEXT_RES_VK, NEXT_TAB->node
+  |  slliw NEXT_TMP2, NEXT_RES_IDX, 5
+  |  slliw TMP3, NEXT_RES_IDX, 3
+  |  subw TMP3, NEXT_TMP2, TMP3
+  |  add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3
+  |6:
+  |  bltu NEXT_TMP0, NEXT_RES_IDX, >8
+  |  ld NEXT_TMP2, NODE:NEXT_RES_VK->val
+  |  addiw NEXT_RES_IDX, NEXT_RES_IDX, 1
+  |  bne NEXT_TMP2, NEXT_NIL, >9
+  |  // Skip holes in hash part.
+  |  addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+  |  j <6
+  |
+  |8:  // End of iteration. Set the key to nil (not the value).
+  |  sd NEXT_NIL, NEXT_RES_KEY
+  |  mv NEXT_RES_VK, NEXT_RES_PTR
+  |9:
+  |  addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+  |  ret
+  |.endif
+  |
+  |//-----------------------------------------------------------------------
+  |//-- FFI helper functions -----------------------------------------------
+  |//-----------------------------------------------------------------------
+  |
+  |// Handler for callback functions. Callback slot number in x5, g in x7.
+  |->vm_ffi_callback:
+  |.if FFI
+  |.type CTSTATE, CTState, PC
+  |  saveregs
+  |  ld CTSTATE, GL:x7->ctype_state
+  |  mv GL, x7
+  |  addxi DISPATCH, x7, GG_G2DISP
+  |  srli x5, x5, 12
+  |  sw x5, CTSTATE->cb.slot
+  |  sd CARG1, CTSTATE->cb.gpr[0]
+  |  fsd FARG1, CTSTATE->cb.fpr[0]
+  |  sd CARG2, CTSTATE->cb.gpr[1]
+  |  fsd FARG2, CTSTATE->cb.fpr[1]
+  |  sd CARG3, CTSTATE->cb.gpr[2]
+  |  fsd FARG3, CTSTATE->cb.fpr[2]
+  |  sd CARG4, CTSTATE->cb.gpr[3]
+  |  fsd FARG4, CTSTATE->cb.fpr[3]
+  |  sd CARG5, CTSTATE->cb.gpr[4]
+  |  fsd FARG5, CTSTATE->cb.fpr[4]
+  |  sd CARG6, CTSTATE->cb.gpr[5]
+  |  fsd FARG6, CTSTATE->cb.fpr[5]
+  |  sd CARG7, CTSTATE->cb.gpr[6]
+  |  fsd FARG7, CTSTATE->cb.fpr[6]
+  |  sd CARG8, CTSTATE->cb.gpr[7]
+  |  fsd FARG8, CTSTATE->cb.fpr[7]
+  |  addi TMP0, sp, CFRAME_SPACE
+  |  sd TMP0, CTSTATE->cb.stack
+  |  sd x0, SAVE_PC(sp)			// Any value outside of bytecode is ok.
+  |  mv CARG1, CTSTATE
+  |  mv CARG2, sp
+  |  call_intern vm_ffi_callback, lj_ccallback_enter	// (CTState *cts, void *cf)
+  |  // Returns lua_State *.
+  |  ld BASE, L:CRET1->base
+  |  ld RC, L:CRET1->top
+  |  mv L, CRET1
+  |  lui TMP3, 0x43380			// TOBIT = Hiword of 2^52 + 2^51 (double).
+  |  ld LFUNC:RB, FRAME_FUNC(BASE)
+  |  li TISNIL, LJ_TNIL
+  |  li TISNUM, LJ_TISNUM
+  |  slli TMP3, TMP3, 32
+  |  li_vmstate INTERP
+  |  subw RC, RC, BASE
+  |  cleartp LFUNC:RB
+  |  st_vmstate
+  |  fmv.d.x TOBIT, TMP3
+  |  ins_callt
+  |.endif
+  |
+  |->cont_ffi_callback:				// Return from FFI callback.
+  |.if FFI
+  |  ld CTSTATE, GL->ctype_state
+  |  sd BASE, L->base
+  |  sd RB, L->top
+  |  sd L, CTSTATE->L
+  |  mv CARG1, CTSTATE
+  |  mv CARG2, RA
+  |  // (CTState *cts, TValue *o)
+  |  call_intern cont_ffi_callback, lj_ccallback_leave
+  |  fld FRET1, CTSTATE->cb.fpr[0]
+  |  ld CRET1, CTSTATE->cb.gpr[0]
+  |  fld FRET2, CTSTATE->cb.fpr[1]
+  |  ld CRET2, CTSTATE->cb.gpr[1]
+  |  j ->vm_leave_unw
+  |.endif
+  |
+  |->vm_ffi_call:			// Call C function via FFI.
+  |  // Caveat: needs special frame unwinding, see below.
+  |.if FFI
+  |  .type CCSTATE, CCallState, CARG1
+  |  lw TMP1, CCSTATE->spadj
+  |  lbu CARG2, CCSTATE->nsp
+  |  lbu CARG3, CCSTATE->nfpr
+  |  mv TMP2, sp
+  |  sub sp, sp, TMP1
+  |  sd ra, -8(TMP2)
+  |  sd x18, -16(TMP2)
+  |  sd CCSTATE, -24(TMP2)
+  |  mv x18, TMP2
+  |  addi TMP1, CCSTATE, offsetof(CCallState, stack)
+  |  mv TMP2, sp
+  |  add TMP3, TMP1, CARG2
+  |  beqz CARG2, >2
+  |1:
+  |  ld TMP0, 0(TMP1)
+  |  addi TMP1, TMP1, 8
+  |  sd TMP0, 0(TMP2)
+  |  addi TMP2, TMP2, 8
+  |  bltu TMP1, TMP3, <1
+  |2:
+  |  beqz CARG3, >3
+  |  fld FARG1, CCSTATE->fpr[0]
+  |  fld FARG2, CCSTATE->fpr[1]
+  |  fld FARG3, CCSTATE->fpr[2]
+  |  fld FARG4, CCSTATE->fpr[3]
+  |  fld FARG5, CCSTATE->fpr[4]
+  |  fld FARG6, CCSTATE->fpr[5]
+  |  fld FARG7, CCSTATE->fpr[6]
+  |  fld FARG8, CCSTATE->fpr[7]
+  |3:
+  |  ld CFUNCADDR, CCSTATE->func
+  |  ld CARG2, CCSTATE->gpr[1]
+  |  ld CARG3, CCSTATE->gpr[2]
+  |  ld CARG4, CCSTATE->gpr[3]
+  |  ld CARG5, CCSTATE->gpr[4]
+  |  ld CARG6, CCSTATE->gpr[5]
+  |  ld CARG7, CCSTATE->gpr[6]
+  |  ld CARG8, CCSTATE->gpr[7]
+  |  ld CARG1, CCSTATE->gpr[0]		// Do this last, since CCSTATE is CARG1.
+  |  jalr CFUNCADDR
+  |  ld CCSTATE:TMP1, -24(x18)
+  |  ld TMP0, -16(x18)
+  |  ld ra, -8(x18)
+  |  sd CRET1, CCSTATE:TMP1->gpr[0]
+  |  sd CRET2, CCSTATE:TMP1->gpr[1]
+  |  fsd FRET1, CCSTATE:TMP1->fpr[0]
+  |  fsd FRET2, CCSTATE:TMP1->fpr[1]
+  |  mv sp, x18
+  |  mv x18, TMP0
+  |  ret
+  |.endif
+  |// Note: vm_ffi_call must be the last function in this object file!
+  |
+  |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+  int vk = 0;
+  |=>defop:
+
+  switch (op) {
+
+  /* -- Comparison ops ---------------------------------------------------- */
+
+  /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+  case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+    |  // RA = src1*8, RD = src2*8, JMP with RD = target
+    |  add RA, BASE, RA
+    |  add RD, BASE, RD
+    if (op == BC_ISLT || op == BC_ISGE) {
+      |  ld CARG1, 0(RA)
+      |   ld CARG2, 0(RD)
+      |  gettp CARG3, CARG1
+      |   gettp CARG4, CARG2
+    } else {
+      |  ld CARG2, 0(RA)
+      |   ld CARG1, 0(RD)
+      |  gettp CARG3, CARG2
+      |   gettp CARG4, CARG1
+    }
+    |  lhu TMP2, OFS_RD(PC)		// TMP2=jump
+    |   addi PC, PC, 4
+    |  bne CARG3, TISNUM, >2
+    |  decode_BC4b TMP2
+    |   bne CARG4, TISNUM, >5
+    |  sext.w CARG1, CARG1
+    |  sext.w CARG2, CARG2
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  slt TMP1, CARG1, CARG2
+    |  addw TMP2, TMP2, TMP3		// TMP2=(jump-0x8000)<<2
+    if (op == BC_ISLT || op == BC_ISGT) {
+      |  neg TMP1, TMP1
+    } else {
+      |  addi TMP1, TMP1, -1
+    }
+    |  and TMP2, TMP2, TMP1
+    |1:
+    |  add PC, PC, TMP2
+    |  ins_next
+    |
+    |2:  // RA is not an integer.
+    |  sltiu TMP1, CARG3, LJ_TISNUM
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  bxeqz TMP1, ->vmeta_comp
+    |  sltiu TMP1, CARG4, LJ_TISNUM
+    |  decode_BC4b TMP2
+    |  beqz TMP1, >4
+    |  fmv.d.x FTMP0, CARG1
+    |  fmv.d.x FTMP2, CARG2
+    |3:  // RA and RD are both numbers.
+    |  addw TMP2, TMP2, TMP3
+    if (op == BC_ISLT) {
+      |  flt.d TMP3, FTMP0, FTMP2
+      |  neg TMP3, TMP3
+    } else if (op == BC_ISGE) {
+      |  flt.d TMP3, FTMP0, FTMP2
+      |  addi TMP3, TMP3, -1
+    } else if (op == BC_ISLE) {
+      |  fle.d TMP3, FTMP2, FTMP0
+      |  neg TMP3, TMP3
+    } else if (op == BC_ISGT) {
+      |  fle.d TMP3, FTMP2, FTMP0
+      |  addi TMP3, TMP3, -1
+    }
+    |  and TMP2, TMP2, TMP3
+    |  j <1
+    |
+    |4:  // RA is a number, RD is not a number.
+    |  // RA is a number, RD is an integer. Convert RD to a number.
+    |  bxne CARG4, TISNUM, ->vmeta_comp
+    if (op == BC_ISLT || op == BC_ISGE) {
+      |  fcvt.d.w FTMP2, CARG2
+      |  fmv.d.x FTMP0, CARG1
+    } else {
+      |  fcvt.d.w FTMP0, CARG1
+      |  fmv.d.x FTMP2, CARG2
+    }
+    |  j <3
+    |
+    |5:  // RA is an integer, RD is not an integer
+    |  sltiu TMP1, CARG4, LJ_TISNUM
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  bxeqz TMP1, ->vmeta_comp
+    |  // RA is an integer, RD is a number. Convert RA to a number.
+    if (op == BC_ISLT || op == BC_ISGE) {
+      |  fcvt.d.w FTMP0, CARG1
+      |  fmv.d.x FTMP2, CARG2
+    } else {
+      |  fcvt.d.w FTMP2, CARG2
+      |  fmv.d.x FTMP0, CARG1
+    }
+    |  j <3
+    break;
+
+  case BC_ISEQV: case BC_ISNEV:
+    vk = op == BC_ISEQV;
+    |  // RA = src1*8, RD = src2*8, JMP with RD = target
+    |  add RA, BASE, RA
+    |   add RD, BASE, RD
+    |    addi PC, PC, 4
+    |  ld CARG1, 0(RA)
+    |   ld CARG2, 0(RD)
+    |    lhu TMP2, -4+OFS_RD(PC)
+    |  gettp CARG3, CARG1
+    |   gettp CARG4, CARG2
+    |  sltu TMP0, TISNUM, CARG3
+    |   sltu TMP1, TISNUM, CARG4
+    |  or TMP0, TMP0, TMP1
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    if (vk) {
+      |  beqz TMP0, ->BC_ISEQN_Z
+    } else {
+      |  beqz TMP0, ->BC_ISNEN_Z
+    }
+    |// Either or both types are not numbers.
+    |.if FFI
+    |  // Check if RA or RD is a cdata.
+    |  xori TMP0, CARG3, LJ_TCDATA
+    |  xori TMP1, CARG4, LJ_TCDATA
+    |  and TMP0, TMP0, TMP1
+    |  bxeqz TMP0, ->vmeta_equal_cd
+    |.endif
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  decode_BC4b TMP2
+    |  addw TMP2, TMP2, TMP3		// (jump-0x8000)<<2
+    |  bne CARG1, CARG2, >2
+    |  // Tag and value are equal.
+    if (vk) {
+      |->BC_ISEQV_Z:
+      |  add PC, PC, TMP2
+    }
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if the tags are the same and it's a table or userdata.
+    |  xor TMP3, CARG3, CARG4			// Same type?
+    |  sltiu TMP0, CARG3, LJ_TISTABUD+1		// Table or userdata? TMP0=1
+    |  beqz TMP3, >3
+    |  mv TMP0, x0		// TMP0=0: not same type, or same type table/userdata
+    |3:
+    |  cleartp TAB:TMP1, CARG1
+    if (vk) {
+      |  beqz TMP0, <1
+    } else {
+      |  beqz TMP0, ->BC_ISEQV_Z  // Reuse code from opposite instruction.
+    }
+    |  // Different tables or userdatas. Need to check __eq metamethod.
+    |  // Field metatable must be at same offset for GCtab and GCudata!
+    |  ld TAB:TMP3, TAB:TMP1->metatable
+    if (vk) {
+      |  beqz TAB:TMP3, <1		// No metatable?
+      |  lbu TMP3, TAB:TMP3->nomm
+      |  andi TMP3, TMP3, 1<<MM_eq
+      |  li TMP0, 0		// ne = 0
+      |  bnez TMP3, <1			// Or 'no __eq' flag set?
+    } else {
+      |  beqz TAB:TMP3,->BC_ISEQV_Z	// No metatable?
+      |  lbu TMP3, TAB:TMP3->nomm
+      |  andi TMP3, TMP3, 1<<MM_eq
+      |  li TMP0, 1		// ne = 1
+      |  bnez TMP3, ->BC_ISEQV_Z	// Or 'no __eq' flag set?
+    }
+    |  j ->vmeta_equal			// Handle __eq metamethod.
+    break;
+
+  case BC_ISEQS: case BC_ISNES:
+    vk = op == BC_ISEQS;
+    |  // RA = src*8, RD = str_const*8 (~), JMP with RD = target
+    |  add RA, BASE, RA
+    |   addi PC, PC, 4
+    |  ld CARG1, 0(RA)
+    |   sub RD, KBASE, RD
+    |    lhu TMP2, -4+OFS_RD(PC)
+    |   ld CARG2, -8(RD)		// KBASE-8-str_const*8
+    |.if FFI
+    |  gettp CARG3, CARG1
+    |  li TMP1, LJ_TCDATA
+    |.endif
+    |  li TMP0, LJ_TSTR
+    |   decode_BC4b TMP2
+    |   settp CARG2, TMP0
+    |   lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |.if FFI
+    |  bxeq CARG3, TMP1, ->vmeta_equal_cd
+    |.endif
+    |  xor TMP0, CARG1, CARG2		// TMP2=0: A==D; TMP2!=0: A!=D
+    |   addw TMP2, TMP2, TMP3
+    if (vk) {
+      |  seqz TMP4, TMP0
+    } else {
+      |  snez TMP4, TMP0
+    }
+    |  neg TMP4, TMP4
+    |  and TMP2, TMP2, TMP4
+    |  add PC, PC, TMP2
+    |  ins_next
+    break;
+
+  case BC_ISEQN: case BC_ISNEN:
+    vk = op == BC_ISEQN;
+    |  // RA = src*8, RD = num_const*8, JMP with RD = target
+    |  add RA, BASE, RA
+    |   add RD, KBASE, RD
+    |  ld CARG1, 0(RA)
+    |   ld CARG2, 0(RD)
+    |    lhu TMP2, OFS_RD(PC)
+    |  gettp CARG3, CARG1
+    |   gettp CARG4, CARG2
+    |    addi PC, PC, 4
+    |    lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    if (vk) {
+      |->BC_ISEQN_Z:
+    } else {
+      |->BC_ISNEN_Z:
+    }
+    |  decode_BC4b TMP2
+    |  bne CARG3, TISNUM, >4
+    |  addw TMP2, TMP2, TMP3
+    |  bne CARG4, TISNUM, >6
+    |  xor TMP0, CARG1, CARG2		// TMP0=0: A==D; TMP0!=0: A!=D
+    |1:
+    if (vk) {
+      |  seqz TMP4, TMP0
+      |  neg TMP4, TMP4
+      |  and TMP2, TMP2, TMP4
+      |  add PC, PC, TMP2
+      |2:
+    } else {
+      |  snez TMP4, TMP0
+      |  neg TMP4, TMP4
+      |  and TMP2, TMP2, TMP4
+      |2:
+      |  add PC, PC, TMP2
+    }
+    |3:
+    |  ins_next
+    |
+    |4:  // RA is not an integer.
+    |    addw TMP2, TMP2, TMP3
+    |.if FFI
+    |  bgeu CARG3, TISNUM, >7
+    |.else
+    |  bgeu CARG3, TISNUM, <2
+    |.endif
+    |  fmv.d.x FTMP0, CARG1
+    |   fmv.d.x FTMP2, CARG2
+    |  bne CARG4, TISNUM, >5
+    |// RA is a number, RD is an integer.
+    |  fcvt.d.w FTMP2, CARG2
+    |
+    |5:  // RA and RD are both numbers.
+    |  feq.d TMP0, FTMP0, FTMP2
+    |  seqz TMP0, TMP0
+    |  j <1
+    |
+    |6: // RA is an integer, RD is a number.
+    |.if FFI
+    |  bgeu CARG4, TISNUM, >8
+    |.else
+    |  bgeu CARG4, TISNUM, <2
+    |.endif
+    |  fcvt.d.w FTMP0, CARG1
+    |   fmv.d.x FTMP2, CARG2
+    |  j <5
+    |
+    |.if FFI
+    |7:	// RA not int, not number
+    |  li TMP0, LJ_TCDATA
+    |  bne CARG3, TMP0, <2
+    |  j ->vmeta_equal_cd
+    |
+    |8:	// RD not int, not number
+    |  li TMP0, LJ_TCDATA
+    |  bne CARG4, TMP0, <2
+    |  j ->vmeta_equal_cd
+    |.endif
+    break;
+
+  case BC_ISEQP: case BC_ISNEP:
+    vk = op == BC_ISEQP;
+    |  // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
+    |  add RA, BASE, RA
+    |   srliw TMP0, RD, 3
+    |  ld TMP1, 0(RA)
+    |   not TMP0, TMP0		// ~TMP0: ~0 ~1 ~2
+    |    lhu TMP2, OFS_RD(PC)		// TMP2: RD in next INS, branch target
+    |  gettp TMP1, TMP1
+    |    addi PC, PC, 4
+    |   xor TMP0, TMP1, TMP0		// TMP0=0 A=D; TMP0!=0 A!=D
+    |.if FFI
+    |  li TMP3, LJ_TCDATA
+    |  bxeq TMP1, TMP3, ->vmeta_equal_cd
+    |.endif
+    |  decode_BC4b TMP2
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  addw TMP2, TMP2, TMP3		// TMP2=(jump-0x8000)<<2
+    if (vk) {
+      |  seqz TMP4, TMP0
+    } else {
+      |  snez TMP4, TMP0
+    }
+    |  neg TMP4, TMP4
+    |  and TMP2, TMP2, TMP4
+    |  add PC, PC, TMP2
+    |  ins_next
+    break;
+
+  /* -- Unary test and copy ops ------------------------------------------- */
+
+  case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+    |  // RA = dst*8 or unused, RD = src*8, JMP with RD = target
+    |  add RD, BASE, RD
+    |   lhu TMP2, OFS_RD(PC)
+    |  ld TMP0, 0(RD)
+    |   addi PC, PC, 4
+    |  gettp TMP0, TMP0
+    |  add RA, BASE, RA
+    |  sltiu TMP0, TMP0, LJ_TISTRUECOND		// TMP0=1 true; TMP0=0 false
+    |  decode_BC4b TMP2
+    |  lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  ld CRET1, 0(RD)
+    |  addw TMP2, TMP2, TMP3		// (jump-0x8000)<<2
+    if (op == BC_IST || op == BC_ISTC) {
+      |  beqz TMP0, >1
+      if (op == BC_ISTC) {
+        |  sd CRET1, 0(RA)
+      }
+    } else {
+      |  bnez TMP0, >1
+      if (op == BC_ISFC) {
+	|  sd CRET1, 0(RA)
+      }
+    }
+    |  add PC, PC, TMP2
+    |1:
+    |  ins_next
+    break;
+
+  case BC_ISTYPE:
+    |  // RA = src*8, RD = -type*8
+    |  add TMP0, BASE, RA
+    |  srliw TMP1, RD, 3
+    |  ld TMP0, 0(TMP0)
+    |  gettp TMP0, TMP0
+    |  add TMP0, TMP0, TMP1		// if itype of RA == type, then TMP0=0
+    |  bxnez TMP0, ->vmeta_istype
+    |  ins_next
+    break;
+  case BC_ISNUM:
+    |  // RA = src*8, RD = -(TISNUM-1)*8
+    |  add TMP0, BASE, RA
+    |  ld TMP0, 0(TMP0)
+    |  checknum TMP0, ->vmeta_istype
+    |  ins_next
+    break;
+
+  /* -- Unary ops --------------------------------------------------------- */
+
+  case BC_MOV:
+    |  // RA = dst*8, RD = src*8
+    |  add RD, BASE, RD
+    |   add RA, BASE, RA
+    |  ld TMP0, 0(RD)
+    |  ins_next1
+    |  sd TMP0, 0(RA)
+    |  ins_next2
+    break;
+  case BC_NOT:
+    |  // RA = dst*8, RD = src*8
+    |  add RD, BASE, RD
+    |   add RA, BASE, RA
+    |  ld TMP0, 0(RD)
+    |   li TMP1, LJ_TTRUE
+    |  ins_next1
+    |  gettp TMP0, TMP0
+    |  sltu TMP0, TMP1, TMP0
+    |  addiw TMP0, TMP0, 1
+    |  slli TMP0, TMP0, 47
+    |  not TMP0, TMP0
+    |   sd TMP0, 0(RA)
+    |  ins_next2
+    break;
+  case BC_UNM:
+    |  // RA = dst*8, RD = src*8
+    |  add RB, BASE, RD
+    |  add RA, BASE, RA
+    |  ld TMP0, 0(RB)
+    |  lui TMP1, 0x80000
+    |  gettp CARG3, TMP0
+    |  bne CARG3, TISNUM, >1
+    |  negw TMP0, TMP0
+    |  bxeq TMP0, TMP1, ->vmeta_unm      // Meta handler deals with -2^31.
+    |  zext.w TMP0, TMP0
+    |  settp_b TMP0, TISNUM
+    |  j >2
+    |1:
+    |  sltiu TMP3, CARG3, LJ_TISNUM
+    |   slli TMP1, TMP1, 32
+    |  bxeqz TMP3, ->vmeta_unm
+    |   xor TMP0, TMP0, TMP1     // sign => ~sign
+    |2:
+    |   sd TMP0, 0(RA)
+    |  ins_next
+    break;
+  case BC_LEN:
+    |  // RA = dst*8, RD = src*8
+    |  add CARG2, BASE, RD
+    |  ld TMP0, 0(CARG2)
+    |   add RA, BASE, RA
+    |  gettp TMP1, TMP0
+    |  addi TMP2, TMP1, -LJ_TSTR
+    |   cleartp STR:CARG1, TMP0
+    |  bnez TMP2, >2
+    |   lwu CARG1, STR:CARG1->len
+    |1:
+    |  settp_b CARG1, TISNUM
+    |  sd CARG1, 0(RA)
+    |  ins_next
+    |2:
+    |  addi TMP2, TMP1, -LJ_TTAB
+    |  bxnez TMP2, ->vmeta_len
+#if LJ_52
+    |  ld TAB:TMP2, TAB:CARG1->metatable
+    |  bnez TAB:TMP2, >9
+    |3:
+#endif
+    |->BC_LEN_Z:
+    |  call_intern BC_LEN, lj_tab_len		// (GCtab *t)
+    |  // Returns uint32_t (but less than 2^31).
+    |  j <1
+#if LJ_52
+    |9:
+    |  lbu TMP0, TAB:TMP2->nomm
+    |  andi TMP0, TMP0, 1<<MM_len
+    |  bnez TMP0, <3			// 'no __len' flag set: done.
+    |  j ->vmeta_len
+#endif
+    break;
+
+  /* -- Binary ops -------------------------------------------------------- */
+
+    |.macro fpmod, a, b, c
+    |  fdiv.d FARG1, b, c
+    |  jal ->vm_floor		// floor(b/c)
+    |  fmul.d a, FRET1, c
+    |  fsub.d a, b, a		// b - floor(b/c)*c
+    |.endmacro
+    |
+    |.macro ins_arithpre
+    ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+    |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+    ||if (vk == 1) {
+    |   // RA = dst*8, RB = num_const*8, RC = src1*8
+    |   decode_RB8 RC, INS
+    |   decode_RDtoRC8 RB, RD
+    ||} else {
+    |   // RA = dst*8, RB = src1*8, RC = num_const*8
+    |   decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    ||}
+    ||switch (vk) {
+    ||case 0:			// suffix is VN
+    |   add RB, BASE, RB
+    |   add RC, KBASE, RC
+    ||  break;
+    ||case 1:			// suffix is NV
+    |   add RC, BASE, RC
+    |   add RB, KBASE, RB
+    ||  break;
+    ||default:			// CAT or suffix is VV
+    |   add RB, BASE, RB
+    |   add RC, BASE, RC
+    ||  break;
+    ||}
+    |.endmacro
+    |
+    |.macro ins_arithfp, fpins, itype1, itype2
+    |  fld FTMP0, 0(RB)
+    |  sltu itype1, itype1, TISNUM
+    |  sltu itype2, itype2, TISNUM
+    |  fld FTMP2, 0(RC)
+    |  and itype1, itype1, itype2
+    |  add RA, BASE, RA
+    |  bxeqz itype1, ->vmeta_arith
+    |  fpins FRET1, FTMP0, FTMP2
+    |  ins_next1
+    |  fsd FRET1, 0(RA)
+    |  ins_next2
+    |.endmacro
+    |
+    |.macro ins_arithead, itype1, itype2, tval1, tval2 
+    |  ld tval1, 0(RB)
+    |  ld tval2, 0(RC)
+    |  // Check for two integers.
+    |  gettp itype1, tval1
+    |  gettp itype2, tval2
+    |.endmacro
+    |
+    |.macro ins_arithdn, intins, fpins
+    |  ins_arithpre
+    |  ins_arithead TMP0, TMP1, CARG1, CARG2
+    |  bne TMP0, TISNUM, >1
+    |  bne TMP1, TISNUM, >1
+    |  sext.w CARG3, CARG1
+    |  sext.w CARG4, CARG2
+    |.if "intins" == "addw"
+    |  intins CRET1, CARG3, CARG4
+    |  xor TMP1, CRET1, CARG3		// ((y^a) & (y^b)) < 0: overflow.
+    |  xor TMP2, CRET1, CARG4
+    |  and TMP1, TMP1, TMP2
+    |  add RA, BASE, RA
+    |  bxltz TMP1, ->vmeta_arith
+    |.elif "intins" == "subw"
+    |  intins CRET1, CARG3, CARG4
+    |  xor TMP1, CRET1, CARG3		// ((y^a) & (a^b)) < 0: overflow.
+    |  xor TMP2, CARG3, CARG4
+    |  and TMP1, TMP1, TMP2
+    |  add RA, BASE, RA
+    |  bxltz TMP1, ->vmeta_arith
+    |.elif "intins" == "mulw"
+    |  mul TMP2, CARG3, CARG4
+    |  add RA, BASE, RA
+    |  sext.w CRET1, TMP2
+    |  bxne CRET1, TMP2, ->vmeta_arith		// 63-32bit not all 0 or 1: overflow.
+    |.endif
+    |  zext.w CRET1, CRET1
+    |  settp_b CRET1, TISNUM
+    |  sd CRET1, 0(RA)
+    |  ins_next
+    |1:  // Check for two numbers.
+    |  ins_arithfp, fpins, TMP0, TMP1
+    |.endmacro
+    |
+    |.macro ins_arithdiv, fpins
+    |  ins_arithpre
+    |  ins_arithead TMP0, TMP1, CARG1, CARG2
+    |  ins_arithfp, fpins, TMP0, TMP1
+    |.endmacro
+    |
+    |.macro ins_arithmod, fpins, BC
+    |  ins_arithpre
+    |  ins_arithead TMP0, TMP1, CARG1, CARG2
+    |  bne TMP0, TISNUM, >1
+    |  bne TMP1, TISNUM, >1
+    |  sext.w CARG1, CARG1
+    |  sext.w CARG2, CARG2
+    |  add RA, BASE, RA
+    |  bxeqz CARG2, ->vmeta_arith
+    |  call_intern BC, lj_vm_modi
+    |  zext.w CRET1, CRET1
+    |  settp_b CRET1, TISNUM
+    |  sd CRET1, 0(RA)
+    |  ins_next
+    |1:  // Check for two numbers.
+    |  ins_arithfp, fpins, TMP0, TMP1
+    |.endmacro
+    
+  case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+    |  ins_arithdn addw, fadd.d
+    break;
+  case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+    |  ins_arithdn subw, fsub.d
+    break;
+  case BC_MULVN: case BC_MULNV: case BC_MULVV:
+    |  ins_arithdn mulw, fmul.d
+    break;
+  case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+    |  ins_arithdiv fdiv.d
+    break;
+  case BC_MODVN:
+    |  ins_arithmod fpmod, BC_MODVN
+    break;
+  case BC_MODNV:
+    |  ins_arithmod fpmod, BC_MODNV
+    break;
+  case BC_MODVV:
+    |  ins_arithmod fpmod, BC_MODVV
+    break;
+  case BC_POW:
+    |  ins_arithpre
+    |  ld CARG1, 0(RB)
+    |   ld CARG2, 0(RC)
+    |  gettp TMP0, CARG1
+    |   gettp TMP1, CARG2
+    |  sltiu TMP0, TMP0, LJ_TISNUM
+    |   sltiu TMP1, TMP1, LJ_TISNUM
+    |  and TMP0, TMP0, TMP1
+    |   add RA, BASE, RA
+    |  bxeqz TMP0, ->vmeta_arith
+    |  fld FARG1, 0(RB)
+    |  fld FARG2, 0(RC)
+    |  call_extern pow
+    |  ins_next1
+    |  fsd FRET1, 0(RA)
+    |  ins_next2
+    break;
+
+  case BC_CAT:
+    |  // RA = dst*8, RB = src_start*8, RC = src_end*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  sub CARG3, RC, RB
+    |   sd BASE, L->base
+    |  add CARG2, BASE, RC
+    |  mv MULTRES, RB
+    |->BC_CAT_Z:
+    |  srliw CARG3, CARG3, 3
+    |   sd PC, SAVE_PC(sp)
+    |   mv CARG1, L
+    |  call_intern BC_CAT, lj_meta_cat	// (lua_State *L, TValue *top, int left)
+    |  // Returns NULL (finished) or TValue * (metamethod).
+    |   ld BASE, L->base
+    |  bxnez CRET1, ->vmeta_binop
+    |  add RB, BASE, MULTRES
+    |  ld TMP0, 0(RB)
+    |   add RA, BASE, RA
+    |  sd TMP0, 0(RA)
+    |  ins_next
+    break;
+
+  /* -- Constant ops ------------------------------------------------------ */
+
+  case BC_KSTR:
+    |  // RA = dst*8, RD = str_const*8 (~)
+    |  sub TMP1, KBASE, RD
+    |   li TMP2, LJ_TSTR
+    |  ld TMP0, -8(TMP1)		// KBASE-8-str_const*8
+    |  add RA, BASE, RA
+    |   settp TMP0, TMP2
+    |  sd TMP0, 0(RA)
+    |  ins_next
+    break;
+  case BC_KCDATA:
+    |.if FFI
+    |  // RA = dst*8, RD = cdata_const*8 (~)
+    |  sub TMP1, KBASE, RD
+    |  ld TMP0, -8(TMP1)		// KBASE-8-cdata_const*8
+    |   li TMP2, LJ_TCDATA
+    |  add RA, BASE, RA
+    |   settp TMP0, TMP2
+    |  sd TMP0, 0(RA)
+    |  ins_next
+    |.endif
+    break;
+  case BC_KSHORT:
+    |  // RA = dst*8, RD = int16_literal*8
+    |   sraiw RD, INS, 16
+    |  add RA, BASE, RA
+    |   zext.w RD, RD
+    |  ins_next1
+    |   settp_b RD, TISNUM
+    |   sd RD, 0(RA)
+    |  ins_next2
+    break;
+  case BC_KNUM:
+    |  // RA = dst*8, RD = num_const*8
+    |  add RD, KBASE, RD
+    |   add RA, BASE, RA
+    |  ld TMP0, 0(RD)
+    |  ins_next1
+    |  sd TMP0, 0(RA)
+    |  ins_next2
+    break;
+  case BC_KPRI:
+    |  // RA = dst*8, RD = primitive_type*8 (~)
+    |   add RA, BASE, RA
+    |  slli TMP0, RD, 44	// 44+3
+    |  not TMP0, TMP0
+    |  ins_next1
+    |   sd TMP0, 0(RA)
+    |  ins_next2
+    break;
+  case BC_KNIL:
+    |  // RA = base*8, RD = end*8
+    |  add RA, BASE, RA
+    |  sd TISNIL, 0(RA)
+    |   addi RA, RA, 8
+    |  add RD, BASE, RD
+    |1:
+    |  sd TISNIL, 0(RA)
+    |  slt TMP0, RA, RD
+    |   addi RA, RA, 8
+    |  bnez TMP0, <1
+    |  ins_next
+    break;
+
+  /* -- Upvalue and function ops ------------------------------------------ */
+
+  case BC_UGET:
+    |  // RA = dst*8, RD = uvnum*8
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   add RA, BASE, RA
+    |  cleartp LFUNC:TMP0
+    |  add RD, RD, LFUNC:TMP0
+    |  ld UPVAL:TMP0, LFUNC:RD->uvptr
+    |  ld TMP1, UPVAL:TMP0->v
+    |  ld TMP2, 0(TMP1)
+    |  ins_next1
+    |   sd TMP2, 0(RA)
+    |  ins_next2
+    break;
+  case BC_USETV:
+    |  // RA = uvnum*8, RD = src*8
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   add RD, BASE, RD
+    |  cleartp LFUNC:TMP0
+    |  add RA, RA, LFUNC:TMP0
+    |  ld UPVAL:TMP0, LFUNC:RA->uvptr
+    |   ld CRET1, 0(RD)
+    |  lbu TMP3, UPVAL:TMP0->marked
+    |   ld CARG2, UPVAL:TMP0->v
+    |  andi TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
+    |  lbu TMP0, UPVAL:TMP0->closed
+    |   gettp TMP2, CRET1
+    |   sd CRET1, 0(CARG2)
+    |  or TMP3, TMP3, TMP0
+    |  li TMP0, LJ_GC_BLACK|1
+    |   addi TMP2, TMP2, -(LJ_TNUMX+1)
+    |  beq TMP3, TMP0, >2			// Upvalue is closed and black?
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if new value is collectable.
+    |  sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
+    |   cleartp GCOBJ:CRET1, CRET1
+    |  beqz TMP0, <1			// tvisgcv(v)
+    |  lbu TMP3, GCOBJ:CRET1->gch.marked
+    |  andi TMP3, TMP3, LJ_GC_WHITES	// iswhite(v)
+    |  beqz TMP3, <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  mv CARG1, GL
+    |  call_intern BC_USETV, lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  j <1
+    break;
+  case BC_USETS:
+    |  // RA = uvnum*8, RD = str_const*8 (~)
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   sub TMP1, KBASE, RD
+    |  cleartp LFUNC:TMP0
+    |  add RA, RA, LFUNC:TMP0
+    |  ld UPVAL:TMP0, LFUNC:RA->uvptr
+    |   ld STR:TMP1, -8(TMP1)		// KBASE-8-str_const*8
+    |  lbu TMP2, UPVAL:TMP0->marked
+    |   ld CARG2, UPVAL:TMP0->v
+    |   lbu TMP3, STR:TMP1->marked
+    |  andi TMP4, TMP2, LJ_GC_BLACK	// isblack(uv)
+    |   lbu TMP2, UPVAL:TMP0->closed
+    |   li TMP0, LJ_TSTR
+    |   settp TMP1, TMP0
+    |  sd TMP1, 0(CARG2)
+    |   bnez TMP4, >2
+    |1:
+    |  ins_next
+    |
+    |2:  // Check if string is white and ensure upvalue is closed.
+    |  beqz TMP2, <1
+    |   andi TMP0, TMP3, LJ_GC_WHITES     // iswhite(str)
+    |  beqz TMP0, <1
+    |  // Crossed a write barrier. Move the barrier forward.
+    |  mv CARG1, GL
+    |  call_intern BC_USETS, lj_gc_barrieruv	// (global_State *g, TValue *tv)
+    |  j <1
+    break;
+  case BC_USETN:
+    |  // RA = uvnum*8, RD = num_const*8
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   add RD, KBASE, RD
+    |  cleartp LFUNC:TMP0
+    |  add TMP0, RA, LFUNC:TMP0
+    |  ld UPVAL:TMP0, LFUNC:TMP0->uvptr
+    |   ld TMP1, 0(RD)
+    |  ld TMP0, UPVAL:TMP0->v
+    |   sd TMP1, 0(TMP0)
+    |  ins_next
+    break;
+  case BC_USETP:
+    |  // RA = uvnum*8, RD = primitive_type*8 (~)
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   slli TMP2, RD, 44
+    |  cleartp LFUNC:TMP0
+    |  add TMP0, RA, LFUNC:TMP0
+    |   not TMP2, TMP2
+    |  ld UPVAL:TMP0, LFUNC:TMP0->uvptr
+    |  ld TMP1, UPVAL:TMP0->v
+    |   sd TMP2, 0(TMP1)
+    |  ins_next
+    break;
+
+  case BC_UCLO:
+    |  // RA = level*8, RD = target
+    |  ld TMP2, L->openupval
+    |  branch_RD			// Do this first since RD is not saved.
+    |   sd BASE, L->base
+    |   mv CARG1, L
+    |  beqz TMP2, >1
+    |   add CARG2, BASE, RA
+    |  call_intern BC_UCLO, lj_func_closeuv	// (lua_State *L, TValue *level)
+    |  ld BASE, L->base
+    |1:
+    |  ins_next
+    break;
+
+  case BC_FNEW:
+    |  // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
+    |  sub TMP1, KBASE, RD
+    |  ld CARG3, FRAME_FUNC(BASE)
+    |   ld CARG2, -8(TMP1)		// KBASE-8-tab_const*8
+    |    sd BASE, L->base
+    |    sd PC, SAVE_PC(sp)
+    |  cleartp CARG3
+    |   mv CARG1, L
+    |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
+    |  call_intern BC_FNEW, lj_func_newL_gc
+    |  // Returns GCfuncL *.
+    |   li TMP0, LJ_TFUNC
+    |  ld BASE, L->base
+    |   settp CRET1, TMP0
+    |  add RA, BASE, RA
+    |   sd CRET1, 0(RA)
+    |  ins_next
+    break;
+
+  /* -- Table ops --------------------------------------------------------- */
+
+  case BC_TNEW:
+  case BC_TDUP:
+    |  // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
+    |  ld TMP0, GL->gc.total
+    |  ld TMP1, GL->gc.threshold
+    |   sd BASE, L->base
+    |   sd PC, SAVE_PC(sp)
+    |  bgeu TMP0, TMP1, >5
+    |1:
+    if (op == BC_TNEW) {
+      |  srliw CARG2, RD, 3
+      |  andi CARG2, CARG2, 0x7ff
+      |  lzi TMP0, 0x801
+      |  addiw TMP2, CARG2, -0x7ff
+      |   srliw CARG3, RD, 14
+      |  seqz TMP4, TMP2
+      |  neg TMP4, TMP4
+      |  and TMP0, TMP0, TMP4
+      |  not TMP4, TMP4
+      |  and CARG2, CARG2, TMP4
+      |  or CARG2, CARG2, TMP0
+      |   mv CARG1, L
+      |  // (lua_State *L, int32_t asize, uint32_t hbits)
+      |  call_intern BC_TNEW, lj_tab_new
+      |  // Returns Table *.
+    } else {
+      |  sub TMP1, KBASE, RD
+      |  mv CARG1, L
+      |   ld CARG2, -8(TMP1)		// KBASE-8-str_const*8
+      |  call_intern BC_TDUP, lj_tab_dup		// (lua_State *L, Table *kt)
+      |  // Returns Table *.
+    }
+    |   li TMP0, LJ_TTAB
+    |  ld BASE, L->base
+    |  ins_next1
+    |   settp CRET1, TMP0
+    |  add RA, BASE, RA
+    |   sd CRET1, 0(RA)
+    |  ins_next2
+    |5:
+    |  mv MULTRES, RD
+    |   mv CARG1, L
+    if (op == BC_TNEW) {
+      |  call_intern BC_TNEW, lj_gc_step_fixtop	// (lua_State *L)
+    } else {
+      |  call_intern BC_TDUP, lj_gc_step_fixtop	// (lua_State *L)
+    }
+    |   mv RD, MULTRES
+    |  j <1
+    break;
+
+  case BC_GGET:
+    |  // RA = dst*8, RD = str_const*8 (~)
+  case BC_GSET:
+    |  // RA = src*8, RD = str_const*8 (~)
+    |  ld LFUNC:TMP0, FRAME_FUNC(BASE)
+    |   sub TMP1, KBASE, RD
+    |   ld STR:RC, -8(TMP1)	// KBASE-8-str_const*8
+    |  cleartp LFUNC:TMP0
+    |  ld TAB:RB, LFUNC:TMP0->env
+    |  add RA, BASE, RA
+    if (op == BC_GGET) {
+      |  j ->BC_TGETS_Z
+    } else {
+      |  j ->BC_TSETS_Z
+    }
+    break;
+
+  case BC_TGETV:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG2, BASE, RB
+    |   add CARG3, BASE, RC
+    |  ld TAB:RB, 0(CARG2)
+    |   ld TMP2, 0(CARG3)
+    |   add RA, BASE, RA
+    |  checktab TAB:RB, ->vmeta_tgetv
+    |   gettp TMP3, TMP2
+    |   lw TMP0, TAB:RB->asize
+    |  bne TMP3, TISNUM, >5		// Integer key?
+    |  sext.w TMP2, TMP2
+    |   ld TMP1, TAB:RB->array
+    |  bxgeu TMP2, TMP0, ->vmeta_tgetv	// Integer key and in array part?
+    |   slliw TMP2, TMP2, 3
+    |   add TMP2, TMP1, TMP2
+    |   ld CRET1, 0(TMP2)
+    |  beq CRET1, TISNIL, >2
+    |1:
+    |   sd CRET1, 0(RA)
+    |  ins_next
+    |
+    |2:  // Check for __index if table value is nil.
+    |  ld TAB:TMP2, TAB:RB->metatable
+    |  beqz TAB:TMP2, <1		// No metatable: done.
+    |  lbu TMP0, TAB:TMP2->nomm
+    |  andi TMP0, TMP0, 1<<MM_index
+    |  bnez TMP0, <1			// 'no __index' flag set: done.
+    |  j ->vmeta_tgetv
+    |
+    |5:
+    |  li TMP0, LJ_TSTR
+    |   cleartp RC, TMP2
+    |  bxne TMP3, TMP0, ->vmeta_tgetv	// String key?
+    |  j ->BC_TGETS_Z
+    break;
+  case BC_TGETS:
+    |  // RA = dst*8, RB = table*8, RC = str_const*8 (~)
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG2, BASE, RB
+    |   sub CARG3, KBASE, RC
+    |  ld TAB:RB, 0(CARG2)
+    |  add RA, BASE, RA
+    |   ld STR:RC, -8(CARG3)		// KBASE-8-str_const*8
+    |  checktab TAB:RB, ->vmeta_tgets1
+    |->BC_TGETS_Z:
+    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+    |  lw TMP0, TAB:RB->hmask
+    |   lw TMP1, STR:RC->sid
+    |    ld NODE:TMP2, TAB:RB->node
+    |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
+    |  slliw TMP0, TMP1, 5
+    |  slliw TMP1, TMP1, 3
+    |  subw TMP1, TMP0, TMP1
+    |   li TMP3, LJ_TSTR
+    |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
+    |   settp STR:RC, TMP3		// Tagged key to look for.
+    |1:
+    |  ld CARG1, NODE:TMP2->key
+    |   ld CARG2, NODE:TMP2->val
+    |    ld NODE:TMP1, NODE:TMP2->next
+    |   ld TAB:TMP3, TAB:RB->metatable
+    |  bne CARG1, RC, >4
+    |  beq CARG2, TISNIL, >5		// Key found, but nil value?
+    |3:
+    |   sd CARG2, 0(RA)
+    |  ins_next
+    |
+    |4:  // Follow hash chain.
+    |   mv NODE:TMP2, NODE:TMP1
+    |  bnez NODE:TMP1, <1
+    |  // End of hash chain: key not found, nil result.
+    |
+    |5:  // Check for __index if table value is nil.
+    |   mv CARG2, TISNIL
+    |  beqz TAB:TMP3, <3		// No metatable: done.
+    |  lbu TMP0, TAB:TMP3->nomm
+    |  andi TMP0, TMP0, 1<<MM_index
+    |  bnez TMP0, <3			// 'no __index' flag set: done.
+    |  j ->vmeta_tgets
+    break;
+  case BC_TGETB:
+    |  // RA = dst*8, RB = table*8, RC = index*8
+    |  decode_RB8 RB, INS
+    |  add CARG2, BASE, RB
+    |   decode_RDtoRC8 RC, RD
+    |  ld TAB:RB, 0(CARG2)
+    |   add RA, BASE, RA
+    |  srliw TMP0, RC, 3
+    |  checktab TAB:RB, ->vmeta_tgetb
+    |  lw TMP1, TAB:RB->asize
+    |   ld TMP2, TAB:RB->array
+    |  bxgeu TMP0, TMP1, ->vmeta_tgetb
+    |   add RC, TMP2, RC
+    |   ld CRET1, 0(RC)
+    |  beq CRET1, TISNIL, >5
+    |1:
+    |   sd CRET1, 0(RA)
+    |  ins_next
+    |
+    |5:  // Check for __index if table value is nil.
+    |  ld TAB:TMP2, TAB:RB->metatable
+    |  beqz TAB:TMP2, <1		// No metatable: done.
+    |  lbu TMP1, TAB:TMP2->nomm
+    |  andi TMP1, TMP1, 1<<MM_index
+    |  bnez TMP1, <1			// 'no __index' flag set: done.
+    |  j ->vmeta_tgetb			// Caveat: preserve TMP0 and CARG2!
+    break;
+  case BC_TGETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add RB, BASE, RB
+    |   add RC, BASE, RC
+    |  ld TAB:CARG1, 0(RB)
+    |   lw CARG2, 0(RC)
+    |    add RA, BASE, RA
+    |  cleartp TAB:CARG1
+    |  lw TMP0, TAB:CARG1->asize
+    |   ld TMP1, TAB:CARG1->array
+    |  bxgeu CARG2, TMP0, ->vmeta_tgetr		// In array part?
+    |   slliw TMP2, CARG2, 3
+    |   add TMP3, TMP1, TMP2
+    |   ld TMP1, 0(TMP3)
+    |->BC_TGETR_Z:
+    |  ins_next1
+    |   sd TMP1, 0(RA)
+    |  ins_next2
+    break;
+
+  case BC_TSETV:
+    |  // RA = src*8, RB = table*8, RC = key*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG2, BASE, RB
+    |   add CARG3, BASE, RC
+    |  ld TAB:RB, 0(CARG2)
+    |   ld TMP2, 0(CARG3)
+    |  add RA, BASE, RA
+    |  checktab TAB:RB, ->vmeta_tsetv
+    |   sext.w RC, TMP2
+    |  checkint TMP2, >5
+    |  lw TMP0, TAB:RB->asize
+    |   ld TMP1, TAB:RB->array
+    |  bxgeu RC, TMP0, ->vmeta_tsetv		// Integer key and in array part?
+    |   slliw TMP2, RC, 3
+    |  add TMP1, TMP1, TMP2
+    |   lbu TMP3, TAB:RB->marked
+    |  ld TMP0, 0(TMP1)
+    |   ld CRET1, 0(RA)
+    |  beq TMP0, TISNIL, >3
+    |1:
+    |   andi TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
+    |   sd CRET1, 0(TMP1)
+    |  bnez TMP2, >7
+    |2:
+    |  ins_next
+    |
+    |3:  // Check for __newindex if previous value is nil.
+    |  ld TAB:TMP2, TAB:RB->metatable
+    |  beqz TAB:TMP2, <1		// No metatable: done.
+    |  lbu TMP2, TAB:TMP2->nomm
+    |  andi TMP2, TMP2, 1<<MM_newindex
+    |  bnez TMP2, <1			// 'no __newindex' flag set: done.
+    |  j ->vmeta_tsetv
+    |5:
+    |  gettp TMP0, TMP2
+    |  addi TMP0, TMP0, -LJ_TSTR
+    |  bxnez TMP0, ->vmeta_tsetv
+    |  cleartp STR:RC, TMP2
+    |  j ->BC_TSETS_Z			// String key?
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMP3, TMP0, <2
+    break;
+  case BC_TSETS:
+    |  // RA = src*8, RB = table*8, RC = str_const*8 (~)
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG2, BASE, RB
+    |   sub CARG3, KBASE, RC
+    |    ld TAB:RB, 0(CARG2)
+    |   ld RC, -8(CARG3)		// KBASE-8-str_const*8
+    |  add RA, BASE, RA
+    |   cleartp STR:RC
+    |  checktab TAB:RB, ->vmeta_tsets1
+    |->BC_TSETS_Z:
+    |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
+    |  lw TMP0, TAB:RB->hmask
+    |   lw TMP1, STR:RC->sid
+    |    ld NODE:TMP2, TAB:RB->node
+    |   sb x0, TAB:RB->nomm		// Clear metamethod cache.
+    |  and TMP1, TMP1, TMP0		// idx = str->sid & tab->hmask
+    |  slliw TMP0, TMP1, 5
+    |  slliw TMP1, TMP1, 3
+    |  subw TMP1, TMP0, TMP1
+    |   li TMP3, LJ_TSTR
+    |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
+    |   settp STR:RC, TMP3		// Tagged key to look for.
+    |  fld FTMP0, 0(RA)
+    |1:
+    |  ld TMP0, NODE:TMP2->key
+    |   ld CARG2, NODE:TMP2->val
+    |    ld NODE:TMP1, NODE:TMP2->next
+    |     lbu TMP3, TAB:RB->marked
+    |  bne TMP0, RC, >5
+    |    ld TAB:TMP0, TAB:RB->metatable
+    |   beq CARG2, TISNIL, >4		// Key found, but nil value?
+    |2:
+    |  andi TMP3, TMP3, LJ_GC_BLACK	// isblack(table)
+    |   fsd FTMP0, NODE:TMP2->val
+    |  bnez TMP3, >7
+    |3:
+    |  ins_next
+    |
+    |4:  // Check for __newindex if previous value is nil.
+    |  beqz TAB:TMP0, <2		// No metatable: done.
+    |  lbu TMP0, TAB:TMP0->nomm
+    |  andi TMP0, TMP0, 1<<MM_newindex
+    |  bnez TMP0, <2			// 'no __newindex' flag set: done.
+    |  j ->vmeta_tsets
+    |
+    |5:  // Follow hash chain.
+    |   mv NODE:TMP2, NODE:TMP1
+    |  bnez NODE:TMP1, <1
+    |  // End of hash chain: key not found, add a new one
+    |
+    |  // But check for __newindex first.
+    |  ld TAB:TMP2, TAB:RB->metatable
+    |   addi CARG3, GL, offsetof(global_State, tmptv)
+    |  beqz TAB:TMP2, >6		// No metatable: continue.
+    |  lbu TMP0, TAB:TMP2->nomm
+    |  andi TMP0, TMP0, 1<<MM_newindex
+    |  bxeqz TMP0, ->vmeta_tsets		// 'no __newindex' flag NOT set: check.
+    |6:
+    |  sd RC, 0(CARG3)
+    |   sd BASE, L->base
+    |  mv CARG2, TAB:RB
+    |   sd PC, SAVE_PC(sp)
+    |   mv CARG1, L
+    |  // (lua_State *L, GCtab *t, TValue *k)
+    |  call_intern BC_TSETS, lj_tab_newkey
+    |  // Returns TValue *.
+    |  ld BASE, L->base
+    |   fsd FTMP0, 0(CRET1)
+    |  j <3				// No 2nd write barrier needed.
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMP3, TMP0, <3
+    break;
+  case BC_TSETB:
+    |  // RA = src*8, RB = table*8, RC = index*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG2, BASE, RB
+    |   add RA, BASE, RA
+    |  ld TAB:RB, 0(CARG2)
+    |  srliw TMP0, RC, 3
+    |  checktab RB, ->vmeta_tsetb
+    |  lw TMP1, TAB:RB->asize
+    |   ld TMP2, TAB:RB->array
+    |  bxgeu TMP0, TMP1, ->vmeta_tsetb
+    |   add RC, TMP2, RC
+    |  ld TMP1, 0(RC)
+    |   lbu TMP3, TAB:RB->marked
+    |  beq TMP1, TISNIL, >5
+    |1:
+    |   ld CRET1, 0(RA)
+    |  andi TMP1, TMP3, LJ_GC_BLACK	// isblack(table)
+    |    sd CRET1, 0(RC)
+    |  bnez TMP1, >7
+    |2:
+    |  ins_next
+    |
+    |5:  // Check for __newindex if previous value is nil.
+    |  ld TAB:TMP2, TAB:RB->metatable
+    |  beqz TAB:TMP2, <1		// No metatable: done.
+    |  lbu TMP1, TAB:TMP2->nomm
+    |  andi TMP1, TMP1, 1<<MM_newindex
+    |  bnez TMP1, <1			// 'no __newindex' flag set: done.
+    |  j ->vmeta_tsetb	// Caveat: preserve TMP0 and CARG2!
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:RB, TMP3, TMP0, <2
+    break;
+  case BC_TSETR:
+    |  // RA = dst*8, RB = table*8, RC = key*8
+    |  decode_RB8 RB, INS
+    |   decode_RDtoRC8 RC, RD
+    |  add CARG1, BASE, RB
+    |   add CARG3, BASE, RC
+    |  ld TAB:CARG2, 0(CARG1)
+    |   lw CARG3, 0(CARG3)
+    |  cleartp TAB:CARG2
+    |  lbu TMP3, TAB:CARG2->marked
+    |   lw TMP0, TAB:CARG2->asize
+    |    ld TMP1, TAB:CARG2->array
+    |  andi TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
+    |   add RA, BASE, RA
+    |  bnez TMP2, >7
+    |2:
+    |  bxgeu CARG3, TMP0, ->vmeta_tsetr		// In array part?
+    |   slliw TMP2, CARG3, 3
+    |   add CRET1, TMP1, TMP2
+    |->BC_TSETR_Z:
+    |  ld TMP1, 0(RA)
+    |  ins_next1
+    |  sd TMP1, 0(CRET1)
+    |  ins_next2
+    |
+    |7:  // Possible table write barrier for the value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP3, CRET1, <2
+    break;
+
+  case BC_TSETM:
+    |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
+    |  add RA, BASE, RA
+    |1:
+    |   add TMP3, KBASE, RD
+    |  ld TAB:CARG2, -8(RA)		// Guaranteed to be a table.
+    |    addiw TMP0, MULTRES, -8
+    |   lw TMP3, 0(TMP3)		// Integer constant is in lo-word.
+    |   srliw CARG3, TMP0, 3
+    |    beqz TMP0, >4			// Nothing to copy?
+    |  cleartp TAB:CARG2
+    |  addw CARG3, CARG3, TMP3
+    |  lw TMP2, TAB:CARG2->asize
+    |   slliw TMP1, TMP3, 3
+    |    lbu TMP3, TAB:CARG2->marked
+    |   ld CARG1, TAB:CARG2->array
+    |  bltu TMP2, CARG3, >5
+    |   add TMP2, RA, TMP0
+    |   add TMP1, TMP1, CARG1
+    |  andi TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
+    |3:  // Copy result slots to table.
+    |   ld CRET1, 0(RA)
+    |    addi RA, RA, 8
+    |   sd CRET1, 0(TMP1)
+    |    addi TMP1, TMP1, 8
+    |  bltu RA, TMP2, <3
+    |  bnez TMP0, >7
+    |4:
+    |  ins_next
+    |
+    |5:  // Need to resize array part.
+    |   sd BASE, L->base
+    |   sd PC, SAVE_PC(sp)
+    |  mv BASE, RD
+    |   mv CARG1, L
+    |  // (lua_State *L, GCtab *t, int nasize)
+    |  call_intern BC_TSETM, lj_tab_reasize
+    |  // Must not reallocate the stack.
+    |  mv RD, BASE
+    |   ld BASE, L->base        // Reload BASE for lack of a saved register.
+    |  j <1
+    |
+    |7:  // Possible table write barrier for any value. Skip valiswhite check.
+    |  barrierback TAB:CARG2, TMP3, TMP0, <4
+    break;
+
+  /* -- Calls and vararg handling ----------------------------------------- */
+
+  case BC_CALLM:
+    |  // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
+    |  decode_RDtoRC8 NARGS8:RC, RD
+    |   addw NARGS8:RC, NARGS8:RC, MULTRES
+    |  j ->BC_CALL_Z
+    break;
+  case BC_CALL:
+    |  // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
+    |  decode_RDtoRC8 NARGS8:RC, RD
+    |->BC_CALL_Z:
+    |  mv TMP2, BASE
+    |  add BASE, BASE, RA
+    |   ld LFUNC:RB, 0(BASE)
+    |   addi BASE, BASE, 16
+    |  addiw NARGS8:RC, NARGS8:RC, -8
+    |  checkfunc RB, ->vmeta_call
+    |  ins_call
+    break;
+
+  case BC_CALLMT:
+    |  // RA = base*8, (RB = 0,) RC = extra_nargs*8
+    |  addw NARGS8:RD, NARGS8:RD, MULTRES
+    |  j ->BC_CALLT_Z1
+    break;
+  case BC_CALLT:
+    |  // RA = base*8, (RB = 0,) RC = (nargs+1)*8
+    |->BC_CALLT_Z1:
+    |  add RA, BASE, RA
+    |  ld LFUNC:RB, 0(RA)
+    |   mv NARGS8:RC, RD
+    |    ld TMP1, FRAME_PC(BASE)
+    |   addi RA, RA, 16
+    |  addiw NARGS8:RC, NARGS8:RC, -8
+    |  checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt
+    |->BC_CALLT_Z:
+    |  andi TMP0, TMP1, FRAME_TYPE	// Caveat: preserve TMP0 until the 'or'.
+    |   lbu TMP3, LFUNC:CARG3->ffid
+    |   xori TMP2, TMP1, FRAME_VARG
+    |  bnez TMP0, >7
+    |1:
+    |  sd LFUNC:RB, FRAME_FUNC(BASE)		// Copy function down, but keep PC.
+    |  sltiu CARG4, TMP3, 2		// (> FF_C) Calling a fast function?
+    |  mv TMP2, BASE
+    |  mv RB, CARG3
+    |   mv TMP3, NARGS8:RC
+    |  beqz NARGS8:RC, >3
+    |2:
+    |   ld CRET1, 0(RA)
+    |    addi RA, RA, 8
+    |  addiw TMP3, TMP3, -8
+    |   sd CRET1, 0(TMP2)
+    |    addi TMP2, TMP2, 8
+    |  bnez TMP3, <2
+    |3:
+    |  or TMP0, TMP0, CARG4
+    |  beqz TMP0, >5
+    |4:
+    |  ins_callt
+    |
+    |5:  // Tailcall to a fast function with a Lua frame below.
+    |  lw INS, -4(TMP1)
+    |  decode_RA8 RA, INS
+    |  sub TMP1, BASE, RA
+    |  ld TMP1, -32(TMP1)
+    |  cleartp LFUNC:TMP1
+    |  ld TMP1, LFUNC:TMP1->pc
+    |   ld KBASE, PC2PROTO(k)(TMP1)     // Need to prepare KBASE.
+    |  j <4
+    |
+    |7:  // Tailcall from a vararg function.
+    |  andi CARG4, TMP2, FRAME_TYPEP
+    |   sub TMP2, BASE, TMP2          // Relocate BASE down.
+    |  bnez CARG4, <1			// Vararg frame below?
+    |  mv BASE, TMP2
+    |  ld TMP1, FRAME_PC(TMP2)
+    |   andi TMP0, TMP1, FRAME_TYPE
+    |  j <1
+    break;
+
+  case BC_ITERC:
+    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
+    |  mv TMP2, BASE			// Save old BASE for vmeta_call.
+    |  add BASE, BASE, RA
+    |  ld RB, -24(BASE)		//A, A+1, A+2 = A-3, A-2, A-1.
+    |   ld CARG1, -16(BASE)
+    |    ld CARG2, -8(BASE)
+    |  li NARGS8:RC, 16		// Iterators get 2 arguments.
+    |  sd RB, 0(BASE)			// Copy callable.
+    |   sd CARG1, 16(BASE)		// Copy state.
+    |    sd CARG2, 24(BASE)		// Copy control var.
+    |   addi BASE, BASE, 16
+    |  checkfunc RB, ->vmeta_call
+    |  ins_call
+    break;
+
+  case BC_ITERN:
+    |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+    |.if JIT
+    |  hotloop
+    |.endif
+    |->vm_IITERN:
+    |  add RA, BASE, RA
+    |  ld TAB:RB, -16(RA)
+    |   lw RC, -8(RA)		// Get index from control var.
+    |  cleartp TAB:RB
+    |   addi PC, PC, 4
+    |  lw TMP0, TAB:RB->asize
+    |   ld TMP1, TAB:RB->array
+    |  slli CARG3, TISNUM, 47
+    |1:  // Traverse array part.
+    |  bleu TMP0, RC, >5			// Index points after array part?
+    |   slliw TMP3, RC, 3
+    |  add TMP3, TMP1, TMP3
+    |  ld CARG1, 0(TMP3)
+    |     lhu RD, -4+OFS_RD(PC)		// ITERL RD
+    |   or TMP2, RC, CARG3
+    |   addiw RC, RC, 1
+    |  beq CARG1, TISNIL, <1		// Skip holes in array part.
+    |   sd TMP2, 0(RA)
+    |  sd CARG1, 8(RA)
+    |     lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |     decode_BC4b RD
+    |     add RD, RD, TMP3
+    |   sw RC, -8(RA)		// Update control var.
+    |     add PC, PC, RD
+    |3:
+    |  ins_next
+    |
+    |5:  // Traverse hash part.
+    |  lw TMP1, TAB:RB->hmask
+    |  subw RC, RC, TMP0
+    |   ld TMP2, TAB:RB->node
+    |6:
+    |  bltu TMP1, RC, <3		// End of iteration? Branch to ITERL+1.
+    |   slliw TMP3, RC, 5
+    |   slliw RB, RC, 3
+    |   subw TMP3, TMP3, RB
+    |  add NODE:TMP3, TMP3, TMP2	// node = tab->node + (idx*32-idx*8)
+    |  ld CARG1, 0(NODE:TMP3)
+    |     lhu RD, -4+OFS_RD(PC)		// ITERL RD
+    |   addiw RC, RC, 1
+    |  beq CARG1, TISNIL, <6		// Skip holes in hash part.
+    |  ld CARG2, NODE:TMP3->key
+    |     lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  sd CARG1, 8(RA)
+    |    addw RC, RC, TMP0
+    |     decode_BC4b RD
+    |     addw RD, RD, TMP3
+    |  sd CARG2, 0(RA)
+    |     add PC, PC, RD
+    |   sw RC, -8(RA)                // Update control var.
+    |  j <3
+    break;
+
+  case BC_ISNEXT:
+    |  // RA = base*8, RD = target (points to ITERN)
+    |  add RA, BASE, RA
+    |    srliw TMP0, RD, 1
+    |  ld CFUNC:CARG1, -24(RA)
+    |    add TMP0, PC, TMP0
+    |   ld CARG2, -16(RA)
+    |   ld CARG3, -8(RA)
+    |    lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff		// -BCBIAS_J*4
+    |  checkfunc CFUNC:CARG1, >5
+    |  gettp CARG2, CARG2
+    |  addi CARG2, CARG2, -LJ_TTAB
+    |  lbu TMP1, CFUNC:CARG1->ffid
+    |  addi CARG3, CARG3, -LJ_TNIL
+    |  or TMP3, CARG2, CARG3
+    |  addi TMP1, TMP1, -FF_next_N
+    |  or TMP3, TMP3, TMP1
+    |   lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff
+    |  bnez TMP3, >5
+    |  add PC, TMP0, TMP2
+    |  addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)
+    |  slli TMP1, TMP1, 32
+    |  sd TMP1, -8(RA)
+    |1:
+    |  ins_next
+    |5:  // Despecialize bytecode if any of the checks fail.
+    |  li TMP3, BC_JMP
+    |   li TMP1, BC_ITERC
+    |  sb TMP3, -4+OFS_OP(PC)
+    |   add PC, TMP0, TMP2
+    |.if JIT
+    |  lb TMP0, OFS_OP(PC)
+    |  li TMP3, BC_ITERN
+    |  lhu TMP2, OFS_RD(PC)
+    |  bne TMP0, TMP3, >6
+    |.endif
+    |  sb TMP1, OFS_OP(PC)
+    |  j <1
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  ld TMP0, GL_J(trace)(GL)	// Assumes J.trace in-reach relative to GL.
+    |  slliw TMP2, TMP2, 3
+    |  add TMP0, TMP0, TMP2
+    |  ld TRACE:TMP2, 0(TMP0)
+    |  lw TMP0, TRACE:TMP2->startins
+    |  andi TMP0, TMP0, -256
+    |  or TMP0, TMP0, TMP1
+    |  sw TMP0, 0(PC)
+    |  j <1
+    |.endif
+    break;
+
+  case BC_VARG:
+    |  // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
+    |  ld TMP0, FRAME_PC(BASE)
+    |  decode_RDtoRC8 RC, RD
+    |   decode_RB8 RB, INS
+    |  add RC, BASE, RC
+    |   add RA, BASE, RA
+    |  addi RC, RC, FRAME_VARG
+    |   add TMP2, RA, RB
+    |  addi TMP3, BASE, -16		// TMP3 = vtop
+    |  sub RC, RC, TMP0		// RC = vbase
+    |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
+    |   sub TMP1, TMP3, RC
+    |  beqz RB, >5			// Copy all varargs?
+    |  addi TMP2, TMP2, -16
+    |1:  // Copy vararg slots to destination slots.
+    |  ld CARG1, 0(RC)
+    |  sltu TMP0, RC, TMP3
+    |    addi RC, RC, 8
+    |  bnez TMP0, >2
+    |  mv CARG1, TISNIL
+    |2:
+    |  sd CARG1, 0(RA)
+    |  sltu TMP0, RA, TMP2
+    |  addi RA, RA, 8
+    |  bnez TMP0, <1
+    |3:
+    |  ins_next
+    |
+    |5:  // Copy all varargs.
+    |  ld TMP0, L->maxstack
+    |   li MULTRES, 8		// MULTRES = (0+1)*8
+    |  blez TMP1, <3			// No vararg slots?
+    |  add TMP2, RA, TMP1
+    |   addi MULTRES, TMP1, 8
+    |  bltu TMP0, TMP2, >7
+    |6:
+    |  ld CRET1, 0(RC)
+    |   addi RC, RC, 8
+    |  sd CRET1, 0(RA)
+    |   addi RA, RA, 8
+    |  bltu RC, TMP3, <6			// More vararg slots?
+    |  j <3
+    |
+    |7:  // Grow stack for varargs.
+    |   sd RA, L->top
+    |  sub RA, RA, BASE
+    |   sd BASE, L->base
+    |  sub BASE, RC, BASE		// Need delta, because BASE may change.
+    |   sd PC, SAVE_PC(sp)
+    |  srliw CARG2, TMP1, 3
+    |   mv CARG1, L
+    |  call_intern BC_VARG, lj_state_growstack	// (lua_State *L, int n)
+    |  mv RC, BASE
+    |  ld BASE, L->base
+    |  add RA, BASE, RA
+    |  add RC, BASE, RC
+    |  addi TMP3, BASE, -16
+    |  j <6
+    break;
+
+  /* -- Returns ----------------------------------------------------------- */
+
+  case BC_RETM:
+    |  // RA = results*8, RD = extra_nresults*8
+    |  addw RD, RD, MULTRES
+    |  j ->BC_RET_Z1
+    break;
+
+  case BC_RET:
+    |  // RA = results*8, RD = (nresults+1)*8
+    |->BC_RET_Z1:
+    |  ld PC, FRAME_PC(BASE)
+    |   add RA, BASE, RA
+    |    mv MULTRES, RD
+    |1:
+    |  andi TMP0, PC, FRAME_TYPE
+    |   xori TMP1, PC, FRAME_VARG
+    |  bnez TMP0, ->BC_RETV_Z
+    |
+    |->BC_RET_Z:
+    |  // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
+    |   lw INS, -4(PC)
+    |    addi TMP2, BASE, -16
+    |    addi RC, RD, -8
+    |  decode_RA8 TMP0, INS
+    |   decode_RB8 RB, INS
+    |   sub BASE, TMP2, TMP0
+    |   add TMP3, TMP2, RB
+    |  beqz RC, >3
+    |2:
+    |   ld CRET1, 0(RA)
+    |    addi RA, RA, 8
+    |  addi RC, RC, -8
+    |   sd CRET1, 0(TMP2)
+    |    addi TMP2, TMP2, 8
+    |  bnez RC, <2
+    |3:
+    |  addi TMP3, TMP3, -8
+    |5:
+    |  bltu TMP2, TMP3, >6
+    |   ld LFUNC:TMP1, FRAME_FUNC(BASE)
+    |  cleartp LFUNC:TMP1
+    |  ld TMP1, LFUNC:TMP1->pc
+    |  ld KBASE, PC2PROTO(k)(TMP1)
+    |  ins_next
+    |
+    |6:  // Fill up results with nil.
+    |  sd TISNIL, 0(TMP2)
+    |   addi TMP2, TMP2, 8
+    |  j <5
+    |
+    |->BC_RETV_Z:  // Non-standard return case.
+    |  andi TMP2, TMP1, FRAME_TYPEP
+    |  bxnez TMP2, ->vm_return
+    |  // Return from vararg function: relocate BASE down.
+    |  sub BASE, BASE, TMP1
+    |   ld PC, FRAME_PC(BASE)
+    |  j <1
+    break;
+
+  case BC_RET0: case BC_RET1:
+    |  // RA = results*8, RD = (nresults+1)*8
+    |  ld PC, FRAME_PC(BASE)
+    |   add RA, BASE, RA
+    |    mv MULTRES, RD
+    |  andi TMP0, PC, FRAME_TYPE
+    |   xori TMP1, PC, FRAME_VARG
+    |  bnez TMP0, ->BC_RETV_Z
+    |  lw INS, -4(PC)
+    |   addi TMP2, BASE, -16
+    if (op == BC_RET1) {
+      |  ld CRET1, 0(RA)
+    }
+    |  decode_RB8 RB, INS
+    |   decode_RA8 RA, INS
+    |   sub BASE, TMP2, RA
+    if (op == BC_RET1) {
+      |  sd CRET1, 0(TMP2)
+    }
+    |5:
+    |  bltu RD, RB, >6
+    |   ld TMP1, FRAME_FUNC(BASE)
+    |  cleartp LFUNC:TMP1
+    |  ld TMP1, LFUNC:TMP1->pc
+    |  ins_next1
+    |  ld KBASE, PC2PROTO(k)(TMP1)
+    |  ins_next2
+    |
+    |6:  // Fill up results with nil.
+    |  addi TMP2, TMP2, 8
+    |  addi RD, RD, 8
+    if (op == BC_RET1) {
+      |  sd TISNIL, 0(TMP2)
+    } else {
+      |  sd TISNIL, -8(TMP2)
+    }
+    |  j <5
+    break;
+
+  /* -- Loops and branches ------------------------------------------------ */
+
+  case BC_FORL:
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_IFORL follows.
+    break;
+
+  case BC_JFORI:
+  case BC_JFORL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_FORI:
+  case BC_IFORL:
+    |  // RA = base*8, RD = target (after end of loop or start of loop)
+    vk = (op == BC_IFORL || op == BC_JFORL);
+    |  add RA, BASE, RA
+    |  ld CARG1, FORL_IDX*8(RA)		// CARG1 = IDX
+    |   ld CARG2, FORL_STEP*8(RA)		// CARG2 = STEP
+    |    ld CARG3, FORL_STOP*8(RA)		// CARG3 = STOP
+    |  gettp CARG4, CARG1
+    |   gettp CARG5, CARG2
+    |    gettp CARG6, CARG3
+    if (op != BC_JFORL) {
+      |  srliw RD, RD, 1
+      |  lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff	// -BCBIAS_J<<2
+      |  add TMP2, RD, TMP2
+    }
+    |  bne CARG4, TISNUM, >3
+    |   sext.w CARG4, CARG1		// start
+    |   sext.w CARG3, CARG3		// stop
+    if (!vk) {				// init
+      |  bxne CARG6, TISNUM, ->vmeta_for
+      |  bxne CARG5, TISNUM, ->vmeta_for
+      |   bfextri TMP0, CARG2, 31, 31	// sign
+      |  slt CARG2, CARG3, CARG4
+      |  slt TMP1, CARG4, CARG3
+      |  neg TMP4, TMP0
+      |  and TMP1, TMP1, TMP4
+      |  not TMP4, TMP4
+      |  and CARG2, CARG2, TMP4
+      |  or CARG2, CARG2, TMP1		// CARG2=0: +,start <= stop or -,start >= stop
+    } else {
+      |  sext.w CARG5, CARG2		// step
+      |  addw CARG1, CARG4, CARG5	// start + step
+      |  xor TMP3, CARG1, CARG4		// y^a
+      |  xor TMP1, CARG1, CARG5		// y^b
+      |  and TMP3, TMP3, TMP1
+      |  slt TMP1, CARG1, CARG3		// start+step < stop ?
+      |  slt CARG3, CARG3, CARG1	// stop < start+step ?
+      |  sltz TMP0, CARG5		// step < 0 ?
+      |   sltz TMP3, TMP3		// ((y^a) & (y^b)) < 0: overflow.
+      |  neg TMP4, TMP0
+      |  and TMP1, TMP1, TMP4
+      |  not TMP4, TMP4
+      |  and CARG3, CARG3, TMP4
+      |  or CARG3, CARG3, TMP1
+      |  or CARG2, CARG3, TMP3		// CARG2=1: overflow; CARG2=0: continue
+      |  zext.w CARG1, CARG1
+      |  settp_b CARG1, TISNUM
+      |  sd CARG1, FORL_IDX*8(RA)
+    }
+    |1:
+    if (op == BC_FORI) {
+      |  neg TMP4, CARG2	// CARG2!=0: jump out the loop; CARG2==0: next INS
+      |  and TMP2, TMP2, TMP4
+      |  add PC, PC, TMP2
+    } else if (op == BC_JFORI) {
+      |  add PC, PC, TMP2
+      |  lhu RD, -4+OFS_RD(PC)
+    } else if (op == BC_IFORL) {
+      |  addi TMP4, CARG2, -1	// CARG2!=0: next INS; CARG2==0: jump back
+      |  and TMP2, TMP2, TMP4
+      |  add PC, PC, TMP2
+    }
+    |  ins_next1
+    |  sd CARG1, FORL_EXT*8(RA)
+    |2:
+    if (op == BC_JFORI) {
+      |  decode_RD8b RD
+      |  beqz CARG2, =>BC_JLOOP		// CARG2 == 0: excute the loop
+    } else if (op == BC_JFORL) {
+      |  beqz CARG2, =>BC_JLOOP
+    }
+    |  ins_next2
+    |
+    |3:  // FP loop.
+    |  fld FTMP0, FORL_IDX*8(RA)	// start
+    |  fld FTMP1, FORL_STOP*8(RA)	// stop
+    |  ld TMP0, FORL_STEP*8(RA)	// step
+    |  sltz CARG2, TMP0		// step < 0 ?
+    |  neg CARG2, CARG2
+    if (!vk) {
+      |  sltiu TMP3, CARG4, LJ_TISNUM	// start is number ?
+      |  sltiu TMP0, CARG5, LJ_TISNUM	// step is number ?
+      |  sltiu TMP1, CARG6, LJ_TISNUM	// stop is number ?
+      |  and TMP3, TMP3, TMP1
+      |  and TMP0, TMP0, TMP3
+      |  bxeqz TMP0, ->vmeta_for		// if start or step or stop isn't number
+      |  flt.d TMP3, FTMP0, FTMP1		// start < stop ?
+      |  flt.d TMP4, FTMP1, FTMP0		// stop < start ?
+      |  and TMP3, TMP3, CARG2
+      |  not CARG2, CARG2
+      |  and TMP4, TMP4, CARG2
+      |  or CARG2, TMP3, TMP4	// CARG2=0:+,start<stop or -,start>stop
+      |  j <1
+    } else {
+      |  fld FTMP3, FORL_STEP*8(RA)
+      |  fadd.d FTMP0, FTMP0, FTMP3		// start + step
+      |  flt.d TMP3, FTMP0, FTMP1		// start + step < stop ?
+      |  flt.d TMP4, FTMP1, FTMP0
+      |  and TMP3, TMP3, CARG2
+      |  not CARG2, CARG2
+      |  and TMP4, TMP4, CARG2
+      |  or CARG2, TMP3, TMP4
+      if (op == BC_IFORL) {
+  |  addi TMP3, CARG2, -1
+  |  and TMP2, TMP2, TMP3
+	|  add PC, PC, TMP2
+      }
+      |  fsd FTMP0, FORL_IDX*8(RA)
+      |  ins_next1
+      |  fsd FTMP0, FORL_EXT*8(RA)
+      |  j <2
+    }
+    break;
+
+  case BC_ITERL:
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_IITERL follows.
+    break;
+
+  case BC_JITERL:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IITERL:
+    |  // RA = base*8, RD = target
+    |  add RA, BASE, RA
+    |  ld TMP1, 0(RA)
+    |  beq TMP1, TISNIL, >1		// Stop if iterator returned nil.
+    if (op == BC_JITERL) {
+      |   sd TMP1,-8(RA)
+      |  j =>BC_JLOOP
+    } else {
+      |  branch_RD			// Otherwise save control var + branch.
+      |  sd TMP1, -8(RA)
+    }
+    |1:
+    |  ins_next
+    break;
+
+  case BC_LOOP:
+    |  // RA = base*8, RD = target (loop extent)
+    |  // Note: RA/RD is only used by trace recorder to determine scope/extent
+    |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+    |.if JIT
+    |  hotloop
+    |.endif
+    |  // Fall through. Assumes BC_ILOOP follows.
+    break;
+
+  case BC_ILOOP:
+    |  // RA = base*8, RD = target (loop extent)
+    |  ins_next
+    break;
+
+  case BC_JLOOP:
+    |.if JIT
+    |  // RA = base*8 (ignored), RD = traceno*8
+    |  ld TMP0, GL_J(trace)(GL)	// Assumes J.trace in-reach relative to GL.
+    |  add TMP0, TMP0, RD
+    |  // Traces on RISC-V don't store the trace number, so use 0.
+    |  sd x0, GL->vmstate
+    |  ld TRACE:TMP1, 0(TMP0)
+    |  sd BASE, GL->jit_base	// store Current JIT code L->base
+    |  ld TMP1, TRACE:TMP1->mcode
+    |  sd L, GL->tmpbuf.L
+    |  jr TMP1
+    |.endif
+    break;
+
+  case BC_JMP:
+    |  // RA = base*8 (only used by trace recorder), RD = target
+    |  branch_RD		// PC + (jump - 0x8000)<<2
+    |  ins_next
+    break;
+
+  /* -- Function headers -------------------------------------------------- */
+
+  case BC_FUNCF:
+    |.if JIT
+    |  hotcall
+    |.endif
+  case BC_FUNCV:  /* NYI: compiled vararg functions. */
+    |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+    break;
+
+  case BC_JFUNCF:
+#if !LJ_HASJIT
+    break;
+#endif
+  case BC_IFUNCF:
+    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+    |  ld TMP2, L->maxstack
+    |   lbu TMP1, -4+PC2PROTO(numparams)(PC)
+    |    ld KBASE, -4+PC2PROTO(k)(PC)
+    |  bxltu TMP2, RA, ->vm_growstack_l
+    |   slliw TMP1, TMP1, 3			// numparams*8
+    |2:
+    |  bltu NARGS8:RC, TMP1, >3		// Check for missing parameters.
+    if (op == BC_JFUNCF) {
+      |  decode_RD8 RD, INS
+      |  j =>BC_JLOOP
+    } else {
+      |  ins_next
+    }
+    |
+    |3:  // Clear missing parameters.
+    |  add TMP0, BASE, NARGS8:RC
+    |  sd TISNIL, 0(TMP0)
+    |   addiw NARGS8:RC, NARGS8:RC, 8
+    |  j <2
+    break;
+
+  case BC_JFUNCV:
+#if !LJ_HASJIT
+    break;
+#endif
+    |  NYI  // NYI: compiled vararg functions
+    break;  /* NYI: compiled vararg functions. */
+
+  case BC_IFUNCV:
+    |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+    |   li TMP0, LJ_TFUNC
+    |   add TMP1, BASE, RC
+    |  ld TMP2, L->maxstack
+    |   settp LFUNC:RB, TMP0
+    |  add TMP0, RA, RC
+    |   sd LFUNC:RB, 0(TMP1)		// Store (tagged) copy of LFUNC.
+    |  addi TMP2, TMP2, -8
+    |   addi TMP3, RC, 16+FRAME_VARG
+    |    ld KBASE, -4+PC2PROTO(k)(PC)
+    |   sd TMP3, 8(TMP1)                // Store delta + FRAME_VARG.
+    |  bxgeu TMP0, TMP2, ->vm_growstack_l
+    |  lbu TMP2, -4+PC2PROTO(numparams)(PC)
+    |   mv RA, BASE
+    |   mv RC, TMP1
+    |  ins_next1
+    |   addi BASE, TMP1, 16
+    |  beqz TMP2, >2
+    |1:
+    |  ld TMP0, 0(RA)
+    |  sltu CARG2, RA, RC			// Less args than parameters?
+    |  mv CARG1, TMP0
+    |    addi RA, RA, 8
+    |    addi TMP1, TMP1, 8
+    |    addiw TMP2, TMP2, -1
+    |  beqz CARG2, >3
+    |  neg TMP4, CARG2		// Clear old fixarg slot (help the GC).
+    |  and TMP3, TISNIL, TMP4
+    |  not TMP4, TMP4
+    |  and CARG1, CARG1, TMP4
+    |  or CARG1, CARG1, TMP3
+    |  sd CARG1, -8(RA)
+    |  sd TMP0, 8(TMP1)
+    |  bnez TMP2, <1
+    |2:
+    |  ins_next2
+    |3:
+    |  neg TMP4, CARG2		// Clear missing fixargs.
+    |  and TMP0, TMP0, TMP4
+    |  not TMP4, TMP4
+    |  and TMP3, TISNIL, TMP4
+    |  or TMP0, TMP0, TMP3
+    |  sd TMP0, 8(TMP1)
+    |  bnez TMP2, <1
+    |  j <2
+    break;
+
+  case BC_FUNCC:
+  case BC_FUNCCW:
+    |  // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
+    if (op == BC_FUNCC) {
+      |  ld CARG4, CFUNC:RB->f
+    } else {
+      |  ld CARG4, GL->wrapf
+    }
+    |  add TMP1, RA, NARGS8:RC
+    |  ld TMP2, L->maxstack
+    |   add RC, BASE, NARGS8:RC
+    |  sd BASE, L->base		// base of currently excuting function
+    |   sd RC, L->top
+    |  bxgtu TMP1, TMP2, ->vm_growstack_c	// Need to grow stack.
+    |    li_vmstate C			// li TMP0, ~LJ_VMST_C
+    if (op == BC_FUNCCW) {
+      |  ld CARG2, CFUNC:RB->f
+    }
+    |   mv CARG1, L
+    |    st_vmstate			// sw TMP0, GL->vmstate
+    |  jalr CARG4		// (lua_State *L [, lua_CFunction f])
+    |  // Returns nresults.
+    |  ld BASE, L->base
+    |  ld TMP1, L->top
+    |  sd L, GL->cur_L
+    |   slliw RD, CRET1, 3
+    |    li_vmstate INTERP
+    |  ld PC, FRAME_PC(BASE)		// Fetch PC of caller.
+    |  sub RA, TMP1, RD		// RA = L->top - nresults*8
+    |    st_vmstate
+    |  j ->vm_returnc
+    break;
+
+  /* ---------------------------------------------------------------------- */
+
+  default:
+    fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+    exit(2);
+    break;
+  }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+  int op;
+
+  dasm_growpc(Dst, BC__MAX);
+
+  build_subroutines(ctx);
+
+  |.code_op
+  for (op = 0; op < BC__MAX; op++)
+    build_ins(ctx, (BCOp)op, op);
+
+  return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+  int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+  int i;
+  switch (ctx->mode) {
+  case BUILD_elfasm:
+    fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+    fprintf(ctx->fp,
+	".Lframe0:\n"
+	"\t.4byte .LECIE0-.LSCIE0\n"
+	".LSCIE0:\n"
+	"\t.4byte 0xffffffff\n"
+	"\t.byte 0x1\n"
+	"\t.string \"\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -4\n"
+	"\t.byte 1\n"				/* Return address is in ra. */
+	"\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n"	/* def_cfa sp 0 */
+	"\t.align 3\n"
+	".LECIE0:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE0:\n"
+	"\t.4byte .LEFDE0-.LASFDE0\n"
+	".LASFDE0:\n"
+	"\t.4byte .Lframe0\n"
+	"\t.8byte .Lbegin\n"
+	"\t.8byte %d\n"
+	"\t.byte 0xe\n\t.uleb128 %d\n"
+	"\t.byte 0x81\n\t.uleb128 2*6\n"	/* offset ra */,
+	fcofs, CFRAME_SIZE);
+    for (i = 27; i >= 18; i--)	/* offset x27-x18 (s11-s2) */
+      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
+    fprintf(ctx->fp,
+	"\t.byte 0x89\n\t.uleb128 2*17\n"	/* offset x9 (s1) */
+	"\t.byte 0x88\n\t.uleb128 2*18\n"	/* offset x8 (s0/fp) */);
+    for (i = 27; i >= 18; i--)	/* offset f31-f18 */
+      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
+    fprintf(ctx->fp,
+	"\t.byte 0x89+32\n\t.uleb128 2*29\n"	/* offset f9 (fs1) */
+	"\t.byte 0x88+32\n\t.uleb128 2*30\n"	/* offset f8 (fs0) */
+	"\t.align 3\n"
+	".LEFDE0:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".LSFDE1:\n"
+	"\t.4byte .LEFDE1-.LASFDE1\n"
+	".LASFDE1:\n"
+	"\t.4byte .Lframe0\n"
+	"\t.4byte lj_vm_ffi_call\n"
+	"\t.4byte %d\n"
+	"\t.byte 0x81\n\t.uleb128 2*1\n"	/* offset ra */
+	"\t.byte 0x92\n\t.uleb128 2*2\n"	/* offset x18 */
+	"\t.byte 0xd\n\t.uleb128 0x12\n"
+	"\t.align 3\n"
+	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+    fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+    fprintf(ctx->fp,
+	".Lframe1:\n"
+	"\t.4byte .LECIE1-.LSCIE1\n"
+	".LSCIE1:\n"
+	"\t.4byte 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zPR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -4\n"
+	"\t.byte 1\n"				/* Return address is in ra. */
+	"\t.uleb128 6\n"			/* augmentation length */
+	"\t.byte 0x1b\n"
+	"\t.4byte lj_err_unwind_dwarf-.\n"
+	"\t.byte 0x1b\n"
+	"\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n"	/* def_cfa sp 0 */
+	"\t.align 2\n"
+	".LECIE1:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE2:\n"
+	"\t.4byte .LEFDE2-.LASFDE2\n"
+	".LASFDE2:\n"
+	"\t.4byte .LASFDE2-.Lframe1\n"
+	"\t.4byte .Lbegin-.\n"
+	"\t.4byte %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0xe\n\t.uleb128 %d\n"
+	"\t.byte 0x81\n\t.uleb128 2*6\n",	/* offset ra */
+	fcofs, CFRAME_SIZE);
+    for (i = 27; i >= 18; i--)	/* offset x27-x18 (s11-s2) */
+      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7));
+    fprintf(ctx->fp,
+	"\t.byte 0x89\n\t.uleb128 2*17\n"	/* offset x9 (s1) */
+	"\t.byte 0x88\n\t.uleb128 2*18\n"	/* offset x8 (s0/fp) */);
+    for (i = 27; i >= 18; i--)	/* offset f31-f18 */
+      fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19));
+    fprintf(ctx->fp,
+	"\t.byte 0x89+32\n\t.uleb128 2*29\n"	/* offset f9 (fs1) */
+	"\t.byte 0x88+32\n\t.uleb128 2*30\n"	/* offset f8 (fs0) */
+	"\t.align 2\n"
+	".LEFDE2:\n\n");
+#if LJ_HASFFI
+    fprintf(ctx->fp,
+	".Lframe2:\n"
+	"\t.4byte .LECIE2-.LSCIE2\n"
+	".LSCIE2:\n"
+	"\t.4byte 0\n"
+	"\t.byte 0x1\n"
+	"\t.string \"zR\"\n"
+	"\t.uleb128 0x1\n"
+	"\t.sleb128 -4\n"
+	"\t.byte 1\n"				/* Return address is in ra. */
+	"\t.uleb128 1\n"			/* augmentation length */
+	"\t.byte 0x1b\n"
+	"\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n"	/* def_cfa sp 0 */
+	"\t.align 2\n"
+	".LECIE2:\n\n");
+    fprintf(ctx->fp,
+	".LSFDE3:\n"
+	"\t.4byte .LEFDE3-.LASFDE3\n"
+	".LASFDE3:\n"
+	"\t.4byte .LASFDE3- .Lframe2\n"
+	"\t.4byte lj_vm_ffi_call-.\n"
+	"\t.4byte %d\n"
+	"\t.uleb128 0\n"			/* augmentation length */
+	"\t.byte 0x81\n\t.uleb128 2*1\n"	/* offset ra */
+	"\t.byte 0x92\n\t.uleb128 2*2\n"	/* offset x18 */
+	"\t.byte 0xd\n\t.uleb128 0x12\n"
+	"\t.align 2\n"
+	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+    break;
+  default:
+    break;
+  }
+}
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_x64.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_x64.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_x64.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |
 |.arch x64
 |.section code_op, code_sub
@@ -359,9 +359,6 @@
 |.macro sseconst_1, reg, tmp		// Synthesize 1.0.
 |  sseconst_hi reg, tmp, 3ff00000
 |.endmacro
-|.macro sseconst_m1, reg, tmp		// Synthesize -1.0.
-|  sseconst_hi reg, tmp, bff00000
-|.endmacro
 |.macro sseconst_2p52, reg, tmp		// Synthesize 2^52.
 |  sseconst_hi reg, tmp, 43300000
 |.endmacro
@@ -1346,44 +1343,28 @@ static void build_subroutines(BuildCtx *
   |.ffunc_1 next
   |  je >2				// Missing 2nd arg?
   |1:
-  |.if X64WIN
-  |  mov RA, [BASE]
-  |  checktab RA, ->fff_fallback
-  |.else
-  |  mov CARG2, [BASE]
-  |  checktab CARG2, ->fff_fallback
-  |.endif
-  |  mov L:RB, SAVE_L
-  |  mov L:RB->base, BASE		// Add frame since C call can throw.
-  |  mov L:RB->top, BASE		// Dummy frame length is ok.
+  |  mov CARG1, [BASE]
   |  mov PC, [BASE-8]
+  |  checktab CARG1, ->fff_fallback
+  |  mov RB, BASE			// Save BASE.
   |.if X64WIN
-  |  lea CARG3, [BASE+8]
-  |  mov CARG2, RA			// Caveat: CARG2 == BASE.
-  |  mov CARG1, L:RB
+  |  lea CARG3, [BASE-16]
+  |  lea CARG2, [BASE+8]		// Caveat: CARG2 == BASE.
   |.else
-  |  lea CARG3, [BASE+8]		// Caveat: CARG3 == BASE.
-  |  mov CARG1, L:RB
+  |  lea CARG2, [BASE+8]
+  |  lea CARG3, [BASE-16]		// Caveat: CARG3 == BASE.
   |.endif
-  |  mov SAVE_PC, PC			// Needed for ITERN fallback.
-  |  call extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Flag returned in eax (RD).
-  |  mov BASE, L:RB->base
-  |  test RDd, RDd;  jz >3		// End of traversal?
-  |  // Copy key and value to results.
-  |  mov RB, [BASE+8]
-  |  mov RD, [BASE+16]
-  |  mov [BASE-16], RB
-  |  mov [BASE-8], RD
-  |->fff_res2:
-  |  mov RDd, 1+2
-  |  jmp ->fff_res
+  |  call extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |  // 1=found, 0=end, -1=error returned in eax (RD).
+  |  mov BASE, RB			// Restore BASE.
+  |  test RDd, RDd;  jg ->fff_res2	// Found key/value.
+  |  js ->fff_fallback_2		// Invalid key.
+  |  // End of traversal: return nil.
+  |  mov aword [BASE-16], LJ_TNIL
+  |  jmp ->fff_res1
   |2:  // Set missing 2nd arg to nil.
   |  mov aword [BASE+8], LJ_TNIL
   |  jmp <1
-  |3:  // End of traversal: return nil.
-  |  mov aword [BASE-16], LJ_TNIL
-  |  jmp ->fff_res1
   |
   |.ffunc_1 pairs
   |  mov TAB:RB, [BASE]
@@ -1432,7 +1413,9 @@ static void build_subroutines(BuildCtx *
   |  // Copy array slot.
   |  mov RB, [RD]
   |  mov [BASE-8], RB
-  |  jmp ->fff_res2
+  |->fff_res2:
+  |  mov RDd, 1+2
+  |  jmp ->fff_res
   |2:  // Check for empty hash part first. Otherwise call C function.
   |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
   |.if X64WIN
@@ -2011,7 +1994,7 @@ static void build_subroutines(BuildCtx *
   |.endif
   |   mov RC, SBUF:CARG1->b
   |   mov SBUF:CARG1->L, L:RB
-  |   mov SBUF:CARG1->p, RC
+  |   mov SBUF:CARG1->w, RC
   |  mov SAVE_PC, PC
   |  call extern lj_buf_putstr_ .. name
   |  mov CARG1, rax
@@ -2470,7 +2453,7 @@ static void build_subroutines(BuildCtx *
   |  mov r12, [RA]
   |  mov rsp, RA			// Reposition stack to C frame.
   |.endif
-  |  test RDd, RDd; js >9		// Check for error from exit.
+  |  cmp RDd, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RDd
   |  mov LFUNC:KBASE, [BASE-16]
@@ -2486,6 +2469,8 @@ static void build_subroutines(BuildCtx *
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RCd, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2508,9 +2493,20 @@ static void build_subroutines(BuildCtx *
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*8]
+  |  mov RCd, TRACE:RA->startins
+  |  movzx RAd, RCH
+  |  movzx OP, RCL
+  |  shr RCd, 16
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |
   |9:  // Rethrow error from the right C frame.
+  |  mov CARG2d, RDd
   |  mov CARG1, L:RB
-  |  call extern lj_err_run		// (lua_State *L)
+  |  neg CARG2d
+  |  call extern lj_err_trace		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -2542,15 +2538,17 @@ static void build_subroutines(BuildCtx *
   |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
   |  subsd xmm1, xmm3
   |  orpd xmm1, xmm2			// Merge sign bit back in.
+  |  sseconst_1 xmm3, RD
   |  .if mode == 1		// ceil(x)?
-  |    sseconst_m1 xmm2, RD		// Must subtract -1 to preserve -0.
   |    cmpsd xmm0, xmm1, 6		// x > result?
+  |    andpd xmm0, xmm3
+  |    addsd xmm1, xmm0			// If yes, add 1.
+  |    orpd xmm1, xmm2			// Merge sign bit back in (again).
   |  .else			// floor(x)?
-  |    sseconst_1 xmm2, RD
   |    cmpsd xmm0, xmm1, 1		// x < result?
+  |    andpd xmm0, xmm3
+  |    subsd xmm1, xmm0			// If yes, subtract 1.
   |  .endif
-  |  andpd xmm0, xmm2
-  |  subsd xmm1, xmm0			// If yes, subtract +-1.
   |.endif
   |  movaps xmm0, xmm1
   |1:
@@ -2591,41 +2589,6 @@ static void build_subroutines(BuildCtx *
   |  subsd xmm0, xmm1
   |  ret
   |
-  |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-  |->vm_powi_sse:
-  |  cmp eax, 1; jle >6			// i<=1?
-  |  // Now 1 < (unsigned)i <= 0x80000000.
-  |1:  // Handle leading zeros.
-  |  test eax, 1; jnz >2
-  |  mulsd xmm0, xmm0
-  |  shr eax, 1
-  |  jmp <1
-  |2:
-  |  shr eax, 1; jz >5
-  |  movaps xmm1, xmm0
-  |3:  // Handle trailing bits.
-  |  mulsd xmm0, xmm0
-  |  shr eax, 1; jz >4
-  |  jnc <3
-  |  mulsd xmm1, xmm0
-  |  jmp <3
-  |4:
-  |  mulsd xmm0, xmm1
-  |5:
-  |  ret
-  |6:
-  |  je <5				// x^1 ==> x
-  |  jb >7				// x^0 ==> 1
-  |  neg eax
-  |  call <1
-  |  sseconst_1 xmm1, RD
-  |  divsd xmm1, xmm0
-  |  movaps xmm0, xmm1
-  |  ret
-  |7:
-  |  sseconst_1 xmm0, RD
-  |  ret
-  |
   |//-----------------------------------------------------------------------
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2645,6 +2608,67 @@ static void build_subroutines(BuildCtx *
   |  .if X64WIN; pop rsi; .endif
   |  ret
   |
+  |.define NEXT_TAB,		TAB:CARG1
+  |.define NEXT_IDX,		CARG2d
+  |.define NEXT_IDXa,		CARG2
+  |.define NEXT_PTR,		RC
+  |.define NEXT_PTRd,		RCd
+  |.define NEXT_TMP,		CARG3
+  |.define NEXT_ASIZE,		CARG4d
+  |.macro NEXT_RES_IDXL, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
+  |.if X64WIN
+  |.define NEXT_RES_PTR,	[rsp+aword*5]
+  |.macro NEXT_RES_IDX, op2;	add NEXT_IDX, op2; .endmacro
+  |.else
+  |.define NEXT_RES_PTR,	[rsp+aword*1]
+  |.macro NEXT_RES_IDX, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
+  |.endif
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in edx.
+  |->vm_next:
+  |.if JIT
+  |  mov NEXT_ASIZE, NEXT_TAB->asize
+  |1:  // Traverse array part.
+  |  cmp NEXT_IDX, NEXT_ASIZE;  jae >5
+  |  mov NEXT_TMP, NEXT_TAB->array
+  |  mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
+  |  cmp NEXT_TMP, LJ_TNIL;  je >2
+  |  lea NEXT_PTR, NEXT_RES_PTR
+  |  mov qword [NEXT_PTR], NEXT_TMP
+  |.if DUALNUM
+  |  setint NEXT_TMP, NEXT_IDXa
+  |  mov qword [NEXT_PTR+qword*1], NEXT_TMP
+  |.else
+  |  cvtsi2sd xmm0, NEXT_IDX
+  |  movsd qword [NEXT_PTR+qword*1], xmm0
+  |.endif
+  |  NEXT_RES_IDX 1
+  |  ret
+  |2:  // Skip holes in array part.
+  |  add NEXT_IDX, 1
+  |  jmp <1
+  |
+  |5:  // Traverse hash part.
+  |  sub NEXT_IDX, NEXT_ASIZE
+  |6:
+  |  cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
+  |  imul NEXT_PTRd, NEXT_IDX, #NODE
+  |  add NODE:NEXT_PTR, NEXT_TAB->node
+  |  cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
+  |  NEXT_RES_IDXL NEXT_ASIZE+1
+  |  ret
+  |7:  // Skip holes in hash part.
+  |  add NEXT_IDX, 1
+  |  jmp <6
+  |
+  |9:  // End of iteration. Set the key to nil (not the value).
+  |  NEXT_RES_IDX NEXT_ASIZE
+  |  lea NEXT_PTR, NEXT_RES_PTR
+  |  mov qword [NEXT_PTR+qword*1], LJ_TNIL
+  |  ret
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- Assertions ---------------------------------------------------------
   |//-----------------------------------------------------------------------
@@ -2731,12 +2755,12 @@ static void build_subroutines(BuildCtx *
   |
   |  // Copy stack slots.
   |  movzx ecx, byte CCSTATE->nsp
-  |  sub ecx, 1
+  |  sub ecx, 8
   |  js >2
   |1:
-  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
-  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
-  |  sub ecx, 1
+  |  mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
+  |  mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
+  |  sub ecx, 8
   |  jns <1
   |2:
   |
@@ -4056,10 +4080,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |  hotloop RBd
     |.endif
+    |->vm_IITERN:
+    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |  mov TAB:RB, [BASE+RA*8-16]
     |  cleartp TAB:RB
     |  mov RCd, [BASE+RA*8-8]		// Get index from control var.
@@ -4123,15 +4148,29 @@ static void build_ins(BuildCtx *ctx, BCO
     |  cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
     |  cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
     |  branchPC RD
-    |  mov64 TMPR, U64x(fffe7fff, 00000000)
+    |  mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
     |  mov [BASE+RA*8-8], TMPR		// Initialize control var.
     |1:
     |  ins_next
     |5:  // Despecialize bytecode if any of the checks fail.
     |  mov PC_OP, BC_JMP
     |  branchPC RD
+    |.if JIT
+    |  cmp byte [PC], BC_ITERN
+    |  jne >6
+    |.endif
     |  mov byte [PC], BC_ITERC
     |  jmp <1
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+    |  movzx RCd, word [PC+2]
+    |  mov TRACE:RA, [RA+RC*8]
+    |  mov eax, TRACE:RA->startins
+    |  mov al, BC_ITERC
+    |  mov dword [PC], eax
+    |  jmp <1
+    |.endif
     break;
 
   case BC_VARG:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/vm_x86.dasc
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/vm_x86.dasc
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/vm_x86.dasc
@@ -1,6 +1,6 @@
 |// Low-level VM code for x86 CPUs.
 |// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
 |
 |.if P64
 |.arch x64
@@ -464,9 +464,6 @@
 |.macro sseconst_1, reg, tmp		// Synthesize 1.0.
 |  sseconst_hi reg, tmp, 3ff00000
 |.endmacro
-|.macro sseconst_m1, reg, tmp		// Synthesize -1.0.
-|  sseconst_hi reg, tmp, bff00000
-|.endmacro
 |.macro sseconst_2p52, reg, tmp		// Synthesize 2^52.
 |  sseconst_hi reg, tmp, 43300000
 |.endmacro
@@ -1372,7 +1369,7 @@ static void build_subroutines(BuildCtx *
   |  mov LFUNC:RB, [RA-8]
   |  add NARGS:RD, 1
   |  // This is fragile. L->base must not move, KBASE must always be defined.
-  |.if x64
+  |.if X64
   |  cmp KBASEa, rdx			// Continue with CALLT if flag set.
   |.else
   |  cmp KBASE, BASE			// Continue with CALLT if flag set.
@@ -1673,55 +1670,35 @@ static void build_subroutines(BuildCtx *
   |  je >2				// Missing 2nd arg?
   |1:
   |  cmp dword [BASE+4], LJ_TTAB;  jne ->fff_fallback
-  |  mov L:RB, SAVE_L
-  |  mov L:RB->base, BASE		// Add frame since C call can throw.
-  |  mov L:RB->top, BASE		// Dummy frame length is ok.
   |  mov PC, [BASE-4]
+  |  mov RB, BASE			// Save BASE.
   |.if X64WIN
-  |  lea CARG3d, [BASE+8]
-  |  mov CARG2d, [BASE]			// Caveat: CARG2d == BASE.
-  |  mov CARG1d, L:RB
+  |  mov CARG1d, [BASE]
+  |  lea CARG3d, [BASE-8]
+  |  lea CARG2d, [BASE+8]		// Caveat: CARG2d == BASE.
   |.elif X64
-  |  mov CARG2d, [BASE]
-  |  lea CARG3d, [BASE+8]		// Caveat: CARG3d == BASE.
-  |  mov CARG1d, L:RB
+  |  mov CARG1d, [BASE]
+  |  lea CARG2d, [BASE+8]
+  |  lea CARG3d, [BASE-8]		// Caveat: CARG3d == BASE.
   |.else
   |  mov TAB:RD, [BASE]
-  |  mov ARG2, TAB:RD
-  |  mov ARG1, L:RB
+  |  mov ARG1, TAB:RD
   |  add BASE, 8
+  |  mov ARG2, BASE
+  |  sub BASE, 8+8
   |  mov ARG3, BASE
   |.endif
-  |  mov SAVE_PC, PC			// Needed for ITERN fallback.
-  |  call extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
-  |  // Flag returned in eax (RD).
-  |  mov BASE, L:RB->base
-  |  test RD, RD;  jz >3		// End of traversal?
-  |  // Copy key and value to results.
-  |.if X64
-  |  mov RBa, [BASE+8]
-  |  mov RDa, [BASE+16]
-  |  mov [BASE-8], RBa
-  |  mov [BASE], RDa
-  |.else
-  |  mov RB, [BASE+8]
-  |  mov RD, [BASE+12]
-  |  mov [BASE-8], RB
-  |  mov [BASE-4], RD
-  |  mov RB, [BASE+16]
-  |  mov RD, [BASE+20]
-  |  mov [BASE], RB
-  |  mov [BASE+4], RD
-  |.endif
-  |->fff_res2:
-  |  mov RD, 1+2
-  |  jmp ->fff_res
+  |  call extern lj_tab_next		// (GCtab *t, cTValue *key, TValue *o)
+  |  // 1=found, 0=end, -1=error returned in eax (RD).
+  |  mov BASE, RB			// Restore BASE.
+  |  test RD, RD;  jg ->fff_res2	// Found key/value.
+  |  js ->fff_fallback_2		// Invalid key.
+  |  // End of traversal: return nil.
+  |  mov dword [BASE-4], LJ_TNIL
+  |  jmp ->fff_res1
   |2:  // Set missing 2nd arg to nil.
   |  mov dword [BASE+12], LJ_TNIL
   |  jmp <1
-  |3:  // End of traversal: return nil.
-  |  mov dword [BASE-4], LJ_TNIL
-  |  jmp ->fff_res1
   |
   |.ffunc_1 pairs
   |  mov TAB:RB, [BASE]
@@ -1775,7 +1752,9 @@ static void build_subroutines(BuildCtx *
   |  mov [BASE], RB
   |  mov [BASE+4], RD
   |.endif
-  |  jmp ->fff_res2
+  |->fff_res2:
+  |  mov RD, 1+2
+  |  jmp ->fff_res
   |2:  // Check for empty hash part first. Otherwise call C function.
   |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
   |  mov FCARG1, TAB:RB
@@ -2423,9 +2402,9 @@ static void build_subroutines(BuildCtx *
   |   lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
   |  mov L:RB->base, BASE
   |  mov STR:FCARG2, [BASE]		// Caveat: FCARG2 == BASE
-  |   mov RC, SBUF:FCARG1->b
+  |   mov RCa, SBUF:FCARG1->b
   |   mov SBUF:FCARG1->L, L:RB
-  |   mov SBUF:FCARG1->p, RC
+  |   mov SBUF:FCARG1->w, RCa
   |  mov SAVE_PC, PC
   |  call extern lj_buf_putstr_ .. name .. @8
   |  mov FCARG1, eax
@@ -2923,7 +2902,7 @@ static void build_subroutines(BuildCtx *
   |  mov r13, TMPa
   |  mov r12, TMPQ
   |.endif
-  |  test RD, RD; js >9			// Check for error from exit.
+  |  cmp RD, -LUA_ERRERR; jae >9	// Check for error from exit.
   |  mov L:RB, SAVE_L
   |  mov MULTRES, RD
   |  mov LFUNC:KBASE, [BASE-8]
@@ -2938,6 +2917,8 @@ static void build_subroutines(BuildCtx *
   |  movzx OP, RCL
   |  add PC, 4
   |  shr RC, 16
+  |  cmp MULTRES, -17			// Static dispatch?
+  |  je >5
   |  cmp OP, BC_FUNCF			// Function header?
   |  jb >3
   |  cmp OP, BC_FUNCC+2			// Fast function?
@@ -2963,9 +2944,24 @@ static void build_subroutines(BuildCtx *
   |  mov KBASE, [KBASE+PC2PROTO(k)]
   |  jmp <2
   |
+  |5:  // Dispatch to static entry of original ins replaced by BC_JLOOP.
+  |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+  |  mov TRACE:RA, [RA+RD*4]
+  |  mov RC, TRACE:RA->startins
+  |  movzx RA, RCH
+  |  movzx OP, RCL
+  |  shr RC, 16
+  |.if X64
+  |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
+  |.else
+  |  jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
+  |.endif
+  |
   |9:  // Rethrow error from the right C frame.
+  |  mov FCARG2, RD
   |  mov FCARG1, L:RB
-  |  call extern lj_err_run@4		// (lua_State *L)
+  |  neg FCARG2
+  |  call extern lj_err_trace@8		// (lua_State *L, int errcode)
   |.endif
   |
   |//-----------------------------------------------------------------------
@@ -3005,15 +3001,17 @@ static void build_subroutines(BuildCtx *
   |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
   |  subsd xmm1, xmm3
   |  orpd xmm1, xmm2			// Merge sign bit back in.
+  |  sseconst_1 xmm3, RDa
   |  .if mode == 1		// ceil(x)?
-  |    sseconst_m1 xmm2, RDa		// Must subtract -1 to preserve -0.
   |    cmpsd xmm0, xmm1, 6		// x > result?
+  |    andpd xmm0, xmm3
+  |    addsd xmm1, xmm0			// If yes, add 1.
+  |    orpd xmm1, xmm2			// Merge sign bit back in (again).
   |  .else			// floor(x)?
-  |    sseconst_1 xmm2, RDa
   |    cmpsd xmm0, xmm1, 1		// x < result?
+  |    andpd xmm0, xmm3
+  |    subsd xmm1, xmm0			// If yes, subtract 1.
   |  .endif
-  |  andpd xmm0, xmm2
-  |  subsd xmm1, xmm0			// If yes, subtract +-1.
   |.endif
   |  movaps xmm0, xmm1
   |1:
@@ -3054,41 +3052,6 @@ static void build_subroutines(BuildCtx *
   |  subsd xmm0, xmm1
   |  ret
   |
-  |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
-  |->vm_powi_sse:
-  |  cmp eax, 1; jle >6			// i<=1?
-  |  // Now 1 < (unsigned)i <= 0x80000000.
-  |1:  // Handle leading zeros.
-  |  test eax, 1; jnz >2
-  |  mulsd xmm0, xmm0
-  |  shr eax, 1
-  |  jmp <1
-  |2:
-  |  shr eax, 1; jz >5
-  |  movaps xmm1, xmm0
-  |3:  // Handle trailing bits.
-  |  mulsd xmm0, xmm0
-  |  shr eax, 1; jz >4
-  |  jnc <3
-  |  mulsd xmm1, xmm0
-  |  jmp <3
-  |4:
-  |  mulsd xmm0, xmm1
-  |5:
-  |  ret
-  |6:
-  |  je <5				// x^1 ==> x
-  |  jb >7				// x^0 ==> 1
-  |  neg eax
-  |  call <1
-  |  sseconst_1 xmm1, RDa
-  |  divsd xmm1, xmm0
-  |  movaps xmm0, xmm1
-  |  ret
-  |7:
-  |  sseconst_1 xmm0, RDa
-  |  ret
-  |
   |//-----------------------------------------------------------------------
   |//-- Miscellaneous functions --------------------------------------------
   |//-----------------------------------------------------------------------
@@ -3136,6 +3099,86 @@ static void build_subroutines(BuildCtx *
   |  ret
   |.endif
   |
+  |.define NEXT_TAB,		TAB:FCARG1
+  |.define NEXT_IDX,		FCARG2
+  |.define NEXT_PTR,		RCa
+  |.define NEXT_PTRd,		RC
+  |.macro NEXT_RES_IDXL, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
+  |.if X64
+  |.define NEXT_TMP,		CARG3d
+  |.define NEXT_TMPq,		CARG3
+  |.define NEXT_ASIZE,		CARG4d
+  |.macro NEXT_ENTER;		.endmacro
+  |.macro NEXT_LEAVE;		ret; .endmacro
+  |.if X64WIN
+  |.define NEXT_RES_PTR,	[rsp+aword*5]
+  |.macro NEXT_RES_IDX, op2;	add NEXT_IDX, op2; .endmacro
+  |.else
+  |.define NEXT_RES_PTR,	[rsp+aword*1]
+  |.macro NEXT_RES_IDX, op2;	lea edx, [NEXT_IDX+op2]; .endmacro
+  |.endif
+  |.else
+  |.define NEXT_ASIZE,		esi
+  |.define NEXT_TMP,		edi
+  |.macro NEXT_ENTER;		push esi; push edi; .endmacro
+  |.macro NEXT_LEAVE;		pop edi; pop esi; ret; .endmacro
+  |.define NEXT_RES_PTR,	[esp+dword*3]
+  |.macro NEXT_RES_IDX, op2;	add NEXT_IDX, op2; .endmacro
+  |.endif
+  |
+  |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+  |// Next idx returned in edx.
+  |->vm_next:
+  |.if JIT
+  |  NEXT_ENTER
+  |  mov NEXT_ASIZE, NEXT_TAB->asize
+  |1:  // Traverse array part.
+  |  cmp NEXT_IDX, NEXT_ASIZE;  jae >5
+  |  mov NEXT_TMP, NEXT_TAB->array
+  |  cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL;  je >2
+  |  lea NEXT_PTR, NEXT_RES_PTR
+  |.if X64
+  |  mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
+  |  mov qword [NEXT_PTR], NEXT_TMPq
+  |.else
+  |  mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
+  |  mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
+  |  mov dword [NEXT_PTR+4], NEXT_ASIZE
+  |  mov dword [NEXT_PTR], NEXT_TMP
+  |.endif
+  |.if DUALNUM
+  |  mov dword [NEXT_PTR+dword*3], LJ_TISNUM
+  |  mov dword [NEXT_PTR+dword*2], NEXT_IDX
+  |.else
+  |  cvtsi2sd xmm0, NEXT_IDX
+  |  movsd qword [NEXT_PTR+dword*2], xmm0
+  |.endif
+  |  NEXT_RES_IDX 1
+  |  NEXT_LEAVE
+  |2:  // Skip holes in array part.
+  |  add NEXT_IDX, 1
+  |  jmp <1
+  |
+  |5:  // Traverse hash part.
+  |  sub NEXT_IDX, NEXT_ASIZE
+  |6:
+  |  cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
+  |  imul NEXT_PTRd, NEXT_IDX, #NODE
+  |  add NODE:NEXT_PTRd, dword NEXT_TAB->node
+  |  cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
+  |  NEXT_RES_IDXL NEXT_ASIZE+1
+  |  NEXT_LEAVE
+  |7:  // Skip holes in hash part.
+  |  add NEXT_IDX, 1
+  |  jmp <6
+  |
+  |9:  // End of iteration. Set the key to nil (not the value).
+  |  NEXT_RES_IDX NEXT_ASIZE
+  |  lea NEXT_PTR, NEXT_RES_PTR
+  |  mov dword [NEXT_PTR+dword*3], LJ_TNIL
+  |  NEXT_LEAVE
+  |.endif
+  |
   |//-----------------------------------------------------------------------
   |//-- Assertions ---------------------------------------------------------
   |//-----------------------------------------------------------------------
@@ -3271,19 +3314,25 @@ static void build_subroutines(BuildCtx *
   |
   |  // Copy stack slots.
   |  movzx ecx, byte CCSTATE->nsp
-  |  sub ecx, 1
+  |.if X64
+  |  sub ecx, 8
   |  js >2
   |1:
-  |.if X64
-  |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
-  |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
+  |  mov rax, [CCSTATE+rcx+offsetof(CCallState, stack)]
+  |  mov [rsp+rcx+CCALL_SPS_EXTRA*8], rax
+  |  sub ecx, 8
+  |  jns <1
+  |2:
   |.else
-  |  mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
-  |  mov [esp+ecx*4], eax
-  |.endif
-  |  sub ecx, 1
+  |  sub ecx, 4
+  |  js >2
+  |1:
+  |  mov eax, [CCSTATE+ecx+offsetof(CCallState, stack)]
+  |  mov [esp+ecx], eax
+  |  sub ecx, 4
   |  jns <1
   |2:
+  |.endif
   |
   |.if X64
   |  movzx eax, byte CCSTATE->nfpr
@@ -4787,10 +4836,11 @@ static void build_ins(BuildCtx *ctx, BCO
     break;
 
   case BC_ITERN:
-    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |.if JIT
-    |  // NYI: add hotloop, record BC_ITERN.
+    |  hotloop RB
     |.endif
+    |->vm_IITERN:
+    |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
     |  mov TMP1, KBASE			// Need two more free registers.
     |  mov TMP2, DISPATCH
     |  mov TAB:RB, [BASE+RA*8-16]
@@ -4878,14 +4928,28 @@ static void build_ins(BuildCtx *ctx, BCO
     |  cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
     |  branchPC RD
     |  mov dword [BASE+RA*8-8], 0	// Initialize control var.
-    |  mov dword [BASE+RA*8-4], 0xfffe7fff
+    |  mov dword [BASE+RA*8-4], LJ_KEYINDEX
     |1:
     |  ins_next
     |5:  // Despecialize bytecode if any of the checks fail.
     |  mov PC_OP, BC_JMP
     |  branchPC RD
+    |.if JIT
+    |  cmp byte [PC], BC_ITERN
+    |  jne >6
+    |.endif
     |  mov byte [PC], BC_ITERC
     |  jmp <1
+    |.if JIT
+    |6:  // Unpatch JLOOP.
+    |  mov RA, [DISPATCH+DISPATCH_J(trace)]
+    |  movzx RC, word [PC+2]
+    |  mov TRACE:RA, [RA+RC*4]
+    |  mov eax, TRACE:RA->startins
+    |  mov al, BC_ITERC
+    |  mov dword [PC], eax
+    |  jmp <1
+    |.endif
     break;
 
   case BC_VARG:
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/xb1build.bat
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/xb1build.bat
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/xb1build.bat
@@ -14,7 +14,7 @@
 @set LJMT=mt /nologo
 @set DASMDIR=..\dynasm
 @set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 
 %LJCOMPILE% host\minilua.c
 @if errorlevel 1 goto :BAD
@@ -31,6 +31,9 @@ if exist minilua.exe.manifest^
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
 @if errorlevel 1 goto :BAD
 
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
 %LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj
Index: wrk-4.2.0/obj/LuaJIT-2.1/src/xedkbuild.bat
===================================================================
--- wrk-4.2.0.orig/obj/LuaJIT-2.1/src/xedkbuild.bat
+++ wrk-4.2.0/obj/LuaJIT-2.1/src/xedkbuild.bat
@@ -14,7 +14,7 @@
 @set LJMT=mt /nologo
 @set DASMDIR=..\dynasm
 @set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
 
 %LJCOMPILE% host\minilua.c
 @if errorlevel 1 goto :BAD
@@ -31,6 +31,9 @@ if exist minilua.exe.manifest^
 minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_ppc.dasc
 @if errorlevel 1 goto :BAD
 
+if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
+minilua host\genversion.lua
+
 %LJCOMPILE% /I "." /I %DASMDIR% /D_XBOX_VER=200 /DLUAJIT_TARGET=LUAJIT_ARCH_PPC  host\buildvm*.c
 @if errorlevel 1 goto :BAD
 %LJLINK% /out:buildvm.exe buildvm*.obj