diff -Naur a/dynasm/dasm_sw64.h b/dynasm/dasm_sw64.h --- a/dynasm/dasm_sw64.h 1970-01-01 08:00:00.000000000 +0800 +++ b/dynasm/dasm_sw64.h 2024-11-05 13:30:00.007085745 +0800 @@ -0,0 +1,425 @@ +/* +** DynASM SW64 encoding engine. +** Copyright (C) 2023 Sheng Kai. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "sw64" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a, b, c, d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec) << 24) +#define DASM_POS2SEC(pos) ((pos) >> 24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State) + (ms - 1) * sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: case DASM_IMMS: +#ifdef DASM_CHECKS + CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I); +#endif + n >>= ((ins >> 10) & 31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc * sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16) - 0xff00; + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); +// if (!(n >= 0)) { +// D->status = DASM_S_UNDEF_LG|(p-D->actionlist-1); +// printf("ZHJ222: secnum is %d, status: 0x%x\n", secnum, ctx->D->status); +// return; +// } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n); + if (ins & 2048) + n = n - (int)((char *)cp - base); + else + n = (n + (int)(size_t)base) & 0x0fffffff; + patchrel: + CK((n & 3) == 0 && ((n + ((ins & 2048) ? 0x00020000 : 0)) >> + ((ins & 2048) ? 18 : 28)) == 0, + RANGE_REL); + cp[-1] |= ((n >> 2) & ((ins & 2048) ? 0x001fffff : 0x03ffffff)); + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ + case DASM_IMM: + cp[-1] |= (n & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc * sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC | (D->section - D->sections); + return D->status; +} +#endif + diff -Naur a/dynasm/dasm_sw64.lua b/dynasm/dasm_sw64.lua --- a/dynasm/dasm_sw64.lua 1970-01-01 08:00:00.000000000 +0800 +++ b/dynasm/dasm_sw64.lua 2024-11-05 13:31:27.497019233 +0800 @@ -0,0 +1,767 @@ +------------------------------------------------------------------------------ +-- DynASM SW64 module. +-- +-- Copyright (C) 2023 Sheng Kai. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "sw64", + description = "DynASM SW64 module", + version = "1.4.0", + vernum = 10400, + release = "2023-02-03", + author = "Sheng Kai", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(0xff000000 + w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n >= 0xff000000 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { zero="r31", sp="r30", ra="r26", pv="r27", fzero="f31" } -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "r30" then return "sp" + elseif s == "r26" then return "ra" + elseif s == "r31" then return "zero" + elseif s == "f31" then return "fzero" + elseif s == "r27" then return "pv" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for SW64 instructions. +local map_op = { + ldi_2 = "f8000000Ao", --0x3e + ldih_2 = "fc000000Ao", --0x3f + + ldl_2 = "8c000000Ao", --0x23 + ldw_2 = "88000000Ao", --0x22 + ldhu_2 = "84000000Ao", --0x21 + ldbu_2 = "80000000Ao", --0x20 + + fstd_2 = "bc000000Fo", --0x2f + fldd_2 = "9c000000Fo", --0x27 + flds_2 = "98000000Fo", --0x26 + fsts_2 = "b8000000Fo", --0x2e + ifmovd_2= "601f0820AI", + ifmovs_2= "601f0800AI", + fimovd_2= "401f0f00FD", + fcvtds_2= "63e00420GI", + fcvtsd_2= "63e00400GI", + fcvtld_2= "63e005e0GI", + fcvtls_2= "63e005a0GI", + fcvtdl_2= "63e004e0GI", + fcvtdln_2 = "63e004a0GI", + fcvtdlp_2 = "63e00460GI", + fcvtdlz_2 = "63e00480GI", + fcvtwl_2 = "63e00500GI", + fcvtlw_2 = "63e00520GI", + fcpys_3 = "60000600FGI", + fcpysn_3 = "60000640FGI", + + faddd_3 = "60000020FGI", + fsubd_3 = "60000060FGI", + fmuld_3 = "600000a0FGI", + fdivd_3 = "600000e0FGI", + + fcmpeq_3 = "60000200FGI", + fcmple_3 = "60000220FGI", + fcmplt_3 = "60000240FGI", + fcmpun_3 = "60000260FGI", + + stl_2 = "ac000000Ao", --0x2b + stw_2 = "a8000000Ao", --0x2a + sth_2 = "a4000000Ao", --0x29 + stb_2 = "a0000000Ao", --0x28 + + addli_3 = "48000100AjD", --0x12.08 + subli_3 = "48000120AjD", --0x12.09 + mulli_3 = "48000300AjD", --0x12.18 + mull_3 = "40000300ABD", --0x10.18 + mulw_3 = "40000200ABD", --0x10.10 + addl_3 = "40000100ABD", --0x10.08 + subl_3 = "40000120ABD", --0x10.09 + subw_3 = "40000020ABD", --0x10.01 + subwi_3 = "48000020AjD", --0x12.01 + s8addl_3 = "40000180ABD", --0x10.0c + s8addli_3 = "48000180AjD", --0x12.0c + s8addw_3 = "40000080ABD", --0x10.04 + s8addwi_3 = "48000080AjD", --0x12.04 + s4addl_3 = "40000140ABD", --0x10.0a + s4addli_3 = "48000140AjD", --0x12.0a + s4addw_3 = "40000040ABD", --0x10.02 + s4addwi_3 = "48000040AjD", --0x12.02 + addw_3 = "40000000ABD", --0x10.00 + addwi_3 = "48000000AjD", --0x12.00 + divw_3 = "40000220ABD", --0x10.11 + udivw_3 = "40000240ABD", --0x10.12 + remw_3 = "40000260ABD", --0x10.13 + uremw_3 = "40000280ABD", --0x10.14 + divl_3 = "40000340ABD", --0x10.1a + udivl_3 = "40000360ABD", --0x10.1b + reml_3 = "40000380ABD", --0x10.1c + ureml_3 = "400003a0ABD", --0x10.1d + + andi_3 = "48000700AjD", + and_3 = "40000700ABD", + ornoti_3 ="48000760AjD", + ornot_3 = "40000760ABD", + bis_3 = "40000740ABD", + bisi_3 = "48000740AjD", + bic_3 = "40000720ABD", + bici_3 = "48000720AjD", + xori_3 = "48000780AjD", + xor_3 = "40000780ABD", + slli_3 = "48000900AjD", + sll_3 = "40000900ABD", + srli_3 = "48000920AjD", + srl_3 = "40000920ABD", + srai_3 = "48000940AjD", + sra_3 = "40000940ABD", + roll_3 = "40000960ABD", + rolli_3 = "48000960AjD", + sllw_3 = "40000980ABD", + sllwi_3 = "48000980AjD", + srlw_3 = "400009a0ABD", + srlwi_3 = "480009a0AjD", + sraw_3 = "400009c0ABD", + srawi_3 = "480009c0AjD", + rolw_3 = "400009e0ABD", + rolwi_3 = "480009e0AjD", + + beq_2 = "c0000000Ab", --0x30 + bne_2 = "c4000000Ab", --0x31 + blt_2 = "c8000000Ab", --0x32 + ble_2 = "cc000000Ab", --0x33 + bgt_2 = "d0000000Ab", --0x34 + bge_2 = "d4000000Ab", --0x35 + + fbeq_2 = "e0000000Fb", + fbge_2 = "f4000000Fb", + fbgt_2 = "f0000000Fb", + fble_2 = "ec000000Fb", + fblt_2 = "e8000000Fb", + fbne_2 = "e4000000Fb", + + call_2 = "04000000Ao", --0x1 + ret_2 = "08000000Ao", --0x2 + jmp_2 = "0C000000Ao", --0x3 + br_2 = "10000000Ab", --0x4 + getpc_1 = "10000000A", --br Rn, 0 + + + cmpeq_3 = "40000500ABD", + cmplt_3 = "40000520ABD", + cmplti_3 = "48000520AjD", + cmple_3 = "40000540ABD", + cmpult_3 = "40000560ABD", + cmpulti_3 = "48000560AjD", + cmpule_3 = "40000580ABD", + sbt_3 = "400005a0ABD", + sbti_3 = "480005a0AjD", + cbt_3 = "400005c0ABD", + cbti_3 = "480005c0AjD", + + + maskhw_3 = "40000cc0ABD", + maskhwi_3 = "48000cc0AjD", + maskhl_3 = "40000ce0ABD", + maskhli_3 = "48000ce0AjD", + maskll_3 = "40000c60ABD", + masklli_3 = "48000c60AjD", + + zap_3 = "40000d00ABD", + zapi_3 = "48000d00AjD", + + extlb_3 = "48000a00AjD", + extlh_3 = "48000a20AjD", + extlw_3 = "48000a40AjD", + extll_3 = "48000a60AjD", + exthb_3 = "48000a80AjD", + exthh_3 = "48000aa0AjD", + exthw_3 = "48000ac0AjD", + exthl_3 = "48000ae0AjD", + + inslb_3 = "48000800AjD", + + maskhw_3 = "48000cc0AjD", + + sexth_2 = "43e00d60BD", + sexthi_2 = "4be00d60iD", + sextb_2 = "43e00d40BD", + sextbi_2 = "4be00d40iD", + + selle_4 = "44000c00ABCD", + sellei_4 = "4c000c00AiCD", + sellt_4 = "44001000ABCD", + sellti_4 = "4c001000AiCD", + selgt_4 = "44000800ABCD", + selgti_4 = "4c000800AiCD", + selge_4 = "44000400ABCD", + selgei_4 = "4c000400AiCD", + selne_4 = "44001400ABCD", + selnei_4 = "4c001400AiCD", + seleq_4 = "44000000ABCD", + seleqi_4 = "4c000000AiCD", + + fselne_4 = "64004400FGHI", + fseleq_4 = "64004000FGHI", + + fsqrtd_2 = "63e00120GI", + + setfpec1_0 = "60000aa0", + setfpec3_0 = "60000ae0", + + syscall_0 = "00000083", --0x0.83 + bpt_0 = "00000080", --0x0.80 + + ldw_dec_2 = "20004000Ap", + ldl_dec_2 = "20005000Ap", +} + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + + +local function parse_imm(imm, bits, shift, scale, signed) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[rf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_disp(disp, width) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 16) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + return r + parse_imm(imm, width, 0, 0, true) + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) + return shl(r, 16) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "A" then + op = op + shl(parse_gpr(params[n]), 21); n = n + 1 + elseif p == "B" then + op = op + shl(parse_gpr(params[n]), 16); n = n + 1 + elseif p == "C" then + op = op + shl(parse_gpr(params[n]), 5); n = n + 1 + elseif p == "D" then + op = op + shl(parse_gpr(params[n]), 0); n = n + 1 + elseif p == "F" then -- float version A + op = op + shl(parse_fpr(params[n]), 21); n = n + 1 + elseif p == "G" then -- float version B + op = op + shl(parse_fpr(params[n]), 16); n = n + 1 + elseif p == "H" then -- float version C + op = op + shl(parse_fpr(params[n]), 5); n = n + 1 + elseif p == "I" then -- float version D + op = op + shl(parse_fpr(params[n]), 0); n = n + 1 + + elseif p == "i" then + op = op + parse_imm(params[n], 8, 13, 0, true); n = n + 1 + elseif p == "j" then + op = op + parse_imm(params[n], 8, 13, 0, false); n = n + 1 + elseif p == "o" then + op = op + parse_disp(params[n], 16); n = n + 1 + elseif p == "p" then + op = op + parse_disp(params[n], 12); n = n + 1 + elseif p == "b" then + local mode, m, s = parse_label(params[n], false) + if p == "b" then m = m + 2048 end + waction("REL_"..mode, m, s, 1) + n = n + 1 + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +map_op[".str100_1"] = function(params) + function empty(s) + str = "" + i=0 + repeat + str = str .. "\0" + i = i+1 + until i >= s + return str + end + str = string.format("%s\n", params[1]) + if #str > 100 then + wfatal(".str100 only support string size below 100") + end + str = str..empty(100-#str) + i=0 + while i ~= #str do + wputxw(shl(string.byte(str, i+4), 24) + + shl(string.byte(str, i+3), 16) + + shl(string.byte(str, i+2), 8) + + shl(string.byte(str, i+1), 0)) + i = i + 4 + end +end + + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ diff -Naur a/Makefile b/Makefile --- a/Makefile 2024-11-05 09:58:44.429962466 +0800 +++ b/Makefile 2024-11-05 14:07:25.739965053 +0800 @@ -99,7 +99,7 @@ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua \ - dis_mips64r6.lua dis_mips64r6el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua dis_sw64.lua\ vmdef.lua ifeq (,$(findstring Windows,$(OS))) diff -Naur a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c --- a/src/host/buildvm_asm.c 2024-11-05 09:58:44.401962793 +0800 +++ b/src/host/buildvm_asm.c 2024-11-05 11:04:43.382923361 +0800 @@ -151,7 +151,7 @@ ins, sym); exit(1); } -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS || LJ_TARGET_SW64 fprintf(stderr, "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); diff -Naur a/src/host/buildvm.c b/src/host/buildvm.c --- a/src/host/buildvm.c 2024-11-05 09:58:44.397962840 +0800 +++ b/src/host/buildvm.c 2024-11-05 11:03:43.422758962 +0800 @@ -67,6 +67,8 @@ #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" +#elif LJ_TARGET_SW64 +#include "../dynasm/dasm_sw64.h" #else #error "No support for this architecture (yet)" #endif diff -Naur a/src/jit/bcsave.lua b/src/jit/bcsave.lua --- a/src/jit/bcsave.lua 2024-11-05 09:58:44.401962793 +0800 +++ b/src/jit/bcsave.lua 2024-11-05 11:05:26.331041117 +0800 @@ -101,6 +101,7 @@ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + sw64 = { e = "le", b = 64, m = 39190, p = 0x9916, }, } local map_os = { diff -Naur a/src/jit/dis_sw64.lua b/src/jit/dis_sw64.lua --- a/src/jit/dis_sw64.lua 1970-01-01 08:00:00.000000000 +0800 +++ b/src/jit/dis_sw64.lua 2024-11-05 13:32:17.092605371 +0800 @@ -0,0 +1,649 @@ +---------------------------------------------------------------------------- +-- LuaJIT SW64 disassembler module. +-- +-- Copyright (C) 2019 deepin inc. All rights reserved. +-- Released under the MIT/X license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles all standard SW64 instructions. +------------------------------------------------------------------------------ +local type = type +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch +local concat = table.concat +local bit = require("bit") +local band, bor, tohex = bit.band, bit.bor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift + +------------------------------------------------------------------------------ +-- Primary and extended opcode maps +------------------------------------------------------------------------------ + +local OPC_SYSCALL = "" +local OPC_MISI_MEMORY, OPC_FUNC_MEMORY = "", "" +local OPC_MEMORY_F = "FBo" +local OPC_MEMORY = "ABo" +local OPC_CONTROL = "Ab" +local OPC_CONTROL_F = "Fb" +local OPC_ARITHMETIC = { + shift = 5, + mask = 0xff, + pat = "ABD", +} +local OPC_ARITHMETIC_F = { + shift = 5, + mask = 0xff, + pat = "FGI", +} +local OPC_ARITHMETIC_I = { + shift = 5, + mask = 0xff, + pat = "AjD", +} +local OPC_COMPLEX_ARITHMETIC = { + shift = 10, + mask = 0x7, + pat = "ABCD", +} +local OPC_COMPLEX_ARITHMETIC_F = { + shift = 10, + mask = 0x1f, + pat = "FGHI", +} + +local ignores_tabs = { + F = { "FCVT", "IFMOV", "FCPY" }, +} + +function should_ignore(name, field) + pat = ignores_tabs[field] or {} + for _, p in ipairs(pat) do + if match(name, p) then + return true + end + end + return false +end + +local class_tabs = { + [0x00] = OPC_SYSCALL, + [0x01] = OPC_MEMORY, + [0x02] = OPC_MEMORY, + [0x03] = OPC_MEMORY, + [0x04] = OPC_CONTROL, + [0x05] = OPC_CONTROL, + [0x06] = OPC_MISI_MEMORY, + [0x08] = OPC_FUNC_MEMORY, + [0x10] = OPC_ARITHMETIC, + [0x11] = OPC_COMPLEX_ARITHMETIC, + [0x12] = OPC_ARITHMETIC_I, + [0x13] = OPC_ARITHMETIC_I, + [0x18] = OPC_ARITHMETIC_F, + [0x19] = OPC_COMPLEX_ARITHMETIC_F, + [0x20] = OPC_MEMORY, + [0x21] = OPC_MEMORY, + [0x22] = OPC_MEMORY, + [0x23] = OPC_MEMORY, + [0x24] = OPC_MEMORY, + [0x25] = OPC_MEMORY, + [0x26] = OPC_MEMORY_F, + [0x27] = OPC_MEMORY_F, + [0x28] = OPC_MEMORY, + [0x29] = OPC_MEMORY, + [0x2A] = OPC_MEMORY, + [0x2B] = OPC_MEMORY, + [0x2C] = OPC_MEMORY, + [0x2D] = OPC_MEMORY, + [0x2E] = OPC_MEMORY_F, + [0x2F] = OPC_MEMORY_F, + [0x30] = OPC_CONTROL, + [0x31] = OPC_CONTROL, + [0x32] = OPC_CONTROL, + [0x33] = OPC_CONTROL, + [0x34] = OPC_CONTROL, + [0x35] = OPC_CONTROL, + [0x36] = OPC_CONTROL, + [0x37] = OPC_CONTROL, + [0x38] = OPC_CONTROL_F, + [0x39] = OPC_CONTROL_F, + [0x3A] = OPC_CONTROL_F, + [0x3B] = OPC_CONTROL_F, + [0x3C] = OPC_CONTROL_F, + [0x3D] = OPC_CONTROL_F, + [0x3e] = OPC_MEMORY, + [0x3f] = OPC_MEMORY, +} + +local map_pri = { + [0x00] = { + [0x0] = "SYSCALL/B", + [0x1] = "SYSCALL" + }, + [0x01]= {[0x0]= "CALL"}, + [0x02]= {[0x0]= "RET"}, + [0x03]= {[0x0]= "JMP"}, + [0x04]= {[0x0]= "BR"}, + [0x05]= {[0x0]= "BSR"}, + [0x06]= { + [0x0000] = "MEMB", + [0x0001] = "IMEMB", + [0x1000] = "RD_F", + [0x1020] = "WR_F", + }, + [0x08]= { + [0x0]= "LLDW", + [0x1]= "LLDL", + [0x8]= "LSTW", + [0x9]= "LSTL", + }, + [0x10]= { + [0x00]= "ADDW", + [0x01]= "SUBW", + [0x02]= "S4ADDW", + [0x03]= "S4SUBW", + [0x04]= "S8ADDW", + [0x05]= "S8SUBW", + [0x08]= "ADDL", + [0x09]= "SUBL", + [0x0a]= "S4ADDL", + [0x0b]= "S4SUBL", + [0x0c]= "S8ADDL", + [0x0d]= "S8SUBL", + [0x10]= "MULW", + [0x11]= "DIVW", + [0x12]= "UDIVW", + [0x13]= "REMW", + [0x14]= "UREMW", + [0x18]= "MULL", + [0x19]= "UMULH", + [0x1a]= "DIVL", + [0x1b]= "UDIVL", + [0x1c]= "REML", + [0x1d]= "UREML", + [0x1e]= "ADDPI", + [0x1f]= "ADDPIS", + [0x28]= "CMPEQ", + [0x29]= "CMPLT", + [0x2a]= "CMPLE", + [0x2b]= "CMPULT", + [0x2c]= "CMPULE", + [0x2d]= "SBT", + [0x2e]= "CBT", + [0x38]= "AND", + [0x39]= "BIC", + [0x3a]= "BIS", + [0x3b]= "ORNOT", + [0x3c]= "XOR", + [0x3d]= "EQV", + [0x40]= "INSLB", + [0x41]= "INSLH", + [0x42]= "INSLW", + [0x43]= "INSLL", + [0x44]= "INSHB", + [0x45]= "INSHH", + [0x46]= "INSHW", + [0x47]= "INSHL", + [0x48]= "SLL", + [0x49]= "SRL", + [0x4a]= "SRA", + [0x4b]= "ROLL", + [0x4c]= "SLLW", + [0x4d]= "SRLW", + [0x4e]= "SRAW", + [0x4f]= "ROLW", + [0x50]= "EXTLB", + [0x51]= "EXTLH", + [0x52]= "EXTLW", + [0x53]= "EXTLL", + [0x54]= "EXTHB", + [0x55]= "EXTHH", + [0x56]= "EXTHW", + [0x57]= "EXTHL", + [0x58]= "CTPOP", + [0x59]= "CTLZ", + [0x5a]= "CTTZ", + [0x60]= "MASKLB", + [0x61]= "MASKLH", + [0x62]= "MASKLW", + [0x63]= "MASKLL", + [0x64]= "MASKHB", + [0x65]= "MASKHH", + [0x66]= "MASKHW", + [0x67]= "MASKHL", + [0x68]= "ZAP", + [0x69]= "ZAPNOT", + [0x6a]= "SEXTB", + [0x6b]= "SEXTH", + [0x6c]= "CMPGEB", + [0x70]= "FIMOVS", + [0x78]= "FIMOVD", + }, + [0x11]= { + [0x0]= "SELEQ", + [0x1]= "SELGE", + [0x2]= "SELGT", + [0x3]= "SELLE", + [0x4]= "SELLT", + [0x5]= "SELNE", + [0x6]= "SELLBC", + [0x7]= "SELLBS", + }, + [0x12]= { + [0x00]= "ADDW", + [0x01]= "SUBW", + [0x02]= "S4ADDW", + [0x03]= "S4SUBW", + [0x04]= "S8ADDW", + [0x05]= "S8SUBW", + [0x08]= "ADDL", + [0x09]= "SUBL", + [0x0a]= "S4ADDL", + [0x0b]= "S4SUBL", + [0x0c]= "S8ADDL", + [0x0d]= "S8SUBL", + [0x10]= "MULW", + [0x18]= "MULL", + [0x19]= "UMULH", + [0x28]= "CMPEQ", + [0x29]= "CMPLT", + [0x2a]= "CMPLE", + [0x2b]= "CMPULT", + [0x2c]= "CMPULE", + [0X2d]= "SBT" + [0x2e]= "CBT", + [0x38]= "AND", + [0x39]= "BIC", + [0x3a]= "BIS", + [0x3b]= "ORNOT", + [0x3c]= "XOR", + [0x3d]= "EQV", + [0x40]= "INSLB", + [0x41]= "INSLH", + [0x42]= "INSLW", + [0x43]= "INSLL", + [0x44]= "INSHB", + [0x45]= "INSHH", + [0x46]= "INSHW", + [0x47]= "INSHL", + [0x48]= "SLL", + [0x49]= "SRL", + [0x4a]= "SRA", + [0x4b]= "ROLL", + [0x4c]= "SLLW", + [0x4d]= "SRLW", + [0x4e]= "SRAW", + [0x4f]= "ROLW" + [0x50]= "EXTLB", + [0x51]= "EXTLH", + [0x52]= "EXTLW", + [0x53]= "EXTLL", + [0x54]= "EXTHB", + [0x55]= "EXTHH", + [0x56]= "EXTHW", + [0x57]= "EXTHL", + [0x60]= "MASKLB", + [0x61]= "MASKLH", + [0x62]= "MASKLW", + [0x63]= "MASKLL", + [0x64]= "MASKHB", + [0x65]= "MASKHH", + [0x66]= "MASKHW", + [0x67]= "MASKHL", + [0x68]= "ZAP", + [0x69]= "ZAPNOT", + [0x6a]= "SEXTB", + [0x6b]= "SEXTH", + [0x6c]= "CMPGEB", + }, + [0x13]= { + [0x0]= "SELEQ", + [0x1]= "SELGE", + [0x2]= "SELGT", + [0x3]= "SELLE", + [0x4]= "SELLT", + [0x5]= "SELNE", + [0x6]= "SELLBC", + [0x7]= "SELLBS", + }, + [0x18]= { + [0x00]= "FADDS", + [0x01]= "FADDD", + [0x02]= "FSUBS", + [0x03]= "FSUBD", + [0x04]= "FMULS", + [0x05]= "FMULD", + [0x06]= "FDIVS", + [0x07]= "FDIVD", + [0x08]= "FSQRTS", + [0x09]= "FSQRTD", + [0x10]= "FCMPEQ", + [0x11]= "FCMPLE", + [0x12]= "FCMPLT", + [0x13]= "FCMPUN", + [0x20]= "FCVTSD", + [0x21]= "FCVTDS", + [0x22]= "FCVTDL_G", + [0x23]= "FCVTDL_P", + [0x24]= "FCVTDL_Z", + [0x25]= "FCVTDL_N", + [0x27]= "FCVTDL", + [0x28]= "FCVTWL", + [0x29]= "FCVTLW", + [0x2D]= "FCVTLS", + [0x2F]= "FCVTLD", + [0x30]= "FCPYS", + [0x31]= "FCPYSE", + [0x32]= "FCPYSN", + [0x40]= "IFMOVS", + [0x41]= "IFMOVD", + [0x50]= "RFPCR", + [0x51]= "WFPCR", + [0x54]= "SETFPEC0", + [0x55]= "SETFPEC1", + [0x56]= "SETFPEC2", + [0x57]= "SETFPEC3", + }, + [0x19]= { + [0x00]= "FMAS", + [0x01]= "FMAD", + [0x02]= "FMSS", + [0x03]= "FMSD", + [0x04]= "FNMAS", + [0x05]= "FNMAD", + [0x06]= "FNMSS", + [0x07]= "FNMSD", + + [0x10]= "FSELEQ", + [0x11]= "FSELNE", + [0x12]= "FSELLT", + [0x13]= "FSELLE", + [0x14]= "FSELGT", + [0x15]= "FSELGE", + }, + [0x1D]= {[0]= "LBR"}, + [0x20]= {[0]= "LDBU"}, + [0x21]= {[0]= "LDHU"}, + [0x22]= {[0]= "LDW"}, + [0x23]= {[0]= "LDL"}, + [0x24]= {[0]= "LDL_U"}, + [0x25]= {[0]= "PRI_LD"}, + [0x26]= {[0]= "FLDS"}, + [0x27]= {[0]= "FLDD"}, + [0x28]= {[0]= "STB"}, + [0x29]= {[0]= "STH"}, + [0x2A]= {[0]= "STW"}, + [0x2B]= {[0]= "STL"}, + [0x2C]= {[0]= "STL_U"}, + [0x2D]= {[0]= "PRI_ST"}, + [0x2E]= {[0]= "FSTS"}, + [0x2F]= {[0]= "FSTD"}, + [0x30]= {[0]= "BEQ"}, + [0x31]= {[0]= "BNE"}, + [0x32]= {[0]= "BLT"}, + [0x33]= {[0]= "BLE"}, + [0x34]= {[0]= "BGT"}, + [0x35]= {[0]= "BGE"}, + [0x36]= {[0]= "BLBC"}, + [0x37]= {[0]= "BLBS"}, + [0x38]= {[0]= "FBEQ"}, + [0x39]= {[0]= "FBNE"}, + [0x3A]= {[0]= "FBLT"}, + [0x3B]= {[0]= "FBLE"}, + [0x3C]= {[0]= "FBGT"}, + [0x3D]= {[0]= "FBGE"}, + [0x3e]= {[0]= "LDI"}, + [0x3f]= {[0]= "LDIH"}, +} + + +------------------------------------------------------------------------------ + +local map_gpr = { + [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "BASE", "r10", "r11", "r12", "r13", "r14", "JGL", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "JTMP", "r27", "at", "r29", "sp", "zero", +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then extra = "\t->"..sym end + end + if ctx.hexdump > 0 then + ctx.out(format("%08x %s %-7s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) + else + ctx.out(format("%08x %-7s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + ctx.pos = pos + 4 +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function get_le(ctx) + local pos = ctx.pos + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local op = ctx:get() + local operands = {} + local last = nil + ctx.op = op + ctx.rel = nil + + local opcode = band(rshift(op, 26), 0x3f) + local opat = map_pri[opcode] + + local fn = 0 + local pat = class_tabs[opcode] + local name = opat[0] + if pat.shift then + name = opat[band(rshift(op, pat.shift), pat.mask)] + pat = pat.pat + end + local isf = false + + if name == "FIMOVD" then + pat = "FD" + elseif name == "IFMOVD" then + pat = "AI" + end + + + for p in gmatch(pat, ".") do + local x = nil + if should_ignore(name, p) then + -- do nothing + elseif p == "A" then + x = map_gpr[band(rshift(op, 21), 31)] + elseif p == "B" then + x = map_gpr[band(rshift(op, 16), 31)] + elseif p == "C" then + x = map_gpr[band(rshift(op, 5), 31)] + elseif p == "D" then + x = map_gpr[band(rshift(op, 0), 31)] + elseif p == "F" then + isf = true + x = "f"..band(rshift(op, 21), 31) + elseif p == "G" then + isf = true + x = "f"..band(rshift(op, 16), 31) + elseif p == "H" then + isf = true + x = "f"..band(rshift(op, 5), 31) + elseif p == "I" then + isf = true + x = "f"..band(rshift(op, 0), 31) + elseif p == "o" then + local disp = arshift(lshift(op, 16), 16) + if name == "LDI" and disp == 0 then + name = "MOVE" + operands[#operands] = last + else + operands[#operands] = format("%d(%s)", disp, last) + end + elseif p == "p" then + local index = map_gpr[band(rshift(op, 16), 31)] + operands[#operands] = format("%s(%s)", index, last) + elseif p == "b" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 21), 21)*4 + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "i" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "j" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "j" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "1" then + if last == "ra" then + operands[#operands] = nil + end + else + assert(false) + end + if x then operands[#operands+1] = x; last = x end + end + return putop(ctx, name, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then ofs = 0 end + local stop = len and ofs+len or #ctx.code + stop = stop - stop % 4 + ctx.pos = ofs - ofs % 4 + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + ctx.get = get_le + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass() +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_gpr[r] end + return "f"..(r-32) +end + + + +function wi_debug(__obj, op, addr) + if not addr then + addr = 0 + end + local operands = {} + local last = nil + + local opcode = band(rshift(op, 26), 0x3f) + local opat = map_pri[opcode] + + local fn = 0 + local pat = class_tabs[opcode] + local name = opat[0] + if pat.shift then + name = opat[band(rshift(op, pat.shift), pat.mask)] + pat = pat.pat + end + local isf = false + + if name == "FIMOVD" then + pat = "FD" + elseif name == "IFMOVD" then + pat = "AI" + end + + for p in gmatch(pat, ".") do + local x = nil + if should_ignore(name, p) then + -- do nothing + elseif p == "A" then + x = map_gpr[band(rshift(op, 21), 31)] + elseif p == "B" then + x = map_gpr[band(rshift(op, 16), 31)] + elseif p == "C" then + x = map_gpr[band(rshift(op, 5), 31)] + elseif p == "D" then + x = map_gpr[band(rshift(op, 0), 31)] + elseif p == "F" then + isf = true + x = "f"..band(rshift(op, 21), 31) + elseif p == "G" then + isf = true + x = "f"..band(rshift(op, 16), 31) + elseif p == "H" then + isf = true + x = "f"..band(rshift(op, 5), 31) + elseif p == "I" then + isf = true + x = "f"..band(rshift(op, 0), 31) + elseif p == "o" then + local disp = arshift(lshift(op, 16), 16) + if name == "LDI" and disp == 0 then + name = "MOVE" + operands[#operands] = last + else + operands[#operands] = format("%d(%s)", disp, last) + end + elseif p == "p" then + local index = map_gpr[band(rshift(op, 16), 31)] + operands[#operands] = format("%s(%s)", index, last) + elseif p == "b" then + x = addr + arshift(lshift(op, 21), 21)*4 + 4 + x = format("0x%08x", x) + elseif p == "i" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "j" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "j" then + x = band(rshift(op, 13), 0x000ff) + elseif p == "1" then + if last == "ra" then + operands[#operands] = nil + end + else + assert(false) + end + if x then operands[#operands+1] = x; last = x end + end + print(name, concat(operands, ", ")) +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname, + wi_debug = wi_debug, +} diff -Naur a/src/jit/dump.lua b/src/jit/dump.lua --- a/src/jit/dump.lua 2024-11-05 09:58:44.401962793 +0800 +++ b/src/jit/dump.lua 2024-11-05 11:06:09.087158346 +0800 @@ -618,7 +618,7 @@ ------------------------------------------------------------------------------ local gpr64 = jit.arch:match("64") -local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" +local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" or jit.arch == "sw64" -- Dump taken trace exits. local function dump_texit(tr, ex, ngpr, nfpr, ...) diff -Naur a/src/lib_jit.c b/src/lib_jit.c --- a/src/lib_jit.c 2024-11-05 09:58:44.401962793 +0800 +++ b/src/lib_jit.c 2024-11-05 11:08:11.795494788 +0800 @@ -701,7 +701,8 @@ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif - +#elif LJ_TARGET_SW64 + /* Nothing to do. */ #else #error "Missing CPU detection for this architecture" #endif diff -Naur a/src/lj_arch.h b/src/lj_arch.h --- a/src/lj_arch.h 2024-11-05 09:58:44.405962746 +0800 +++ b/src/lj_arch.h 2024-11-05 14:09:09.740423771 +0800 @@ -31,6 +31,8 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 #define LUAJIT_ARCH_mips64 7 +#define LUAJIT_ARCH_SW64 77 +#define LUAJIT_ARCH_sw64 77 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -65,6 +67,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 +#elif defined(__sw_64__) +#define LUAJIT_TARGET LUAJIT_ARCH_SW64 #else #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif @@ -439,6 +443,23 @@ #define LJ_ARCH_VERSION 10 #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_SW64 + +#define LJ_ARCH_NAME "sw64" +#define LJ_ARCH_ENDIAN LUAJIT_LE +#define LJ_ARCH_BITS 64 +#define LJ_TARGET_SW64 1 +#define LJ_TARGET_EHRETREG 4 //TODO +#define LJ_TARGET_EHRAREG 8 //??TODO +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_JUMPRANGE 21 /* 2*2^21 = 4MB-aligned region */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#define LJ_ARCH_VERSION 10 // ?? +#define LJ_PAGESIZE 8192 +#define SW64_DEBUG_WI 0 + #else #error "No target architecture defined" #endif @@ -674,7 +695,7 @@ #endif #endif -#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 +#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_SW64 #define LJ_NO_UNWIND 1 #endif diff -Naur a/src/lj_asm.c b/src/lj_asm.c --- a/src/lj_asm.c 2024-11-05 09:58:44.405962746 +0800 +++ b/src/lj_asm.c 2024-11-05 11:14:40.200559720 +0800 @@ -227,6 +227,8 @@ #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" +#elif LJ_TARGET_SW64 +#include "lj_emit_sw64.h" #else #error "Missing instruction emitter for target CPU" #endif @@ -1708,6 +1710,8 @@ #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" +#elif LJ_TARGET_SW64 +#include "lj_asm_sw64.h" #else #error "Missing assembler for target CPU" #endif @@ -2598,7 +2602,9 @@ asm_snap_prev(as); break; /* Done. */ } - +#if SW64_DEBUG_WI + memset(as->mcbot, 0, sizeof(MCode)*(as->mctop - as->mcbot)); +#endif /* Otherwise try again with a bigger IR. */ lj_trace_free(J2G(J), J->curfinal); J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ diff -Naur a/src/lj_asm_sw64.h b/src/lj_asm_sw64.h --- a/src/lj_asm_sw64.h 1970-01-01 08:00:00.000000000 +0800 +++ b/src/lj_asm_sw64.h 2024-11-05 22:08:21.417108255 +0800 @@ -0,0 +1,2131 @@ +/* +** SW64 IR assembler (SSA IR -> machine code). +** Copyright (C) 2019 deepin inc. See Copyright Notice in luajit.h +*/ + +#include +#define TODO do {printf("\e[1;34mTODO IMPLEMENT %s\e[m\n", __FUNCTION__); asm("bpt;bpt");} while(0); + +#define EXIT_ROOM 6 + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate a register or RID_ZERO. */ +static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) + return RID_ZERO; + r = ra_allocref(as, ref, allow); + } else { + ra_noweak(as, r); + } + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_alloc1z(as, ir->op2, allow); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_alloc1z(as, ir->op1, allow); + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki16(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (checki16(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; + intptr_t jgl = (intptr_t)J2G(as->J); + if ((uintptr_t)(ofs-jgl) < 65536) { + *ofsp = ofs-jgl-32768; + return RID_JGL; + } else { + *ofsp = (int16_t)ofs; + return ra_allock(as, ofs-(int16_t)ofs, allow); + } + } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); + return RID_JGL; + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ + +/* --- LOAD ADDRESS MACRO ------------------------------------------------ */ + +static int asm_lda(ASMState* as, MCode* mcp, Reg dest, uintptr_t addr) +{ + int count = 0; + int16_t hi, lo; + MCode mtmp[5] = {0}; + split64AddrHI32(addr, &hi, &lo); + if (hi != 0) { + // ldih dest, hi(zero) + mtmp[count++] = SW64I_LDIH | SW64F_A(dest) | SW64F_DISP(hi, RID_ZERO); + } + if (lo != 0) { + // ldi dest, lo(dest or zero) + mtmp[count++] = SW64I_LDI | SW64F_A(dest) | SW64F_DISP(lo, hi ? dest : RID_ZERO); + } + if (hi || lo) { + // slli dest, 32, dest + mtmp[count++] = SW64I_SLLI | SW64F_A(dest) | SW64F_j(32) | SW64F_D(dest); + } + + split64AddrLO32(addr, &hi, &lo); + mtmp[count] = SW64I_LDIH | SW64F_A(dest) | SW64F_DISP(hi, count > 1 ? dest : RID_ZERO); + count++; + mtmp[count++] = SW64I_LDI | SW64F_A(dest) | SW64F_DISP(lo, dest); + + for (int i=count-1; i>=0; i--) { + __WI(&mcp[i-count], mtmp[i]); + } + return count; +} + + +/* -- Guard handling ------------------------------------------------------ */ + +/* Need some spare long-range jump slots, for out-of-range branches. */ +#define SW64_SPAREJUMP 4 + +/* Setup spare long-range jump slots per mcarea. */ +static void asm_sparejump_setup(ASMState *as) +{ + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + mxp -= SW64_SPAREJUMP*2; + lj_assertA(SW64I_NOP == 0x43ff075f, "bad NOP"); + memset(mxp, 0, SW64_SPAREJUMP*2*sizeof(MCode)); + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = SW64_SPAREJUMP; + while (slot--) { + mxp -= 2; + if (*mxp == tjump) { + return mxp; + } else if (*mxp == SW64I_NOP) { + *mxp = tjump; + return mxp; + } + } + return NULL; +} + +/* Setup exit stub after the end of each trace. */ +static void asm_exitstub_setup(ASMState *as) +{ + MCode *mxp = as->mctop; + /* + stw TMP, 0(sp); //store exit number + + ldi TMP, traceno(zero); + lda at, lj_vm_exit_handler + call zero, (at); + */ + + __WI(--mxp, SW64I_CALL | SW64F_A(RID_ZERO) | SW64F_DISP(0, RID_R28)); + + mxp -= asm_lda(as, mxp, RID_R28, (uintptr_t)(void*)lj_vm_exit_handler); + + __WI(--mxp, SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISPI(as->T->traceno)); + +#if SW64_DEBUG_WI + __WI(--mxp, SW64I_STL | SW64F_A(RID_TMP) | SW64F_DISP(0, RID_SP)); +#else + __WI(--mxp, SW64I_STW | SW64F_A(RID_TMP) | SW64F_DISP(0, RID_SP)); +#endif + + as->mctop = mxp; +} + +/* Keep this in-sync with exitstub_trace_addr(). */ +#define asm_exitstub_addr(as) ((as)->mctop) + +/* Emit conditional branch to exit for guard. */ +static void asm_guard(ASMState *as, SW64Ins mi, Reg a) +{ + lua_assert(a != RID_TMP); + MCode *target = asm_exitstub_addr(as); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->invmcp = NULL; + as->loopinv = 1; + as->mcp = p+1; + mi = invert_cond(mi); + target = p; /* Patch target later in asm_loop_fixup. */ + } + lua_assert(as->snapno >= 0); + + // bxx a, target + emit_branch(as, mi, a, target); + emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); +} +static void asm_compare_guard(ASMState* as, SW64Ins cmp, + Reg a, Reg b, MCode *target) +{ + switch(SW64_OP(cmp)) { + case SW64_OP(0x60000000): + emit_branch(as, SW64I_FBNE, RID_F28, target); +#if SW64_DEBUG_WI + emit_Ao(as, SW64I_LDI, RID_TMP, RID_TMP, as->snapno); + emit_loadu64(as, RID_TMP, (((unsigned long)(void*)as->mcp) << 32)); +#else + // ldi RID_TMP, as->snapno(zero) + emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); +#endif + emit_FGI(as, cmp, a, b, RID_F28); + break; + case SW64_OP(0x40000000): + emit_branch(as, SW64I_BNE, RID_R28, target); +#if SW64_DEBUG_WI + emit_Ao(as, SW64I_LDI, RID_TMP, RID_TMP, as->snapno); + emit_loadu64(as, RID_TMP, (((unsigned long)(void*)as->mcp) << 32)); +#else + // ldi RID_TMP, as->snapno(zero) + emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); +#endif + emit_ABD(as, cmp, a, b, RID_R28); + break; + default: + lua_assert(!"NOT HRERE"); + } +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, SW64Ins mi, Reg rt, IRRef ref, + RegSet allow, int32_t ofs) +{ + IRIns *ir = IR(ref); + Reg base; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + if (ir->o == IR_ADD) { + intptr_t ofs2; + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki16(ofs2))) { + ref = ir->op1; + ofs = (int32_t)ofs2; + } + } else if (ir->o == IR_STRREF) { + intptr_t ofs2 = 65536; + lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs2 = ofs + get_kval(as, ir->op2); + ref = ir->op1; + } else if (irref_isk(ir->op1)) { + ofs2 = ofs + get_kval(as, ir->op1); + ref = ir->op2; + } + if (!checki16(ofs2)) { + /* NYI: Fuse ADD with constant. */ + Reg right, left = ra_alloc2(as, ir, allow); + right = (left >> 8); left &= 255; + emit_Ao(as, mi, rt, RID_TMP, ofs); + emit_ABD(as, SW64I_ADDL, left, right, RID_TMP); + return; + } + ofs = ofs2; + } + } + base = ra_alloc1(as, ref, allow); + emit_Ao(as, mi, rt, base, ofs); +} + +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func, 1); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + if (ref) { + IRIns *ir = IR(ref); + if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && + !(ci->flags & CCI_VARARG)) { + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ + ra_leftov(as, fpr, ref); + fpr += 1; + gpr += 1; + } else { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ + if (irt_isfp(ir->t)) { + RegSet of = as->freeset; + Reg r; + /* Workaround to protect argument GPRs from being used for remat. */ + as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); + r = ra_alloc1(as, ref, RSET_FPR); + as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); + if (irt_isnum(ir->t)) { + emit_GI(as, SW64I_FCVTLD, r, r); + emit_AI(as, SW64I_IFMOVD, gpr, r); + gpr++; fpr++; + } else if (irt_isfloat(ir->t)) { + emit_GI(as, SW64I_FCVTLS, r, r); + emit_AI(as, SW64I_IFMOVS, gpr, r); + gpr++; fpr++; + } + } else { + ra_leftov(as, gpr, ref); + gpr++; fpr++; + } + } else { + Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + if (irt_isnum(ir->t)) { + emit_Ao(as, SW64I_FSTD, r, RID_SP, ofs); + } else if(irt_isfloat(ir->t)) { + emit_Ao(as, SW64I_FSTS, r, RID_SP, ofs); + } else { + emit_Ao(as, SW64I_STL, r, RID_SP, ofs); + } + ofs += 8; + } + } + } else { + fpr = REGARG_LASTFPR+1; + if (gpr <= REGARG_LASTGPR) { + gpr++; fpr++; + } else { + ofs += 8; + } + } + checkmclim(as); + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + WI_DEBUG_BEFORE(); + RegSet drop = RSET_SCRATCH; + if ((ci->flags & CCI_NOFPRCLOBBER)) + drop &= ~RSET_FPR; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + int32_t ofs = sps_scale(ir->s); + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + // This doesn't require FCVTLD, refer to the `lj_math_random_step` + emit_AI(as, SW64I_IFMOVD, RID_RET, dest); + } + if (ofs) { + emit_Ao(as, SW64I_STL, RID_RET, RID_SP, ofs); + } + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else { + ra_destreg(as, ir, RID_RET); + } + } + WI_DEBUG_END(); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(void *)get_kval(as, func); + } else { /* Need specific register for indirect calls. */ + Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); + MCode *p = as->mcp; + + __WI(--p, SW64I_CALL | SW64F_A(RID_RA) | SW64F_B(r)); + if (r != RID_CFUNCADDR) + __WI(--p, SW64I_LDI | SW64F_A(RID_CFUNCADDR) | SW64F_DISP(0, r)); + + as->mcp = p; + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, RID_TMP, ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)), RID_R28); + + emit_Ao(as, SW64I_AL, RID_TMP, base, -8); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_ABD(as, SW64I_BIS, RID_TMP, tmp, RID_TMP); + emit_AjD(as, SW64I_ANDI, tmp, SBUF_MASK_FLAG, tmp); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guard(as, SW64I_FBEQ, tmp); + emit_FGI(as, SW64I_FCMPEQ, tmp, left, tmp); + emit_GI(as, SW64I_FCVTLD, tmp, tmp); + emit_FD(as, SW64I_FIMOVD, tmp, dest); + emit_GI(as, SW64I_FCVTLW, tmp, tmp); + lua_assert(irt_isint(ir->t)); + emit_GI(as, SW64I_FCVTDL, left, tmp); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); + emit_FD(as, SW64I_FIMOVD, tmp, dest); + emit_FGI(as, SW64I_FADDD, left, right, tmp); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); + int sti8 = st == IRT_I8; + int stu8 = st == IRT_U8; + int sti16 = st == IRT_I16; + int stu16 = st == IRT_U16; + int stu32 = st == IRT_U32; + int stu64 = st == IRT_U64; + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + + IRRef lref = ir->op1; + + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_GI(as, st == IRT_NUM ? SW64I_FCVTDS : SW64I_FCVTSD, + ra_alloc1(as, lref, RSET_FPR), dest); + } else if (stu64) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + MCLabel l_end = emit_label(as); + if (irt_isfloat(ir->t)) { + TODO; + } else { + emit_FGI(as, SW64I_FADDD, dest, dest, dest); + emit_GI(as, SW64I_FCVTLD, dest, dest); + emit_AI(as, SW64I_IFMOVD, RID_R28, dest); + emit_ABD(as,SW64I_BIS, RID_R28, left, RID_R28); + emit_AjD(as,SW64I_ANDI, left, 1, left); + emit_AjD(as,SW64I_SRLI, left, 1, RID_R28); + } + emit_branch(as, SW64I_BGE, left, l_end); + emit_GI(as, SW64I_FCVTLD, dest, dest); + emit_AI(as, SW64I_IFMOVD, left, dest); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_GI(as, irt_isfloat(ir->t) ? SW64I_FCVTLS : SW64I_FCVTLD, dest, dest); + if (stu32) { + emit_AI(as, SW64I_IFMOVD, RID_R28, dest); + emit_AjD(as, SW64I_EXTLWI, left, 0, RID_R28); + } else { + emit_AI(as, SW64I_IFMOVD, left, dest); + } + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ + emit_AjD(as, SW64I_EXTLWI, dest, 0, dest); + } + emit_FD(as, SW64I_FIMOVD, tmp, dest); + emit_GI(as, SW64I_FCVTDL, left, tmp); + } + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_isu32(ir->t)) { + emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); + } + + if (st64 && irt_isint(ir->t)) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_AjD(as, SW64I_EXTLWI, left, 0, dest); + } else if (irt_isu64(ir->t) && st == IRT_INT) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_AjD(as, SW64I_EXTLWI, left, 0, dest); + } else if (sti8) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if (!irt_is64(ir->t)) { + emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); + } + emit_ABD(as, SW64I_SEXTB, 0, left, dest); + } else if (stu8) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_ABD(as, SW64I_EXTLBI, left, 0, dest); + } else if (sti16) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if (!irt_is64(ir->t)) { + emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); + } + emit_ABD(as, SW64I_SEXTH, 0, left, dest); + } else if (stu16) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_ABD(as, SW64I_EXTLHI, left, 0, dest); + } else if (stu32) { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if (irt_isint(ir->t)) + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); + emit_AjD(as, SW64I_EXTLWI, left, 0, dest); + } else { + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = 0; + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); + ofs = sps_scale(ir->s); + asm_guard(as, SW64I_BEQ, RID_RET); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), + RID_SP, ofs); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_Ao(as, SW64I_STL, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, + rset_exclude(allow, src)); + emit_Ao(as, SW64I_STL, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + emit_ABD(as, SW64I_ADDL, RID_TMP, type, RID_TMP); + emit_AjD(as, SW64I_EXTLWI, src, 0, RID_TMP); + } else { + emit_ABD(as, SW64I_ADDL, src, type, RID_TMP); + } + } +} +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) +{ + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + else /* Otherwise force a spill and use the spill slot. */ + emit_Ao(as, SW64I_LDI, dest, RID_SP, ra_spill(as, ir)); + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + asm_tvstore64(as, dest, 0, ref); + emit_Ao(as, SW64I_LDI, dest, RID_JGL, + (int32_t)(offsetof(global_State, tmptv)-32768)); + } +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki16(ofs)) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_Ao(as, SW64I_LDI, dest, base, ofs); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_ABD(as, SW64I_S8ADDL, idx, base, dest); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + WI_DEBUG_BEFORE(); + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; + + Reg cmp64 = RID_NONE; + + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + + rset_clear(allow, tab); + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + + if ( irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else if (!irt_ispri(kt)) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + + if (!irt_isnum(kt)) { + /* Allocate cmp64 register used for 64-bit comparisons */ + if ( irt_isnum(kt)) { + cmp64 = key; + } else if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; + if (merge == IR_NE) { + asm_guard(as, SW64I_BEQ, RID_ZERO); + } else if (destused) { + emit_loada(as, dest, niltvg(J2G(as->J))); + } + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_move(as, dest, tmp1); + emit_Ao(as, SW64I_AL, tmp1, dest, (int32_t)offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) { /* Must match asm_guard(). */ + l_end = asm_exitstub_addr(as); + } + if ( irt_isnum(kt)) { + emit_branch(as, SW64I_BEQ, RID_R28, l_end); + emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); + emit_FGI(as, SW64I_FCMPEQ, tmpnum, key, RID_R28); + Reg isnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); + emit_branch(as, SW64I_BEQ, tmp1, l_next); + emit_ABD(as, SW64I_CMPULT, tmp1, isnum, tmp1); + emit_AjD(as, SW64I_SRAI, tmp1, 47, tmp1); + emit_AI(as, SW64I_IFMOVD, tmp1, tmpnum); + } else { + emit_branch(as, SW64I_BNE, RID_R28, l_end); + emit_ABD(as, SW64I_CMPEQ, tmp1, cmp64, RID_R28); + emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); + } + emit_Ao(as, SW64I_LDL, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + *l_loop = SW64I_BNE | SW64F_A(tmp1) | ((as->mcp-l_loop-1) & 0x1fffff); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_ABD(as, SW64I_ADDL, key, type, tmp2); + rset_clear(allow, type); + } + + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_Ao(as, SW64I_AL, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; + if (isk) + tmphash = ra_allock(as, khash, allow); + + emit_ABD(as, SW64I_ADDL, dest, tmp1, dest); + lj_assertA(sizeof(Node) == 24, "bad Node size"); + emit_ABD(as, SW64I_SUBW, tmp2, tmp1, tmp1); + emit_AjD(as, SW64I_SLLI, tmp1, 3, tmp1); + emit_AjD(as, SW64I_SLLI, tmp1, 5, tmp2); + + emit_ABD(as, SW64I_AND, tmp2, tmphash, tmp1); //tmp1 <- hmask & tmphash + emit_Ao(as, SW64I_AL, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_Ao(as, SW64I_LDW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); + + if (isk) {//TODO + /* Nothing to do. */ + } else if (irt_isstr(kt)) { + emit_Ao(as, SW64I_LDW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + //hi = tmp1, lo = tmp2 + Reg hi = tmp1; + Reg lo = tmp2; + + /* hi = hi - lj_rol(lo, HASH_ROT3); */ + emit_ABD(as, SW64I_SUBL, hi, dest, hi); + emit_rotl32(as, lo, (HASH_ROT3)&31, dest, RID_R28); + + /* hi = lo ^ lj_rol(hi, HASH_ROT1 + HASH_ROT2); */ + emit_ABD(as, SW64I_XOR, lo, dest, hi); + emit_rotl32(as, hi, (HASH_ROT2+HASH_ROT1)&31, dest, RID_R28); + + /* lo = lo - lj_rol(hi, HASH_ROT1); */ + emit_ABD(as, SW64I_SUBL, lo, dest, lo); + emit_rotl32(as, hi, HASH_ROT1&31, dest, RID_R28); + + /* lo = lo ^ hi; */ + emit_ABD(as, SW64I_XOR, lo, hi, lo); + + + if (irt_isnum(kt)) { + emit_ABD(as, SW64I_ADDL, hi, hi, hi); // hi << 1 + + emit_AjD(as, SW64I_MASKLLI, tmp2, 4, lo); //lo + emit_AjD(as, SW64I_SRAI, tmp2, 32, hi); //hi + + emit_FD(as, SW64I_FIMOVD, key, tmp2); + } else { + emit_ABD(as, SW64I_XOR, key, tmp1, tmp2); + emit_rotl32(as, tmp1, HASH_ROT1&31, dest, tmp2); + emit_ABD(as, SW64I_ADDL, key, ra_allock(as, HASH_BIAS, allow), tmp1); + } + } + } + WI_DEBUG_END(); +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; + Reg key = ra_scratch(as, allow); + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ofs > 32736) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_Ao(as, SW64I_LDI, dest, node, ofs); + } + if (irt_ispri(irkey->t)) { + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, key, ra_allock(as, k, allow), RID_R28); + emit_Ao(as, SW64I_LDL, key, idx, kofs); + if (ofs > 32736) + emit_ABD(as, SW64I_ADDL, node, ra_allock(as, ofs, allow), dest); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, SW64I_AL, dest, v, RSET_GPR); + } else { + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? SW64I_BEQ : SW64I_BNE, RID_R28); + if (ir->o == IR_UREFC) + emit_Ao(as, SW64I_LDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_Ao(as, SW64I_AL, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_Ao(as, SW64I_LDBU, RID_R28, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); + } else { + emit_Ao(as, SW64I_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { + emit_Ao(as, SW64I_LDI, dest, base, ofs+irr->i); + } else { + emit_Ao(as, SW64I_LDI, dest, dest, ofs); + emit_ABD(as, SW64I_ADDL, base, ra_alloc1(as, ir->op2, allow), dest); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ +static void fxloadins_end(ASMState*as, SW64Ins mi, Reg r) +{ + if (mi == SW64I_EXTLWI) { + emit_AjD(as, SW64I_EXTLWI, r, 0, r); + } else if (mi) + emit_ABD(as, mi, RID_ZERO, r, r); +} +static SW64Ins asm_fxloadins(IRIns *ir, SW64Ins* mi2) +{ + *mi2 = 0; + switch (irt_type(ir->t)) { + case IRT_I8: + *mi2 = SW64I_SEXTB; //fallthrough + case IRT_U8: + return SW64I_LDBU; + + case IRT_I16: + *mi2 = SW64I_SEXTH; //fallthrough + case IRT_U16: + return SW64I_LDHU; + + case IRT_U32: + *mi2 = SW64I_EXTLWI; //fallthrough + case IRT_INT: + return SW64I_LDW; + + case IRT_NUM: return SW64I_FLDD; + case IRT_FLOAT: return SW64I_FLDS; + default: return irt_is64(ir->t) ? SW64I_LDL : SW64I_LDW; + } +} + +static SW64Ins asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return SW64I_STB; + case IRT_I16: case IRT_U16: return SW64I_STH; + case IRT_NUM: return SW64I_FSTD; + case IRT_FLOAT: return SW64I_FSTS; +#if LJ_64 && !LJ_GC64 + case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ +#endif + default: return (irt_is64(ir->t)) ? SW64I_STL : SW64I_STW; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + SW64Ins mi2 = 0; + SW64Ins mi = asm_fxloadins(ir, &mi2); + Reg idx; + int32_t ofs; + if (ir->op1 == REF_NIL) { + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_Ao(as, SW64I_LDI, dest, idx, ofs); + return; + } + } + ofs = field_ofs[ir->op2]; + } + fxloadins_end(as, mi2, dest); + emit_Ao(as, mi, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + SW64Ins mi = asm_fxstoreins(ir); + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); + emit_Ao(as, mi, src, idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + SW64Ins mi2 = 0; + SW64Ins mi = asm_fxloadins(ir, &mi2); + Reg dest = ra_dest(as, ir, + irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + fxloadins_end(as, mi2, dest); + asm_fusexref(as, mi, dest, ir->op1, RSET_GPR, 0); +} + +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, + irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } +} + +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + +#if LJ_64 && !LJ_GC64 +static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) +{ + + if (ra_used(ir) || typecheck) { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (typecheck) { + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, + tmp, ra_allock(as, (int32_t)0x1fffe, rset_exclude(RSET_GPR, dest)), + RID_R28); + emit_AjD(as, SW64I_SRLI, dest, 47, tmp); + } + return dest; + } else { + return RID_NONE; + } +} +#endif + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; + IRType1 t = ir->t; + + type = ra_scratch(as, allow); + rset_clear(allow, type); + + if (ra_used(ir)) { + lj_assertA(irt_isnum(ir->t) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + if (irt_isaddr(t)) + emit_DEXTM(as, dest, dest, 0, 47); + else if (irt_isint(t)) + emit_AjD(as, SW64I_ADDWI, dest, 0, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + rset_clear(allow, idx); + if (irt_isnum(t)) { + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPULT, type, ra_allock(as, (int32_t)LJ_TISNUM, allow), RID_R28); + } else { + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, type, ra_allock(as, (int32_t)irt_toitype(t), allow), RID_R28); + } + if (ra_hasreg(dest)) { + if (irt_isnum(t)){ + emit_Fo(as, SW64I_FLDD, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_AjD(as, SW64I_SRAI, dest, 47, type); + emit_Ao(as, SW64I_LDL, dest, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_Fo(as, SW64I_FSTD, src, idx, ofs); + } else { + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_Ao(as, SW64I_STL, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_ABD(as, SW64I_ADDL, tmp, type, tmp); + emit_AjD(as, SW64I_EXTLWI, src, 0, RID_TMP); + } else { + emit_ABD(as, SW64I_ADDL, src, type, tmp); + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; + IRType1 t = ir->t; + int32_t ofs = 8*((int32_t)ir->op1-2); + + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + lj_assertA(irt_isnum(ir->t) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); + if (ir->op2 & IRSLOAD_CONVERT) { + if (irt_isint(t)) { + Reg tmp = ra_scratch(as, RSET_FPR); + emit_FD(as, SW64I_FIMOVD, dest, tmp); + emit_GI(as, SW64I_FCVTDL_Z, tmp, tmp); + emit_GI(as, SW64I_FCVTLW, tmp, tmp); + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + Reg tmp = ra_scratch(as, RSET_GPR); + emit_GI(as, SW64I_FCVTLD, dest, dest); + emit_AI(as, SW64I_IFMOVD, tmp, dest); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } + else if (irt_isaddr(t)) { + /* Clear type from pointers. */ + emit_DEXTM(as, dest, dest, 0, 47); + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_AjD(as, SW64I_ADDWI, dest, 0, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + Reg ktype = ra_allock(as, ~((int64_t)~irt_toitype(t) << 47), allow); + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, type, ktype, RID_R28); + } else { + if (irt_isnum(t)) { + Reg isnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPULT, RID_TMP, isnum, RID_R28); + if (ra_hasreg(dest)) + emit_Fo(as, SW64I_FLDD, dest, base, ofs); + } else { + Reg ktype2 = ra_allock(as, (int32_t)irt_toitype(t), allow); + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, RID_TMP, ktype2, RID_R28); + } + emit_AjD(as, SW64I_SRAI, type, 47, RID_TMP); + } + emit_Ao(as, SW64I_LDL, type, base, ofs); + } else if (ra_hasreg(dest)) { + if (irt_isnum(t)) + emit_Fo(as, SW64I_FLDD, dest, base, ofs); + else + emit_Ao(as, irt_isint(t) ? SW64I_LDW : SW64I_LDL, dest, base, + ofs ); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + if (ra_used(ir)) + ra_destreg(as, ir, RID_RET); /* GCcdata * */ + + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + emit_Ao(as, sz == 8 ? SW64I_STL : SW64I_STW, ra_alloc1(as, ir->op2, allow), + RID_RET, sizeof(GCcdata)); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + emit_Ao(as, SW64I_STB, RID_R28, RID_RET, offsetof(GCcdata, gct)); + emit_Ao(as, SW64I_LDI, RID_R28, RID_ZERO, ~LJ_TCDATA); + + emit_Ao(as, SW64I_STH, RID_R28, RID_RET, offsetof(GCcdata, ctypeid)); + emit_Ao(as, SW64I_LDI, RID_R28, RID_ZERO, id); /* Lower 16 bit used. Sign-ext ok. */ + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg link = RID_TMP; + MCLabel l_end = emit_label(as); + emit_Ao(as, SW64I_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_Ao(as, SW64I_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); + emit_getgl(as, link, gc.grayagain); + emit_branch(as, SW64I_BEQ, RID_TMP, l_end); + emit_ABD(as, SW64I_XOR, mark, RID_TMP, mark); /* Clear black bit. */ + emit_AjD(as, SW64I_ANDI, mark, LJ_GC_BLACK, RID_TMP); + emit_Ao(as, SW64I_LDBU, mark, tab, (int32_t)offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + + emit_AjD(as, SW64I_ANDI, tmp, LJ_GC_BLACK, tmp); + + emit_branch(as, SW64I_BEQ, RID_TMP, l_end); + emit_AjD(as, SW64I_ANDI, RID_TMP, LJ_GC_WHITES, RID_TMP); + + emit_branch(as, SW64I_BEQ, RID_TMP, l_end); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_Ao(as, SW64I_LDBU, tmp, obj, + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + emit_Ao(as, SW64I_LDBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, SW64Ins mi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_FGI(as, mi, left, right, dest); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, SW64Ins mi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_FGI(as, mi, RID_FZERO, left, dest); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); +} + +#define asm_fpadd(as, ir) asm_fparith(as, ir, SW64I_FADDD) +#define asm_fpsub(as, ir) asm_fparith(as, ir, SW64I_FSUBD) +#define asm_fpmul(as, ir) asm_fparith(as, ir, SW64I_FMULD) + +//TODO + +static void asm_add(ASMState *as, IRIns *ir) +{ + IRType1 t = ir->t; + if (irt_isnum(t)) { + asm_fpadd(as, ir); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checku8(k)) { + emit_AjD(as, (LJ_64 && irt_is64(t)) ? SW64I_ADDLI : SW64I_ADDWI, + left, k, dest); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ABD(as, (LJ_64 && irt_is64(t)) ? SW64I_ADDL : SW64I_ADDW, + left, right, dest); + } +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpsub(as, ir); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ABD(as, irt_is64(ir->t) ? SW64I_SUBL : SW64I_SUBW, + left, right, dest); + } +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpmul(as, ir); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ABD(as, irt_is64(ir->t) ? SW64I_MULL : SW64I_MULW, + left, right, dest); + } +} + +/*static void asm_mod(ASMState *as, IRIns *ir) +{ + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_pow(ASMState *as, IRIns *ir) +{ + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else + asm_callid(as, ir, IRCALL_lj_vm_powi); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else + asm_fparith(as, ir, SW64I_FDIVD); +} +*/ + +static void asm_fpdiv(ASMState *as, IRIns *ir) +{ + asm_fparith(as, ir, SW64I_FDIVD); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_FGI(as, SW64I_FCPYSN, left, left, dest); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ABD(as, (LJ_64 && irt_is64(ir->t)) ? SW64I_SUBL : SW64I_SUBW, + RID_ZERO, left, dest); + } +} + +#define asm_abs(as, ir) asm_fpunary(as, ir, SW64I_FABS) + +#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) + +static void asm_arithov(ASMState *as, IRIns *ir) +{ + Reg right, left, dest = ra_dest(as, ir, RSET_GPR); + lj_assertA(!irt_is64(ir->t), "bad usage"); + if (irref_isk(ir->op2)) { + int k = IR(ir->op2)->i; + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); + asm_guard(as, k >= 0 ? SW64I_BNE : SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPLT, dest, dest == left ? RID_TMP : left, RID_R28); + emit_Ao(as, SW64I_LDI, dest, left, k); + if (dest == left) emit_move(as, RID_TMP, left); + return; + } + } + left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + + asm_guard(as, SW64I_BLT, RID_R28); + + emit_ABD(as, SW64I_AND, RID_TMP, RID_R28, RID_R28); + if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ + emit_ABD(as, SW64I_XOR, dest, dest == right ? RID_TMP : right, RID_TMP); + } else { /* ((dest^left) & (dest^~right)) < 0 */ + emit_ABD(as, SW64I_XOR, RID_TMP, dest, RID_TMP); + emit_ABD(as, SW64I_EQV, dest == right ? RID_TMP : right, RID_ZERO, RID_TMP); + } + + emit_ABD(as, SW64I_XOR, dest, dest == left ? RID_TMP : left, RID_R28); + emit_ABD(as, ir->o == IR_ADDOV ? SW64I_ADDW : SW64I_SUBW, left, right, dest); + + if (dest == left || dest == right) + emit_move(as, RID_TMP, dest == left ? left : right); +} + +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + +static void asm_mulov(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + + asm_guard(as, SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, dest, RID_R28, RID_R28); + + emit_ABD(as, SW64I_MULW, left, right, dest); + emit_ABD(as, SW64I_MULL, left, right, RID_R28); +} + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + Reg left, right, dest = ra_dest(as, ir, RSET_GPR); + IRIns *irl = IR(ir->op1); + if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { + left = ra_alloc2(as, irl, RSET_GPR); + right = (left >> 8); left &= 255; + } else { + left = RID_ZERO; + right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + } + emit_ABD(as, SW64I_ORNOT, left, right, dest); +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + int is64 = irt_is64(ir->t); + int bit = is64 ? 64 : 32; + + if (is64) { + /* 8. extlb left, 7 */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_EXTLBI, left, 7, RID_R28); + + /* 7. extlb left, 6 */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*7, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 6, RID_R28); + + /* 6. extlb left, 5 */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*6, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 5, RID_R28); + + /* 5. extlb left, 4 */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*5, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 4, RID_R28); + } + + /* 4. extlb left, 3, AT; addl AT, dest, dest */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + if (is64) emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*4, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 3, RID_R28); + + /* 3. extlb left, 2, AT; slli AT, 8, AT; addl AT, dest, dest */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*3, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 2, RID_R28); + + /* 2. extlb left, 1, AT; slli AT, 16, AT; addl AT, dest, dest */ + emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*2, RID_R28); + emit_AjD(as, SW64I_EXTLBI, left, 1, RID_R28); + + /* 1. extlb left, 0, AT; slli AT, 24, dest */ + emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*1, dest); + emit_AjD(as, SW64I_EXTLBI, left, 0, RID_R28); +} + +static void asm_bitop(ASMState *as, IRIns *ir, SW64Ins mi, SW64Ins mik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (!irt_is64(ir->t)) { + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); + } + + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki8(k)) { + emit_AjD(as, mik, left, k, dest); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ABD(as, mi, left, right, dest); +} + +#define asm_band(as, ir) asm_bitop(as, ir, SW64I_AND, SW64I_ANDI) +#define asm_bor(as, ir) asm_bitop(as, ir, SW64I_BIS, SW64I_BISI) +#define asm_bxor(as, ir) asm_bitop(as, ir, SW64I_XOR, SW64I_XORI) + +static void asm_bitshift(ASMState *as, IRIns *ir, SW64Ins mi, SW64Ins mik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + int is64 = irt_is64(ir->t); + if (!is64) + emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); // truncated it to 32 bit + + if (irref_isk(ir->op2)) { /* Constant shifts. */ + uint32_t shift = (uint32_t)IR(ir->op2)->i; + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_AjD(as, mik, is64 ? left : RID_R28, (shift & 63), dest); + if (!is64) { + if (mi != SW64I_SRAI && mi != SW64I_SRA) + emit_AjD(as, SW64I_EXTLWI, RID_R28, 0, RID_R28); + emit_ABD(as, SW64I_ADDW, RID_ZERO, left, RID_R28); // truncated it to 32 bit + } + } else { + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ABD(as, mi, is64 ? left : RID_R28, right, dest); + if (!is64) { + if (mi != SW64I_SRAI && mi != SW64I_SRA) + emit_AjD(as, SW64I_EXTLWI, RID_R28, 0, RID_R28); + emit_ABD(as, SW64I_ADDW, RID_ZERO, left, RID_R28); // truncated it to 32 bit + } + } +} + +#define asm_bshl(as, ir) asm_bitshift(as, ir, SW64I_SLL, SW64I_SLLI) +#define asm_bshr(as, ir) asm_bitshift(as, ir, SW64I_SRL, SW64I_SRLI) +#define asm_bsar(as, ir) asm_bitshift(as, ir, SW64I_SRA, SW64I_SRAI) + +static void asm_brotx(ASMState *as, IRIns *ir, int mode) +{ + int is64 = irt_is64(ir->t); + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op2)) { /* Constant shifts. */ + uint32_t shift = (uint32_t)(IR(ir->op2)->i & 63); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (is64) { + emit_rotx(as, left, shift, dest, RID_R28, mode); + } else { + emit_rotx32(as, left, shift, dest, RID_R28, mode); + } + } else { + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + if (!is64) { + emit_ABD(as, SW64I_ADDW, dest, RID_ZERO, dest); + } + emit_ABD(as, SW64I_BIS, dest, RID_TMP, dest); + + if (mode == 1) { + emit_ABD(as, SW64I_SLL, is64 ? left: RID_R28, right, dest); + emit_ABD(as, SW64I_SRL, is64 ? left: RID_R28, RID_TMP, RID_TMP); + } else if (mode == 2){ + emit_ABD(as, SW64I_SRL, is64 ? left: RID_R28, right, dest); + emit_ABD(as, SW64I_SLL, is64 ? left: RID_R28, RID_TMP, RID_TMP); + } else { + lua_assert(0); + } + if (!is64) { + emit_AjD(as, SW64I_EXTLWI, left, 0, RID_R28); + } + emit_ABD(as, SW64I_SUBL, ra_allock(as, is64 ? 64 : 32, RSET_GPR), right, RID_TMP); + } +} +#define asm_brol(as, ir) asm_brotx(as, ir, 1) +#define asm_bror(as, ir) asm_brotx(as, ir, 2) + +static void asm_min_max(ASMState *as, IRIns *ir, int ismax) +{ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_FGHI(as, SW64I_FSELEQ, RID_F28, left, right, dest); + emit_FGI(as, SW64I_FCMPLT, ismax ? left: right, ismax ? right : left, + RID_F28); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ABCD(as, SW64I_SELEQ, RID_R28, left, right, dest); + emit_ABD(as, SW64I_CMPLT, ismax ? left : right, ismax ? right : left, + RID_R28); + } +} + +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + +/* -- Comparisons --------------------------------------------------------- */ + +static void asm_comp(ASMState *as, IRIns *ir) +{ + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + IROp op = ir->o; + if (irt_isnum(ir->t)) { + MCLabel l_true; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + l_true = emit_label(as); + switch (op) { + case IR_LT: + case IR_ULT: + asm_guard(as, SW64I_FBEQ, RID_F28); + emit_FGI(as, SW64I_FCMPLT, left, right, RID_F28); + break; + case IR_GE: + case IR_UGE: + asm_guard(as, SW64I_FBEQ, RID_F28); + emit_FGI(as, SW64I_FCMPLE, right, left, RID_F28); + break; + case IR_LE: + case IR_ULE: + asm_guard(as, SW64I_FBEQ, RID_F28); + emit_FGI(as, SW64I_FCMPLE, left, right, RID_F28); + break; + case IR_GT: + case IR_UGT: + asm_guard(as, SW64I_FBEQ, RID_F28); + emit_FGI(as, SW64I_FCMPLT, right, left, RID_F28); + break; + default: + lua_assert(!"not here"); + } + + if (op & 4) { + emit_branch(as, SW64I_FBNE, RID_F28, l_true); + } else { + asm_guard(as, SW64I_FBNE, RID_F28); + } + emit_FGI(as, SW64I_FCMPUN, left, right, RID_F28); + + } else { + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; + + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + SW64Ins mi = (op&2) ? ((op&1) ? SW64I_BLE : SW64I_BGT) : + ((op&1) ? SW64I_BLT : SW64I_BGE); + asm_guard(as, mi, left); + } else { + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if ((op&2)) k++; + if (checki8(k)) { + asm_guard(as, (op&1) ? SW64I_BNE : SW64I_BEQ, RID_R28); + emit_AjD(as, (op&4) ? SW64I_CMPULTI : SW64I_CMPLTI, + left, k, RID_R28); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + asm_guard(as, ((op^(op>>1))&1) ? SW64I_BNE : SW64I_BEQ, RID_R28); + emit_ABD(as, (op&4) ? SW64I_CMPULT : SW64I_CMPLT, + (op&2) ? right : left, (op&2) ? left : right, RID_R28); + } + } +} + + +static void asm_equal(ASMState *as, IRIns *ir) +{ + Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); + int is_ne = ir->o & 1; + right = (left >> 8); left &= 255; + if (irt_isnum(ir->t)) { + MCLabel l_true = emit_label(as); + + if (irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + asm_guard(as, is_ne ? SW64I_FBEQ : SW64I_FBNE, left); + return; + } + asm_guard(as, is_ne ? SW64I_FBNE : SW64I_FBEQ, RID_F28); + emit_FGI(as, SW64I_FCMPEQ, left, right, RID_F28); + + if (is_ne) { + emit_branch(as, SW64I_FBNE, RID_F28, l_true); + } else { + asm_guard(as, SW64I_FBNE, RID_F28); + } + emit_FGI(as, SW64I_FCMPUN, left, right, RID_F28); + + } else { + if (irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + asm_guard(as, is_ne ? SW64I_BEQ: SW64I_BNE, left); + return; + } + asm_guard(as, is_ne ? SW64I_BNE : SW64I_BEQ, RID_R28); + emit_ABD(as, SW64I_CMPEQ, left, right, RID_R28); + } +} + +/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); lua_assert(0); +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, SW64I_BNE, RID_R28); + emit_AjD(as, SW64I_ANDI, RID_R28, HOOK_PROFILE, RID_R28); + emit_lsglptr(as, SW64I_LDBU, RID_R28, + (int32_t)offsetof(global_State, hookmask)); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ + Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; + ExitNo oldsnap = as->snapno; + rset_clear(allow, pbase); + tmp = allow ? rset_pickbot(allow) : RID_RET; + as->snapno = exitno; + asm_guard(as, SW64I_BLE, RID_R28); + as->snapno = oldsnap; + if (allow == RSET_EMPTY) /* Restore temp. register. */ + emit_Ao(as, SW64I_AL, tmp, RID_SP, 0); + else + ra_modified(as, tmp); + emit_Ao(as, SW64I_LDI, RID_R28, RID_TMP, -(8*topslot)); + emit_ABD(as, SW64I_SUBL, tmp, pbase, RID_TMP); + emit_Ao(as, SW64I_AL, tmp, tmp, offsetof(lua_State, maxstack)); + if (pbase == RID_TMP) + emit_getgl(as, RID_TMP, jit_base); + emit_getgl(as, tmp, cur_L); + if (allow == RSET_EMPTY) /* Spill temp. register. */ + emit_Ao(as, SW64I_AS, tmp, RID_SP, 0); +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + WI_DEBUG_BEFORE(); + SnapEntry *map = &as->T->snapmap[snap->mapofs]; + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_Fo(as, SW64I_FSTD, src, RID_BASE, ofs); + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); + WI_DEBUG_END(); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define SW64_NOPATCH_GC_CHECK SW64I_BIS + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + WI_DEBUG_BEFORE(); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + /* Assumes asm_snap_prep() already done. */ + asm_guard(as, SW64I_BNE, RID_RET); + + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + l_end[-3] = SW64_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ + emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + tmp = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_branch(as, SW64I_BNE, RID_R28, l_end); + emit_ABD(as, SW64I_CMPULT, RID_TMP, tmp, RID_R28); + + emit_getgl(as, tmp, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); + WI_DEBUG_END(); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + WI_DEBUG_BEFORE(); + MCode *p = as->mctop; + MCode *target = as->mcp; + for (int i=1; iloopinv) { /* Inverted loop branch? */ + /* asm_guard already inverted the cond branch. Only patch the target. */ + p[-EXIT_ROOM] &= 0xffe00000u; + p[-EXIT_ROOM] |= ((uint32_t)(target-(p-EXIT_ROOM)-1) & 0x001fffffu); + } else { + __WI(p-EXIT_ROOM, SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH(target - (p-EXIT_ROOM) - 1)); + } + WI_DEBUG_END(); +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + if (as->loopinv) as->mctop--; +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + WI_DEBUG_BEFORE(); + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_move(as, r, RID_BASE); + } + WI_DEBUG_END(); +} + +/* Coalesce BASE register for a side trace. */ +static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + WI_DEBUG_BEFORE(); + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } + WI_DEBUG_END(); + return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + WI_DEBUG_BEFORE(); + MCode *p = as->mctop-1; + MCode *target; + int32_t spadj = as->T->spadjust; + + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; + + for (int i=0; imctop - EXIT_ROOM; /* Leave room for exit branch. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (ngpr > 0) ngpr--; else nslots += 2; + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + asm_sparejump_setup(as); + asm_exitstub_setup(as); +} + +/* -- Trace patching ------------------------------------------------------ */ + +int is_branch_op(MCode ins) +{ + switch(ins & 0xfc000000) { + case SW64I_BEQ: case SW64I_BNE: case SW64I_BLT: + case SW64I_BLE: case SW64I_BGT: case SW64I_BGE: + return 1; + case SW64I_FBEQ: + case SW64I_FBGE: + case SW64I_FBGT: + case SW64I_FBLE: + case SW64I_FBLT: + case SW64I_FBNE: + return 1; + } + return 0; +} + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + WI_DEBUG_BEFORE(); + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *px = exitstub_trace_addr(T, exitno); + MCode *cstart = NULL, *cstop = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); +#if SW64_DEBUG_WI + MCode exitload = SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISP(exitno, RID_TMP); + printf("try patching traceno:%d exitno:%d target:%p px:%p\n", + T->traceno, exitno, target, px); +#else + MCode exitload = SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISP(exitno, RID_TMP); +#endif + + for (p++; p < pe; p++) { + /* Look for load of exit number. */ + if (*p != exitload) { + continue; + } + + /* Look for exitstub branch. Yes, this covers all used branch variants. */ + if (is_branch_op(p[1]) + && (SW64F_BRANCH(p[1]) == SW64F_BRANCH((px - (p+1) - 1)))) { + ptrdiff_t delta = target - (p+1) - 1; + if (IS_SW64F_BRANCH_VALID(delta)) { /* Patch in-range branch. */ +#if SW64_DEBUG_WI + printf("p1atch at %p\n", p+2); +#endif + __WI_REPLACE(p+1, + (p[1] & (~0x1fffff)) | SW64F_BRANCH(delta), + p[1]); + patchbranch: + cstop = p+1; + if (!cstart) cstart = p; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ + TODO; + } + } else if (p[2] == SW64I_NOP) { +#if SW64_DEBUG_WI + printf("p2atch at %p\n", p+2); +#endif + ptrdiff_t delta = target - (p+2) - 1; + __WI_REPLACE(p+2, SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH(delta), SW64I_NOP); + goto patchbranch; + } + } + if (cstart) lj_mcode_sync(cstart, cstop); + lj_mcode_patch(J, mcarea, 1); + WI_DEBUG_END(); +} + +#undef TODO diff -Naur a/src/lj_bcdef.h b/src/lj_bcdef.h --- a/src/lj_bcdef.h 2017-05-02 03:05:00.000000000 +0800 +++ b/src/lj_bcdef.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,226 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -LJ_DATADEF const uint16_t lj_bc_ofs[] = { -0, -72, -144, -216, -288, -427, -569, -632, -695, -764, -833, -886, -938, -989, -1040, -1081, -1122, -1148, -1180, -1240, -1314, -1368, -1422, -1476, -1530, -1589, -1643, -1697, -1751, -1805, -1841, -1908, -1975, -2042, -2109, -2158, -2230, -2306, -2342, -2378, -2408, -2437, -2462, -2505, -2541, -2628, -2710, -2748, -2782, -2833, -2897, -3006, -3099, -3117, -3135, -3283, -3407, -3506, -3679, -3908, -4032, -4174, -4220, -4262, -4266, -4414, -4482, -4647, -4838, -4926, -4930, -5066, -5158, -5263, -5360, -5465, -5485, -5555, -5622, -5642, -5686, -5725, -5745, -5763, -5810, -5835, -5855, -5918, -5972, -5972, -6097, -6098, -6177, -7841, -7908, -8419, -8522, -8579, -8710, -7974, -8136, -8228, -8280, -8311, -8768, -8809, -9417, -8864, -9167, -9469, -9596, -9620, -9647, -9711, -9744, -9778, -9809, -9840, -9873, -9914, -9957, -9990, -10030, -10070, -10245, -10393, -10110, -10110, -9678, -10149, -10549, -10492, -10196, -10603, -10662, -11596, -11994, -11941, -12063, -12142, -12224, -12306, -12388, -11650, -11747, -11844, -10721, -10752, -10799, -10921, -11090, -11217, -11327, -11442, -11557 -}; - -LJ_DATADEF const uint16_t lj_bc_mode[] = { -BCDEF(BCMODE) -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF, -BCMODE_FF -}; - diff -Naur a/src/lj_ccallback.c b/src/lj_ccallback.c --- a/src/lj_ccallback.c 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_ccallback.c 2024-11-06 10:36:30.256946917 +0800 @@ -71,6 +71,10 @@ #define CALLBACK_MCODE_HEAD 52 +#elif LJ_TARGET_SW64 + +#define CALLBACK_MCODE_HEAD 4*13 + #else /* Missing support for this architecture. */ @@ -238,6 +242,87 @@ } return p; } + +#elif LJ_TARGET_SW64 + +static inline void swSplit32Addr(uint32_t addr, int16_t* hi, int16_t* lo) +{ + *hi = (int16_t)(addr >> 16); + *lo = (int16_t)(addr & 0xffff); + if (*lo < 0) { + *hi = *hi + 1; + *lo = (int16_t)(addr - ((int32_t)(*hi) << 16)); + } +} +static inline void swSplit64AddrHI32(uint64_t addr, int16_t*hi, int16_t*lo) +{ + swSplit32Addr((uint32_t)(addr >> 32), hi, lo); +} +static inline void swSplit64AddrLO32(uint64_t addr, int16_t*hi, int16_t*lo) +{ + swSplit32Addr((uint32_t)(addr & 0xffffffff), hi, lo); +} + +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + int16_t hi, lo; + + MSize slot; + { + swSplit64AddrHI32((uint64_t)target, &hi, &lo); + // ldih PV, h32_hi(zero) + *p++ = SW64I_LDIH | SW64F_A(RID_CFUNCADDR)| SW64F_DISP(hi, RID_ZERO); + // ldi PV, h32_lo(PV) + *p++ = SW64I_LDI | SW64F_A(RID_CFUNCADDR)| SW64F_DISP(lo, RID_CFUNCADDR); + // slli PV, 32, PV + *p++ = SW64I_SLLI | SW64F_A(RID_CFUNCADDR)| SW64F_IMM(32) | SW64F_D(RID_CFUNCADDR); + + swSplit64AddrLO32((uint64_t)target, &hi, &lo); + // ldih at, lo32_hi(zero) + *p++ = SW64I_LDIH | SW64F_A(RID_R28)| SW64F_DISP(hi, RID_ZERO); + // ldi at, lo32_lo(at) + *p++ = SW64I_LDI | SW64F_A(RID_R28)| SW64F_DISP(lo, RID_R28); + // addl PV, at, PV + *p++ = SW64I_ADDL | SW64F_A(RID_CFUNCADDR)| SW64F_B(RID_R28) | SW64F_D(RID_CFUNCADDR); + } + + { + swSplit64AddrHI32((uint64_t)g, &hi, &lo); + // ldih r2, h32_hi(zero) + *p++ = SW64I_LDIH | SW64F_A(RID_R2) | SW64F_DISP(hi, RID_ZERO); + // ldi r2, h32_lo(r2) + *p++ = SW64I_LDI | SW64F_A(RID_R2) | SW64F_DISP(lo, RID_R2); + // slli r2, 32, r2 + *p++ = SW64I_SLLI | SW64F_A(RID_R2)| SW64F_IMM(32) | SW64F_D(RID_R2); + + swSplit64AddrLO32((uint64_t)g, &hi, &lo); + // ldih at, lo32_hi(zero) + *p++ = SW64I_LDIH | SW64F_A(RID_R28)| SW64F_DISP(hi, RID_ZERO); + // ldi at, lo32_lo(at) + *p++ = SW64I_LDI | SW64F_A(RID_R28)| SW64F_DISP(lo, RID_R28); + // addl r2, at, r2 + *p++ = SW64I_ADDL | SW64F_A(RID_R2)| SW64F_B(RID_R28) | SW64F_D(RID_R2); + } + + // call zero, 0(PV) + *p++ = SW64I_CALL | SW64F_A(RID_ZERO) | SW64F_DISP(0, RID_CFUNCADDR); + + //lua_assert((p - page) * 4 <= CALLBACK_MCODE_HEAD); + + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + // ldi r1, slot(zero) + *p++ = SW64I_LDI | SW64F_A(RID_R1) | SW64F_DISP(slot, RID_ZERO); + + // br zero, (page-p-2) + *p = SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH((page-p-2)+1); + p++; + } + + return p; +} + #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) @@ -516,6 +601,33 @@ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ((float *)dp)[1] = *(float *)dp; +#elif LJ_TARGET_SW64 + +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + if (isfp) { \ + FPRCBArg *reg = &(cts->cb.fpr[ngpr]); \ + if (cta->size == 4) { \ + reg->f[0] = (float)reg->d; \ + }\ + sp = reg; \ + } else {\ + intptr_t *reg = &(cts->cb.gpr[ngpr]); \ + if (cta->size == 4) { \ + *reg = *(int*)reg; \ + } \ + sp = reg; \ + } \ + ngpr += n; \ + goto done; \ + } + +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ \ + if (ctype_isinteger(ctr->info) && ctr->size == 4) \ + *((int64_t *)dp) = *(int *)dp; + #else #error "Missing calling convention definitions for this architecture" #endif diff -Naur a/src/lj_ccall.c b/src/lj_ccall.c --- a/src/lj_ccall.c 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_ccall.c 2024-11-05 14:12:53.253205702 +0800 @@ -575,6 +575,67 @@ goto done; \ } +#elif LJ_TARGET_SW64 +/* -- SW64 calling conventions -------------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = 1; /* Return all structs by reference. */ \ + cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = (float)cc->fpr[0].d; \ + ((float *)dp)[1] = (float)cc->fpr[1].d; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_STRUCTARG \ + if (!(sz <= 8*6)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + if (ngpr < maxgpr) \ + sz *= 2; \ + } + +#define CCALL_HANDLE_REGARG \ + { /* Try to pass argument in GPRs. */ \ + cc->reg_is_word[ngpr] = (d->size == 4); \ + if (ctype_iscomplex(d->info)) { \ + cc->reg_is_word[ngpr] = d->size == 8; \ + cc->reg_is_word[ngpr+1] = d->size==8; \ + } \ + if (n > 1) { \ + /* int64_t or complex (float). */ \ + lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ + } \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += (ngpr + n - maxgpr) * CTSIZE_PTR; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_SIZE_STACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ + #else #error "Missing calling convention definitions for this architecture" #endif @@ -939,6 +1000,9 @@ /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); +#if LJ_TARGET_SW64 + memset(cc->reg_is_word, 0, sizeof(cc->reg_is_word)); +#endif #if CCALL_NUM_FPR memset(cc->fpr, 0, sizeof(cc->fpr)); #endif @@ -1077,7 +1141,7 @@ if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_SW64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1101,7 +1165,7 @@ cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } -#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) +#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) || LJ_TARGET_SW64 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { /* Split float HFA or complex float into separate registers. */ CTSize i = (sz >> 2) - 1; diff -Naur a/src/lj_ccall.h b/src/lj_ccall.h --- a/src/lj_ccall.h 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_ccall.h 2024-11-05 11:22:17.266522101 +0800 @@ -129,6 +129,22 @@ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; +#elif LJ_TARGET_SW64 + +#define CCALL_NARG_GPR 6 +#define CCALL_NARG_FPR 0 /* FP args are positional and overlay the GPR array. */ +#define CCALL_NRET_GPR 1 +#define CCALL_NRET_FPR 2 + +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { float f; float g; }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -181,6 +197,9 @@ #if LJ_32 int32_t align1; #endif +#if LJ_TARGET_SW64 + uint8_t reg_is_word[CCALL_NUM_GPR]; +#endif #if CCALL_NUM_FPR FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ #endif diff -Naur a/src/lj_crecord.c b/src/lj_crecord.c --- a/src/lj_crecord.c 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_crecord.c 2024-11-05 11:26:33.379041521 +0800 @@ -131,7 +131,7 @@ /* Number of windowed registers used for optimized memory copy. */ #if LJ_TARGET_X86 #define CREC_COPY_REGWIN 2 -#elif LJ_TARGET_PPC || LJ_TARGET_MIPS +#elif LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_SW64 #define CREC_COPY_REGWIN 8 #else #define CREC_COPY_REGWIN 4 diff -Naur a/src/lj_def.h b/src/lj_def.h --- a/src/lj_def.h 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_def.h 2024-11-05 14:00:15.538193672 +0800 @@ -373,6 +373,14 @@ #define check_exp(c, e) (e) #endif +/* Runtime assertions. */ +#ifdef lua_assert //TODO +#define api_check(l, e) lua_assert(e) +#else +#define lua_assert(c) ((void)0) +#define api_check luai_apicheck +#endif + /* Static assertions. */ #define LJ_ASSERT_NAME2(name, line) name ## line #define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line) diff -Naur a/src/lj_dispatch.c b/src/lj_dispatch.c --- a/src/lj_dispatch.c 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_dispatch.c 2024-11-05 11:28:20.234072199 +0800 @@ -38,7 +38,7 @@ /* -- Dispatch table management ------------------------------------------- */ -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_SW64 #include LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); @@ -76,7 +76,7 @@ GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_SW64 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); #endif } diff -Naur a/src/lj_dispatch.h b/src/lj_dispatch.h --- a/src/lj_dispatch.h 2024-11-05 09:58:44.409962699 +0800 +++ b/src/lj_dispatch.h 2024-11-05 11:29:11.602127311 +0800 @@ -12,7 +12,7 @@ #include "lj_jit.h" #endif -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_SW64 /* Need our own global offset table for the dreaded MIPS calling conventions. */ #ifndef _LJ_VM_H @@ -93,7 +93,7 @@ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ uint8_t align1[(16-sizeof(global_State))&15]; #endif -#if LJ_TARGET_MIPS +#if LJ_TARGET_MIPS || LJ_TARGET_SW64 ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ #endif #if LJ_HASJIT diff -Naur a/src/lj_emit_sw64.h b/src/lj_emit_sw64.h --- a/src/lj_emit_sw64.h 1970-01-01 08:00:00.000000000 +0800 +++ b/src/lj_emit_sw64.h 2024-11-05 13:33:06.872189975 +0800 @@ -0,0 +1,456 @@ +/* +** SW64 instruction emitter. +** Copyright (C) 2019 deepin inc. See Copyright Notice in luajit.h +*/ + +#include +#define TODO do {printf("\e[1;34mTODO IMPLEMENT %s\e[m\n", __FUNCTION__); asm("bpt;bpt");} while(0); + +#if SW64_DEBUG_WI +#include +const char* disass_ins(int ins, void* addr) { + static char cmd[1024]; + sprintf(cmd, "./luajit -e 'require(\"jit.dis_sw64\"):wi_debug(%d, %p)'", ins, addr); + FILE* out = popen(cmd, "r"); + memset(cmd, 0, sizeof(cmd)); + fread(cmd, sizeof(cmd), 1, out); + pclose(out); + for (int i=0; icurins; \ + if (*x != origin) { \ + printf("BUG: 1 overwrite[%p](0x%x) to 0x%lx on %s !\n", \ + x, (int)origin, (int)ins, __FUNCTION__); \ + asm("bpt;bpt"); \ + } else { \ + printf("\tIR' %d --> %p #%s\n", ir-REF_BASE, x, disass_ins(ins, x)); \ + } \ + *x = ins; \ + } while(0) + +#define __WI(addr, ins) \ + do { \ + MCode* x = addr; \ + IRRef ir = -1; \ + if (as!=0) ir = as->curins; \ + if (*x != 0 && *x != 0x43ff075f) { \ + printf("BUG: 2 overwrite[%p](0x%x)(%s) to 0x%lx(%s) on %s! IR:0x%x\n", \ + x, *x, disass_ins(*x, x), (unsigned long)ins, disass_ins(ins, x), __FUNCTION__, \ + ir); \ + asm("bpt;bpt"); \ + } else { \ + printf("\tIR %d --> %p #%s\n", ir-REF_BASE, x, disass_ins(ins, x)); \ + } \ + *x = ins; \ + } while(0) +#define WI_DEBUG_BEFORE() printf("before %s\n", __FUNCTION__); +#define WI_DEBUG_END() printf("end %s\n", __FUNCTION__); +#else +#define __WI(addr, ins) *(addr) = ins; +#define __WI_REPLACE(addr, ins, origin) *(addr) = ins; +#define WI_DEBUG_BEFORE() +#define WI_DEBUG_END() +#endif + +#if LJ_64 +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else if (ir->o == IR_KNUM) { + return (intptr_t)ir_knum(ir)->u64; + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} +#endif + +#if LJ_64 +#define get_kval(as, ref) get_k64val(as, ref) +#else +#define get_kval(as, ref) (IR((ref))->i) +#endif + + +inline static void split32Addr(uint32_t addr, int16_t* hi, int16_t* lo) +{ + *hi = (int16_t)(addr >> 16); + *lo = (int16_t)(addr & 0xffff); + if (*lo < 0) { + *hi = *hi + 1; + *lo = (int16_t)(addr - ((int32_t)(*hi) << 16)); + } +} +inline static void split64AddrHI32(uint64_t addr, int16_t*hi, int16_t*lo) +{ + int32_t addr_hi = (int32_t)(addr >> 32); + int32_t addr_lo = (addr & 0xffffffff); + if (addr_lo < 0) { + addr_hi++; + } + split32Addr((uint32_t)addr_hi, hi, lo); +} +inline static void split64AddrLO32(uint64_t addr, int16_t*hi, int16_t*lo) +{ + int32_t addr_hi = addr >> 32; + int32_t addr_lo = addr & 0xffffffff; + if (addr_lo < 0) { + addr_hi++; + addr_lo = addr - ((int64_t)addr_hi << 32); + } + split32Addr((uint32_t)(addr_lo), hi, lo); +} + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_AjD(ASMState *as, SW64Ins mi, Reg a, uint8_t i, Reg d) +{ + lua_assert(a <=31 && a >= 0); + lua_assert(d <= 31 && d >= 0); + __WI(--as->mcp, mi | SW64F_A(a) | SW64F_j(i) | SW64F_D(d)); +} + +static void emit_Ao(ASMState *as, SW64Ins mi, Reg a, Reg b, int32_t disp) +{ + lua_assert(b <= 31 && b >= 0); + lua_assert(((int16_t)disp) == disp); + __WI(--as->mcp, mi | SW64F_A(a) | SW64F_DISP(disp, b)); +} + +static void emit_ABD(ASMState *as, SW64Ins mi, Reg a, Reg b, Reg d) +{ + lua_assert(a <=31 && a >= 0); + lua_assert(b <=31 && b >= 0); + lua_assert(d <=31 && d >= 0); + __WI(--as->mcp, mi | SW64F_A(a) | SW64F_B(b) | SW64F_D(d)); +} + +static void emit_ABCD(ASMState *as, SW64Ins mi, Reg a, Reg b, Reg c, Reg d) +{ + lua_assert(a <=31 && a >= 0); + lua_assert(b <=31 && b >= 0); + lua_assert(c <=31 && c >= 0); + lua_assert(d <=31 && d >= 0); + __WI(--as->mcp, mi | SW64F_A(a) | SW64F_B(b) | SW64F_C(c) | SW64F_D(d)); +} + +static void emit_FGI(ASMState *as, SW64Ins mi, Reg f, Reg g, Reg i) +{ + lua_assert(f >= 32 && f <= 63); + lua_assert(g >= 32 && g <= 63); + lua_assert(i >= 32 && i <= 63); + __WI(--as->mcp, mi | SW64F_F(f) | SW64F_G(g) | SW64F_I(i)); +} +static void emit_FGHI(ASMState *as, SW64Ins mi, Reg f, Reg g, Reg h, Reg i) +{ + lua_assert(f >= 32 && f <= 63); + lua_assert(g >= 32 && g <= 63); + lua_assert(h >= 32 && h <= 63); + lua_assert(i >= 32 && i <= 63); + __WI(--as->mcp, mi | SW64F_F(f) | SW64F_G(g) | SW64F_H(h) | SW64F_I(i)); +} +static void emit_Fo(ASMState *as, SW64Ins mi, Reg f, Reg b, int16_t disp) +{ + lua_assert(f >= 32 && f <= 63); + lua_assert(b <= 31 && b >= 0); + __WI(--as->mcp, mi | SW64F_F(f) | SW64F_DISP(disp, b)); +} +static void emit_FD(ASMState *as, SW64Ins mi, Reg f, Reg d) +{ + lua_assert(f >= 32 && f <= 63); + lua_assert(d >= 0 && d <= 31); + __WI(--as->mcp, mi | SW64F_F(f) | SW64F_D(d)); +} +static void emit_AI(ASMState *as, SW64Ins mi, Reg a, Reg i) +{ + lua_assert(a >= 0 && a <= 31); + lua_assert(i >= 32 && i <= 63); + __WI(--as->mcp, mi | SW64F_A(a) | SW64F_I(i)); +} + +static void emit_GI(ASMState *as, SW64Ins mi, Reg g, Reg i) +{ + lua_assert(g >= 32 && g <= 63); + lua_assert(i >= 32 && i <= 63); + __WI(--as->mcp, mi | SW64F_F(RID_F31) | SW64F_G(g) | SW64F_I(i)); +} + +static void emit_void(ASMState *as, SW64Ins mi) +{ + __WI(--as->mcp, mi); +} + +static void emit_DEXTM(ASMState *as, Reg rt, Reg rs, uint32_t pos, uint32_t size) +{ + emit_AjD(as, SW64I_SRLI, rt, 64-size, rt); + emit_AjD(as, SW64I_SLLI, rs, 64-pos-size, rt); +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= REF_BASE) + +/* Load a 32 bit constant into a GPR. */ +static void emit_loadi(ASMState *as, Reg r, int32_t i) +{ + int16_t hi, lo; + split32Addr(i, &hi, &lo); + emit_ABD(as, SW64I_ADDW, RID_ZERO, r, r); + if (i == 0 && r != RID_ZERO) { + emit_Ao(as, SW64I_LDI, r, RID_ZERO, 0); + return; + } + lua_assert(r != RID_ZERO); + if (lo != 0) { + emit_Ao(as, SW64I_LDI, r, hi ? r : RID_ZERO, lo); + } + if (hi != 0) { + // ldih dest, hi(zero) + emit_Ao(as, SW64I_LDIH, r, RID_ZERO, hi); + } +} + +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + int16_t h_hi, h_lo; + int16_t l_hi, l_lo; + if (u64 == 0) { + emit_Ao(as, SW64I_LDI, r, RID_ZERO, 0); + return; + } + lua_assert(r != RID_ZERO); + + split64AddrLO32(u64, &l_hi, &l_lo); + split64AddrHI32(u64, &h_hi, &h_lo); + int has_high = h_hi || h_lo; + + if (l_lo) { + emit_Ao(as, SW64I_LDI, r, l_hi || has_high ? r : RID_ZERO, l_lo); + } + if (l_hi) { + emit_Ao(as, SW64I_LDIH, r, has_high ? r : RID_ZERO, l_hi); + } + + if (has_high) { + emit_AjD(as, SW64I_SLLI, r, 32, r); + } + if (h_lo) { + emit_Ao(as, SW64I_LDI, r, h_hi ? r : RID_ZERO, h_lo); + } + if (h_hi) { + emit_Ao(as, SW64I_LDIH, r, RID_ZERO, h_hi); + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, SW64Ins mi, Reg r, void *p, RegSet allow) +{ + intptr_t jgl = (intptr_t)(J2G(as->J)); + intptr_t i = (intptr_t)(p); + Reg base; + if ((uint32_t)(i-jgl-32768) < 0x1000) { + i = i-jgl-32768; + base = RID_JGL; + } else { + base = ra_allock(as, i-(int16_t)i, allow); + i = (int16_t)i; + } + emit_Ao(as, mi, r, base, i); +} + +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + emit_lsptr(as, SW64I_FLDD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR); + } else { + if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) + emit_lsptr(as, SW64I_LDL, r64, (void *)k, 0); + else + emit_loadu64(as, r64, *k); + } +} + +/* Get/set global_State fields. */ +static void emit_lsglptr(ASMState *as, SW64Ins mi, Reg r, int32_t ofs) +{ + emit_Ao(as, mi, r, RID_JGL, ofs-32768); +} + +#define emit_getgl(as, r, field) \ + emit_lsglptr(as, SW64I_AL, (r), (int32_t)offsetof(global_State, field)) +#define emit_setgl(as, r, field) \ + emit_lsglptr(as, SW64I_AS, (r), (int32_t)offsetof(global_State, field)) + +/* Trace number is determined from per-trace exit stubs. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static SW64Ins invert_cond(SW64Ins mi) +{ + switch (mi) { + case SW64I_BEQ: return SW64I_BNE; + case SW64I_BNE: return SW64I_BEQ; + case SW64I_FBEQ: return SW64I_FBNE; + case SW64I_FBNE: return SW64I_FBEQ; + case SW64I_BLT: return SW64I_BGE; + case SW64I_BGE: return SW64I_BLT; + case SW64I_BLE: return SW64I_BGT; + case SW64I_BGT: return SW64I_BLE; + default: + printf("TODO HANDLING INVERT_COND:%x\n", mi); + asm("bpt;bpt"); + return SW64I_NOP; + } +} + +static void emit_branch(ASMState *as, SW64Ins mi, Reg a, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = target - p; + lua_assert(IS_SW64F_BRANCH_VALID(delta)); + __WI(--p, mi | SW64F_A(a) | SW64F_BRANCH(delta)); + as->mcp = p; +} + +static void emit_jmp(ASMState *as, MCode *target) +{ + TODO + /* *--as->mcp = SW64I_NOP; */ + /* emit_branch(as, SW64I_B, RID_ZERO, RID_ZERO, (target)); */ +} + +static void emit_call(ASMState *as, void *target, int needcfa) +{ + MCode *p = as->mcp; + __WI(--p, SW64I_CALL | SW64F_A(RID_RA) | SW64F_B(RID_CFUNCADDR)); + needcfa = 1; + // TODO: use br if target in range + as->mcp = p; + if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); +} + +/* -- Emit generic operations --------------------------------------------- */ + +#define emit_move(as, dst, src) \ + emit_Ao(as, SW64I_LDI, (dst), (src), 0) + +#define emit_fmove(as, dst, src) \ + emit_FGI(as, SW64I_FADDD, (src), (RID_FZERO), (dst)) + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (dst < RID_MAX_GPR) + emit_move(as, dst, src); + else + emit_fmove(as, dst, src); +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_Ao(as, irt_is64(ir->t) ? SW64I_LDL : SW64I_LDW, r, base, ofs); + else + emit_Fo(as, irt_isnum(ir->t) ? SW64I_FLDD : SW64I_FLDS, + r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_Ao(as, irt_is64(ir->t) ? SW64I_STL : SW64I_STW, r, base, ofs); + else + emit_Fo(as, irt_isnum(ir->t) ? SW64I_FSTD : SW64I_FSTS, + r, base, ofs); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) { + lj_assertA(checki16(ofs), "offset %d out of range", ofs); + emit_Ao(as, SW64I_LDI, r, r, ofs); + } +} + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + + + +static void emit_rotx32(ASMState *as, Reg src, uint32_t shift, Reg dest, Reg tmp, int mode) +{ + lua_assert(src != tmp); + lua_assert(dest != tmp); + + emit_ABD(as, SW64I_ADDW, dest, RID_ZERO, dest); + emit_ABD(as, SW64I_BIS, dest, tmp, dest); + if (mode == 1) { + emit_AjD(as, SW64I_SRLI, dest, (-shift) & 31, dest); + emit_AjD(as, SW64I_SLLI, dest, shift, tmp); + } else if (mode == 2) { + emit_AjD(as, SW64I_SLLI, dest, (-shift) & 31, dest); + emit_AjD(as, SW64I_SRLI, dest, shift, tmp); + } else { + lua_assert(0); + } + emit_AjD(as, SW64I_EXTLWI, src, 0, dest); +} +static void emit_rotx(ASMState *as, Reg src, uint32_t shift, Reg dest, Reg tmp, int mode) +{ + lua_assert(src != tmp); + lua_assert(dest != tmp); + + emit_ABD(as, SW64I_BIS, dest, tmp, dest); + if (mode == 1) { + emit_AjD(as, SW64I_SRLI, src, (-shift) & 63, dest); + emit_AjD(as, SW64I_SLLI, src, shift, tmp); + } else if (mode == 2) { + emit_AjD(as, SW64I_SLLI, src, (-shift) & 63, dest); + emit_AjD(as, SW64I_SRLI, src, shift, tmp); + } else { + lua_assert(0); + } +} +#define emit_rotl32(as, src, shift, dest, tmp) emit_rotx32(as, src, shift, dest, tmp, 1) +#define emit_rotr32(as, src, shift, dest, tmp) emit_rotx32(as, src, shift, dest, tmp, 2) +#define emit_rotl(as, src, shift, dest, tmp) emit_rotx(as, src, shift, dest, tmp, 1) +#define emit_rotr(as, src, shift, dest, tmp) emit_rotx(as, src, shift, dest, tmp, 2) + +#undef TODO diff -Naur a/src/lj_ffdef.h b/src/lj_ffdef.h --- a/src/lj_ffdef.h 2017-05-02 03:05:00.000000000 +0800 +++ b/src/lj_ffdef.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,210 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -FFDEF(assert) -FFDEF(type) -FFDEF(next) -FFDEF(pairs) -FFDEF(ipairs_aux) -FFDEF(ipairs) -FFDEF(getmetatable) -FFDEF(setmetatable) -FFDEF(getfenv) -FFDEF(setfenv) -FFDEF(rawget) -FFDEF(rawset) -FFDEF(rawequal) -FFDEF(unpack) -FFDEF(select) -FFDEF(tonumber) -FFDEF(tostring) -FFDEF(error) -FFDEF(pcall) -FFDEF(xpcall) -FFDEF(loadfile) -FFDEF(load) -FFDEF(loadstring) -FFDEF(dofile) -FFDEF(gcinfo) -FFDEF(collectgarbage) -FFDEF(newproxy) -FFDEF(print) -FFDEF(coroutine_status) -FFDEF(coroutine_running) -FFDEF(coroutine_create) -FFDEF(coroutine_yield) -FFDEF(coroutine_resume) -FFDEF(coroutine_wrap_aux) -FFDEF(coroutine_wrap) -FFDEF(math_abs) -FFDEF(math_floor) -FFDEF(math_ceil) -FFDEF(math_sqrt) -FFDEF(math_log10) -FFDEF(math_exp) -FFDEF(math_sin) -FFDEF(math_cos) -FFDEF(math_tan) -FFDEF(math_asin) -FFDEF(math_acos) -FFDEF(math_atan) -FFDEF(math_sinh) -FFDEF(math_cosh) -FFDEF(math_tanh) -FFDEF(math_frexp) -FFDEF(math_modf) -FFDEF(math_deg) -FFDEF(math_rad) -FFDEF(math_log) -FFDEF(math_atan2) -FFDEF(math_pow) -FFDEF(math_fmod) -FFDEF(math_ldexp) -FFDEF(math_min) -FFDEF(math_max) -FFDEF(math_random) -FFDEF(math_randomseed) -FFDEF(bit_tobit) -FFDEF(bit_bnot) -FFDEF(bit_bswap) -FFDEF(bit_lshift) -FFDEF(bit_rshift) -FFDEF(bit_arshift) -FFDEF(bit_rol) -FFDEF(bit_ror) -FFDEF(bit_band) -FFDEF(bit_bor) -FFDEF(bit_bxor) -FFDEF(bit_tohex) -FFDEF(string_len) -FFDEF(string_byte) -FFDEF(string_char) -FFDEF(string_sub) -FFDEF(string_rep) -FFDEF(string_reverse) -FFDEF(string_lower) -FFDEF(string_upper) -FFDEF(string_dump) -FFDEF(string_find) -FFDEF(string_match) -FFDEF(string_gmatch_aux) -FFDEF(string_gmatch) -FFDEF(string_gsub) -FFDEF(string_format) -FFDEF(table_foreachi) -FFDEF(table_foreach) -FFDEF(table_getn) -FFDEF(table_maxn) -FFDEF(table_insert) -FFDEF(table_remove) -FFDEF(table_concat) -FFDEF(table_sort) -FFDEF(io_method_close) -FFDEF(io_method_read) -FFDEF(io_method_write) -FFDEF(io_method_flush) -FFDEF(io_method_seek) -FFDEF(io_method_setvbuf) -FFDEF(io_method_lines) -FFDEF(io_method___gc) -FFDEF(io_method___tostring) -FFDEF(io_open) -FFDEF(io_popen) -FFDEF(io_tmpfile) -FFDEF(io_close) -FFDEF(io_read) -FFDEF(io_write) -FFDEF(io_flush) -FFDEF(io_input) -FFDEF(io_output) -FFDEF(io_lines) -FFDEF(io_type) -FFDEF(os_execute) -FFDEF(os_remove) -FFDEF(os_rename) -FFDEF(os_tmpname) -FFDEF(os_getenv) -FFDEF(os_exit) -FFDEF(os_clock) -FFDEF(os_date) -FFDEF(os_time) -FFDEF(os_difftime) -FFDEF(os_setlocale) -FFDEF(debug_getregistry) -FFDEF(debug_getmetatable) -FFDEF(debug_setmetatable) -FFDEF(debug_getfenv) -FFDEF(debug_setfenv) -FFDEF(debug_getinfo) -FFDEF(debug_getlocal) -FFDEF(debug_setlocal) -FFDEF(debug_getupvalue) -FFDEF(debug_setupvalue) -FFDEF(debug_upvalueid) -FFDEF(debug_upvaluejoin) -FFDEF(debug_sethook) -FFDEF(debug_gethook) -FFDEF(debug_debug) -FFDEF(debug_traceback) -FFDEF(jit_on) -FFDEF(jit_off) -FFDEF(jit_flush) -FFDEF(jit_status) -FFDEF(jit_attach) -FFDEF(jit_util_funcinfo) -FFDEF(jit_util_funcbc) -FFDEF(jit_util_funck) -FFDEF(jit_util_funcuvname) -FFDEF(jit_util_traceinfo) -FFDEF(jit_util_traceir) -FFDEF(jit_util_tracek) -FFDEF(jit_util_tracesnap) -FFDEF(jit_util_tracemc) -FFDEF(jit_util_traceexitstub) -FFDEF(jit_util_ircalladdr) -FFDEF(jit_opt_start) -FFDEF(ffi_meta___index) -FFDEF(ffi_meta___newindex) -FFDEF(ffi_meta___eq) -FFDEF(ffi_meta___len) -FFDEF(ffi_meta___lt) -FFDEF(ffi_meta___le) -FFDEF(ffi_meta___concat) -FFDEF(ffi_meta___call) -FFDEF(ffi_meta___add) -FFDEF(ffi_meta___sub) -FFDEF(ffi_meta___mul) -FFDEF(ffi_meta___div) -FFDEF(ffi_meta___mod) -FFDEF(ffi_meta___pow) -FFDEF(ffi_meta___unm) -FFDEF(ffi_meta___tostring) -FFDEF(ffi_meta___pairs) -FFDEF(ffi_meta___ipairs) -FFDEF(ffi_clib___index) -FFDEF(ffi_clib___newindex) -FFDEF(ffi_clib___gc) -FFDEF(ffi_callback_free) -FFDEF(ffi_callback_set) -FFDEF(ffi_cdef) -FFDEF(ffi_new) -FFDEF(ffi_cast) -FFDEF(ffi_typeof) -FFDEF(ffi_istype) -FFDEF(ffi_sizeof) -FFDEF(ffi_alignof) -FFDEF(ffi_offsetof) -FFDEF(ffi_errno) -FFDEF(ffi_string) -FFDEF(ffi_copy) -FFDEF(ffi_fill) -FFDEF(ffi_abi) -FFDEF(ffi_metatype) -FFDEF(ffi_gc) -FFDEF(ffi_load) - -#undef FFDEF - -#ifndef FF_NUM_ASMFUNC -#define FF_NUM_ASMFUNC 62 -#endif - diff -Naur a/src/lj_folddef.h b/src/lj_folddef.h --- a/src/lj_folddef.h 2017-05-02 03:05:00.000000000 +0800 +++ b/src/lj_folddef.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,1068 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -static const FoldFunc fold_func[] = { - fold_kfold_numarith, - fold_kfold_ldexp, - fold_kfold_fpmath, - fold_kfold_numpow, - fold_kfold_numcomp, - fold_kfold_intarith, - fold_kfold_intovarith, - fold_kfold_bnot, - fold_kfold_bswap, - fold_kfold_intcomp, - fold_kfold_intcomp0, - fold_kfold_int64arith, - fold_kfold_int64arith2, - fold_kfold_int64shift, - fold_kfold_bnot64, - fold_kfold_bswap64, - fold_kfold_int64comp, - fold_kfold_int64comp0, - fold_kfold_snew_kptr, - fold_kfold_snew_empty, - fold_kfold_strref, - fold_kfold_strref_snew, - fold_kfold_strcmp, - fold_kfold_add_kgc, - fold_kfold_add_kptr, - fold_kfold_add_kright, - fold_kfold_tobit, - fold_kfold_conv_kint_num, - fold_kfold_conv_kintu32_num, - fold_kfold_conv_kint_ext, - fold_kfold_conv_kint_i64, - fold_kfold_conv_kint64_num_i64, - fold_kfold_conv_kint64_num_u64, - fold_kfold_conv_kint64_int_i64, - fold_kfold_conv_knum_int_num, - fold_kfold_conv_knum_u32_num, - fold_kfold_conv_knum_i64_num, - fold_kfold_conv_knum_u64_num, - fold_kfold_tostr_knum, - fold_kfold_tostr_kint, - fold_kfold_strto, - lj_opt_cse, - fold_kfold_kref, - fold_shortcut_round, - fold_shortcut_left, - fold_shortcut_dropleft, - fold_shortcut_leftleft, - fold_simplify_numadd_negx, - fold_simplify_numadd_xneg, - fold_simplify_numsub_k, - fold_simplify_numsub_negk, - fold_simplify_numsub_xneg, - fold_simplify_nummuldiv_k, - fold_simplify_nummuldiv_negk, - fold_simplify_nummuldiv_negneg, - fold_simplify_numpow_xk, - fold_simplify_numpow_kx, - fold_shortcut_conv_num_int, - fold_simplify_conv_int_num, - fold_simplify_conv_i64_num, - fold_simplify_conv_int_i64, - fold_simplify_conv_flt_num, - fold_simplify_tobit_conv, - fold_simplify_floor_conv, - fold_simplify_conv_sext, - fold_simplify_conv_narrow, - fold_cse_conv, - fold_narrow_convert, - fold_simplify_intadd_k, - fold_simplify_intmul_k, - fold_simplify_intsub_k, - fold_simplify_intsub_kleft, - fold_simplify_intadd_k64, - fold_simplify_intsub_k64, - fold_simplify_intmul_k32, - fold_simplify_intmul_k64, - fold_simplify_intmod_k, - fold_simplify_intmod_kleft, - fold_simplify_intsub, - fold_simplify_intsubadd_leftcancel, - fold_simplify_intsubsub_leftcancel, - fold_simplify_intsubsub_rightcancel, - fold_simplify_intsubadd_rightcancel, - fold_simplify_intsubaddadd_cancel, - fold_simplify_band_k, - fold_simplify_bor_k, - fold_simplify_bxor_k, - fold_simplify_shift_ik, - fold_simplify_shift_andk, - fold_simplify_shift1_ki, - fold_simplify_shift2_ki, - fold_simplify_shiftk_andk, - fold_simplify_andk_shiftk, - fold_reassoc_intarith_k, - fold_reassoc_intarith_k64, - fold_reassoc_dup, - fold_reassoc_bxor, - fold_reassoc_shift, - fold_reassoc_minmax_k, - fold_reassoc_minmax_left, - fold_reassoc_minmax_right, - fold_abc_fwd, - fold_abc_k, - fold_abc_invar, - fold_comm_swap, - fold_comm_equal, - fold_comm_comp, - fold_comm_dup, - fold_comm_bxor, - fold_merge_eqne_snew_kgc, - lj_opt_fwd_aload, - fold_kfold_hload_kkptr, - lj_opt_fwd_hload, - lj_opt_fwd_uload, - lj_opt_fwd_tab_len, - fold_cse_uref, - lj_opt_fwd_hrefk, - fold_fwd_href_tnew, - fold_fwd_href_tdup, - fold_fload_tab_tnew_asize, - fold_fload_tab_tnew_hmask, - fold_fload_tab_tdup_asize, - fold_fload_tab_tdup_hmask, - fold_fload_tab_ah, - fold_fload_str_len_kgc, - fold_fload_str_len_snew, - fold_fload_cdata_typeid_kgc, - fold_fload_cdata_int64_kgc, - fold_fload_cdata_typeid_cnew, - fold_fload_cdata_ptr_int64_cnew, - lj_opt_cse, - lj_opt_fwd_fload, - fold_fwd_sload, - fold_xload_kptr, - lj_opt_fwd_xload, - fold_barrier_tab, - fold_barrier_tnew_tdup, - lj_opt_dse_ahstore, - lj_opt_dse_ustore, - lj_opt_dse_fstore, - lj_opt_dse_xstore, - lj_ir_emit -}; - -static const uint32_t fold_hash[916] = { -0xffffffff, -0xffffffff, -0x5b4c8016, -0x0d4e7016, -0xffffffff, -0x1000701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x29110c1a, -0xffffffff, -0xffffffff, -0x5b488016, -0x0d4a7016, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7b87fc07, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x0d467016, -0xffffffff, -0x5a4c73ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5153fc29, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d408016, -0xffffffff, -0x594873ff, -0x8187440f, -0xffffffff, -0xffffffff, -0xffffffff, -0x8287fc0f, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6715ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a11fc1a, -0xffffffff, -0x1daa5a70, -0xffffffff, -0xffffffff, -0x0a0bfc16, -0x5c408c16, -0x6911ffff, -0x8db7ffff, -0xffffffff, -0xffffffff, -0x1caa59d4, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0dffff, -0x2b68d002, -0xffffffff, -0x3cab5695, -0xffffffff, -0x41aaa675, -0xffffffff, -0xffffffff, -0xffffffff, -0x27ae5800, -0xffffffff, -0x6a09ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7f865c0f, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a05ffff, -0x42abffff, -0x5e44881c, -0x5d50a016, -0x066c5816, -0x00646c1b, -0x75753bff, -0x1951fc18, -0x6264c81b, -0x1850641c, -0xffffffff, -0x6a01ffff, -0x87a7ffff, -0x4953fc1c, -0x8da80000, -0x4f52a3ff, -0x00606c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d428416, -0x88a53800, -0xffffffff, -0xffffffff, -0xffffffff, -0x05645816, -0xffffffff, -0x005c6c1b, -0x20aa71d6, -0xffffffff, -0xffffffff, -0xffffffff, -0x1399fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x157f33ff, -0xffffffff, -0xffffffff, -0x584dfc20, -0xffffffff, -0xffffffff, -0xffffffff, -0x8d9bffff, -0xffffffff, -0x055c5816, -0xffffffff, -0x00546c1b, -0xffffffff, -0xffffffff, -0x5849fc20, -0xffffffff, -0xffffffff, -0xffffffff, -0x8c97ffff, -0x5543fc1c, -0x05585816, -0xffffffff, -0x00506c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8a93ffff, -0x26ae6c00, -0x05545816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x76753c17, -0x41aaa695, -0xffffffff, -0x898fffff, -0xffffffff, -0x05505816, -0xffffffff, -0xffffffff, -0xffffffff, -0x858867ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x848bffff, -0xffffffff, -0x054c5816, -0x79873c06, -0x47525bff, -0xffffffff, -0x3f695401, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8387ffff, -0xffffffff, -0x05485816, -0xffffffff, -0x5a4e5bff, -0xffffffff, -0xffffffff, -0x6264c816, -0x43aaa26e, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x05445816, -0xffffffff, -0x5a4a5bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3455fc1b, -0x0c5a701c, -0x6366cbff, -0x0e3c7000, -0xffffffff, -0x05405816, -0xffffffff, -0x59465bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x41aaa276, -0x0c56701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x21aa7275, -0x0b52701c, -0x61489016, -0x6465fc33, -0x8d77ffff, -0xffffffff, -0x7b87fc05, -0xffffffff, -0xffffffff, -0x2a126bff, -0x385a6fff, -0xffffffff, -0x446dfc16, -0xffffffff, -0x7473ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7d873000, -0xffffffff, -0x5c409016, -0x686fffff, -0x8187440d, -0xffffffff, -0xffffffff, -0x3554b81b, -0x8287fc0d, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x686bffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8d9ffc00, -0x737a5fff, -0x41aaaa75, -0xffffffff, -0xffffffff, -0x5e40801c, -0x0b42701c, -0x6b67ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2b68d000, -0xffffffff, -0xffffffff, -0x6d133017, -0xffffffff, -0xffffffff, -0x4c59fc16, -0xffffffff, -0xffffffff, -0x110bfc1c, -0x3aab566e, -0xffffffff, -0x5052a7ff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6515fc28, -0x4a55fc16, -0x7f865c0d, -0x88a53c00, -0x41aaa296, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4451fc16, -0xffffffff, -0x60448bff, -0x21aa7295, -0xffffffff, -0x3cab5676, -0x04106c1b, -0xffffffff, -0x78873807, -0xffffffff, -0xffffffff, -0x574dfc16, -0xffffffff, -0x4e53ffff, -0xffffffff, -0x09145816, -0xffffffff, -0x040c6c1b, -0x8287fc00, -0x5e50a01c, -0x6467fc32, -0xffffffff, -0x5749fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a105816, -0x2e3e7c00, -0x04086c1b, -0x7083fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0x5645fc16, -0xffffffff, -0x22aa6e6e, -0x5e42841c, -0x614e9c16, -0x090c5816, -0x04046c1b, -0x1eaa5ab3, -0xffffffff, -0xffffffff, -0xffffffff, -0x5441fc16, -0x41aaaa95, -0xffffffff, -0x5352a028, -0x09085816, -0x17505c16, -0x04006c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6b43ffff, -0xffffffff, -0x09045816, -0xffffffff, -0x43aaa2ae, -0xffffffff, -0xffffffff, -0xffffffff, -0x083e5800, -0x7c865c00, -0xffffffff, -0x76753c15, -0x3051fc2e, -0x09005816, -0xffffffff, -0xffffffff, -0xffffffff, -0x3aab568e, -0xffffffff, -0x43aaa66e, -0xffffffff, -0x1daa5a71, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a12701c, -0x5f66cfff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3cab5696, -0xffffffff, -0x100e701c, -0x41aaa676, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a125c17, -0x3654b82e, -0x100a701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x1006701c, -0xffffffff, -0x1951fc19, -0xffffffff, -0xffffffff, -0xffffffff, -0x23aa6e8e, -0xffffffff, -0x5b4e8016, -0xffffffff, -0x1eaa5ad3, -0x1002701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x29130c1a, -0xffffffff, -0xffffffff, -0x0d4c7016, -0xffffffff, -0x475273ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5b468016, -0x0d487016, -0x5a4e73ff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5d54a816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x5a4a73ff, -0x6615fc16, -0x3bab56ae, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x594673ff, -0xffffffff, -0x61468c16, -0x8d17ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a13fc1a, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6913ffff, -0x40abfeb3, -0x8db9ffff, -0x41aaa696, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0fffff, -0x8db5ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7a873c07, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a0bffff, -0x3f695402, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x24aa6eae, -0xffffffff, -0xffffffff, -0x6a07ffff, -0xffffffff, -0xffffffff, -0x066e5816, -0xffffffff, -0x00666c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6a03ffff, -0xffffffff, -0x4b55fc1c, -0x066a5816, -0xffffffff, -0x00626c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4851fc1c, -0x05665816, -0x18506016, -0x005e6c1b, -0x12986416, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x8da1ffff, -0xffffffff, -0x3bab56ce, -0xffffffff, -0x43aaa6ae, -0xffffffff, -0xffffffff, -0x584ffc20, -0x7b87fc06, -0xffffffff, -0x5f4287ff, -0x8d9dffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x00566c1b, -0xffffffff, -0xffffffff, -0x584bfc20, -0x5253fc28, -0xffffffff, -0xffffffff, -0xffffffff, -0x5645fc1c, -0xffffffff, -0x40abfed3, -0x00526c1b, -0x8187440e, -0xffffffff, -0x5847fc20, -0x8287fc0e, -0xffffffff, -0xffffffff, -0x8b95ffff, -0x2e3c7800, -0x5441fc1c, -0xffffffff, -0xffffffff, -0xffffffff, -0x17505c1c, -0xffffffff, -0xffffffff, -0x41aaaa76, -0xffffffff, -0x614c9816, -0x8991ffff, -0x1daa5a6f, -0x05525816, -0x4d585bff, -0xffffffff, -0x8087400c, -0xffffffff, -0xffffffff, -0xffffffff, -0x1baa59d3, -0x828dffff, -0x25aa6ece, -0x054e5816, -0x76753c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0x2b68d001, -0xffffffff, -0xffffffff, -0xffffffff, -0x8689ffff, -0xffffffff, -0x054a5816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x43aca01b, -0x05465816, -0x7f865c0e, -0x5a4c5bff, -0x39ab55d3, -0x01626c16, -0x02686fff, -0x3457fc1b, -0xffffffff, -0xffffffff, -0x0f3e7000, -0x3dab55ae, -0x05425816, -0x1951fc17, -0x59485bff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3153fc1b, -0x0c58701c, -0x5f64cbff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x035a6c16, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b54701c, -0xffffffff, -0x8779ffff, -0x1faa71d5, -0xffffffff, -0xffffffff, -0x2d5eb81b, -0x72b5fc08, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b50701c, -0x456ffc16, -0x7b75ffff, -0xffffffff, -0xffffffff, -0x147e5c16, -0xffffffff, -0xffffffff, -0x2a106bff, -0xffffffff, -0x1eaa5ab4, -0x446bfc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x41aaaa96, -0xffffffff, -0x3556b81b, -0x87a5fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x4e6dffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x3252b81b, -0xffffffff, -0x5e54a81c, -0xffffffff, -0xffffffff, -0x0b44701c, -0x28b05c00, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x73785fff, -0xffffffff, -0xffffffff, -0xffffffff, -0x0b40701c, -0xffffffff, -0x6b65ffff, -0xffffffff, -0xffffffff, -0x1daa5a72, -0xffffffff, -0xffffffff, -0xffffffff, -0x6266cc1b, -0xffffffff, -0x375bfc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x3f695400, -0xffffffff, -0xffffffff, -0xffffffff, -0x6d113017, -0x3ead541b, -0xffffffff, -0x5d448816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x18506416, -0xffffffff, -0xffffffff, -0x16b37400, -0xffffffff, -0x4653fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x04126c1b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x574ffc16, -0xffffffff, -0x6855ffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x040e6c1b, -0x41aaa275, -0xffffffff, -0xffffffff, -0xffffffff, -0x574bfc16, -0x6f826400, -0x6851ffff, -0x1eaa5ad4, -0x2a125816, -0xffffffff, -0x040a6c1b, -0x7185fc00, -0xffffffff, -0xffffffff, -0xffffffff, -0x5747fc16, -0x7b87fc04, -0xffffffff, -0xffffffff, -0x090e5816, -0xffffffff, -0x04066c1b, -0x6e81fc00, -0x1aac6c1b, -0x1850601c, -0x2e5cbbff, -0x5543fc16, -0xffffffff, -0xffffffff, -0xffffffff, -0x090a5816, -0xffffffff, -0x04026c1b, -0xffffffff, -0xffffffff, -0x8087440c, -0xffffffff, -0xffffffff, -0xffffffff, -0x6c45ffff, -0x8287fc0c, -0x09065816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x6b41ffff, -0x3353fc2e, -0x09025816, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2f50bbff, -0x073c5800, -0x6266cc16, -0x5f4083ff, -0xffffffff, -0xffffffff, -0x43aca41b, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2a10701c, -0x6364cfff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x7e865c0c, -0xffffffff, -0xffffffff, -0x3656b82e, -0x41aaa295, -0x100c701c, -0x614a9416, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x2c5ebc1b, -0xffffffff, -0x2a105c17, -0xffffffff, -0x1008701c, -0x3cab5675, -0xffffffff, -0xffffffff, -0x77873806, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0x1004701c, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff, -0xffffffff -}; - -#define fold_hashkey(k) (lj_rol(lj_rol((k),17)-(k),16)%915) - diff -Naur a/src/lj_frame.h b/src/lj_frame.h --- a/src/lj_frame.h 2024-11-05 09:58:44.413962653 +0800 +++ b/src/lj_frame.h 2024-11-05 11:30:10.422190418 +0800 @@ -264,6 +264,18 @@ #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 + +#elif LJ_TARGET_SW64 +#define CFRAME_OFS_ERRF 172 +#define CFRAME_OFS_NRES 168 +#define CFRAME_OFS_PREV 160 +#define CFRAME_OFS_L 152 +#define CFRAME_OFS_PC 144 +#define CFRAME_SIZE 176 + +#define CFRAME_OFS_MULTRES 0 +#define CFRAME_SHIFT_MULTRES 3 + #else #error "Missing CFRAME_* definitions for this architecture" #endif diff -Naur a/src/lj_gdbjit.c b/src/lj_gdbjit.c --- a/src/lj_gdbjit.c 2024-11-05 09:58:44.413962653 +0800 +++ b/src/lj_gdbjit.c 2024-11-05 11:33:25.420046275 +0800 @@ -306,6 +306,9 @@ #elif LJ_TARGET_MIPS DW_REG_SP = 29, DW_REG_RA = 31, +#elif LJ_TARGET_SW64 + DW_REG_SP = 30, + DW_REG_RA = 26, #else #error "Unsupported target architecture" #endif @@ -383,6 +386,8 @@ .machine = 20, #elif LJ_TARGET_MIPS .machine = 8, +#elif LJ_TARGET_SW64 + .machine = 0x9916, #else #error "Unsupported target architecture" #endif @@ -591,6 +596,10 @@ for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } } +#elif LJ_TARGET_SW64 + { + /* TODO: setup saved register position */ + } #else #error "Unsupported target architecture" #endif diff -Naur a/src/lj_jit.h b/src/lj_jit.h --- a/src/lj_jit.h 2024-11-05 09:58:44.413962653 +0800 +++ b/src/lj_jit.h 2024-11-05 13:44:35.053442889 +0800 @@ -67,10 +67,13 @@ #endif #endif +#elif LJ_TARGET_SW64 +#define JIT_F_SW1621 0x00000010 +#define JIT_F_CPU_FIRST JIT_F_SW1621 +#define JIT_F_CPUSTRING "\4SW6A" #else - +#define JIT_F_CPU_FIRST 0 #define JIT_F_CPUSTRING "" - #endif /* Optimization flags. 12 bits. */ diff -Naur a/src/lj_libdef.h b/src/lj_libdef.h --- a/src/lj_libdef.h 2017-05-02 03:05:00.000000000 +0800 +++ b/src/lj_libdef.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,393 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -#ifdef LJLIB_MODULE_base -#undef LJLIB_MODULE_base -static const lua_CFunction lj_lib_cf_base[] = { - lj_ffh_assert, - lj_ffh_next, - lj_ffh_pairs, - lj_ffh_ipairs_aux, - lj_ffh_ipairs, - lj_ffh_setmetatable, - lj_cf_getfenv, - lj_cf_setfenv, - lj_ffh_rawget, - lj_cf_rawset, - lj_cf_rawequal, - lj_cf_unpack, - lj_cf_select, - lj_ffh_tonumber, - lj_ffh_tostring, - lj_cf_error, - lj_ffh_pcall, - lj_cf_loadfile, - lj_cf_load, - lj_cf_loadstring, - lj_cf_dofile, - lj_cf_gcinfo, - lj_cf_collectgarbage, - lj_cf_newproxy, - lj_cf_print -}; -static const uint8_t lj_lib_init_base[] = { -2,0,28,70,97,115,115,101,114,116,195,110,105,108,199,98,111,111,108,101,97, -110,252,1,200,117,115,101,114,100,97,116,97,198,115,116,114,105,110,103,197, -117,112,118,97,108,198,116,104,114,101,97,100,197,112,114,111,116,111,200,102, -117,110,99,116,105,111,110,197,116,114,97,99,101,197,99,100,97,116,97,197,116, -97,98,108,101,252,9,198,110,117,109,98,101,114,132,116,121,112,101,68,110,101, -120,116,253,69,112,97,105,114,115,64,253,70,105,112,97,105,114,115,140,103, -101,116,109,101,116,97,116,97,98,108,101,76,115,101,116,109,101,116,97,116, -97,98,108,101,7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,70, -114,97,119,103,101,116,6,114,97,119,115,101,116,8,114,97,119,101,113,117,97, -108,6,117,110,112,97,99,107,6,115,101,108,101,99,116,72,116,111,110,117,109, -98,101,114,195,110,105,108,197,102,97,108,115,101,196,116,114,117,101,72,116, -111,115,116,114,105,110,103,5,101,114,114,111,114,69,112,99,97,108,108,134, -120,112,99,97,108,108,8,108,111,97,100,102,105,108,101,4,108,111,97,100,10, -108,111,97,100,115,116,114,105,110,103,6,100,111,102,105,108,101,6,103,99,105, -110,102,111,14,99,111,108,108,101,99,116,103,97,114,98,97,103,101,252,2,8,110, -101,119,112,114,111,120,121,200,116,111,115,116,114,105,110,103,5,112,114,105, -110,116,252,3,200,95,86,69,82,83,73,79,78,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_coroutine -#undef LJLIB_MODULE_coroutine -static const lua_CFunction lj_lib_cf_coroutine[] = { - lj_cf_coroutine_status, - lj_cf_coroutine_running, - lj_cf_coroutine_create, - lj_ffh_coroutine_yield, - lj_ffh_coroutine_resume, - lj_cf_coroutine_wrap -}; -static const uint8_t lj_lib_init_coroutine[] = { -30,13,6,6,115,116,97,116,117,115,7,114,117,110,110,105,110,103,6,99,114,101, -97,116,101,69,121,105,101,108,100,70,114,101,115,117,109,101,254,4,119,114, -97,112,255 -}; -#endif - -#ifdef LJLIB_MODULE_math -#undef LJLIB_MODULE_math -static const lua_CFunction lj_lib_cf_math[] = { - lj_ffh_math_abs, - lj_ffh_math_sqrt, - lj_ffh_math_log, - lj_ffh_math_atan2, - lj_ffh_math_ldexp, - lj_ffh_math_min, - lj_cf_math_random, - lj_cf_math_randomseed -}; -static const uint8_t lj_lib_init_math[] = { -37,16,30,67,97,98,115,133,102,108,111,111,114,132,99,101,105,108,68,115,113, -114,116,133,108,111,103,49,48,131,101,120,112,131,115,105,110,131,99,111,115, -131,116,97,110,132,97,115,105,110,132,97,99,111,115,132,97,116,97,110,132,115, -105,110,104,132,99,111,115,104,132,116,97,110,104,133,102,114,101,120,112,132, -109,111,100,102,251,248,193,99,26,220,165,76,64,131,100,101,103,251,57,157, -82,162,70,223,145,63,131,114,97,100,67,108,111,103,69,97,116,97,110,50,131, -112,111,119,132,102,109,111,100,69,108,100,101,120,112,67,109,105,110,131,109, -97,120,251,24,45,68,84,251,33,9,64,194,112,105,250,251,0,0,0,0,0,0,240,127, -196,104,117,103,101,250,252,2,6,114,97,110,100,111,109,252,2,10,114,97,110, -100,111,109,115,101,101,100,255 -}; -#endif - -#ifdef LJLIB_MODULE_bit -#undef LJLIB_MODULE_bit -static const lua_CFunction lj_lib_cf_bit[] = { - lj_ffh_bit_tobit, - lj_ffh_bit_lshift, - lj_ffh_bit_band, - lj_cf_bit_tohex -}; -static const uint8_t lj_lib_init_bit[] = { -65,42,12,69,116,111,98,105,116,132,98,110,111,116,133,98,115,119,97,112,70, -108,115,104,105,102,116,134,114,115,104,105,102,116,135,97,114,115,104,105, -102,116,131,114,111,108,131,114,111,114,68,98,97,110,100,131,98,111,114,132, -98,120,111,114,5,116,111,104,101,120,255 -}; -#endif - -#ifdef LJLIB_MODULE_string -#undef LJLIB_MODULE_string -static const lua_CFunction lj_lib_cf_string[] = { - lj_ffh_string_len, - lj_ffh_string_byte, - lj_ffh_string_char, - lj_ffh_string_sub, - lj_ffh_string_rep, - lj_ffh_string_reverse, - lj_cf_string_dump, - lj_cf_string_find, - lj_cf_string_match, - lj_cf_string_gmatch, - lj_cf_string_gsub, - lj_cf_string_format -}; -static const uint8_t lj_lib_init_string[] = { -77,53,14,67,108,101,110,68,98,121,116,101,68,99,104,97,114,67,115,117,98,67, -114,101,112,71,114,101,118,101,114,115,101,133,108,111,119,101,114,133,117, -112,112,101,114,4,100,117,109,112,4,102,105,110,100,5,109,97,116,99,104,254, -6,103,109,97,116,99,104,4,103,115,117,98,6,102,111,114,109,97,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_table -#undef LJLIB_MODULE_table -static const lua_CFunction lj_lib_cf_table[] = { - lj_cf_table_foreachi, - lj_cf_table_foreach, - lj_ffh_table_getn, - lj_cf_table_maxn, - lj_cf_table_insert, - lj_cf_table_remove, - lj_cf_table_concat, - lj_cf_table_sort -}; -static const uint8_t lj_lib_init_table[] = { -92,61,8,8,102,111,114,101,97,99,104,105,7,102,111,114,101,97,99,104,68,103, -101,116,110,4,109,97,120,110,6,105,110,115,101,114,116,6,114,101,109,111,118, -101,6,99,111,110,99,97,116,4,115,111,114,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_io_method -#undef LJLIB_MODULE_io_method -static const lua_CFunction lj_lib_cf_io_method[] = { - lj_cf_io_method_close, - lj_cf_io_method_read, - lj_cf_io_method_write, - lj_cf_io_method_flush, - lj_cf_io_method_seek, - lj_cf_io_method_setvbuf, - lj_cf_io_method_lines, - lj_cf_io_method___gc, - lj_cf_io_method___tostring -}; -static const uint8_t lj_lib_init_io_method[] = { -100,62,10,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101,5,102, -108,117,115,104,4,115,101,101,107,7,115,101,116,118,98,117,102,5,108,105,110, -101,115,4,95,95,103,99,10,95,95,116,111,115,116,114,105,110,103,252,1,199,95, -95,105,110,100,101,120,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_io -#undef LJLIB_MODULE_io -static const lua_CFunction lj_lib_cf_io[] = { - lj_cf_io_open, - lj_cf_io_popen, - lj_cf_io_tmpfile, - lj_cf_io_close, - lj_cf_io_read, - lj_cf_io_write, - lj_cf_io_flush, - lj_cf_io_input, - lj_cf_io_output, - lj_cf_io_lines, - lj_cf_io_type -}; -static const uint8_t lj_lib_init_io[] = { -109,62,12,252,2,192,250,4,111,112,101,110,5,112,111,112,101,110,7,116,109,112, -102,105,108,101,5,99,108,111,115,101,4,114,101,97,100,5,119,114,105,116,101, -5,102,108,117,115,104,5,105,110,112,117,116,6,111,117,116,112,117,116,5,108, -105,110,101,115,4,116,121,112,101,255 -}; -#endif - -#ifdef LJLIB_MODULE_os -#undef LJLIB_MODULE_os -static const lua_CFunction lj_lib_cf_os[] = { - lj_cf_os_execute, - lj_cf_os_remove, - lj_cf_os_rename, - lj_cf_os_tmpname, - lj_cf_os_getenv, - lj_cf_os_exit, - lj_cf_os_clock, - lj_cf_os_date, - lj_cf_os_time, - lj_cf_os_difftime, - lj_cf_os_setlocale -}; -static const uint8_t lj_lib_init_os[] = { -120,62,11,7,101,120,101,99,117,116,101,6,114,101,109,111,118,101,6,114,101, -110,97,109,101,7,116,109,112,110,97,109,101,6,103,101,116,101,110,118,4,101, -120,105,116,5,99,108,111,99,107,4,100,97,116,101,4,116,105,109,101,8,100,105, -102,102,116,105,109,101,9,115,101,116,108,111,99,97,108,101,255 -}; -#endif - -#ifdef LJLIB_MODULE_debug -#undef LJLIB_MODULE_debug -static const lua_CFunction lj_lib_cf_debug[] = { - lj_cf_debug_getregistry, - lj_cf_debug_getmetatable, - lj_cf_debug_setmetatable, - lj_cf_debug_getfenv, - lj_cf_debug_setfenv, - lj_cf_debug_getinfo, - lj_cf_debug_getlocal, - lj_cf_debug_setlocal, - lj_cf_debug_getupvalue, - lj_cf_debug_setupvalue, - lj_cf_debug_upvalueid, - lj_cf_debug_upvaluejoin, - lj_cf_debug_sethook, - lj_cf_debug_gethook, - lj_cf_debug_debug, - lj_cf_debug_traceback -}; -static const uint8_t lj_lib_init_debug[] = { -131,62,16,11,103,101,116,114,101,103,105,115,116,114,121,12,103,101,116,109, -101,116,97,116,97,98,108,101,12,115,101,116,109,101,116,97,116,97,98,108,101, -7,103,101,116,102,101,110,118,7,115,101,116,102,101,110,118,7,103,101,116,105, -110,102,111,8,103,101,116,108,111,99,97,108,8,115,101,116,108,111,99,97,108, -10,103,101,116,117,112,118,97,108,117,101,10,115,101,116,117,112,118,97,108, -117,101,9,117,112,118,97,108,117,101,105,100,11,117,112,118,97,108,117,101, -106,111,105,110,7,115,101,116,104,111,111,107,7,103,101,116,104,111,111,107, -5,100,101,98,117,103,9,116,114,97,99,101,98,97,99,107,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit -#undef LJLIB_MODULE_jit -static const lua_CFunction lj_lib_cf_jit[] = { - lj_cf_jit_on, - lj_cf_jit_off, - lj_cf_jit_flush, - lj_cf_jit_status, - lj_cf_jit_attach -}; -static const uint8_t lj_lib_init_jit[] = { -147,62,9,2,111,110,3,111,102,102,5,102,108,117,115,104,6,115,116,97,116,117, -115,6,97,116,116,97,99,104,252,5,194,111,115,250,252,4,196,97,114,99,104,250, -252,3,203,118,101,114,115,105,111,110,95,110,117,109,250,252,2,199,118,101, -114,115,105,111,110,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit_util -#undef LJLIB_MODULE_jit_util -static const lua_CFunction lj_lib_cf_jit_util[] = { - lj_cf_jit_util_funcinfo, - lj_cf_jit_util_funcbc, - lj_cf_jit_util_funck, - lj_cf_jit_util_funcuvname, - lj_cf_jit_util_traceinfo, - lj_cf_jit_util_traceir, - lj_cf_jit_util_tracek, - lj_cf_jit_util_tracesnap, - lj_cf_jit_util_tracemc, - lj_cf_jit_util_traceexitstub, - lj_cf_jit_util_ircalladdr -}; -static const uint8_t lj_lib_init_jit_util[] = { -152,62,11,8,102,117,110,99,105,110,102,111,6,102,117,110,99,98,99,5,102,117, -110,99,107,10,102,117,110,99,117,118,110,97,109,101,9,116,114,97,99,101,105, -110,102,111,7,116,114,97,99,101,105,114,6,116,114,97,99,101,107,9,116,114,97, -99,101,115,110,97,112,7,116,114,97,99,101,109,99,13,116,114,97,99,101,101,120, -105,116,115,116,117,98,10,105,114,99,97,108,108,97,100,100,114,255 -}; -#endif - -#ifdef LJLIB_MODULE_jit_opt -#undef LJLIB_MODULE_jit_opt -static const lua_CFunction lj_lib_cf_jit_opt[] = { - lj_cf_jit_opt_start -}; -static const uint8_t lj_lib_init_jit_opt[] = { -163,62,1,5,115,116,97,114,116,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_meta -#undef LJLIB_MODULE_ffi_meta -static const lua_CFunction lj_lib_cf_ffi_meta[] = { - lj_cf_ffi_meta___index, - lj_cf_ffi_meta___newindex, - lj_cf_ffi_meta___eq, - lj_cf_ffi_meta___len, - lj_cf_ffi_meta___lt, - lj_cf_ffi_meta___le, - lj_cf_ffi_meta___concat, - lj_cf_ffi_meta___call, - lj_cf_ffi_meta___add, - lj_cf_ffi_meta___sub, - lj_cf_ffi_meta___mul, - lj_cf_ffi_meta___div, - lj_cf_ffi_meta___mod, - lj_cf_ffi_meta___pow, - lj_cf_ffi_meta___unm, - lj_cf_ffi_meta___tostring, - lj_cf_ffi_meta___pairs, - lj_cf_ffi_meta___ipairs -}; -static const uint8_t lj_lib_init_ffi_meta[] = { -164,62,19,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101, -120,4,95,95,101,113,5,95,95,108,101,110,4,95,95,108,116,4,95,95,108,101,8,95, -95,99,111,110,99,97,116,6,95,95,99,97,108,108,5,95,95,97,100,100,5,95,95,115, -117,98,5,95,95,109,117,108,5,95,95,100,105,118,5,95,95,109,111,100,5,95,95, -112,111,119,5,95,95,117,110,109,10,95,95,116,111,115,116,114,105,110,103,7, -95,95,112,97,105,114,115,8,95,95,105,112,97,105,114,115,195,102,102,105,203, -95,95,109,101,116,97,116,97,98,108,101,250,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_clib -#undef LJLIB_MODULE_ffi_clib -static const lua_CFunction lj_lib_cf_ffi_clib[] = { - lj_cf_ffi_clib___index, - lj_cf_ffi_clib___newindex, - lj_cf_ffi_clib___gc -}; -static const uint8_t lj_lib_init_ffi_clib[] = { -182,62,3,7,95,95,105,110,100,101,120,10,95,95,110,101,119,105,110,100,101,120, -4,95,95,103,99,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi_callback -#undef LJLIB_MODULE_ffi_callback -static const lua_CFunction lj_lib_cf_ffi_callback[] = { - lj_cf_ffi_callback_free, - lj_cf_ffi_callback_set -}; -static const uint8_t lj_lib_init_ffi_callback[] = { -185,62,3,4,102,114,101,101,3,115,101,116,252,1,199,95,95,105,110,100,101,120, -250,255 -}; -#endif - -#ifdef LJLIB_MODULE_ffi -#undef LJLIB_MODULE_ffi -static const lua_CFunction lj_lib_cf_ffi[] = { - lj_cf_ffi_cdef, - lj_cf_ffi_new, - lj_cf_ffi_cast, - lj_cf_ffi_typeof, - lj_cf_ffi_istype, - lj_cf_ffi_sizeof, - lj_cf_ffi_alignof, - lj_cf_ffi_offsetof, - lj_cf_ffi_errno, - lj_cf_ffi_string, - lj_cf_ffi_copy, - lj_cf_ffi_fill, - lj_cf_ffi_abi, - lj_cf_ffi_metatype, - lj_cf_ffi_gc, - lj_cf_ffi_load -}; -static const uint8_t lj_lib_init_ffi[] = { -187,62,22,4,99,100,101,102,3,110,101,119,4,99,97,115,116,6,116,121,112,101, -111,102,6,105,115,116,121,112,101,6,115,105,122,101,111,102,7,97,108,105,103, -110,111,102,8,111,102,102,115,101,116,111,102,5,101,114,114,110,111,6,115,116, -114,105,110,103,4,99,111,112,121,4,102,105,108,108,3,97,98,105,252,8,192,250, -8,109,101,116,97,116,121,112,101,252,7,192,250,2,103,99,252,5,192,250,4,108, -111,97,100,252,4,193,67,250,252,3,194,111,115,250,252,2,196,97,114,99,104,250, -255 -}; -#endif - diff -Naur a/src/lj_recdef.h b/src/lj_recdef.h --- a/src/lj_recdef.h 2017-05-02 03:05:00.000000000 +0800 +++ b/src/lj_recdef.h 1970-01-01 08:00:00.000000000 +0800 @@ -1,263 +0,0 @@ -/* This is a generated file. DO NOT EDIT! */ - -static const uint16_t recff_idmap[] = { -0, -0x0100, -0x0200, -0x0300, -0, -0, -0x0400, -0x0500, -0x0600, -0x0700, -0, -0, -0x0800, -0x0900, -0x0a00, -0, -0x0b00, -0x0c00, -0x0d00, -0, -0x0e00, -0x0f00, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x1000, -0x1100+(IRFPM_FLOOR), -0x1100+(IRFPM_CEIL), -0x1200+(IRFPM_SQRT), -0x1200+(IRFPM_LOG10), -0x1200+(IRFPM_EXP), -0x1200+(IRFPM_SIN), -0x1200+(IRFPM_COS), -0x1200+(IRFPM_TAN), -0x1300+(FF_math_asin), -0x1300+(FF_math_acos), -0x1300+(FF_math_atan), -0x1400+(IRCALL_sinh), -0x1400+(IRCALL_cosh), -0x1400+(IRCALL_tanh), -0, -0x1500, -0x1600, -0x1600, -0x1700, -0x1800, -0x1900, -0, -0x1a00, -0x1b00+(IR_MIN), -0x1b00+(IR_MAX), -0x1c00, -0, -0x1d00+(IR_TOBIT), -0x1d00+(IR_BNOT), -0x1d00+(IR_BSWAP), -0x1e00+(IR_BSHL), -0x1e00+(IR_BSHR), -0x1e00+(IR_BSAR), -0x1e00+(IR_BROL), -0x1e00+(IR_BROR), -0x1f00+(IR_BAND), -0x1f00+(IR_BOR), -0x1f00+(IR_BXOR), -0, -0x2000, -0x2100+(0), -0, -0x2100+(1), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2200, -0, -0x2300, -0x2400, -0, -0, -0, -0, -0x2500+(0), -0x2600+(0), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2500+(GCROOT_IO_OUTPUT), -0x2600+(GCROOT_IO_OUTPUT), -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0x2700+(0), -0x2700+(1), -0x2800+(MM_eq), -0x2800+(MM_len), -0x2800+(MM_lt), -0x2800+(MM_le), -0x2800+(MM_concat), -0x2900, -0x2800+(MM_add), -0x2800+(MM_sub), -0x2800+(MM_mul), -0x2800+(MM_div), -0x2800+(MM_mod), -0x2800+(MM_pow), -0x2800+(MM_unm), -0, -0, -0, -0x2a00+(1), -0x2a00+(0), -0, -0, -0, -0, -0x2b00, -0x2b00, -0x2c00, -0x2d00, -0x2e00+(FF_ffi_sizeof), -0x2e00+(FF_ffi_alignof), -0x2e00+(FF_ffi_offsetof), -0x2f00, -0x3000, -0x3100, -0x3200, -0x3300, -0, -0x3400 -}; - -static const RecordFunc recff_func[] = { -recff_nyi, -recff_c, -recff_assert, -recff_type, -recff_ipairs_aux, -recff_ipairs, -recff_getmetatable, -recff_setmetatable, -recff_rawget, -recff_rawset, -recff_rawequal, -recff_select, -recff_tonumber, -recff_tostring, -recff_pcall, -recff_xpcall, -recff_math_abs, -recff_math_round, -recff_math_unary, -recff_math_atrig, -recff_math_htrig, -recff_math_modf, -recff_math_degrad, -recff_math_log, -recff_math_atan2, -recff_math_pow, -recff_math_ldexp, -recff_math_minmax, -recff_math_random, -recff_bit_unary, -recff_bit_shift, -recff_bit_nary, -recff_string_len, -recff_string_range, -recff_table_getn, -recff_table_insert, -recff_table_remove, -recff_io_write, -recff_io_flush, -recff_cdata_index, -recff_cdata_arith, -recff_cdata_call, -recff_clib_index, -recff_ffi_new, -recff_ffi_typeof, -recff_ffi_istype, -recff_ffi_xof, -recff_ffi_errno, -recff_ffi_string, -recff_ffi_copy, -recff_ffi_fill, -recff_ffi_abi, -recff_ffi_gc -}; - diff -Naur a/src/lj_snap.c b/src/lj_snap.c --- a/src/lj_snap.c 2024-11-05 09:58:44.417962606 +0800 +++ b/src/lj_snap.c 2024-11-05 11:35:45.895160951 +0800 @@ -805,7 +805,7 @@ #if !LJ_SOFTFP if (r >= RID_MAX_GPR) { src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; -#if LJ_TARGET_PPC +#if LJ_TARGET_PPC || LJ_TARGET_SW64 if (sz == 4) { /* PPC FPRs are always doubles. */ *(float *)dst = (float)*(double *)src; return; diff -Naur a/src/lj_target.h b/src/lj_target.h --- a/src/lj_target.h 2024-11-05 09:58:44.417962606 +0800 +++ b/src/lj_target.h 2024-11-05 13:21:35.409474173 +0800 @@ -55,7 +55,7 @@ /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_SW64 typedef uint64_t RegSet; #define RSET_BITS 6 #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) @@ -143,6 +143,8 @@ #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" +#elif LJ_TARGET_SW64 +#include "lj_target_sw64.h" #else #error "Missing include for target CPU" #endif diff -Naur a/src/lj_target_sw64.h b/src/lj_target_sw64.h --- a/src/lj_target_sw64.h 1970-01-01 08:00:00.000000000 +0800 +++ b/src/lj_target_sw64.h 2024-11-05 13:33:32.703974745 +0800 @@ -0,0 +1,304 @@ +/* +** Definitions for SW64 CPUs. +** Copyright (C) 2019-2019 deepin inc. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_SW64_H +#define _LJ_TARGET_SW64_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ + _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ + _(R24) _(R25) _(RA) _(R27) _(R28) _(R29) _(SP) _(R31) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ + _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ + _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) +#define VRIDDEF(_) + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_ZERO = RID_R31, + RID_FZERO = RID_F31, + RID_TMP = RID_RA, + RID_GP = RID_R29, + + /* Calling conventions. */ + RID_RET = RID_R0, + RID_RETHI = RID_R0, + RID_RETLO = RID_R5, + RID_FPRET = RID_F0, + RID_CFUNCADDR = RID_R27, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_R9, /* Interpreter BASE. */ + RID_LPC = RID_R11, /* Interpreter PC. */ + RID_DISPATCH = RID_R12, /* Interpreter DISPATCH table. */ + RID_LREG = RID_R13, /* Interpreter L. */ + RID_JGL = RID_R15, /* On-trace: global_State + 32768. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_R0, + RID_MAX_GPR = RID_R31+1, + RID_MIN_FPR = RID_F0, + RID_MAX_FPR = RID_F31+1, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ +}; + +#define RID_NUM_KREF RID_NUM_GPR +#define RID_MIN_KREF RID_R0 + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except ZERO, TMP, SP, JGL and GP. */ +#define RSET_FIXED \ + (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ + RID2RSET(RID_JGL)|RID2RSET(RID_GP)|RID2RSET(RID_R28)) + +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) - RID2RSET(RID_FZERO)) +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL + + +#define RSET_SCRATCH_GPR \ + (RSET_RANGE(RID_R0, RID_R8+1)|RSET_RANGE(RID_R16, RID_R25+1)| \ + RID2RSET(RID_R27)) +#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F1+1)|RSET_RANGE(RID_F10,RID_F30+1)) +#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) + +#define REGARG_FIRSTGPR RID_R16 +#define REGARG_LASTGPR RID_R21 +#define REGARG_NUMGPR 6 +#define REGARG_FIRSTFPR RID_F16 +#define REGARG_LASTFPR RID_F21 +#define REGARG_NUMFPR 6 + + + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. +*/ +#if LJ_32 +#define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif + +#define SPS_FIRST 4 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) +{ + while (*p == 0x43ff075f) p++; /* Skip SW64_NOP. */ + return p; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) + +/* -- Instructions -------------------------------------------------------- */ + +/* Instruction fields. */ +#define SW64F_A(r) (((r) & 0x1f) << 21) +#define SW64F_B(r) (((r) & 0x1f) << 16) +#define SW64F_C(r) (((r) & 0x1f) << 5) +#define SW64F_D(r) (((r) & 0x1f) << 0) + +#define SW64F_F(r) (((r) & 0x1f) << 21) +#define SW64F_G(r) (((r) & 0x1f) << 16) +#define SW64F_H(r) (((r) & 0x1f) << 5) +#define SW64F_I(r) (((r) & 0x1f) << 0) + + +#define SW64F_IMM(i) (((i) & 0xff) << 13) +#define SW64F_DISP(d, r) (((d) & 0xffff) | SW64F_B(r)) +#define SW64F_DISPI(d) (((d) & 0xffff) | SW64F_B(RID_ZERO)) +#define SW64F_BRANCH(d) ((d) & 0x1fffff) +#define SW64F_j(i) SW64F_IMM(i) +#define IS_SW64F_BRANCH_VALID(d) (((int32_t)d) < 0x1fffff) + +#define SW64_OP(ins) ((ins) & 0x3f000000) + +typedef enum SW64Ins { + + /* Load/store instructions. */ + SW64I_LDL = 0x8c000000, + SW64I_STL = 0xac000000, + SW64I_LDW = 0x88000000, + SW64I_STW = 0xa8000000, + SW64I_LDHU = 0x84000000, + SW64I_STH = 0xa4000000, + SW64I_LDBU = 0x80000000, + SW64I_STB = 0xa0000000, + SW64I_FLDD = 0x9c000000, + SW64I_FSTD = 0xbc000000, + SW64I_FLDS = 0x98000000, + SW64I_FSTS = 0xb8000000, + + SW64I_LDIH = 0xfc000000, + SW64I_LDI = 0xf8000000, + SW64I_CALL= 0x04000000, + SW64I_BR = 0x10000000, + SW64I_LBR = 0x74000000, + + SW64I_S4ADDL = 0x40000140, + SW64I_S4ADDLI = 0x48000140, + SW64I_S8ADDL = 0x40000180, + SW64I_S8ADDLI =0x48000180, + SW64I_ADDL = 0x40000100, + SW64I_ADDLI =0x48000100, + SW64I_ADDW = 0x40000000, + SW64I_ADDWI =0x48000000, + SW64I_SUBL = 0x40000120, + SW64I_SUBLI =0x48000120, + SW64I_SUBW = 0x40000020, + SW64I_MULL = 0x40000300, + SW64I_MULLI =0x48000300, + SW64I_MULW = 0x40000200, + SW64I_MULWI =0x48000200, + SW64I_UMULH = 0x40000320, + SW64I_UMULHI =0x48000320, + SW64I_DIVW = 0x40000220, + SW64I_UDIVW = 0x40000240, + SW64I_REMW = 0x40000260, + SW64I_UREMW = 0x40000280, + SW64I_DIVL = 0x40000340, + SW64I_UDIVL = 0x40000360, + SW64I_REML = 0x40000380, + SW64I_UREML = 0x400003a0, + + SW64I_FADDS = 0x60000000, + SW64I_FADDD = 0x60000020, + SW64I_FSUBD = 0x60000060, + SW64I_FMULD = 0x600000a0, + SW64I_FDIVD = 0x600000e0, + + SW64I_SLL = 0x40000900, + SW64I_SLLI = 0x48000900, + SW64I_SRLI = 0x48000920, + SW64I_SRL = 0x40000920, + SW64I_SRAI = 0x48000940, + SW64I_SRA = 0x40000940, + SW64I_ROLL = 0x40000960, + SW64I_ROLLI = 0x48000960, + SW64I_SLLW = 0x40000980, + SW64I_SLLWI = 0x48000980, + SW64I_SRLW = 0x400009a0, + SW64I_SRLWI = 0x480009a0, + SW64I_SRAW = 0x400009c0, + SW64I_SRAWI = 0x480009c0, + SW64I_ROLW = 0x400009e0, + SW64I_ROLWI = 0x480009e0, + + SW64I_AND = 0x40000700, + SW64I_ANDI = 0x48000700, + SW64I_XOR = 0x40000780, + SW64I_XORI = 0x48000780, + SW64I_BIS = 0x40000740, + SW64I_BISI = 0x48000740, + SW64I_ORNOT = 0x40000760, + SW64I_ORNOTI = 0x48000760, + SW64I_EQV = 0x400007a0, + SW64I_EQVI =0x480007a0, + + SW64I_BEQ = 0xc0000000, + SW64I_BNE = 0xc4000000, + SW64I_BLT = 0xc8000000, + SW64I_BLE = 0xcc000000, + SW64I_BGT = 0xd0000000, + SW64I_BGE = 0xd4000000, + + SW64I_CMPEQ = 0x40000500, + SW64I_CMPULE = 0x40000580, + SW64I_CMPLE = 0x40000540, + SW64I_CMPULT = 0x40000560, + SW64I_CMPULTI = 0x48000560, + SW64I_CMPLT = 0x40000520, + SW64I_CMPLTI = 0x48000520, + + SW64I_FCMPEQ = 0x60000200, + SW64I_FCMPLE = 0x60000220, + SW64I_FCMPLT = 0x60000240, + SW64I_FCMPUN = 0x60000260, + SW64I_FBEQ = 0xe0000000, + SW64I_FBGE = 0xf4000000, + SW64I_FBGT = 0xf0000000, + SW64I_FBLE = 0xec000000, + SW64I_FBLT = 0xe8000000, + SW64I_FBNE = 0xe4000000, + + SW64I_BPT = 0x00000080, + SW64I_NOP = 0x43ff075f, //excb, same as gcc's asm("nop") + + SW64I_FCVTLW = 0x63e00520, + SW64I_FCVTWL = 0x63e00500, + SW64I_FCVTLS = 0x63e005a0, + SW64I_FCVTLD = 0x63e005e0, + SW64I_FCVTDL = 0x63e004e0, + SW64I_FCVTDL_Z = 0x63e00480, + SW64I_FIMOVD = 0x401f0f00, + SW64I_IFMOVD = 0x601f0820, + SW64I_IFMOVS = 0x601f0800, + SW64I_FCVTDS = 0x63e00420, + SW64I_FCVTSD = 0x63e00400, + + SW64I_MASKLLI = 0x48000c60, + SW64I_MASKLL = 0x40000c60, + + SW64I_FABS = 0x63e00600, + SW64I_FCPYSN = 0x60000640, + + SW64I_SEXTB = 0x43e00d40, + SW64I_SEXTH = 0x43e00d60, + + SW64I_EXTLBI = 0x48000a00, + SW64I_EXTLHI = 0x48000a20, + SW64I_EXTLWI = 0x48000a40, + + SW64I_SETFPEC1 = 0x60000aa0, + SW64I_SETFPEC3 = 0x60000ae0, + + SW64I_SELEQ = 0x44000000, + SW64I_SELNE = 0x44001400, + SW64I_FSELEQ = 0x64004000, + SW64I_FSELNE = 0x64004400, + + SW64I_AL = SW64I_LDL, + SW64I_AS = SW64I_STL, + +} SW64Ins; + +#endif diff -Naur a/src/lj_trace.c b/src/lj_trace.c --- a/src/lj_trace.c 2024-11-05 09:58:44.421962559 +0800 +++ b/src/lj_trace.c 2024-11-05 13:25:40.383186199 +0800 @@ -870,8 +870,57 @@ } #endif +#if SW64_DEBUG_WI +LUALIB_API const char *o_tostr(lua_State *L, TValue *o, + const char *def, size_t *len) +{ + GCstr *s; + if (LJ_LIKELY(tvisstr(o))) { + s = strV(o); + } else if (tvisnil(o)) { + if (len != NULL) *len = def ? strlen(def) : 0; + return def; + } else if (tvisnumber(o)) { + lj_gc_check(L); + s = lj_strfmt_number(L, o); + setstrV(L, o, s); + } else { + return "Other"; + //lj_err_argt(L, 0, LUA_TSTRING); + } + if (len != NULL) + *len = s->len; + return strdata(s); +} +void dump_base(char* msg, lua_State* L) +{ + return; + int n = L->top - L->base; + printf("%s N:%d\n", msg, n); + + for (int i=0; ibase+i; + char* t = lj_typename(o); + + if (tvisnum(o)) { + double n = numberVnum(o); + printf("%d\t%s\t%f\n", i, t, n); + } else if (tvisint(o)) { + printf("%d\t%s\t%d\n", i, t, intV(o)); + } else { + printf("%d\t%s\n", i, t); + } + } +} +#endif + /* A trace exited. Restore interpreter state. */ +#if SW64_DEBUG_WI +int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr, unsigned long exit_addr) +#else int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) +#endif { ERRNO_SAVE lua_State *L = J->L; @@ -902,6 +951,10 @@ } #endif lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); +#if SW64_DEBUG_WI + printf("-----------%s exitno:%d nsnap:%d traceno:%d exit_addr:0x%lx\n", __FUNCTION__, + J->exitno, T->nsnap, T->traceno, exit_addr); +#endif exd.J = J; exd.exptr = exptr; errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); diff -Naur a/src/lj_trace.h b/src/lj_trace.h --- a/src/lj_trace.h 2024-11-05 09:58:44.421962559 +0800 +++ b/src/lj_trace.h 2024-11-05 13:27:58.232172910 +0800 @@ -36,7 +36,11 @@ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); +#if SW64_DEBUG_WI +LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr, unsigned long exit_addr); +#else LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); +#endif #if LJ_UNWIND_EXT LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep); #endif diff -Naur a/src/lj_vmmath.c b/src/lj_vmmath.c --- a/src/lj_vmmath.c 2024-11-05 09:58:44.421962559 +0800 +++ b/src/lj_vmmath.c 2024-11-05 13:28:57.781255159 +0800 @@ -69,7 +69,7 @@ /* -- Helper functions for generated machine code ------------------------- */ -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS || LJ_TARGET_SW64 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; diff -Naur a/src/luajit_rolling.h b/src/luajit_rolling.h --- a/src/luajit_rolling.h 2024-11-05 09:58:44.421962559 +0800 +++ b/src/luajit_rolling.h 2024-11-05 14:37:08.052947283 +0800 @@ -30,11 +30,19 @@ #include "lua.h" +#if defined(__sw_64__) +#define LUAJIT_VERSION "LuaJIT 2.1.0_9 sw1.0.0" +#define LUAJIT_VERSION_NUM 20199 +#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_9_sw_1_0_0 +#define LUAJIT_COPYRIGHT "Copyright (C) 2024 Sheng Kai" +#define LUAJIT_URL "https://luajit.org/" +#else #define LUAJIT_VERSION "LuaJIT 2.1.ROLLING" #define LUAJIT_VERSION_NUM 20199 /* Deprecated. */ #define LUAJIT_VERSION_SYM luaJIT_version_2_1_ROLLING #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2023 Mike Pall" #define LUAJIT_URL "https://luajit.org/" +#endif /* Modes for luaJIT_setmode. */ #define LUAJIT_MODE_MASK 0x00ff diff -Naur a/src/Makefile b/src/Makefile --- a/src/Makefile 2024-11-05 09:58:44.429962466 +0800 +++ b/src/Makefile 2024-11-05 13:40:30.527913542 +0800 @@ -52,6 +52,7 @@ CCOPT_arm64= CCOPT_ppc= CCOPT_mips= +CCOPT_sw64= -mieee # CCDEBUG= # Uncomment the next line to generate debug information: @@ -234,6 +235,9 @@ TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +ifneq (,$(findstring LJ_TARGET_SW64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= sw64 +else ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) TARGET_LJARCH= x64 else @@ -274,6 +278,7 @@ endif endif endif +endif ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 @@ -437,6 +442,9 @@ DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif +ifneq (,$(findstring SW64_DEBUG_WI 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SW64_DEBUG_WI +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff -Naur a/src/vm_sw64.dasc b/src/vm_sw64.dasc --- a/src/vm_sw64.dasc 1970-01-01 08:00:00.000000000 +0800 +++ b/src/vm_sw64.dasc 2024-11-05 13:33:50.971868037 +0800 @@ -0,0 +1,4938 @@ +|// Low-level VM code for SW64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2023 Sheng Kai. See Copyright Notice in luajit.h +| +|.arch sw64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +|// Fixed register assignments for the interpreter. +|// Don't use: r31 = 0, r29 = gp, r30 = sp, r26 = ra +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, r9 //s0 // Base of current Lua stack frame. +|.define KBASE, r10 //s1 // Constants of current Lua function. +|.define PC, r11 //s2 // Next PC. +|.define DISPATCH, r12 //s3 // Opcode dispatch table. +|.define LREG, r13 //s4 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, r21 //a5 // Size of multi-result: (nresults+1). +| +|.define JGL, r15 //fp // On-trace: global_State + 32768. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, r15 //fp +|.define TISNUM, r8 //t7 +|.define TOBIT, f8 // 2^52 + 2^51. +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, r14 //mips:s7 sw64:s5 // Callee-save. +|.define RB, r22 //t8 +|.define RC, r23 //t9 +|.define RD, r24 //t10 +|.define INS, r25 //t11 +| +|.define AT, r28 //at // Assembler temporary. +|.define FAT, f28 //at // Assembler temporary. +|.define TMP0, r5 //t4 +|.define TMP1, r6 //t5 +|.define TMP2, r7 //t6 +|.define TMP3, r3 //t2 +|.define TMP4, r4 //t3 +| +|// Calling conventions. +|.define CFUNCADDR, r27 //t12/pv +|.define CARG1, r16 //a0 +|.define CARG2, r17 //a1 +|.define CARG3, r18 //a2 +|.define CARG4, r19 //a3 +|.define CARG5, r20 //a4 +|.define CARG6, r21 //a5 +| +|.define CRET1, r0 //v0 +|.define CRET2, r2 //t1 +| +|.define FCARG1, f16 +|.define FCARG2, f17 +|.define FCARG3, f18 +|.define FCARG4, f19 +|.define FCARG5, f20 +|.define FCARG6, f21 +| +|.define FCRET1, f0 +|.define FCRET2, f1 +| +|.define FTMP0, f10 +|.define FTMP1, f11 +|.define FTMP2, f12 +|.define FTMP3, f13 +|.define FTMP4, f14 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +| +|.define CFRAME_SPACE, 176 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 172 // 32 bit values. +|.define SAVE_NRES, 168 +|.define SAVE_CFRAME, 160 // 64 bit values. +|.define SAVE_L, 152 +|.define SAVE_PC, 144 +|//----- 16 byte aligned +|.define SAVE_GPR_, 80 // .. 80+8*8: 64 bit GPR saves. s0-s5 +|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. f2-f9 +| +|.define TMPX, 8 +|.define TMPD, 0 +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|.define SAVE_MULTRES, TMPD +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| ldi sp, -CFRAME_SPACE(sp) //TODO +| stl ra, SAVE_GPR_+7*8(sp) +| stl r15, SAVE_GPR_+6*8(sp) +| stl r14, SAVE_GPR_+5*8(sp) +| stl r13, SAVE_GPR_+4*8(sp) +| stl r12, SAVE_GPR_+3*8(sp) +| stl r11, SAVE_GPR_+2*8(sp) +| stl r10, SAVE_GPR_+1*8(sp) +| stl r9, SAVE_GPR_+0*8(sp) +| fstd f9, SAVE_FPR_+7*8(sp) +| fstd f8, SAVE_FPR_+6*8(sp) +| fstd f7, SAVE_FPR_+5*8(sp) +| fstd f6, SAVE_FPR_+4*8(sp) +| fstd f5, SAVE_FPR_+3*8(sp) +| fstd f4, SAVE_FPR_+2*8(sp) +| fstd f3, SAVE_FPR_+1*8(sp) +| fstd f2, SAVE_FPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ldl ra, SAVE_GPR_+7*8(sp) +| ldl r15, SAVE_GPR_+6*8(sp) +| ldl r14, SAVE_GPR_+5*8(sp) +| ldl r13, SAVE_GPR_+4*8(sp) +| ldl r12, SAVE_GPR_+3*8(sp) +| ldl r11, SAVE_GPR_+2*8(sp) +| ldl r10, SAVE_GPR_+1*8(sp) +| ldl r9, SAVE_GPR_+0*8(sp) +| fldd f9, SAVE_FPR_+7*8(sp) +| fldd f8, SAVE_FPR_+6*8(sp) +| fldd f7, SAVE_FPR_+5*8(sp) +| fldd f6, SAVE_FPR_+4*8(sp) +| fldd f5, SAVE_FPR_+3*8(sp) +| fldd f4, SAVE_FPR_+2*8(sp) +| fldd f3, SAVE_FPR_+1*8(sp) +| fldd f2, SAVE_FPR_+0*8(sp) +| ldi sp, CFRAME_SPACE(sp) +| ret zero, 0(ra) +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; syscall ; .endmacro //TODO +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. SW64 is little endian. +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +| +|// Instruction decode. +|.macro decode_BC4b, dst +|.if SW64_CORE4 +| sllwi dst, 2, dst +|.else +| slli dst, 2, dst +| addwi dst, 0, dst +|.endif +|.endmacro +|.macro decode_BC8b, dst +|.if SW64_CORE4 +| sllwi dst, 3, dst +|.else +| slli dst, 3, dst +| addwi dst, 0, dst +|.endif +|.endmacro +|.macro decode_OP, dst, ins; andi ins, 0xff, dst; .endmacro +|.macro decode_RA, dst, ins; extlb ins, 0x1, dst; decode_BC8b dst; .endmacro +|.macro decode_RB, dst, ins; extlb ins, 0x3, dst; decode_BC8b dst; .endmacro +|.macro decode_RC, dst, ins; extlb ins, 0x2, dst; decode_BC8b dst; .endmacro +|.macro decode_RD, dst, ins; extlh ins, 0x2, dst; decode_BC8b dst; .endmacro +|.macro decode_RDtoRC8, dst, src; ldi dst, 0x7f8(zero); and src, dst, dst; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| ldw INS, 0(PC) +| ldi PC, 4(PC) +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP TMP1, INS +| decode_BC8b TMP1 +| addl TMP1, DISPATCH, TMP0 +| ldl TMP4, 0(TMP0) +| decode_RD RD, INS +| decode_RA RA, INS +| jmp zero, 0(TMP4) +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| br zero, ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| br zero, ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ldl PC, LFUNC:RB->pc +| ldw INS, 0(PC) +| ldi PC, 4(PC) +| decode_OP TMP1, INS +| decode_RA RA, INS +| decode_BC8b TMP1 +| addl TMP1, DISPATCH, TMP0 +| ldl TMP0, 0(TMP0) +| addl RA, BASE, RA +| jmp zero, 0(TMP0) +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| stl PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +|.if SW64_CORE4 +| srlwi RD, 1, TMP0 +|.else +| zapi RD, 0xf0, RD +| srli RD, 1, TMP0 +|.endif +| ldih TMP4, -0x2(zero) // -BCBIAS_J*4 +| addw TMP0, TMP4, TMP0 // (jump - 0x8000)<<2 +| addl PC, TMP0, PC +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro load_got, func +| ldl CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) +|.endmacro +| +|.macro call_intern, func +| stl MULTRES, TMPX(sp) +| call ra, 0(CFUNCADDR) +| ldl MULTRES, TMPX(sp) +|.endmacro +| +|.macro call_extern +| stl MULTRES, TMPX(sp) +| call ra, 0(CFUNCADDR) +| ldl MULTRES, TMPX(sp) +| .endmacro +| +|.macro hotcheck, delta, target +| srli PC, 1, TMP1 +| andi TMP1, 126, TMP1 +| addl TMP1, DISPATCH, TMP1 +| ldhu TMP2, GG_DISP2HOT(TMP1) +| subwi TMP2, delta, TMP2 +| sth TMP2, GG_DISP2HOT(TMP1) +| blt TMP2, target +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; ldi TMP0, ~LJ_VMST_..st(zero); .endmacro +|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ldl tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) +| andi mark, ~LJ_GC_BLACK & 255, mark // black2gray(tab) +| stl tab, DISPATCH_GL(gc.grayagain)(DISPATCH) +| stb mark, tab->marked +| stl tmp, tab->gclist +| br zero, target +|.endmacro +| +|.macro .DEXTM, rt, rs, pos, size +| slli rs, 64-pos-size, rt +| srli rt, 64-size, rt +|.endmacro +| +|.macro .DINS, rt, rs, pos, size +| ldi CARG5, 1(zero); slli CARG5, size, CARG5; ldi CARG5, -1(CARG5); +| and rs, CARG5, TMP4; +| slli TMP4, pos, TMP4; +| slli CARG5, pos, CARG5; +| bic rt, CARG5, rt; +| bis rt, TMP4, rt; +|.endmacro +| +|// Clear type tag. Isolate lowest 47 bits of reg. +|.macro cleartp, reg; .DEXTM reg, reg, 0, 47; .endmacro +|.macro cleartp, dst, reg; .DEXTM dst, reg, 0, 47; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. +|.macro settp, dst, tp; .DINS dst, tp, 47, 17; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; srai src, 47, dst; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp TMP4, reg +| ldi TMP4, tp(TMP4) +| cleartp reg +| bne TMP4, target +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp TMP4, reg +| ldi TMP4, tp(TMP4) +| cleartp dst, reg +| bne TMP4, target +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target +| gettp TMP4, reg +| ldi TISNUM, LJ_TISNUM(zero) +| cmpeq TMP4, TISNUM, AT +| beq AT, target +|.endmacro +|.macro checknum, reg, target +| gettp TMP4, reg +| ldi AT, LJ_TISNUM(zero) +| cmpult TMP4, AT, TMP4 +| beq TMP4, target +|.endmacro +| +|.macro mov_false, reg +| ldi reg, 0x0001(zero) +| slli reg, 47, reg +| ornot zero, reg, reg // ~reg +|.endmacro +|.macro mov_true, reg +| ldi reg, 0x0001(zero) +| slli reg, 48, reg +| ornot zero, reg, reg // ~reg +|.endmacro +| +|.macro fcmp, op, a, b, reg, tmp; +| fcmp..op a, b, tmp +| fcvtdl tmp, tmp +| fimovd tmp, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi PC, FRAME_P, TMP0 + | + | // Return from pcall or xpcall fast func. + | mov_true TMP1 + | beq TMP0, ->cont_dispatch + | ldl PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | bis TMP2, zero, BASE // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | stl TMP1, -8(RA) // Prepend true to results. + | ldi RA, -8(RA) + | + |->vm_returnc: + | addwi RD, 8, RD // RD = (nresults+1)*8. + | andi PC, FRAME_TYPE, TMP0 + | addwi zero, LUA_YIELD, CRET1 + | beq RD, ->vm_unwind_c_eh + | bis RD, zero, MULTRES + | beq TMP0, ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | subwi zero, 8, TMP2 // TMP2 = 0xfffffff8 + | xori TMP0, FRAME_C, TMP0 + | and TMP2, PC, TMP2 + | subl BASE, TMP2, TMP2 // TMP2 = previous base. + | bne TMP0, ->vm_returnp + | + | subwi RD, 8, TMP1 + | stl TMP2, L->base + | li_vmstate C + | ldw TMP2, SAVE_NRES(sp) + | ldi BASE, -16(BASE) + | st_vmstate + | s8addwi TMP2, 0, TMP2 + | beq TMP1, >2 + |1: + | subwi TMP1, 8, TMP1 + | ldl CRET1, 0(RA) + | addli RA, 8, RA + | stl CRET1, 0(BASE) + | addli BASE, 8, BASE + | bne TMP1, <1 + | + |2: + | cmpeq TMP2, RD, AT + | beq AT, >6 + |3: + | stl BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ldl TMP0, SAVE_CFRAME(sp) // Restore previous C frame. + | bis zero, zero, CRET1 // Ok return status for vm_pcall. + | stl TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ldl TMP1, L->maxstack + | cmplt TMP2, RD, TMP0 + | // More results wanted. Check stack size and fill up results with nil. + | cmplt BASE, TMP1, TMP1 + | bne TMP0, >7 + | beq TMP1, >8 + | stl TISNIL, 0(BASE) + | addwi RD, 8, RD + | addli BASE, 8, BASE + | br zero, <2 + | + |7: // Less results wanted. + | subw RD, TMP2, TMP0 + | subl BASE, TMP0, TMP0 // Either keep top or shrink it. + | selne TMP2, TMP0, BASE, BASE // LUA_MULTRET+1 case? + | br zero, <3 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | load_got lj_state_growstack + | stl BASE, L->top + | bis RD, zero, MULTRES + |.if SW64_CORE4 + | srlwi TMP2, 3, CARG2 + |.else + | zapi TMP2, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + |.endif + | bis L, zero, CARG1 + | call_intern lj_state_growstack // (lua_State *L, int n) + | ldw TMP2, SAVE_NRES(sp) + | ldl BASE, L->top // Need the (realloced) L->top in BASE. + | bis MULTRES, zero, RD + | s8addwi TMP2, 0, TMP2 + | br zero, <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | bis CARG1, zero, sp + | bis CARG2, zero, CRET1 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ldl L, SAVE_L(sp) + | ldi TMP0, ~LJ_VMST_C(zero) + | addwi TMP0, 0, TMP0 + | ldl GL:TMP1, L->glref + | stw TMP0, GL:TMP1->vmstate + | br zero, ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | ldi AT, CFRAME_RAWMASK(zero) + | and CARG1, AT, sp + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ldl L, SAVE_L(sp) + | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). + | ldi TISNIL, LJ_TNIL(zero) + | ldi TISNUM, LJ_TISNUM(zero) + | ldl BASE, L->base + | ldl DISPATCH, L->glref // Setup pointer to dispatch table. + | ifmovs TMP3, TOBIT + | mov_false TMP1 + | li_vmstate INTERP + | ldl PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | fcvtsd TOBIT, TOBIT + | ldi RA, -8(BASE) // Results start at BASE-8. + | ldi DISPATCH, GG_G2DISP(DISPATCH) + | stl TMP1, 0(RA) // Prepend false to error message. + | st_vmstate + | ldi RD, 16(zero) // 2 results: false + error message. + | br zero, ->vm_returnc + | + |->vm_unwind_stub: // Jump to exit stub from unwinder. + | bis CARG2, zero, ra + | jmp zero, 0(CARG1) + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | ldi CARG2, LUA_MINSTACK(zero) + | br zero, >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs, PC = first PC + | addl BASE, RC, RC + | subl RA, BASE, RA + | stl BASE, L->base + | ldi PC, 4(PC) // Must point after first instruction. + | stl RC, L->top + |.if SW64_CORE4 + | srlwi RA, 3, CARG2 + |.else + | zapi RA, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + |.endif + |2: + | // L->base = new base, L->top = top + | load_got lj_state_growstack + | stl PC, SAVE_PC(sp) + | bis L, zero, CARG1 + | call_intern lj_state_growstack // (lua_State *L, int n) + | ldl BASE, L->base + | ldl RC, L->top + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | subl RC, BASE, RC + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | bis CARG1, zero, L + | ldl DISPATCH, L->glref // Setup pointer to dispatch table. + | bis CARG2, zero, BASE + | ldbu TMP1, L->status + | stl L, SAVE_L(sp) + | ldi PC, FRAME_CP(zero) + | ldi TMP0, CFRAME_RESUME(sp) + | ldi DISPATCH, GG_G2DISP(DISPATCH) + | stw zero, SAVE_NRES(sp) + | stw zero, SAVE_ERRF(sp) + | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | stl zero, SAVE_CFRAME(sp) + | stl TMP0, L->cframe + | beq TMP1, >3 + | + | // Resume after yield (like a return). + | stl L, DISPATCH_GL(cur_L)(DISPATCH) + | bis BASE, zero, RA + | ldl BASE, L->base + | ldl TMP1, L->top + | ldl PC, FRAME_PC(BASE) + | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). + | subl TMP1, BASE, RD + | ifmovs TMP3, TOBIT + | stb zero, L->status + | fcvtsd TOBIT, TOBIT + | li_vmstate INTERP + | ldi RD, 8(RD) + | st_vmstate + | bis RD, zero, MULTRES + | andi PC, FRAME_TYPE, TMP0 + | ldi TISNIL, LJ_TNIL(zero) + | ldi TISNUM, LJ_TISNUM(zero) + | beq TMP0, ->BC_RET_Z + | br zero, ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | stw CARG4, SAVE_ERRF(sp) + | ldi PC, FRAME_CP(zero) + | br zero, >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | ldi PC, FRAME_C(zero) + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ldl TMP1, L:CARG1->cframe + | bis CARG1, zero, L + | stw CARG3, SAVE_NRES(sp) + | ldl DISPATCH, L->glref // Setup pointer to dispatch table. + | stl CARG1, SAVE_L(sp) + | bis CARG2, zero, BASE + | ldi DISPATCH, GG_G2DISP(DISPATCH) + | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | stl TMP1, SAVE_CFRAME(sp) + | stl sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | stl L, DISPATCH_GL(cur_L)(DISPATCH) + | ldl TMP2, L->base // TMP2 = old base (used in vmeta_call). + | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). + | ldl TMP1, L->top + | ifmovs TMP3, TOBIT + | addl PC, BASE, PC + | subl TMP1, BASE, NARGS8:RC + | ldi TISNUM, LJ_TISNUM(zero) + | subl PC, TMP2, PC // PC = frame delta + frame type + | fcvtsd TOBIT, TOBIT + | li_vmstate INTERP + | ldi TISNIL, LJ_TNIL(zero) + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | bis CARG1, zero, L + | ldl TMP0, L:CARG1->stack + | stl CARG1, SAVE_L(sp) + | ldl TMP1, L->top + | ldl DISPATCH, L->glref // Setup pointer to dispatch table. + | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | subl TMP0, TMP1, TMP0 // Compute -savestack(L, L->top). + | ldl TMP1, L->cframe + | ldi DISPATCH, GG_G2DISP(DISPATCH) + | stw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. + | stw zero, SAVE_ERRF(sp) // No error function. + | stl TMP1, SAVE_CFRAME(sp) + | stl sp, L->cframe // Add our C frame to cframe chain. + | stl L, DISPATCH_GL(cur_L)(DISPATCH) + | ldi CFUNCADDR, 0(CARG4) + | call r26, 0(CFUNCADDR) // (lua_State *L, lua_CFunction func, void *ud) + | bis CRET1, zero, BASE + | ldi PC, FRAME_CP(zero) + | bne CRET1, <3 // Else continue with the call. + | br zero, ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the + |// stack, so BASE doesn't need to be reloaded across these calls. + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1) + | ldl TMP0, -32(BASE) // Continuation. + | bis BASE, zero, RB + | bis TMP2, zero, BASE // Restore caller BASE. + | ldl LFUNC:TMP1, FRAME_FUNC(TMP2) + |.if FFI + | cmpulti TMP0, 2, AT + |.endif + | ldl PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | addl RA, RD, TMP2 + | stl TISNIL, -8(TMP2) // Ensure one valid arg. + |.if FFI + | bne AT, >1 + |.endif + | ldl TMP1, LFUNC:TMP1->pc + | // BASE = base, RA = resultptr, RB = meta base + | ldl KBASE, PC2PROTO(k)(TMP1) + | jmp zero, 0(TMP0) // Jump to continuation. + | + |.if FFI + |1: + | ldi TMP1, -32(RB) + | bne TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | subl TMP1, BASE, RC + | br zero, ->vm_call_tail + |.endif + | + |->cont_cat: // RA = resultptr, RB = meta base + | ldw INS, -4(PC) + | ldi CARG2, -32(RB) + | ldl CRET1, 0(RA) + | decode_RB MULTRES, INS + | decode_RA RA, INS + | addl BASE, MULTRES, TMP1 + | stl BASE, L->base + | subl CARG2, TMP1, CARG3 + | stl CRET1, 0(CARG2) + | cmpeq TMP1, CARG2, AT + | beq AT, ->BC_CAT_Z + | addl RA, BASE, RA + | stl CRET1, 0(RA) + | br zero, ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TMP0, LJ_TSTR(zero) + | settp STR:RC, TMP0 + | stl STR:RC, 0(CARG3) + | br zero, >1 + | + |->vmeta_tgets: + | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TMP0, LJ_TTAB(zero) + | ldi TMP1, LJ_TSTR(zero) + | settp TAB:RB, TMP0 + | ldi CARG3, DISPATCH_GL(tmptv2)(DISPATCH) + | stl TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | stl STR:RC, 0(CARG3) + | br zero, >1 + | + |->vmeta_tgetb: + | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TISNUM, LJ_TISNUM(zero) + | settp TMP0, TISNUM + | stl TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | load_got lj_meta_tget + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | ldi TMP1, -FRAME_CONT(BASE) + | beq CRET1, >3 + | ldl TMP0, 0(CRET1) + | stl TMP0, 0(RA) + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | ldl BASE, L->top + | stl PC, -24(BASE) // [cont|PC] + | subl BASE, TMP1, PC + | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | ldi NARGS8:RC, 16(zero) + | br zero, ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | load_got lj_tab_getinth + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | bis TISNIL, zero, CARG2 + | beq CRET1, ->BC_TGETR_Z + | ldl CARG2, 0(CRET1) + | br zero, ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TMP0, LJ_TSTR(zero) + | settp STR:RC, TMP0 + | stl STR:RC, 0(CARG3) + | br zero, >1 + | + |->vmeta_tsets: + | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TMP0, LJ_TTAB(zero) + | ldi TMP1, LJ_TSTR(zero) + | settp TAB:RB, TMP0 + | ldi CARG3, DISPATCH_GL(tmptv2)(DISPATCH) + | stl TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | stl STR:RC, 0(CARG3) + | br zero, >1 + | + |->vmeta_tsetb: // TMP0 = index + | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | ldi TISNUM, LJ_TISNUM(zero) + | settp TMP0, TISNUM + | stl TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | load_got lj_meta_tset + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | ldl TMP2, 0(RA) + | beq CRET1, >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | stl TMP2, 0(CRET1) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | ldi TMP1, -FRAME_CONT(BASE) + | ldl BASE, L->top + | stl PC, -24(BASE) // [cont|PC] + | subl BASE, TMP1, PC + | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | ldi NARGS8:RC, 24(zero) // 3 args for func(t, k, v) + | cleartp LFUNC:RB + | stl TMP2, 16(BASE) // Copy value to third argument. + | br zero, ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | load_got lj_tab_setinth + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + | br zero, ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | bis RA, zero, CARG2 + | bis RD, zero, CARG3 + | load_got lj_meta_comp + | ldi PC, -4(PC) + | stl BASE, L->base + | bis L, zero, CARG1 + | decode_OP CARG4, INS + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // Returns 0/1 or TValue * (metamethod). + |3: + | cmpulti CRET1, 2, TMP1 + | beq TMP1, ->vmeta_binop + | subw zero, CRET1, TMP2 + |4: + | ldhu RD, OFS_RD(PC) + | ldi PC, 4(PC) + | ldih TMP1, -0x2(zero) // -BCBIAS_J*4 + | s4addwi RD, 0, RD + | addw RD, TMP1, RD + | and RD, TMP2, RD + | addl PC, RD, PC + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | ldbu TMP1, -4+OFS_RA(PC) + | ldl TMP2, 0(RA) + | s8addwi TMP1, 0, TMP1 + | addl TMP1, BASE, TMP1 + | stl TMP2, 0(TMP1) + | br zero, ->cont_nop + | + |->cont_condt: // RA = resultptr + | ldl TMP0, 0(RA) + | gettp TMP0, TMP0 + | // cmpulti TMP0, LJ_TISTRUECOND, TMP1 + | ldi TMP1, LJ_TISTRUECOND(zero) + | cmpult TMP0, TMP1, TMP1 + | subw zero, TMP1, TMP2 // Branch if result is true. + | br zero, <4 + | + |->cont_condf: // RA = resultptr + | ldl TMP0, 0(RA) + | gettp TMP0, TMP0 + | // cmpulti TMP0, LJ_TISTRUECOND, TMP1 + | ldi TMP1, LJ_TISTRUECOND(zero) + | cmpult TMP0, TMP1, TMP1 + | subwi TMP1, 1, TMP2 // Branch if result is false. + | br zero, <4 + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | load_got lj_meta_equal + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | bis TMP0, zero, CARG4 + | ldi PC, -4(PC) + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // Returns 0/1 or TValue * (metamethod). + | br zero, <3 + | + |->vmeta_equal_cd: + |.if FFI + | load_got lj_meta_equal_cd + | bis INS, zero, CARG2 + | ldi PC, -4(PC) + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | br zero, <3 + |.endif + | + |->vmeta_istype: + | load_got lj_meta_istype + | ldi PC, -4(PC) + | stl BASE, L->base + | bis L, zero, CARG1 + |.if SW64_CORE4 + | srlwi RA, 3, CARG2 + | srlwi RD, 3, CARG3 + |.else + | zapi RA, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + | zapi RD, 0xf0, CARG3 + | srli CARG3, 3, CARG3 + |.endif + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | br zero, ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | bis RB, zero, RC + | + |->vmeta_arith: + | load_got lj_meta_arith + | stl BASE, L->base + | bis RA, zero, CARG2 + | stl PC, SAVE_PC(sp) + | bis RB, zero, CARG3 + | bis RC, zero, CARG4 + | decode_OP CARG5, INS + | bis L, zero, CARG1 + | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // Returns NULL (finished) or TValue * (metamethod). + | beq CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | subl CRET1, BASE, TMP1 + | stl PC, -24(CRET1) // [cont|PC] + | bis BASE, zero, TMP2 + | ldi PC, FRAME_CONT(TMP1) + | bis CRET1, zero, BASE + | ldi NARGS8:RC, 16(zero) // 2 args for func(o1, o2). + | br zero, ->vm_call_dispatch + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | bis CARG1, zero, MULTRES +#endif + | load_got lj_meta_len + | stl BASE, L->base + | bis L, zero, CARG1 + | stl PC, SAVE_PC(sp) + | call_intern lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bne CRET1, ->vmeta_binop // Binop call for compatibility. + | bis MULTRES, zero, CARG1 + | br zero, ->BC_LEN_Z +#else + | br zero, ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | load_got lj_meta_call + | stl TMP2, L->base // This is the callers base! + | ldi CARG2, -16(BASE) + | stl PC, SAVE_PC(sp) + | addl BASE, RC, CARG3 + | bis L, zero, CARG1 + | bis NARGS8:RC, zero, MULTRES + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | ldi NARGS8:RC, 8(MULTRES) // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | load_got lj_meta_call + | stl BASE, L->base + | subli RA, 16, CARG2 + | stl PC, SAVE_PC(sp) + | addl RA, RC, CARG3 + | bis L, zero, CARG1 + | bis NARGS8:RC, zero, MULTRES + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | ldl RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ldl TMP1, FRAME_PC(BASE) + | addli MULTRES, 8, NARGS8:RC // Got one more argument now. + | cleartp LFUNC:CARG3, RB + | br zero, ->BC_CALLT_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | load_got lj_meta_for + | stl BASE, L->base + | bis RA, zero, CARG2 + | stl PC, SAVE_PC(sp) + | bis INS, zero, MULTRES + | bis L, zero, CARG1 + | call_intern lj_meta_for // (lua_State *L, TValue *base) + |.if JIT + | decode_OP TMP0, MULTRES + | ldi TMP1, BC_JFORI(zero) + |.endif + | decode_RA RA, MULTRES + | decode_RD RD, MULTRES + |.if JIT + | cmpeq TMP0, TMP1, AT + | bne AT, =>BC_JFORI + |.endif + | br zero, =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ldl CARG1, 0(BASE) + | beq NARGS8:RC, ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | cmpulti NARGS8:RC, 16, TMP0 + | ldl CARG1, 0(BASE) + | ldl CARG2, 8(BASE) + | bne TMP0, ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + |->ff_ .. name: + | ldl CARG1, 0(BASE) + | fldd FCARG1, 0(BASE) + | beq NARGS8:RC, ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | ldl CARG1, 0(BASE) + | ldl CARG2, 8(BASE) + | cmpulti NARGS8:RC, 16, TMP0 + | gettp TMP1, CARG1 + | bne TMP0, ->fff_fallback + | gettp TMP2, CARG2 + | // cmpulti TMP1, LJ_TISNUM, TMP1 + | // cmpulti TMP2, LJ_TISNUM, TMP2 + | ldi AT, LJ_TISNUM(zero) + | cmpult TMP1, AT, TMP1 + | cmpult TMP2, AT, TMP2 + | fldd FCARG1, 0(BASE) + | and TMP1, TMP2, TMP1 + | fldd FCARG2, 8(BASE) + | beq TMP1, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. + |.macro ffgccheck + | ldl TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ldl TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | cmpult TMP0, TMP1, AT + | bne AT, >1 + | br ra, ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp TMP1, CARG1 + | // cmpulti TMP1, LJ_TISTRUECOND, TMP1 + | ldi AT, LJ_TISTRUECOND(zero) + | cmpult TMP1, AT, TMP1 + | ldi RA, -16(BASE) + | beq TMP1, ->fff_fallback + | ldl PC, FRAME_PC(BASE) + | addwi NARGS8:RC, 8, RD // Compute (nresults+1)*8. + | ldi TMP1, 8(BASE) + | addl RA, RD, TMP2 + | stl CARG1, 0(RA) + | cmpeq BASE, TMP2, AT + | bne AT, ->fff_res // Done if exactly 1 argument. + |1: + | ldl TMP0, 0(TMP1) + | stl TMP0, -16(TMP1) + | bis TMP1, zero, AT + | ldi TMP1, 8(TMP1) + | cmpeq AT, TMP2, AT + | beq AT, <1 + | br zero, ->fff_res + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | ldi TMP1, ~LJ_TISNUM(zero) + | addwi TMP1, 0, TMP1 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TISNUM, TMP0, TMP2 + | ornot zero, TMP0, AT // ~TMP0 + | seleq TMP2, TMP1, AT, AT + | s8addl AT, CFUNC:RB, AT + | ldl CARG1, CFUNC:AT->upvalue + | br zero, ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | ldi TMP0, -LJ_TTAB(TMP2) + | ldi TMP1, -LJ_TUDATA(TMP2) + | seleq TMP0, zero, TMP1, TMP0 + | cleartp TAB:CARG1 + | bne TMP0, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ldl TAB:RB, TAB:CARG1->metatable + |2: + | ldl STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) + | ldi CARG1, LJ_TNIL(zero) + | beq TAB:RB, ->fff_restv + | ldw TMP0, TAB:RB->hmask + | ldw TMP1, STR:RC->sid + | ldl NODE:TMP2, TAB:RB->node + | and TMP1, TMP0, TMP1 // idx = str->sid & tab->hmask + | slli TMP1, 5, TMP0 + | slli TMP1, 3, TMP1 + | subl TMP0, TMP1, TMP1 + | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) + | ldi CARG4, LJ_TSTR(zero) + | addwi CARG4, 0, CARG4 + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ldl TMP0, NODE:TMP2->key + | ldl CARG1, NODE:TMP2->val + | ldl NODE:TMP2, NODE:TMP2->next + | ldi TMP3, LJ_TTAB(zero) + | cmpeq RC, TMP0, AT + | bne AT, >5 + | bne NODE:TMP2, <3 + |4: + | bis RB, zero, CARG1 + | settp CARG1, TMP3 + | br zero, ->fff_restv // Not found, keep default result. + |5: + | cmpeq CARG1, TISNIL, AT + | beq AT, ->fff_restv + | br zero, <4 // Ditto for nil value. + | + |6: + | // cmpulti TMP2, LJ_TISNUM, AT + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TMP2, TISNUM, AT + | selne AT, TISNUM, TMP2, TMP2 + | slli TMP2, 3, TMP2 + | subl DISPATCH, TMP2, TMP0 + | ldl TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) + | br zero, <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ldl TAB:TMP0, TAB:TMP1->metatable + | ldbu TMP2, TAB:TMP1->marked + | ldi AT, -LJ_TTAB(TMP3) + | cleartp TAB:CARG2 + | bis AT, TAB:TMP0, AT + | bne AT, ->fff_fallback + | andi TMP2, LJ_GC_BLACK, AT // isblack(table) + | stl TAB:CARG2, TAB:TMP1->metatable + | beq AT, ->fff_restv + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ldl CARG2, 0(BASE) + | cmpulti NARGS8:RC, 16, TMP0 + | load_got lj_tab_get + | gettp TMP1, CARG2 + | cleartp CARG2 + | ldi TMP1, -LJ_TTAB(TMP1) + | bis TMP0, TMP1, TMP0 + | ldi CARG3, 8(BASE) + | bne TMP0, ->fff_fallback + | bis L, zero, CARG1 + | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ldl CARG1, 0(CRET1) + | br zero, ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ldl CARG1, 0(BASE) + | xori NARGS8:RC, 8, TMP0 // Exactly one number argument. + | gettp TMP1, CARG1 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TISNUM, TMP1, TMP1 + | bis TMP0, TMP1, TMP0 + | bne TMP0, ->fff_fallback // No args or CARG1 is not number + | br zero, ->fff_restv + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | ldi AT, -LJ_TSTR(TMP0) + | // A __tostring method in the string base metatable is ignored. + | ldl TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) + | beq AT, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TISNUM, TMP0, TMP0 + | stl BASE, L->base // Add frame since C call can throw. + | bis TMP0, TMP1, TMP0 + | bne TMP0, ->fff_fallback + | stl PC, SAVE_PC(sp) // Redundant (but a defined value). + | ffgccheck + | load_got lj_strfmt_number + | bis L, zero, CARG1 + | bis BASE, zero, CARG2 + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | ldi AT, LJ_TSTR(zero) + | settp CRET1, AT + | bis CRET1, zero, CARG1 + | br zero, ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, -LJ_TTAB, ->fff_fallback + | addl BASE, NARGS8:RC, TMP2 + | stl TISNIL, 0(TMP2) // Set missing 2nd arg to nil. + | load_got lj_tab_next + | ldl PC, FRAME_PC(BASE) + | ldi CARG2, 8(BASE) + | ldi CARG3, -16(BASE) + | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | ldi RA, -16(BASE) + | ldi RD, (2+1)*8(zero) + | bgt CRET1, ->fff_res + | bis TISNIL, zero, CARG1 + | beq CRET1, ->fff_restv // End of traversal: return nil. + | ldl CFUNC:RB, FRAME_FUNC(BASE) + | ldi RC, 2*8(zero) + | cleartp CFUNC:RB + | br zero, ->fff_fallback + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ldl PC, FRAME_PC(BASE) +#if LJ_52 + | ldl TAB:TMP2, TAB:TMP1->metatable + | ldl TMP0, CFUNC:RB->upvalue[0] + | ldi RA, -16(BASE) + | bne TAB:TMP2, ->fff_fallback +#else + | ldl TMP0, CFUNC:RB->upvalue[0] + | ldi RA, -16(BASE) +#endif + | stl TISNIL, 0(BASE) + | stl CARG1, -8(BASE) + | stl TMP0, 0(RA) + | ldi RD, (3+1)*8(zero) + | br zero, ->fff_res + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | ldw TMP0, TAB:CARG1->asize + | ldl TMP1, TAB:CARG1->array + | ldl PC, FRAME_PC(BASE) + | addwi CARG2, 0, TMP2 + | addwi TMP2, 1, TMP2 + | cmpult TMP2, TMP0, AT + | ldi RA, -16(BASE) + | zapi TMP2, 0xf0, TMP0 + | ldi TISNUM, LJ_TISNUM(zero) + | settp TMP0, TISNUM + | stl TMP0, 0(RA) + | beq AT, >2 // Not in array part? + | s8addl TMP2, TMP1, TMP3 + | ldl TMP1, 0(TMP3) + |1: + | ldi RD, (0+1)*8(zero) + | cmpeq TMP1, TISNIL, AT + | bne AT, ->fff_res // End of iteration, return 0 results. + | stl TMP1, -8(BASE) + | ldi RD, (2+1)*8(zero) + | br zero, ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | ldw TMP0, TAB:CARG1->hmask + | ldi RD, (0+1)*8(zero) + | load_got lj_tab_getinth + | beq TMP0, ->fff_res + | bis TMP2, zero, CARG2 + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | ldi RD, (0+1)*8(zero) + | beq CRET1, ->fff_res + | ldl TMP1, 0(CRET1) + | br zero, <1 + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ldl PC, FRAME_PC(BASE) +#if LJ_52 + | ldl TAB:TMP2, TAB:TMP1->metatable +#endif + | ldl CFUNC:TMP0, CFUNC:RB->upvalue[0] + | ldi RA, -16(BASE) +#if LJ_52 + | bne TAB:TMP2, ->fff_fallback +#endif + | ldi TISNUM, LJ_TISNUM(zero) + | slli TISNUM, 47, TMP1 + | stl CARG1, -8(BASE) + | stl TMP1, 0(BASE) + | stl CFUNC:TMP0, 0(RA) + | ldi RD, (3+1)*8(zero) + | br zero, ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | ldl TMP1, L->maxstack + | addl BASE, NARGS8:RC, TMP2 + | cmpult TMP1, TMP2, AT + | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | bne AT, ->fff_fallback + | ldi NARGS8:RC, -8(NARGS8:RC) + | bis BASE, zero, TMP2 + | cmplt NARGS8:RC, zero, AT + | bne AT, ->fff_fallback + | ldi BASE, 16(BASE) + | // Remember active hook before pcall. + |.if SW64_CORE4 + | srlwi TMP3, HOOK_ACTIVE_SHIFT, TMP3 + |.else + | zapi TMP3, 0xf0, TMP3 + | srli TMP3, HOOK_ACTIVE_SHIFT, TMP3 + |.endif + | andi TMP3, 1, TMP3 + | ldi PC, 16+FRAME_PCALL(TMP3) + | beq NARGS8:RC, ->vm_call_dispatch + |1: + | addl BASE, NARGS8:RC, TMP0 + |2: + | ldl TMP1, -16(TMP0) + | stl TMP1, -8(TMP0) + | ldi TMP0, -8(TMP0) + | cmpeq TMP0, BASE, AT + | beq AT, <2 + | br zero, ->vm_call_dispatch + | + |.ffunc xpcall + | ldl TMP1, L->maxstack + | addl BASE, NARGS8:RC, TMP2 + | cmpult TMP1, TMP2, AT + | ldl CARG1, 0(BASE) + | bne AT, ->fff_fallback + | ldi NARGS8:TMP0, -16(NARGS8:RC) + | ldl CARG2, 8(BASE) + | ldbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | cmplt NARGS8:TMP0, zero, AT + | bne AT, ->fff_fallback + | gettp TMP2, CARG2 + | ldi TMP2, -LJ_TFUNC(TMP2) + | bne TMP2, ->fff_fallback // Traceback must be a function. + | bis BASE, zero, TMP2 + | bis NARGS8:TMP0, zero, NARGS8:RC + | ldi BASE, 24(BASE) + | // Remember active hook before pcall. + |.if SW64_CORE4 + | srlwi TMP3, HOOK_ACTIVE_SHIFT, TMP3 + |.else + | zapi TMP3, 0xf0, TMP3 + | srli TMP3, HOOK_ACTIVE_SHIFT, TMP3 + |.endif + | stl CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, 1, TMP3 + | stl CARG1, 8(TMP2) + | ldi PC, 24+FRAME_PCALL(TMP3) + | beq NARGS8:RC, ->vm_call_dispatch + | br zero, <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ldl L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | ldbu TMP0, L:CARG1->status + | ldl TMP1, L:CARG1->cframe + | ldl CARG2, L:CARG1->top + | ldl TMP2, L:CARG1->base + | subwi TMP0, LUA_YIELD, CARG4 + | addl CARG2, TMP0, CARG3 + | ldi TMP3, 8(CARG2) + | seleq CARG4, CARG2, TMP3, CARG2 + | cmplt zero, CARG4, AT + | bne AT, ->fff_fallback // st > LUA_YIELD? + | xor TMP2, CARG3, TMP2 + | bis TMP2, TMP0, CARG4 + | bne TMP1, ->fff_fallback // cframe != 0? + | ldl TMP0, L:CARG1->maxstack + | ldl PC, FRAME_PC(BASE) + | beq CARG4, ->fff_fallback // base == top && st == 0? + | addl CARG2, NARGS8:RC, TMP2 + | cmpult TMP0, TMP2, CARG4 + | stl BASE, L->base + | stl PC, SAVE_PC(sp) + | bne CARG4, ->fff_fallback // Stack overflow? + |1: + |.if resume + | ldi BASE, 8(BASE) // Keep resumed thread in stack for GC. + | ldi NARGS8:RC, -8(NARGS8:RC) + | ldi TMP2, -8(TMP2) + |.endif + | stl TMP2, L:CARG1->top + | stl BASE, L->top + | addl BASE, NARGS8:RC, TMP1 + | bis CARG2, zero, CARG3 + |2: // Move args to coroutine. + | ldl TMP0, 0(BASE) + | cmpult BASE, TMP1, TMP3 + | ldi BASE, 8(BASE) + | beq TMP3, >3 + | stl TMP0, 0(CARG3) + | ldi CARG3, 8(CARG3) + | br zero, <2 + |3: + | bis L:CARG1, zero, L:RA + | br ra, ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ldl TMP2, L:RA->base + | // cmpulti CRET1, LUA_YIELD+1, TMP1 + | ldi TMP1, LUA_YIELD+1(zero) + | cmpult CRET1, TMP1, TMP1 + | ldl TMP3, L:RA->top + | li_vmstate INTERP + | ldl BASE, L->base + | stl L, DISPATCH_GL(cur_L)(DISPATCH) + | st_vmstate + | subl TMP3, TMP2, RD + | beq TMP1, >8 + | ldl TMP0, L->maxstack + | addl BASE, RD, TMP1 + | beq RD, >6 // No results? + | addl TMP2, RD, TMP3 + | cmpult TMP0, TMP1, AT + | bne AT, >9 // Need to grow stack? + | stl TMP2, L:RA->top // Clear coroutine stack. + | bis BASE, zero, TMP1 + |5: // Move results from coroutine. + | ldl TMP0, 0(TMP2) + | ldi TMP2, 8(TMP2) + | stl TMP0, 0(TMP1) + | ldi TMP1, 8(TMP1) + | cmpult TMP2, TMP3, AT + | bne AT, <5 + |6: + |.if resume + | mov_true TMP1 + | ldi RD, 16(RD) + |7: + | stl TMP1, -8(BASE) // Prepend true/false to results. + | ldi RA, -8(BASE) + |.else + | bis BASE, zero, RA + | ldi RD, 8(RD) + |.endif + | andi PC, FRAME_TYPE, TMP0 + | stl PC, SAVE_PC(sp) + | bis RD, zero, MULTRES + | beq TMP0, ->BC_RET_Z + | br zero, ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | ldi TMP3, -8(TMP3) + | mov_false TMP1 + | addwi zero, (2+1)*8, RD + | ldl TMP0, 0(TMP3) + | stl TMP3, L:RA->top // Remove error from coroutine stack. + | stl TMP0, 0(BASE) // Copy error message. + | br zero, <7 + |.else + | load_got lj_ffh_coroutine_wrap_err + | bis L, zero, CARG1 + | bis L:RA, zero, CARG2 + | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + |.endif + | + |9: // Handle stack expansion on return from yield. + | load_got lj_state_growstack + | bis L, zero, CARG1 + |.if SW64_CORE4 + | srlwi RD, 3, CARG2 + |.else + | zapi RD, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + |.endif + | call_intern lj_state_growstack // (lua_State *L, int n) + | ldi CRET1, 0(zero) + | br zero, <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ldl TMP0, L->cframe + | addl BASE, NARGS8:RC, TMP1 + | addwi zero, LUA_YIELD, CRET1 + | stl BASE, L->base + | andi TMP0, CFRAME_RESUME, TMP0 + | stl TMP1, L->top + | beq TMP0, ->fff_fallback + | stl zero, L->cframe + | stb CRET1, L->status + | br zero, ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func + |->ff_math_ .. func: + | ldl CARG1, 0(BASE) + | gettp TMP0, CARG1 + | beq NARGS8:RC, ->fff_fallback + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | bne AT, ->fff_restv + | fldd FCARG1, 0(BASE) + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TMP0, TISNUM, AT + | beq AT, ->fff_fallback + | br ra, ->vm_ .. func + | br zero, ->fff_resn + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | ldi TMP2, -LJ_TISNUM(CARG2) + | addwi CARG1, 0, TMP1 + | bne TMP2, >1 + |.if SW64_CORE4 + | srawi TMP1, 31, TMP0 + |.else + | addwi TMP1, 0, TMP0 + | srai TMP0, 31, TMP0 // Extract sign. int + |.endif + | xor TMP1, TMP0, TMP1 + | subl TMP1, TMP0, CARG1 + | slli CARG1, 32, TMP3 + | ldi TISNUM, LJ_TISNUM(zero) + | settp CARG1, TISNUM + | cmplt TMP3, zero, AT + | beq AT, ->fff_restv + | ldi CARG1, 0x41e(zero) // 2^31 as a double. + |.if SW64_CORE4 + | sllwi CARG1, 4, CARG1 // 0x41e0 + |.else + | slli CARG1, 4, CARG1 // 0x41e0 + | addwi CARG1, 0, CARG1 + |.endif + | slli CARG1, 48, CARG1 + | br zero, ->fff_restv + |1: + | // cmpulti CARG2, LJ_TISNUM, TMP2 + | ldi TMP2, LJ_TISNUM(zero) + | cmpult CARG2, TMP2, TMP2 + | .DEXTM CARG1, CARG1, 0, 63 + | beq TMP2, ->fff_fallback // int + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ldl PC, FRAME_PC(BASE) + | ldi RA, -16(BASE) + | stl CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | ldi RD, (1+1)*8(zero) + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi PC, FRAME_TYPE, TMP0 + | bis RD, zero, MULTRES + | ldi RA, -16(BASE) + | bne TMP0, ->vm_return + | ldw INS, -4(PC) + | decode_RB RB, INS + |5: + | cmpult RD, RB, TMP2 + | decode_RA TMP0, INS + | bne TMP2, >6 // More results expected? + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | subl RA, TMP0, BASE + | ins_next + | + |6: // Fill up results with nil. + | addl RA, RD, TMP1 + | ldi RD, 8(RD) + | stl TISNIL, -8(TMP1) + | br zero, <5 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | load_got func + | call_extern + | br zero, ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | load_got func + | call_extern + | br zero, ->fff_resn + |.endmacro + | + |.ffunc_n math_sqrt + | fsqrtd FCARG1, FCRET1 + |->fff_resn: + | ldl PC, FRAME_PC(BASE) + | fstd FCRET1, -16(BASE) + | br zero, ->fff_res1 + | + |.ffunc math_log + | ldi TMP1, 8(zero) + | ldl CARG1, 0(BASE) + | fldd FCARG1, 0(BASE) + | cmpeq NARGS8:RC, TMP1, AT + | beq AT, ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | load_got log + | call_extern + | br zero, ->fff_resn + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | load_got ldexp + | fldd FCARG1, 0(BASE) + | ldw CARG2, 8(BASE) // (double x, int exp) + | call_extern + | br zero, ->fff_resn + | + |.ffunc_n math_frexp + | load_got frexp + | ldl PC, FRAME_PC(BASE) + | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) + | call_extern + | ldw TMP1, DISPATCH_GL(tmptv)(DISPATCH) + | ldi RA, -16(BASE) + | ifmovs TMP1, FCARG2 + | fstd FCRET1, 0(RA) + | fcvtwl FCARG2, FCARG2 + | fcvtld FCARG2, FCARG2 + | fstd FCARG2, 8(RA) + | ldi RD, (2+1)*8(zero) + | br zero, ->fff_res + | + |.ffunc_n math_modf + | load_got modf + | ldl PC, FRAME_PC(BASE) + | ldi CARG2, -16(BASE) + | ldi RA, -16(BASE) + | call_extern + | fstd FCRET1, -8(BASE) + | ldi RD, (2+1)*8(zero) + | br zero, ->fff_res + | + | + |.macro math_minmax, name, intins, fpins + | .ffunc_1 name + | addl BASE, NARGS8:RC, TMP3 + | addli BASE, 8, TMP2 + | checkint CARG1, >5 + |1: // Handle integers. + | ldl CARG2, 0(TMP2) + | cmpeq TMP2, TMP3, AT + | bne AT, ->fff_restv + | addwi CARG1, 0, CARG1 + | checkint CARG2, >3 + | addwi CARG2, 0, CARG2 + | cmplt CARG1, CARG2, AT + | intins AT, CARG2, CARG1, CARG1 + | ldi TMP2, 8(TMP2) + | zapi CARG1, 0xf0, CARG1 + | ldi TISNUM, LJ_TISNUM(zero) + | settp CARG1, TISNUM + | br zero, <1 + | + |3: // Convert intermediate result to number and continue with number loop. + | ifmovs CARG1, FCRET1 + | checknum CARG2, ->fff_fallback + | fcvtwl FCRET1, FCRET1 + | fcvtld FCRET1, FCRET1 + | fldd FCARG1, 0(TMP2) + | br zero, >7 + | + |5: + | fldd FCRET1, 0(BASE) + | ldl CARG2, 0(TMP2) + | checknum CARG1, ->fff_fallback + |6: // Handle numbers. + | cmpeq TMP2, TMP3, AT + | bne AT, ->fff_resn + | fldd FCARG1, 0(TMP2) + | checknum CARG2, >8 + |7: + | fcmplt FCRET1, FCARG1, FAT + | fpins FAT, FCARG1, FCRET1, FCRET1 + | ldi TMP2, 8(TMP2) + | br zero, <6 + | + |8: // Convert integer to number and continue with number loop. + | flds FCARG1, 0(TMP2) + | checkint CARG2, ->fff_fallback + | fcvtwl FCARG1, FCARG1 + | fcvtld FCARG1, FCARG1 + | br zero, <7 + | + |.endmacro + | + | math_minmax math_min, seleq, fseleq + | math_minmax math_max, selne, fselne + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ldl CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori NARGS8:RC, 8, TMP1 + | ldi TMP0, -LJ_TSTR(TMP0) + | bis TMP1, TMP0, TMP1 + | cleartp STR:CARG1 + | bne TMP1, ->fff_fallback // Need exactly 1 string argument. + | ldw TMP0, STR:CARG1->len + | ldl PC, FRAME_PC(BASE) + | cmpult zero, TMP0, RD + | ldbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addwi RD, 1, RD + | s8addwi RD, 0, RD // RD = ((str->len != 0)+1)*8 + | ldi TISNUM, LJ_TISNUM(zero) + | settp TMP2, TISNUM + | stl TMP2, -16(BASE) + | br zero, ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ldl CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori NARGS8:RC, 8, TMP1 // Need exactly 1 argument. + | ldi TMP0, -LJ_TISNUM(TMP0) // Integer. + | ldi TMP2, 255(zero) + | addwi CARG1, 0, CARG1 + | bis TMP1, TMP0, TMP1 + | cmpult TMP2, CARG1, TMP2 // !(255 < n). + | bis TMP1, TMP2, TMP1 + | ldi CARG3, 1(zero) + | bne TMP1, ->fff_fallback + | ldi CARG2, TMPD_OFS(sp) + | stb CARG1, TMPD(sp) + |->fff_newstr: + | load_got lj_str_new + | stl BASE, L->base + | stl PC, SAVE_PC(sp) + | bis L, zero, CARG1 + | call_intern lj_str_new // (lua_State *L, char *str, size_t l) + | // Returns GCstr *. + | ldl BASE, L->base + |->fff_resstr: + | ldi AT, LJ_TSTR(zero) + | settp CRET1, AT + | bis CRET1, zero, CARG1 + | br zero, ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ldl CARG1, 0(BASE) + | ldl CARG2, 8(BASE) + | ldl CARG3, 16(BASE) + | subwi NARGS8:RC, 16, TMP0 + | gettp TMP1, CARG1 + | cmplt TMP0, zero, AT + | bne AT, ->fff_fallback + | cleartp STR:CARG1, CARG1 + | subwi zero, 1, CARG4 + | beq TMP0, >1 + | addwi CARG3, 0, CARG4 + | checkint CARG3, ->fff_fallback + |1: + | checkint CARG2, ->fff_fallback + | ldi TMP0, -LJ_TSTR(TMP1) + | addwi CARG2, 0, CARG3 + | bne TMP0, ->fff_fallback + | ldw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | addwi CARG2, 1, TMP0 + | cmplt CARG4, zero, TMP3 + | addw CARG4, TMP0, TMP2 + | cmplt CARG3, zero, TMP1 + | selne TMP3, TMP2, CARG4, CARG4 // if (end < 0) end += len+1 + | addw CARG3, TMP0, TMP2 + | selne TMP1, TMP2, CARG3, CARG3 // if (start < 0) start += len+1 + | ldi TMP3, 1(zero) + | cmplt CARG4, zero, TMP2 + | cmplt zero, CARG3, TMP1 + | selne TMP2, zero, CARG4, CARG4 // if (end < 0) end = 0 + | selne TMP1, CARG3, TMP3, CARG3 // if (start < 1) start = 1 + | cmplt CARG2, CARG4, TMP2 + | seleq TMP2, CARG4, CARG2, CARG4 // if (end > len) end = len + | addl STR:CARG1, CARG3, CARG2 + | subl CARG4, CARG3, CARG3 // len = end - start + | ldi CARG2, sizeof(GCstr)-1(CARG2) + | addwi CARG3, 1, CARG3 // len += 1 + | cmplt CARG3, zero, AT + | beq AT, ->fff_newstr + |->fff_emptystr: // Return empty string. + | ldi TMP1, LJ_TSTR(zero) + | ldi STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) + | settp CARG1, TMP1 + | br zero, ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ldl CARG2, 0(BASE) + | beq NARGS8:RC, ->fff_fallback + | checkstr STR:CARG2, ->fff_fallback + | ldi SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) + | load_got lj_buf_putstr_ .. name + | ldl TMP0, SBUF:CARG1->b + | stl L, SBUF:CARG1->L + | stl BASE, L->base + | stl TMP0, SBUF:CARG1->w + | stl PC, SAVE_PC(sp) + | call_intern extern lj_buf_putstr_ .. name + |// or SBUF:CARG1, SBUF:CRET1, zero + | load_got lj_buf_tostr + | bis SBUF:CRET1, zero, SBUF:CARG1 + | call_intern lj_buf_tostr + | ldl BASE, L->base + | br zero, ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | fldd FCARG1, 0(BASE) + | beq TMP1, ->fff_fallback + | faddd FCARG1, TOBIT, FCARG1 + | fimovd FCARG1, CRET1 + | zapi CRET1, 0xf0, CRET1 + | ret zero, 0(ra) + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | zapi CARG1, 0xf0, CRET1 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | bne AT, >1 + | ldi TMP1, LJ_TISNUM(zero) + | cmpult TMP0, TMP1, TMP1 + | br ra, ->vm_tobit_fb + |1: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | ldi TMP2, 8(BASE) + | addl BASE, NARGS8:RC, TMP3 + |1: + | ldl TMP1, 0(TMP2) + | cmpeq TMP2, TMP3, AT + | bne AT, ->fff_resi + | gettp TMP0, TMP1 + | ldi TMP2, 8(TMP2) + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | beq AT, >2 + | zapi TMP1, 0xf0, TMP1 + | bins CRET1, TMP1, CRET1 + | br zero, <1 + |2: + | fldd FCARG1, -8(TMP2) + | // cmpulti TMP0, LJ_TISNUM, TMP0 + | ldi AT, LJ_TISNUM(zero) + | cmpult TMP0, AT, TMP0 + | faddd FCARG1, TOBIT, FCARG1 + | beq TMP0, ->fff_fallback + | fimovd FCARG1, TMP1 + | zapi TMP1, 0xf0, TMP1 + | bins CRET1, TMP1, CRET1 + | br zero, <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, bis + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | srli CRET1, 8, TMP0 + | srli CRET1, 24, TMP1 + | srli TMP0, 8, TMP2 + | andi TMP2, 0xff, TMP3 + | slli TMP3, 8, TMP3 + | .DINS TMP1, CRET1, 24, 8 + | .DINS TMP3, TMP0, 16, 8 + | bis TMP1, TMP3, CRET1 + | br zero, ->fff_resi + | + |.ffunc_bit tobit + |->fff_resi: + | ldl PC, FRAME_PC(BASE) + | ldi RA, -16(BASE) + | ldi TISNUM, LJ_TISNUM(zero) + | settp CRET1, TISNUM + | stl CRET1, -16(BASE) + | br zero, ->fff_res1 + | + |.ffunc_bit bnot + | ornot zero, CRET1, CRET1 // ~CRET1 + | zapi CRET1, 0xf0, CRET1 + | br zero, ->fff_resi + | + |.macro .ffunc_bit_sh, name, shins, shmod + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | bne AT, >1 + | // cmpulti TMP0, LJ_TISNUM, TMP1 + | ldi AT, LJ_TISNUM(zero) + | cmpult TMP0, AT, TMP1 + | br ra, ->vm_tobit_fb + | bis CRET1, zero, CARG1 + |1: + | gettp TMP0, CARG2 + | zapi CARG2, 0xf0, CARG2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | beq AT, ->fff_fallback + | addwi CARG1, 0, CARG1 + |.if shmod == 1 + | subw zero, CARG2, CARG2 + |.endif + | shins CRET1, CARG1, CARG2 + | zapi CRET1, 0xf0, CRET1 + | br zero, ->fff_resi + |.endmacro + | + |.macro .SLLW, rd, rs, rt + |.if SW64_CORE4 + | sllw rs, rt, rd + |.else + | andi rt, 0x1f, AT + | sll rs, AT, rd + | addwi rd, 0x0, rd + |.endif + |.endmacro + | + |.macro .SRLW, rd, rs, rt + |.if SW64_CORE4 + | srlw rs, rt, rd + |.else + | andi rt, 0x1f, AT + | zapi rs, 0xf0, rd + | srl rd, AT, rd + | addwi rd, 0, rd + |.endif + |.endmacro + | + |.macro .SRAW, rd, rs, rt + |.if SW64_CORE4 + | sraw rs, rt, rd + |.else + | andi rt, 0x1f, AT + | addwi rs, 0x0, rd + | sra rd, AT, rd + |.endif + |.endmacro + | + |.macro .RORW, rd, rs, rt //TODO CHECK + | andi rt, 0x1f, TMP0 + | ldi TMP1, 32(zero) + | subw TMP1, TMP0, TMP1 + | andi TMP0, 0x1f, TMP2 + | zapi rs, 0xf0, TMP0 + |.if SW64_CORE4 + | srlw TMP0, TMP2, TMP0 + |.else + | srl TMP0, TMP2, TMP0 + | addwi TMP0, 0, TMP0 + |.endif + | andi TMP1, 0x1f, TMP2 + |.if SW64_CORE4 + | sllw rs, TMP2, rd + |.else + | sll rs, TMP2, rd + | addwi rd, 0x0, rd + |.endif + | bis TMP0, rd, rd + | addwi rd, 0x0, rd + |.endmacro + | + |.ffunc_bit_sh lshift, .SLLW, 0 + |.ffunc_bit_sh rshift, .SRLW, 0 + |.ffunc_bit_sh arshift, .SRAW, 0 + |.ffunc_bit_sh rol, .RORW, 1 + |.ffunc_bit_sh ror, .RORW, 0 + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ldl PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | ldl CARG3, CFUNC:RB->f + | addl BASE, NARGS8:RC, TMP1 + | stl BASE, L->base + | ldi TMP0, 8*LUA_MINSTACK(TMP1) + | ldl TMP2, L->maxstack + | stl PC, SAVE_PC(sp) // Redundant (but a defined value). + | stl TMP1, L->top + | bis L, zero, CARG1 + | cmpult TMP2, TMP0, AT + | bne AT, >5 // Need to grow stack. + | ldi CFUNCADDR, 0(CARG3) + | call r26, 0(CFUNCADDR) // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ldl BASE, L->base + | s8addwi CRET1, 0, RD + | ldi RA, -16(BASE) + | cmplt zero, CRET1, AT + | bne AT, ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | ldl TMP0, L->top + | subl TMP0, BASE, NARGS8:RC + | cleartp LFUNC:RB + | bne CRET1, ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi PC, FRAME_TYPE, TMP0 + | ldi TMP2, ~FRAME_TYPEP(zero) + | and TMP2, PC, TMP1 + | bne TMP0, >3 + | ldbu TMP1, OFS_RA(PC) + | s8addwi TMP1, 16, TMP1 + |3: + | subl BASE, TMP1, TMP2 + | br zero, ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | load_got lj_state_growstack + | ldi CARG2, LUA_MINSTACK(zero) + | bis L, zero, CARG1 + | call_intern lj_state_growstack // (lua_State *L, int n) + | ldl BASE, L->base + | ldi CRET1, 0(zero) // Set zero-flag to force retry. + | br zero, <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | bis ra, zero, MULTRES + | load_got lj_gc_step + | addl BASE, NARGS8:RC, TMP0 // Calculate L->top. + | stl BASE, L->base + | stl PC, SAVE_PC(sp) // Redundant (but a defined value). + | bis L, zero, CARG1 + | stl TMP0, L->top + | call_intern lj_gc_step // (lua_State *L) + | ldl BASE, L->base + | ldl TMP0, L->top + | ldl CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | subl TMP0, BASE, NARGS8:RC + | jmp zero, 0(MULTRES) + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi TMP3, HOOK_VMEVENT, TMP1 // No recording while in vmevent. + | ldw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | bne TMP1, >5 + | // Decrement the hookcount for consistency, but always do the call. + | andi TMP3, HOOK_ACTIVE, TMP1 + | subwi TMP2, 1, TMP2 + | bne TMP1, >1 + | andi TMP3, LUA_MASKLINE|LUA_MASKCOUNT, TMP1 + | beq TMP1, >1 + | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | br zero, >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi TMP3, HOOK_ACTIVE, TMP1 // Hook already active? + | beq TMP1, >1 + |5: // Re-dispatch to static ins. + | ldl TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jmp zero, 0(TMP1) + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | ldw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi TMP3, HOOK_ACTIVE, TMP1 // Hook already active? + | bne TMP1, <5 + | andi TMP3, LUA_MASKLINE|LUA_MASKCOUNT, TMP1 + | subwi TMP2, 1, TMP2 + | beq TMP1, <5 + | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | beq TMP2, >1 + | andi TMP3, LUA_MASKLINE, TMP1 + | load_got lj_dispatch_ins + | beq TMP1, <5 + |1: + | load_got lj_dispatch_ins + | stw MULTRES, TMPD(sp) + | bis PC, zero, CARG2 + | stl BASE, L->base + | bis L, zero, CARG1 + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ldl BASE, L->base + |4: // Re-dispatch to static ins. + | ldw INS, -4(PC) + | decode_OP TMP1, INS + | decode_BC8b TMP1 + | addl TMP1, DISPATCH, TMP0 + | decode_RD RD, INS + | ldl TMP1, GG_DISP2STATIC(TMP0) + | decode_RA RA, INS + | jmp zero, 0(TMP1) + | + |->cont_hook: // Continue from hook yield. + | ldi PC, 4(PC) + | ldw MULTRES, -24(RB) // Restore MULTRES for *M ins. + | br zero, <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ldl LFUNC:TMP1, FRAME_FUNC(BASE) + | ldi CARG1, GG_DISP2J(DISPATCH) + | cleartp LFUNC:TMP1 + | stl PC, SAVE_PC(sp) + | ldl TMP1, LFUNC:TMP1->pc + | bis PC, zero, CARG2 + | stl L, DISPATCH_J(L)(DISPATCH) + | ldbu TMP1, PC2PROTO(framesize)(TMP1) + | load_got lj_trace_hot + | stl BASE, L->base + | s8addl TMP1, BASE, TMP1 + | stl TMP1, L->top + | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) + | br zero, <3 + |.endif + | + | + |->vm_callhook: // Dispatch target for call hooks. + | bis PC, zero, CARG2 + |.if JIT + | br zero, >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | bisi PC, 1, CARG2 + |1: + |.endif + | load_got lj_dispatch_call + | addl BASE, RC, TMP0 + | stl PC, SAVE_PC(sp) + | stl BASE, L->base + | subl RA, BASE, RA + | stl TMP0, L->top + | bis L, zero, CARG1 + | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ldl BASE, L->base + | ldl TMP0, L->top + | stl zero, SAVE_PC(sp) // Invalidate for subsequent line hook. + | addl RA, BASE, RA + | subl TMP0, BASE, NARGS8:RC + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | ldw INS, -4(PC) + | jmp zero, 0(CRET1) + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | ldw INS, -4(PC) + | ldl TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA RC, INS + | ldi TMP1, -8(MULTRES) + | cleartp TRACE:TMP2 + | addl RC, BASE, RC // Call base. + | beq TMP1, >2 + |1: // Move results down. + | ldl CARG1, 0(RA) + | ldi TMP1, -8(TMP1) + | ldi RA, 8(RA) + | stl CARG1, 0(RC) + | ldi RC, 8(RC) + | bne TMP1, <1 + |2: + | decode_RA RA, INS + | decode_RB RB, INS + | addl RA, RB, RA + | addl RA, BASE, RA + |3: + | cmpult RC, RA, TMP1 + | bne TMP1, >9 // More results wanted? + | + | ldhu TMP3, TRACE:TMP2->traceno + | ldhu RD, TRACE:TMP2->link + | load_got lj_dispatch_stitch + | cmpeq RD, TMP3, AT + | bne AT, ->cont_nop // Blacklisted. + | s8addwi RD, 0, RD + | bne RD, =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | stw TMP3, DISPATCH_J(exitno)(DISPATCH) + | stl L, DISPATCH_J(L)(DISPATCH) + | stl BASE, L->base + | ldi CARG1, GG_DISP2J(DISPATCH) + | bis PC, zero, CARG2 + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldl BASE, L->base + | br zero, ->cont_nop + | + |9: + | stl TISNIL, 0(RC) + | ldi RC, 8(RC) + | br zero, <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | bis L, zero, CARG1 + | bis PC, zero, CARG2 + | stl BASE, L->base + | stw MULTRES, TMPD(sp) + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldi PC, -4(PC) + | ldl BASE, L->base + | br zero, ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + | fstd f..a, a*8(sp) + | fstd f..b, b*8(sp) + | stl r..a, 32*8+a*8(sp) + | stl r..b, 32*8+b*8(sp) + |.endmacro + | + |->vm_exit_handler: + |.if JIT + |//Save all registers except RA and SP.On SW64 is r30 and r26. + | ldi sp, -(32*8+32*8)(sp) + | savex_ 0, 1 + | savex_ 2, 3 + | savex_ 4, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 27, 28 + | savex_ 29, 31 + | fstd f26, 26*8(sp) + | fstd f30, 30*8(sp) + | stl zero, 32*8+26*8(sp) // Clear RID_TMP. + | ldi TMP2, 32*8+32*8(sp) // Recompute original value of sp. + | stl TMP2, 32*8+30*8(sp) // Store sp in RID_SP + | li_vmstate EXIT + | ldi DISPATCH, -GG_DISP2G-32768(JGL) + | ldw TMP1, 0(TMP2) // Load exit number. + | st_vmstate + | ldl L, DISPATCH_GL(cur_L)(DISPATCH) + | ldl BASE, DISPATCH_GL(jit_base)(DISPATCH) + | load_got lj_trace_exit + | stl L, DISPATCH_J(L)(DISPATCH) + | stw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. + | stl BASE, L->base + | stw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. + | ldi CARG1, GG_DISP2J(DISPATCH) + | stl zero, DISPATCH_GL(jit_base)(DISPATCH) + | bis sp, zero, CARG2 + | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldl TMP1, L->cframe + | ldi TMP2, -4(zero) + | ldl BASE, L->base + | and TMP1, TMP2, sp + | ldl PC, SAVE_PC(sp) // Get SAVE_PC. + | stl L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). + | br zero, >1 + |.endif + | + |->vm_exit_interp: + |.if JIT + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ldl L, SAVE_L(sp) + | ldi DISPATCH, -GG_DISP2G-32768(JGL) + | stl BASE, L->base + |1: + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | ldi TMP0, -LUA_ERRERR(zero) + | cmpult CRET1, TMP0, TMP0 + | beq TMP0, >9 // Check for error from exit. + | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). + | slli CRET1, 3, MULTRES + | cleartp LFUNC:RB + | stw MULTRES, TMPD(sp) + | ldi TISNIL, LJ_TNIL(zero) + | ldi TISNUM, LJ_TISNUM(zero) // Setup type comparison constants. + | ifmovs TMP3, TOBIT + | ldl TMP1, LFUNC:RB->pc + | stl zero, DISPATCH_GL(jit_base)(DISPATCH) + | ldl KBASE, PC2PROTO(k)(TMP1) + | fcvtsd TOBIT, TOBIT + | // Modified copy of ins_next which handles function header dispatch, too. + | ldw INS, 0(PC) + | ldi CRET1, 17(CRET1) + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_RD RD, INS + | ldi PC, 4(PC) + | beq CRET1, >5 + | decode_OP TMP1, INS + | decode_BC8b TMP1 + | // cmpulti TMP1, BC_FUNCF*8, TMP2 + | addl DISPATCH, TMP1, TMP0 + | ldi TMP2, BC_FUNCF*8(zero) + | cmpult TMP1, TMP2, TMP2 + | ldl TMP3, 0(TMP0) + | decode_RA RA, INS + | beq TMP2, >2 + | jmp zero, 0(TMP3) + |2: + | // cmpulti TMP1, (BC_FUNCC+2)*8, TMP2 // Fast function? + | ldi TMP2, (BC_FUNCC+2)*8(zero) + | cmpult TMP1, TMP2, TMP2 + | ldl TMP1, FRAME_PC(BASE) + | bne TMP2, >3 + | // Check frame below fast function. + | andi TMP1, FRAME_TYPE, TMP0 + | bne TMP0, >3 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldw TMP2, -4(TMP1) + | decode_RA TMP0, TMP2 + | subl BASE, TMP0, TMP1 + | ldl LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ldl TMP1, LFUNC:TMP2->pc + | ldl KBASE, PC2PROTO(k)(TMP1) + |3: + | ldi RC, -8(MULTRES) + | addl RA, BASE, RA + | jmp zero, 0(TMP3) + | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ldl TMP0, DISPATCH_J(trace)(DISPATCH) + | decode_BC8b RD + | addl TMP0, RD, TMP0 + | ldl TRACE:TMP2, 0(TMP0) + | ldw INS, TRACE:TMP2->startins + | decode_OP TMP1, INS + | decode_BC8b TMP1 + | addl DISPATCH, TMP1, TMP0 + | decode_RD RD, INS + | ldl TMP3, GG_DISP2STATIC(TMP0) + | decode_RA RA, INS + | jmp zero, 0(TMP3) + | + |9: // Rethrow error from the right C frame. + | load_got lj_err_trace + | subw zero, CRET1, CARG2 //TODO LA: sub.w no trap + | bis L, zero, CARG1 + | call_intern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Modifies AT, TMP0, FCRET1, FCRET2, FCARG1. Keeps all others incl. f2. + |.macro vm_round, func + | // skip NaN && Inf + | // 0 * NaN == NaN + | // 0 * Inf == NaN + | // 0 * Other == 0 + | fmuld fzero, FCARG1, FAT + | fcmpun fzero, FAT, FAT + | fbeq FAT, >1 + | faddd fzero, FCARG1, FCRET1 + | ret zero, 0(ra) + |1: + |.if "func"=="floor" + | fcvtdln FCARG1, FAT; + |.endif + |.if "func"=="ceil" + | fcvtdlp FCARG1, FAT; + |.endif + |.if "func"=="trunc" + | fcvtdlz FCARG1, FAT; + |.endif + | fcvtld FAT, FCRET1 + | ret zero, 0(ra) + |.endmacro + | + | + |->vm_floor: + | vm_round floor + |->vm_ceil: + | vm_round ceil + |->vm_trunc: + |.if JIT + | vm_round trunc + |.endif + | + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP2, TMP2 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldw NEXT_ASIZE, NEXT_TAB->asize + | ldl NEXT_TMP0, NEXT_TAB->array + | ldi NEXT_NIL, LJ_TNIL(zero) + |1: // Traverse array part. + | cmpult NEXT_IDX, NEXT_ASIZE, TMP3 + |.if SW64_CORE4 + | sllwi NEXT_IDX, 3, NEXT_TMP1 + |.else + | slli NEXT_IDX, 3, NEXT_TMP1 + | addwi NEXT_TMP1, 0, NEXT_TMP1 + |.endif + | addl NEXT_TMP0, NEXT_TMP1, NEXT_TMP1 + | beq TMP3, >5 + | ldi TMP3, LJ_TISNUM(zero) + | ldl NEXT_TMP2, 0(NEXT_TMP1) + | slli TMP3, 47, TMP3 + | bis NEXT_IDX, TMP3, NEXT_TMP1 + | addwi NEXT_IDX, 1, NEXT_IDX + | cmpeq NEXT_TMP2, NEXT_NIL, NEXT_TMP2 + | bne NEXT_TMP2, <1 + | stl NEXT_TMP2, NEXT_RES_VAL + | stl NEXT_TMP1, NEXT_RES_KEY + | bis NEXT_RES_PTR, zero, NEXT_RES_VK + | bis NEXT_IDX, zero, NEXT_RES_IDX + | ret zero, 0(ra) + | + |5: // Traverse hash part. + | subw NEXT_IDX, NEXT_ASIZE, NEXT_RES_IDX + | ldw NEXT_TMP0, NEXT_TAB->hmask + | ldl NODE:NEXT_RES_VK, NEXT_TAB->node + |.if SW64_CORE4 + | sllwi NEXT_RES_IDX, 5, NEXT_TMP2 + | sllwi NEXT_RES_IDX, 3, TMP3 + |.else + | slli NEXT_RES_IDX, 5, NEXT_TMP2 + | addwi NEXT_TMP2, 0, NEXT_TMP2 + | slli NEXT_RES_IDX, 3, TMP3 + | addwi TMP3, 0, TMP3 + |.endif + | subw NEXT_TMP2, TMP3, TMP3 + | addl NODE:NEXT_RES_VK, TMP3, NODE:NEXT_RES_VK + |6: + | cmpult NEXT_TMP0, NEXT_RES_IDX, TMP3 + | bne TMP3, >8 + | ldl NEXT_TMP2, NODE:NEXT_RES_VK->val + | addwi NEXT_RES_IDX, 1, NEXT_RES_IDX + | cmpeq NEXT_TMP2, NEXT_NIL, NEXT_TMP2 + | beq NEXT_TMP2, >9 + | // Skip holes in hash part. + | ldi NODE:NEXT_RES_VK, sizeof(Node)(NODE:NEXT_RES_VK) + | br zero, <6 + | + |8: // End of iteration. Set the key to nil (not the value). + | stl NEXT_NIL, NEXT_RES_KEY + | bis NEXT_RES_PTR, zero, NEXT_RES_VK + |9: + | addw NEXT_RES_IDX, NEXT_ASIZE, NEXT_RES_IDX + | ret zero, 0(ra) + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in r1, g in r2. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ldl CTSTATE, GL:r2->ctype_state + | ldi DISPATCH, GG_G2DISP(r2) + | load_got lj_ccallback_enter + | stw r1, CTSTATE->cb.slot + | stl CARG1, CTSTATE->cb.gpr[0] + | fstd FCARG1, CTSTATE->cb.fpr[0] + | stl CARG2, CTSTATE->cb.gpr[1] + | fstd FCARG2, CTSTATE->cb.fpr[1] + | stl CARG3, CTSTATE->cb.gpr[2] + | fstd FCARG3, CTSTATE->cb.fpr[2] + | stl CARG4, CTSTATE->cb.gpr[3] + | fstd FCARG4, CTSTATE->cb.fpr[3] + | stl CARG5, CTSTATE->cb.gpr[4] + | fstd FCARG5, CTSTATE->cb.fpr[4] + | stl CARG6, CTSTATE->cb.gpr[5] + | fstd FCARG6, CTSTATE->cb.fpr[5] + | ldi TMP0, CFRAME_SPACE(sp) + | stl TMP0, CTSTATE->cb.stack + | stl zero, SAVE_PC(sp) // Any value outside of bytecode is ok. + | bis CTSTATE, zero, CARG1 + | bis sp, zero, CARG2 + | call_intern lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ldl BASE, L:CRET1->base + | ldl RC, L:CRET1->top + | bis CRET1, zero, L + | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). + | ldl LFUNC:RB, FRAME_FUNC(BASE) + | ifmovs TMP3, TOBIT + | ldi TISNIL, LJ_TNIL(zero) + | ldi TISNUM, LJ_TISNUM(zero) + | li_vmstate INTERP + | subw RC, BASE, RC + | cleartp LFUNC:RB + | st_vmstate + | fcvtsd TOBIT, TOBIT + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | load_got lj_ccallback_leave + | ldl CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) + | stl BASE, L->base + | stl RB, L->top + | stl L, CTSTATE->L + | bis CTSTATE, zero, CARG1 + | bis RA, zero, CARG2 + | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) + | fldd FCRET1, CTSTATE->cb.fpr[0] + | ldl CRET1, CTSTATE->cb.gpr[0] + | fldd FCRET2, CTSTATE->cb.fpr[1] + | ldl CRET2, CTSTATE->cb.gpr[1] + | br zero, ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | ldw TMP1, CCSTATE->spadj + | ldbu CARG2, CCSTATE->nsp + | bis sp, zero, TMP2 + | subl sp, TMP1, sp + | stl ra, -8(TMP2) + |// s8addwi CARG2, 0, CARG2 + | stl r9, -16(TMP2) + | stl CCSTATE, -24(TMP2) + | bis TMP2, zero, r9 + | ldi TMP1, offsetof(CCallState, stack)(CCSTATE) + | bis sp, zero, TMP2 + | addl TMP1, CARG2, TMP3 + | beq CARG2, >2 + |1: + | ldl TMP0, 0(TMP1) + | ldi TMP1, 8(TMP1) + | cmpult TMP1, TMP3, TMP4 + | stl TMP0, 0(TMP2) + | ldi TMP2, 8(TMP2) + | bne TMP4, <1 + |2: + | ldl CFUNCADDR, CCSTATE->func + | fldd FCARG1, CCSTATE->gpr[0] + | fldd FCARG2, CCSTATE->gpr[1] + | fldd FCARG3, CCSTATE->gpr[2] + | fldd FCARG4, CCSTATE->gpr[3] + | fldd FCARG5, CCSTATE->gpr[4] + | fldd FCARG6, CCSTATE->gpr[5] + | ldl CARG2, CCSTATE->gpr[1] + | ldl CARG3, CCSTATE->gpr[2] + | ldl CARG4, CCSTATE->gpr[3] + | ldl CARG5, CCSTATE->gpr[4] + | ldl CARG6, CCSTATE->gpr[5] + | ldl CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | call r26, 0(CFUNCADDR) + | ldl CCSTATE:TMP1, -24(r9) + | ldl TMP2, -16(r9) + | ldl ra, -8(r9) + | stl CRET1, CCSTATE:TMP1->gpr[0] + | stl CRET2, CCSTATE:TMP1->gpr[1] + | fstd FCRET1, CCSTATE:TMP1->fpr[0] + | fstd FCRET2, CCSTATE:TMP1->fpr[1] + | bis r9, zero, sp + | bis TMP2, zero, r9 + | ret zero, 0(ra) + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +//TODO cmx +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + | addl RA, BASE, RA + | addl RD, BASE, RD + if (op == BC_ISLT || op == BC_ISGE) { + | ldl CARG1, 0(RA) + | ldl CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + } else { + | ldl CARG2, 0(RA) + | ldl CARG1, 0(RD) + | gettp CARG3, CARG2 + | gettp CARG4, CARG1 + } + | ldhu TMP2, OFS_RD(PC) // TMP2=jump + | ldi PC, 4(PC) + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG3, TISNUM, AT + | beq AT, >2 + | decode_BC4b TMP2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG4, TISNUM, AT + | beq AT, >5 + | addwi CARG1, 0, CARG1 + | addwi CARG2, 0, CARG2 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | cmplt CARG1, CARG2, TMP1 + | addw TMP2, TMP3, TMP2 // TMP2=(jump-0x8000)<<2 + if (op == BC_ISLT || op == BC_ISGT) { + | seleq TMP1, zero, TMP2, TMP2 + } else { + | selne TMP1, zero, TMP2,TMP2 + } + |1: + | addl PC, TMP2, PC + | ins_next + | + |2: // RA is not an integer. + | // cmpulti CARG3, LJ_TISNUM, TMP1 + | ldi TMP1, LJ_TISNUM(zero) + | cmpult CARG3, TMP1, TMP1 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | beq TMP1, ->vmeta_comp + | // cmpulti CARG4, LJ_TISNUM, TMP1 + | ldi TMP1, LJ_TISNUM(zero) + | cmpult CARG4, TMP1, TMP1 + | decode_BC4b TMP2 + | beq TMP1, >4 + | ifmovd CARG1, FTMP0 + | ifmovd CARG2, FTMP2 + |3: // RA and RD are both numbers. + if (op == BC_ISLT || op == BC_ISGE) { + | fcmplt FTMP0, FTMP2, FTMP3 + } else { + | fcmplt FTMP0, FTMP2, FTMP3 + | fcmpun FTMP0, FTMP2, FTMP4 //TODO FCC CHECK + | faddd FTMP3, FTMP4, FTMP3 + } + | addw TMP2, TMP3, TMP2 + | fimovd FTMP3, TMP3 //TODO CHECK + if (op == BC_ISLT || op == BC_ISGT) { + | seleq TMP3, zero, TMP2, TMP2 + } else { + | selne TMP3, zero, TMP2, TMP2 + } + | br zero, <1 + | + |4: // RA is a number, RD is not a number. + | // RA is a number, RD is an integer. Convert RD to a number. + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG4, TISNUM, AT + | beq AT, ->vmeta_comp + if (op == BC_ISLT || op == BC_ISGE) { + | ifmovs CARG2, FTMP2 + | ifmovd CARG1, FTMP0 + | fcvtwl FTMP2, FTMP2 + | fcvtld FTMP2, FTMP2 + } else { + | ifmovs CARG1, FTMP0 + | ifmovd CARG2, FTMP2 + | fcvtwl FTMP0, FTMP0 + | fcvtld FTMP0, FTMP0 + } + | br zero, <3 + | + |5: // RA is an integer, RD is not an integer + | // cmpulti CARG4, LJ_TISNUM, TMP1 + | ldi TMP1, LJ_TISNUM(zero) + | cmpult CARG4, TMP1, TMP1 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | beq TMP1, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + if (op == BC_ISLT || op == BC_ISGE) { + | ifmovs CARG1, FTMP0 + | ifmovd CARG2, FTMP2 + | fcvtwl FTMP0, FTMP0 + | fcvtld FTMP0, FTMP0 + } else { + | ifmovs CARG2, FTMP2 + | ifmovd CARG1, FTMP0 + | fcvtwl FTMP2, FTMP2 + | fcvtld FTMP2, FTMP2 + } + | br zero, <3 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | addl RA, BASE, RA + | addl RD, BASE, RD + | ldi PC, 4(PC) + | ldl CARG1, 0(RA) + | ldl CARG2, 0(RD) + | ldhu TMP2, -4+OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult TISNUM, CARG3, TMP0 + | cmpult TISNUM, CARG4, TMP1 + | bis TMP0, TMP1, TMP0 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + if (vk) { + | beq TMP0, ->BC_ISEQN_Z + } else { + | beq TMP0, ->BC_ISNEN_Z + } + |// Either or both types are not numbers. + |.if FFI + | // Check if RA or RD is a cdata. + | ldi TMP0, LJ_TCDATA(zero) + | addwi TMP0, 0, TMP0 + | cmpeq CARG3, TMP0, AT + | bne AT, ->vmeta_equal_cd + | cmpeq CARG4, TMP0, AT + | bne AT, ->vmeta_equal_cd + |.endif + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | decode_BC4b TMP2 + | addw TMP2, TMP3, TMP2 // (jump-0x8000)<<2 + | cmpeq CARG1, CARG2, AT + | beq AT, >2 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | addl PC, TMP2, PC + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor CARG3, CARG4, TMP3 // Same type? + | // cmpulti CARG3, LJ_TISTABUD+1, TMP0 // Table or userdata? TMP0=1 + | ldi TMP0, LJ_TISTABUD+1(zero) + | cmpult CARG3, TMP0, TMP0 + | selne TMP3, zero, TMP0, TMP0 // TMP0=0: not same type, or same type table/userdata + | cleartp TAB:TMP1, CARG1 + if (vk) { + | beq TMP0, <1 + } else { + | beq TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ldl TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beq TAB:TMP3, <1 // No metatable? + | ldbu TMP3, TAB:TMP3->nomm + | andi TMP3, 1<BC_ISEQV_Z // No metatable? + | ldbu TMP3, TAB:TMP3->nomm + | andi TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? + } + | br zero, ->vmeta_equal // Handle __eq metamethod. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | addl RA, BASE, RA + | ldi PC, 4(PC) + | ldl CARG1, 0(RA) + | subl KBASE, RD, RD + | ldhu TMP2, -4+OFS_RD(PC) + | ldl CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp CARG3, CARG1 + | ldi TMP1, LJ_TCDATA(zero) + | addwi TMP1, 0, TMP1 + |.endif + | ldi TMP0, LJ_TSTR(zero) + | addwi TMP0, 0, TMP0 + | decode_BC4b TMP2 + | settp CARG2, TMP0 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + |.if FFI + | cmpeq CARG3, TMP1, AT + | bne AT, ->vmeta_equal_cd + |.endif + | xor CARG1, CARG2, TMP0 // TMP2=0: A==D; TMP2!=0: A!=D + | addw TMP2, TMP3, TMP2 + if (vk) { + | selne TMP0, zero, TMP2, TMP2 + } else { + | seleq TMP0, zero, TMP2, TMP2 + } + | addl PC, TMP2, PC + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | addl RA, BASE, RA + | addl RD, KBASE, RD + | ldl CARG1, 0(RA) + | ldl CARG2, 0(RD) + | ldhu TMP2, OFS_RD(PC) + | ldi PC, 4(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | decode_BC4b TMP2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG3, TISNUM, AT + | beq AT, >4 + | addw TMP2, TMP3, TMP2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG4, TISNUM, AT + | beq AT, >6 + | xor CARG1, CARG2, TMP0 // TMP0=0: A==D; TMP0!=0: A!=D + if (vk) { + | selne TMP0, zero, TMP2, TMP2 + |1: + | addl PC, TMP2, PC + |2: + } else { + | seleq TMP0, zero, TMP2, TMP2 + |1: + |2: + | addl PC, TMP2, PC + } + |3: + | ins_next + | + |4: // RA is not an integer. + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult CARG3, TISNUM, TMP0 + | addw TMP2, TMP3, TMP2 + |.if FFI + | beq TMP0, >7 + |.else + | beq TMP0, <2 + |.endif + | ifmovd CARG1, FTMP0 + | ifmovd CARG2, FTMP2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG4, TISNUM, AT + | beq AT, >5 + |// RA is a number, RD is an integer. + | ldl TMP3, 0(RD) + | addw TMP3, zero, TMP3 //get [0:31] of RD + | ifmovd TMP3, FTMP2 + | fcvtld FTMP2, FTMP2 + | + |5: // RA and RD are both numbers. + | fcmpun FTMP0, FTMP2, FTMP3 + | fimovd FTMP3, TMP4 //tmp4=2:is NaN; tmp0=0:isnot NaN + | bne TMP4, >9 + | fcmpeq FTMP0, FTMP2, FTMP4 + | fimovd FTMP4, TMP1 //tmp1=0:is eq + if (vk) { + | seleq TMP1, zero, TMP2, TMP2 + } else { + | selne TMP1, zero, TMP2, TMP2 + } + | br zero, <1 + | + |6: // RA is an integer, RD is a number. + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult CARG4, TISNUM, TMP0 + |.if FFI + | beq TMP0, >8 + |.else + | beq TMP0, <2 + |.endif + | ifmovs CARG1, FTMP0 + | ifmovd CARG2, FTMP2 + | fcvtwl FTMP0, FTMP0 + | fcvtld FTMP0, FTMP0 + | br zero, <5 + | + |.if FFI + |7: // RA not int, not number + | ldi TMP0, LJ_TCDATA(zero) + | addwi TMP0, 0, TMP0 + | cmpeq CARG3, TMP0, AT + | beq AT, <2 + | br zero, ->vmeta_equal_cd + | + |8: // RD not int, not number + | ldi TMP0, LJ_TCDATA(zero) + | addwi TMP0, 0, TMP0 + | cmpeq CARG4, TMP0, AT + | beq AT, <2 + | br zero, ->vmeta_equal_cd + |.endif + | + |9: //is NaN + if (vk) { + | selne TMP4, zero, TMP2, TMP2 + } else { + | seleq TMP4, zero, TMP2, TMP2 + } + | br zero, <1 + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | addl RA, BASE, RA + |.if SW64_CORE4 + | srlwi RD, 3, TMP0 + |.else + | zapi RD, 0xf0, TMP0 + | srli TMP0, 3, TMP0 + |.endif + | ldl TMP1, 0(RA) + | ornot zero, TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 + | ldhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1 + | ldi PC, 4(PC) + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + |.if FFI + | ldi TMP3, LJ_TCDATA(zero) + | addwi TMP3, 0, TMP3 + | cmpeq TMP1, TMP3, AT + | bne AT, ->vmeta_equal_cd + |.endif + | decode_BC4b TMP2 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | addw TMP2, TMP3, TMP2 // TMP2=(jump-0x8000)<<2 + if (vk) { + | selne TMP0, zero, TMP2, TMP2 + } else { + | seleq TMP0, zero, TMP2, TMP2 + } + | addl PC, TMP2, PC + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | addl RD, BASE, RD + | ldhu TMP2, OFS_RD(PC) + | ldl TMP0, 0(RD) + | ldi PC, 4(PC) + | gettp TMP0, TMP0 + | addl RA, BASE, RA + | // cmpulti TMP0, LJ_TISTRUECOND, TMP0 // TMP0=1 true; TMP0=0 false + | ldi AT, LJ_TISTRUECOND(zero) + | cmpult TMP0, AT, TMP0 + | decode_BC4b TMP2 + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | ldl CRET1, 0(RD) + | addw TMP2, TMP3, TMP2 // (jump-0x8000)<<2 + if (op == BC_IST || op == BC_ISTC) { + | beq TMP0, >1 + if (op == BC_ISTC) { + | stl CRET1, 0(RA) + } + } else { + | bne TMP0, >1 + if (op == BC_ISFC) { + | stl CRET1, 0(RA) + } + } + | addl PC, TMP2, PC + |1: + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | addl BASE, RA, TMP0 + |.if SW64_CORE4 + | srlwi RD, 3, TMP1 + |.else + | zapi RD, 0xf0, TMP1 + | srli TMP1, 3, TMP1 + |.endif + | ldl TMP0, 0(TMP0) + | gettp TMP0, TMP0 + | addl TMP0, TMP1, TMP0 // if itype of RA == type, then TMP0=0 + | bne TMP0, ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | addl BASE, RA, TMP0 + | ldl TMP0, 0(TMP0) + | checknum TMP0, ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | addl RD, BASE, RD + | addl RA, BASE, RA + | ldl TMP0, 0(RD) + | ins_next1 + | stl TMP0, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | addl RD, BASE, RD + | addl RA, BASE, RA + | ldl TMP0, 0(RD) + | ldi TMP1, LJ_TTRUE(zero) + | ins_next1 + | gettp TMP0, TMP0 + | cmpult TMP1, TMP0, TMP0 + | addwi TMP0, 1, TMP0 + | slli TMP0, 47, TMP0 + | ornot zero, TMP0, TMP0 // ~TMP0 + | stl TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | addl BASE, RD, RB + | addl BASE, RA, RA + | ldl TMP0, 0(RB) + | ldih TMP1, -32768(zero) + | gettp CARG3, TMP0 + | ldi TISNUM, LJ_TISNUM(zero); cmpeq CARG3, TISNUM, AT; beq AT, >1 + | subw zero, TMP0, TMP0 + | cmpeq TMP0, TMP1, AT; bne AT, ->vmeta_unm // Meta handler deals with -2^31. + | zapi TMP0, 0xf0, TMP0 + | ldi TISNUM, LJ_TISNUM(zero) + | settp TMP0, TISNUM + | br zero, >2 + |1: + | ldi AT, LJ_TISNUM(zero); cmpult CARG3, AT, TMP3 + | slli TMP1, 32, TMP1 + | beq TMP3, ->vmeta_unm + | xor TMP0, TMP1, TMP0 // sign => ~sign + |2: + | stl TMP0, 0(RA) + | ins_next + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | addl BASE, RD, CARG2 + | ldl TMP0, 0(CARG2) + | addl BASE, RA, RA + | gettp TMP1, TMP0 + | ldi TMP2, -LJ_TSTR(TMP1) + | cleartp STR:CARG1, TMP0 + | bne TMP2, >2 + | ldw CRET1, STR:CARG1->len + |1: + | ldi TISNUM, LJ_TISNUM(zero) + | settp CRET1, TISNUM + | stl CRET1, 0(RA) + | ins_next + |2: + | ldi TMP2, -LJ_TTAB(TMP1) + | bne TMP2, ->vmeta_len +#if LJ_52 + | ldl TAB:TMP2, TAB:CARG1->metatable + | bne TAB:TMP2, >9 + |3: +#endif + |->BC_LEN_Z: + | load_got lj_tab_len + | call_intern lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | br zero, <1 +#if LJ_52 + |9: + | ldbu TMP0, TAB:TMP2->nomm + | andi TMP0, 1<vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, b, c, a + | fdivd b, c, FCARG1 + | br ra, ->vm_floor // floor(b/c) + | fmuld FCRET1, c, a + | fsubd b, a, a // b - floor(b/c)*c + |.endmacro + | + |.macro ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||if (vk == 1) { + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | decode_RB RC, INS + | decode_RDtoRC8 RB, RD + ||} else { + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + ||} + ||switch (vk) { + ||case 0: // suffix is VN + | addl RB, BASE, RB + | addl RC, KBASE, RC + || break; + ||case 1: // suffix is NV + | addl RC, BASE, RC + | addl RB, KBASE, RB + || break; + ||default: // CAT or suffix is VV + | addl RB, BASE, RB + | addl RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arithfp, fpins, itype1, itype2 + | fldd FTMP0, 0(RB) + | ldi TISNUM, LJ_TISNUM(zero) + | cmpult itype1, TISNUM, itype1 + | cmpult itype2, TISNUM, itype2 + | fldd FTMP2, 0(RC) + | and itype1, itype2, itype1 + | addl RA, BASE, RA + | beq itype1, ->vmeta_arith + | fpins FTMP0, FTMP2, FCRET1 + | ins_next1 + | fstd FCRET1, 0(RA) + | ins_next2 + |.endmacro + | + |.macro ins_arithead, itype1, itype2, tval1, tval2 + | ldl tval1, 0(RB) + | ldl tval2, 0(RC) + | // Check for two integers. + | gettp itype1, tval1 + | gettp itype2, tval2 + |.endmacro + | + |.macro ins_arithdn, intins, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | beq AT, >1 + | cmpeq TMP1, TISNUM, AT + | beq AT, >1 + | addwi CARG1, 0, CARG3 + | addwi CARG2, 0, CARG4 + |.if "intins" == "addw" + | intins CARG3, CARG4, CRET1 + | xor CRET1, CARG3, TMP1 // ((y^a) & (y^b)) < 0: overflow. + | xor CRET1, CARG4, TMP2 + | and TMP1, TMP2, TMP1 + | addl RA, BASE, RA + | cmplt TMP1, zero, AT + | bne AT, ->vmeta_arith + |.elif "intins" == "subw" + | intins CARG3, CARG4, CRET1 + | xor CRET1, CARG3, TMP1 // ((y^a) & (a^b)) < 0: overflow. + | xor CARG3, CARG4, TMP2 + | and TMP1, TMP2, TMP1 + | addl RA, BASE, RA + | cmplt TMP1, zero, AT + | bne AT, ->vmeta_arith + |.elif "intins" == "mulw" //TODO CHECK + | mulw CARG3, CARG4, CRET1 + | mull CARG3, CARG4, TMP2 + | zapi TMP2, 0xf, TMP2 + |.if SW64_CORE4 + | srawi CRET1, 31, TMP1 // 63-32bit not all 0 or 1: overflow. + |.else + | addwi CRET1, 0, CRET1 + | srai CRET1, 31, TMP1 // 63-32bit not all 0 or 1: overflow. + |.endif + | addl RA, BASE, RA + | cmpeq TMP1, TMP2, AT + | beq AT, ->vmeta_arith + |.endif + | zapi CRET1, 0xf0, CRET1 + | ldi TISNUM, LJ_TISNUM(zero) + | settp CRET1, TISNUM + | stl CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithdiv, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithmod, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | load_got lj_vm_modi + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP0, TISNUM, AT + | beq AT, >1 + | cmpeq TMP1, TISNUM, AT + | beq AT, >1 + | addwi CARG1, 0, CARG1 + | addwi CARG2, 0, CARG2 + | addl RA, BASE, RA + | beq CARG2, ->vmeta_arith + | call_intern lj_vm_modi + | zapi CRET1, 0xf0, CRET1 + | ldi TISNUM, LJ_TISNUM(zero) + | settp CRET1, TISNUM + | stl CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn addw, faddd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subw, fsubd + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn mulw, fmuld + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdivd + break; + case BC_MODVN: case BC_MODNV: case BC_MODVV: + | ins_arithmod fpmod + break; + case BC_POW: + | ins_arithpre + | ldl CARG1, 0(RB) + | ldl CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | // cmpulti TMP0, LJ_TISNUM, TMP0 + | // cmpulti TMP1, LJ_TISNUM, TMP1 + | ldi AT, LJ_TISNUM(zero) + | cmpult TMP0, AT, TMP0 + | cmpult TMP1, AT, TMP1 + | and TMP0, TMP1, TMP0 + | addl RA, BASE, RA + | load_got pow + | beq TMP0, ->vmeta_arith + | fldd FCARG1, 0(RB) + | fldd FCARG2, 0(RC) + | call_extern + | ins_next1 + | fstd FCRET1, 0(RA) + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | subl RC, RB, CARG3 + | stl BASE, L->base + | addl BASE, RC, CARG2 + | bis RB, zero, MULTRES + |->BC_CAT_Z: + | load_got lj_meta_cat + |.if SW64_CORE4 + | srlwi CARG3, 3, CARG3 + |.else + | zapi CARG3, 0xf0, CARG3 + | srli CARG3, 3, CARG3 + |.endif + | stl PC, SAVE_PC(sp) + | bis L, zero, CARG1 + | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ldl BASE, L->base + | bne CRET1, ->vmeta_binop + | addl BASE, MULTRES, RB + | ldl TMP0, 0(RB) + | addl RA, BASE, RA + | stl TMP0, 0(RA) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | subl KBASE, RD, TMP1 + | ldi TMP2, LJ_TSTR(zero) + | addwi TMP2, 0, TMP2 + | ldl TMP0, -8(TMP1) // KBASE-8-str_const*8 + | addl RA, BASE, RA + | settp TMP0, TMP2 + | stl TMP0, 0(RA) + | ins_next + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | subl KBASE, RD, TMP1 + | ldl TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | ldi TMP2, LJ_TCDATA(zero) + | addwi TMP2, 0, TMP2 + | addl RA, BASE, RA + | settp TMP0, TMP2 + | stl TMP0, 0(RA) + | ins_next + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + |.if SW64_CORE4 + | srawi INS, 16, RD + |.else + | addwi INS, 0, RD + | srai RD, 16, RD + |.endif + | addl RA, BASE, RA + | zapi RD, 0xf0, RD + | ldi TISNUM, LJ_TISNUM(zero) + | settp RD, TISNUM + | stl RD, 0(RA) + | ins_next + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | addl RD, KBASE, RD + | addl RA, BASE, RA + | ldl TMP0, 0(RD) + | stl TMP0, 0(RA) + | ins_next + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | addl RA, BASE, RA + | slli RD, 44, TMP0 // 44+3 + | ornot zero, TMP0, TMP0 // ~TMP0 + | stl TMP0, 0(RA) + | ins_next + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | addl RA, BASE, RA + | stl TISNIL, 0(RA) + | ldi RA, 8(RA) + | addl RD, BASE, RD + |1: + | stl TISNIL, 0(RA) + | cmplt RA, RD, TMP0 + | ldi RA, 8(RA) + | bne TMP0, <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | addl RA, BASE, RA + | cleartp LFUNC:TMP0 + | addl RD, LFUNC:TMP0, RD + | ldl UPVAL:TMP0, LFUNC:RD->uvptr + | ldl TMP1, UPVAL:TMP0->v + | ldl TMP2, 0(TMP1) + | ins_next1 + | stl TMP2, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | addl RD, BASE, RD + | cleartp LFUNC:TMP0 + | addl RA, LFUNC:TMP0, RA + | ldl UPVAL:TMP0, LFUNC:RA->uvptr + | ldl CRET1, 0(RD) + | ldbu TMP3, UPVAL:TMP0->marked + | ldl CARG2, UPVAL:TMP0->v + | andi TMP3, LJ_GC_BLACK, TMP3 // isblack(uv) + | ldbu TMP0, UPVAL:TMP0->closed + | gettp TMP2, CRET1 + | stl CRET1, 0(CARG2) + | bis TMP3, TMP0, TMP3 + | ldi TMP0, LJ_GC_BLACK|1(zero) + | ldi TMP2, -(LJ_TNUMX+1)(TMP2) + | cmpeq TMP3, TMP0, AT + | bne AT, >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | // cmpulti TMP2, LJ_TISGCV - (LJ_TNUMX+1), TMP0 + | ldi TMP0, (LJ_TISGCV-(LJ_TNUMX+1))(zero) + | cmpult TMP2, TMP0, TMP0 + | cleartp GCOBJ:CRET1, CRET1 + | beq TMP0, <1 // tvisgcv(v) + | ldbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, LJ_GC_WHITES, TMP3 // iswhite(v) + | load_got lj_gc_barrieruv + | beq TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | ldi CARG1, GG_DISP2G(DISPATCH) + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + | br zero, <1 + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | subl KBASE, RD, TMP1 + | cleartp LFUNC:TMP0 + | addl RA, LFUNC:TMP0, RA + | ldl UPVAL:TMP0, LFUNC:RA->uvptr + | ldl STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | ldbu TMP2, UPVAL:TMP0->marked + | ldl CARG2, UPVAL:TMP0->v + | ldbu TMP3, STR:TMP1->marked + | andi TMP2, LJ_GC_BLACK, TMP4 // isblack(uv) + | ldbu TMP2, UPVAL:TMP0->closed + | ldi TMP0, LJ_TSTR(zero) + | settp TMP1, TMP0 + | stl TMP1, 0(CARG2) + | bne TMP4, >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | andi TMP3, LJ_GC_WHITES, TMP0 // iswhite(str) + | beq TMP2, <1 + | load_got lj_gc_barrieruv + | beq TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | ldi CARG1, GG_DISP2G(DISPATCH) + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + | br zero, <1 + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | addl RD, KBASE, RD + | cleartp LFUNC:TMP0 + | addl RA, LFUNC:TMP0, TMP0 + | ldl UPVAL:TMP0, LFUNC:TMP0->uvptr + | ldl TMP1, 0(RD) + | ldl TMP0, UPVAL:TMP0->v + | stl TMP1, 0(TMP0) + | ins_next + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | slli RD, 44, TMP2 + | cleartp LFUNC:TMP0 + | addl RA, LFUNC:TMP0, TMP0 + | ornot zero, TMP2, TMP2 // ~TMP2 + | ldl UPVAL:TMP0, LFUNC:TMP0->uvptr + | ldl TMP1, UPVAL:TMP0->v + | stl TMP2, 0(TMP1) + | ins_next + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ldl TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | load_got lj_func_closeuv + | stl BASE, L->base + | bis L, zero, CARG1 + | beq TMP2, >1 + | addl BASE, RA, CARG2 + | call_intern lj_func_closeuv // (lua_State *L, TValue *level) + | ldl BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | load_got lj_func_newL_gc + | subl KBASE, RD, TMP1 + | ldl CARG3, FRAME_FUNC(BASE) + | ldl CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | stl BASE, L->base + | stl PC, SAVE_PC(sp) + | cleartp CARG3 + | bis L, zero, CARG1 + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern lj_func_newL_gc + | // Returns GCfuncL *. + | ldi TMP0, LJ_TFUNC(zero) + | ldl BASE, L->base + | settp CRET1, TMP0 + | addl RA, BASE, RA + | stl CRET1, 0(RA) + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ldl TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ldl TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | stl BASE, L->base + | cmpult TMP0, TMP1, TMP2 + | stl PC, SAVE_PC(sp) + | beq TMP2, >5 + |1: + if (op == BC_TNEW) { + | load_got lj_tab_new + |.if SW64_CORE4 + | srlwi RD, 3, CARG2 + |.else + | zapi RD, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + |.endif + | ldi AT, 0x7ff(zero) + | and CARG2, AT, CARG2 + | ldi TMP0, 0x801(zero) + | subw CARG2, AT, TMP2 + |.if SW64_CORE4 + | srlwi RD, 14, CARG3 + |.else + | zapi RD, 0xf0, CARG3 + | srli CARG3, 14, CARG3 + |.endif + | seleq TMP2, TMP0, CARG2, CARG2 + | // (lua_State *L, int32_t asize, uint32_t hbits) + | bis L, zero, CARG1 + | call_intern lj_tab_new + | // Returns Table *. + } else { + | load_got lj_tab_dup + | subl KBASE, RD, TMP1 + | bis L, zero, CARG1 + | ldl CARG2, -8(TMP1) // KBASE-8-str_const*8 + | call_intern lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | ldi TMP0, LJ_TTAB(zero) + | ldl BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | addl RA, BASE, RA + | stl CRET1, 0(RA) + | ins_next2 + |5: + | load_got lj_gc_step_fixtop + | bis RD, zero, MULTRES + | bis L, zero, CARG1 + | call_intern lj_gc_step_fixtop // (lua_State *L) + | bis MULTRES, zero, RD + | br zero, <1 + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ldl LFUNC:TMP0, FRAME_FUNC(BASE) + | subl KBASE, RD, TMP1 + | ldl STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP0 + | ldl TAB:RB, LFUNC:TMP0->env + | addl RA, BASE, RA + if (op == BC_GGET) { + | br zero, ->BC_TGETS_Z + } else { + | br zero, ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl BASE, RB, CARG2 + | addl BASE, RC, CARG3 + | ldl TAB:RB, 0(CARG2) + | ldl TMP2, 0(CARG3) + | addl RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | ldw TMP0, TAB:RB->asize + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq TMP3, TISNUM, AT + | beq AT, >5 // Integer key? + | addwi TMP2, 0, TMP2 + | ldl TMP1, TAB:RB->array + | cmpult TMP2, TMP0, TMP3 //array part (keys = [0, asize-1]) + | s8addwi TMP2, 0, TMP2 + | beq TMP3, ->vmeta_tgetv // Integer key and in array part? + | addl TMP2, TMP1, TMP2 + | ldl AT, 0(TMP2) + | ldl CRET1, 0(TMP2) + | cmpeq AT, TISNIL, AT + | bne AT, >2 + |1: + | ins_next1 + | stl CRET1, 0(RA) + | ins_next2 + | + |2: // Check for __index if table value is nil. + | ldl TAB:TMP2, TAB:RB->metatable + | beq TAB:TMP2, <1 // No metatable: done. + | ldbu TMP0, TAB:TMP2->nomm + | andi TMP0, 1<vmeta_tgetv + | + |5: + | ldi TMP0, LJ_TSTR(zero) + | cleartp RC, TMP2 + | cmpeq TMP3, TMP0, AT + | beq AT, ->vmeta_tgetv // String key? + | br zero, ->BC_TGETS_Z + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD //TODO CHECK + | addl BASE, RB, CARG2 + | subl KBASE, RC, CARG3 + | ldl TAB:RB, 0(CARG2) + | addl RA, BASE, RA + | ldl STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldw TMP0, TAB:RB->hmask + | ldw TMP1, STR:RC->sid + | ldl NODE:TMP2, TAB:RB->node + | and TMP1, TMP0, TMP1 // idx = str->sid & tab->hmask + |.if SW64_CORE4 + | sllwi TMP1, 5, TMP0 + | sllwi TMP1, 3, TMP1 + |.else + | slli TMP1, 5, TMP0 + | addwi TMP0, 0, TMP0 + | s8addwi TMP1, 0, TMP1 + |.endif + | subw TMP0, TMP1, TMP1 + | ldi TMP3, LJ_TSTR(zero) + | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ldl CARG1, NODE:TMP2->key + | ldl CRET1, NODE:TMP2->val + | ldl NODE:TMP1, NODE:TMP2->next + | ldl TAB:TMP3, TAB:RB->metatable + | cmpeq CARG1, RC, TMP4 + | beq TMP4, >4 + | cmpeq CRET1, TISNIL, TMP4 + | bne TMP4, >5 // Key found, but nil value? + |3: + | ins_next1 + | stl CRET1, 0(RA) + | ins_next2 + | + |4: // Follow hash chain. + | bis NODE:TMP1, zero, NODE:TMP2 + | bne NODE:TMP1, <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | bis TISNIL, zero, CRET1 + | beq TAB:TMP3, <3 // No metatable: done. + | ldbu TMP0, TAB:TMP3->nomm + | andi TMP0, 1<vmeta_tgets + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB RB, INS + | addl BASE, RB, CARG2 + | decode_RDtoRC8 RC, RD + | ldl TAB:RB, 0(CARG2) + | addl RA, BASE, RA + |.if SW64_CORE4 + | srlwi RC, 3, TMP0 + |.else + | zapi RC, 0xf0, TMP0 + | srli TMP0, 3, TMP0 + |.endif + | checktab TAB:RB, ->vmeta_tgetb + | ldw TMP1, TAB:RB->asize + | ldl TMP2, TAB:RB->array + | cmpult TMP0, TMP1, TMP1 + | addl RC, TMP2, RC + | beq TMP1, ->vmeta_tgetb + | ldl CRET1, 0(RC) + | cmpeq CRET1, TISNIL, AT + | bne AT, >5 + |1: + | ins_next1 + | stl CRET1, 0(RA) + | ins_next2 + | + |5: // Check for __index if table value is nil. + | ldl TAB:TMP2, TAB:RB->metatable + | beq TAB:TMP2, <1 // No metatable: done. + | ldbu TMP1, TAB:TMP2->nomm + | andi TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl RB, BASE, RB + | addl RC, BASE, RC + | ldl TAB:CARG1, 0(RB) + | ldw CARG2, 0(RC) + | addl RA, BASE, RA + | cleartp TAB:CARG1 + | ldw TMP0, TAB:CARG1->asize + | ldl TMP1, TAB:CARG1->array + | cmpult CARG2, TMP0, TMP0 + | s8addwi CARG2, 0, TMP2 + | addl TMP1, TMP2, CRET1 + | beq TMP0, ->vmeta_tgetr // In array part? + | ldl CARG2, 0(CRET1) + |->BC_TGETR_Z: + | ins_next1 + | stl CARG2, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl BASE, RB, CARG2 + | addl BASE, RC, CARG3 + | ldl RB, 0(CARG2) + | ldl TMP2, 0(CARG3) + | addl RA, BASE, RA + | checktab RB, ->vmeta_tsetv + | addwi TMP2, 0, RC + | checkint TMP2, >5 + | ldw TMP0, TAB:RB->asize + | ldl TMP1, TAB:RB->array + | cmpult RC, TMP0, TMP0 + | s8addwi RC, 0, TMP2 + | beq TMP0, ->vmeta_tsetv // Integer key and in array part? + | addl TMP1, TMP2, TMP1 + | ldbu TMP3, TAB:RB->marked + | ldl TMP0, 0(TMP1) + | ldl CRET1, 0(RA) + | cmpeq TMP0, TISNIL, AT + | bne AT, >3 + |1: + | andi TMP3, LJ_GC_BLACK, TMP2 // isblack(table) + | stl CRET1, 0(TMP1) + | bne TMP2, >7 + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ldl TAB:TMP2, TAB:RB->metatable + | beq TAB:TMP2, <1 // No metatable: done. + | ldbu TMP2, TAB:TMP2->nomm + | andi TMP2, 1<vmeta_tsetv + |5: + | gettp TMP0, TMP2 + | ldi TMP0, -LJ_TSTR(TMP0) + | bne TMP0, ->vmeta_tsetv + | cleartp STR:RC, TMP2 + | br zero, ->BC_TSETS_Z // String key? + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl BASE, RB, CARG2 + | subl KBASE, RC, CARG3 + | ldl TAB:RB, 0(CARG2) + | ldl RC, -8(CARG3) // KBASE-8-str_const*8 + | addl RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | ldw TMP0, TAB:RB->hmask + | ldw TMP1, STR:RC->sid + | ldl NODE:TMP2, TAB:RB->node + | stb zero, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP0, TMP1 // idx = str->sid & tab->hmask + |.if SW64_CORE4 + | sllwi TMP1, 5, TMP0 + | sllwi TMP1, 3, TMP1 + |.else + | slli TMP1, 5, TMP0 + | addwi TMP0, 0, TMP0 + | s8addwi TMP1, 0, TMP1 + |.endif + | subw TMP0, TMP1, TMP1 + | ldi TMP3, LJ_TSTR(zero) + | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + | fldd f9, 0(RA) + |1: + | ldl TMP0, NODE:TMP2->key + | ldl CARG2, NODE:TMP2->val + | ldl NODE:TMP1, NODE:TMP2->next + | ldbu TMP3, TAB:RB->marked + | cmpeq TMP0, RC, AT + | beq AT, >5 + | ldl TAB:TMP0, TAB:RB->metatable + | cmpeq CARG2, TISNIL, AT + | bne AT, >4 // Key found, but nil value? + |2: + | andi TMP3, LJ_GC_BLACK, TMP3 // isblack(table) + | fstd f9, NODE:TMP2->val + | bne TMP3, >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beq TAB:TMP0, <2 // No metatable: done. + | ldbu TMP0, TAB:TMP0->nomm + | andi TMP0, 1<vmeta_tsets + | + |5: // Follow hash chain. + | bis NODE:TMP1, zero, NODE:TMP2 + | bne NODE:TMP1, <1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ldl TAB:TMP2, TAB:RB->metatable + | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) + | beq TAB:TMP2, >6 // No metatable: continue. + | ldbu TMP0, TAB:TMP2->nomm + | andi TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | load_got lj_tab_newkey + | stl RC, 0(CARG3) + | stl BASE, L->base + | bis TAB:RB, zero, CARG2 + | stl PC, SAVE_PC(sp) + | bis L, zero, CARG1 + | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k + | // Returns TValue *. + | ldl BASE, L->base + | fstd f9, 0(CRET1) + | br zero, <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl BASE, RB, CARG2 + | addl RA, BASE, RA + | ldl TAB:RB, 0(CARG2) + |.if SW64_CORE4 + | srlwi RC, 3, TMP0 + |.else + | zapi RC, 0xf0, TMP0 + | srli TMP0, 3, TMP0 + |.endif + | checktab RB, ->vmeta_tsetb + | ldw TMP1, TAB:RB->asize + | ldl TMP2, TAB:RB->array + | cmpult TMP0, TMP1, TMP1 + | addl RC, TMP2, RC + | beq TMP1, ->vmeta_tsetb + | ldl TMP1, 0(RC) + | ldbu TMP3, TAB:RB->marked + | cmpeq TMP1, TISNIL, AT + | bne AT, >5 + |1: + | ldl CRET1, 0(RA) + | andi TMP3, LJ_GC_BLACK, TMP1 // isblack(table) + | stl CRET1, 0(RC) + | bne TMP1, >7 + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ldl TAB:TMP2, TAB:RB->metatable + | beq TAB:TMP2, <1 // No metatable: done. + | ldbu TMP1, TAB:TMP2->nomm + | andi TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB RB, INS + | decode_RDtoRC8 RC, RD + | addl BASE, RB, CARG1 + | addl BASE, RC, CARG3 + | ldl TAB:CARG2, 0(CARG1) + | ldw CARG3, 0(CARG3) + | cleartp TAB:CARG2 + | ldbu TMP3, TAB:CARG2->marked + | ldw TMP0, TAB:CARG2->asize + | ldl TMP1, TAB:CARG2->array + | andi TMP3, LJ_GC_BLACK, TMP2 // isblack(table) + | addl RA, BASE, RA + | bne TMP2, >7 + |2: + | cmpult CARG3, TMP0, TMP0 + | s8addwi CARG3, 0, TMP2 + | addl TMP1, TMP2, CRET1 + | beq TMP0, ->vmeta_tsetr // In array part? + |->BC_TSETR_Z: + | ldl TMP1, 0(RA) + | ins_next1 + | stl TMP1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | addl RA, BASE, RA + |1: + | addl KBASE, RD, TMP3 + | ldl TAB:CARG2, -8(RA) // Guaranteed to be a table. + | subwi MULTRES, 8, TMP0 + | ldw TMP3, 0(TMP3) // Integer constant is in lo-word. + |.if SW64_CORE4 + | srlwi TMP0, 3, CARG3 + |.else + | zapi TMP0, 0xf0, CARG3 + | srli CARG3, 3, CARG3 + |.endif + | beq TMP0, >4 // Nothing to copy? + | cleartp TAB:CARG2 + | addw CARG3, TMP3, CARG3 + | ldw TMP2, TAB:CARG2->asize + | s8addwi TMP3, 0, TMP1 + | ldbu TMP3, TAB:CARG2->marked + | ldl CARG1, TAB:CARG2->array + | cmpult TMP2, CARG3, TMP4 + | addl TMP0, RA, TMP2 + | bne TMP4, >5 + | addl TMP1, CARG1, TMP1 + | andi TMP3, LJ_GC_BLACK, TMP0 // isblack(table) + |3: // Copy result slots to table. + | ldl CRET1, 0(RA) + | ldi RA, 8(RA) + | cmpult RA, TMP2, TMP4 + | stl CRET1, 0(TMP1) + | ldi TMP1, 8(TMP1) + | bne TMP4, <3 + | bne TMP0, >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | load_got lj_tab_reasize + | stl BASE, L->base + | stl PC, SAVE_PC(sp) + | bis RD, zero, BASE + | bis L, zero, CARG1 + | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | // Must not reallocate the stack. + | bis BASE, zero, RD + | ldl BASE, L->base // Reload BASE for lack of a saved register. + | br zero, <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | addw NARGS8:RC, MULTRES, NARGS8:RC + | br zero, ->BC_CALL_Z + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | bis BASE, zero, TMP2 + | addl BASE, RA, BASE + | ldl LFUNC:RB, 0(BASE) + | ldi BASE, 16(BASE) + | subwi NARGS8:RC, 8, NARGS8:RC + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addw NARGS8:RD, MULTRES, NARGS8:RD + | br zero, ->BC_CALLT_Z1 + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT_Z1: + | addl RA, BASE, RA + | ldl LFUNC:RB, 0(RA) + | bis RD, zero, NARGS8:RC + | ldl TMP1, FRAME_PC(BASE) + | ldi RA, 16(RA) + | subwi NARGS8:RC, 8, NARGS8:RC + | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP1, FRAME_TYPE, TMP0 // Caveat: preserve TMP0 until the 'or'. + | ldbu TMP3, LFUNC:CARG3->ffid + | xori TMP1, FRAME_VARG, TMP2 + | bne TMP0, >7 + |1: + | stl LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | cmpulti TMP3, 2, CARG4 // (> FF_C) Calling a fast function? + | bis BASE, zero, TMP2 + | bis CARG3, zero, RB + | bis NARGS8:RC, zero, TMP3 + | beq NARGS8:RC, >3 + |2: + | ldl CRET1, 0(RA) + | ldi RA, 8(RA) + | subwi TMP3, 8, TMP3 + | stl CRET1, 0(TMP2) + | ldi TMP2, 8(TMP2) + | bne TMP3, <2 + |3: + | bis TMP0, CARG4, TMP0 + | beq TMP0, >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | ldw INS, -4(TMP1) + | decode_RA RA, INS + | subl BASE, RA, TMP1 + | ldl TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ldl TMP1, LFUNC:TMP1->pc + | ldl KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | br zero, <4 + | + |7: // Tailcall from a vararg function. + | andi TMP2, FRAME_TYPEP, CARG4 + | subl BASE, TMP2, TMP2 // Relocate BASE down. + | bne CARG4, <1 // Vararg frame below? + | bis TMP2, zero, BASE + | ldl TMP1, FRAME_PC(TMP2) + | andi TMP1, FRAME_TYPE, TMP0 + | br zero, <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | bis BASE, zero, TMP2 // Save old BASE for vmeta_call. + | addl BASE, RA, BASE + | ldl RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. + | ldl CARG1, -16(BASE) + | ldl CARG2, -8(BASE) + | ldi NARGS8:RC, 16(zero) // Iterators get 2 arguments. + | stl RB, 0(BASE) // Copy callable. + | stl CARG1, 16(BASE) // Copy state. + | stl CARG2, 24(BASE) // Copy control var. + | ldi BASE, 16(BASE) + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + | addl RA, BASE, RA + | ldl TAB:RB, -16(RA) + | ldw RC, -8(RA) // Get index from control var. + | cleartp TAB:RB + | ldi PC, 4(PC) + | ldw TMP0, TAB:RB->asize + | ldl TMP1, TAB:RB->array + | ldi TISNUM, LJ_TISNUM(zero) + | slli TISNUM, 47, CARG3 + |1: // Traverse array part. + | cmpult RC, TMP0, TMP2 + | s8addwi RC, 0, TMP3 + | beq TMP2, >5 // Index points after array part? + | addl TMP3, TMP1, TMP3 + | ldl CARG1, 0(TMP3) + | ldhu RD, -4+OFS_RD(PC) // ITERL RD + | bis RC, CARG3, TMP2 + | addwi RC, 1, RC + | cmpeq CARG1, TISNIL, AT + | bne AT, <1 // Skip holes in array part. + | stl TMP2, 0(RA) + | stl CARG1, 8(RA) + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | decode_BC4b RD + | addl RD, TMP3, RD + | stw RC, -8(RA) // Update control var. + | addl PC, RD, PC + |3: + | ins_next + | + |5: // Traverse hash part. + | ldw TMP1, TAB:RB->hmask + | subw RC, TMP0, RC + | ldl TMP2, TAB:RB->node + |6: + | cmpult TMP1, RC, CARG1 // End of iteration? Branch to ITERL+1. + |.if SW64_CORE4 + | sllwi RC, 5, TMP3 + |.else + | slli RC, 5, TMP3 + | addwi TMP3, 0, TMP3 + |.endif + | bne CARG1, <3 + | s8addwi RC, 0, RB + | subw TMP3, RB, TMP3 + | addl TMP3, TMP2, NODE:TMP3 // node = tab->node + (idx*32-idx*8) + | ldl CARG1, 0(NODE:TMP3) + | ldhu RD, -4+OFS_RD(PC) // ITERL RD + | addwi RC, 1, RC + | cmpeq CARG1, TISNIL, AT + | bne AT, <6 // Skip holes in hash part. + | ldl CARG2, NODE:TMP3->key + | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 + | stl CARG1, 8(RA) + | addw RC, TMP0, RC + | decode_BC4b RD + | addw RD, TMP3, RD + | stl CARG2, 0(RA) + | addl PC, RD, PC + | stw RC, -8(RA) // Update control var. + | br zero, <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | addl RA, BASE, RA + |.if SW64_CORE4 + | srlwi RD, 1, TMP0 + |.else + | zapi RD, 0xf0, TMP0 + | srli TMP0, 1, TMP0 + |.endif + | ldl CFUNC:CARG1, -24(RA) + | addl TMP0, PC, TMP0 + | ldl CARG2, -16(RA) + | ldl CARG3, -8(RA) + | ldih TMP2, -0x2(zero) // -BCBIAS_J*4 + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | ldi CARG2, -LJ_TTAB(CARG2) + | ldbu TMP1, CFUNC:CARG1->ffid + | ldi CARG3, -LJ_TNIL(CARG3) + | bis CARG2, CARG3, TMP3 + | ldi TMP1, -FF_next_N(TMP1) + | bis TMP3, TMP1, TMP3 + | ldih TMP1, 0x1(zero) + | ldi TMP1, -0x2(TMP1) //LJ_KEYINDEX + | bne TMP3, >5 + | addl TMP0, TMP2, PC + | slli TMP1, 16, TMP1 + | ldi TMP1, 0x7fff(TMP1) + | slli TMP1, 32, TMP1 //make TMP1=0xfffe7fff00000000 + | stl TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | ldi TMP3, BC_JMP(zero) + | ldi TMP1, BC_ITERC(zero) + | stb TMP3, -4+OFS_OP(PC) + | addl TMP0, TMP2, PC + | stb TMP1, OFS_OP(PC) + | br zero, <1 + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ldl TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB RB, INS + | addl RC, BASE, RC + | addl RA, BASE, RA + | ldi RC, FRAME_VARG(RC) + | addl RA, RB, TMP2 + | ldi TMP3, -16(BASE) // TMP3 = vtop + | subl RC, TMP0, RC // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | subl TMP3, RC, TMP1 + | beq RB, >5 // Copy all varargs? + | ldi TMP2, -16(TMP2) + |1: // Copy vararg slots to destination slots. + | ldl CARG1, 0(RC) + | cmpult RC, TMP3, TMP0 + | ldi RC, 8(RC) + | selne TMP0, CARG1, TISNIL, CARG1 + | stl CARG1, 0(RA) + | cmpult RA, TMP2, TMP0 + | ldi RA, 8(RA) + | bne TMP0, <1 + |3: + | ins_next + | + |5: // Copy all varargs. + | ldl TMP0, L->maxstack + | ldi MULTRES, 8(zero) // MULTRES = (0+1)*8 + | cmplt zero, TMP1, AT + | beq AT, <3 // No vararg slots? + | addl RA, TMP1, TMP2 + | cmpult TMP0, TMP2, TMP2 + | ldi MULTRES, 8(TMP1) + | bne TMP2, >7 + |6: + | ldl CRET1, 0(RC) + | ldi RC, 8(RC) + | stl CRET1, 0(RA) + | cmpult RC, TMP3, TMP0 + | ldi RA, 8(RA) + | bne TMP0, <6 // More vararg slots? + | br zero, <3 + | + |7: // Grow stack for varargs. + | load_got lj_state_growstack + | stl RA, L->top + | subl RA, BASE, RA + | stl BASE, L->base + | subl RC, BASE, BASE // Need delta, because BASE may change. + | stl PC, SAVE_PC(sp) + |.if SW64_CORE4 + | srlwi TMP1, 3, CARG2 + |.else + | zapi TMP1, 0xf0, CARG2 + | srli CARG2, 3, CARG2 + |.endif + | bis L, zero, CARG1 + | call_intern lj_state_growstack // (lua_State *L, int n) + | bis BASE, zero, RC + | ldl BASE, L->base + | addl RA, BASE, RA + | addl RC, BASE, RC + | ldi TMP3, -16(BASE) + | br zero, <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addw RD, MULTRES, RD + | br zero, ->BC_RET_Z1 + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + |->BC_RET_Z1: + | ldl PC, FRAME_PC(BASE) + | addl RA, BASE, RA + | bis RD, zero, MULTRES + |1: + | andi PC, FRAME_TYPE, TMP0 + | xori PC, FRAME_VARG, TMP1 + | bne TMP0, ->BC_RETV_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | ldw INS, -4(PC) + | ldi TMP2, -16(BASE) + | ldi RC, -8(RD) + | decode_RA TMP0, INS + | decode_RB RB, INS + | addl TMP2, RB, TMP3 + | subl TMP2, TMP0, BASE + | beq RC, >3 + |2: + | ldl CRET1, 0(RA) + | ldi RA, 8(RA) + | ldi RC, -8(RC) + | stl CRET1, 0(TMP2) + | ldi TMP2, 8(TMP2) + | bne RC, <2 + |3: + | ldi TMP3, -8(TMP3) + |5: + | cmpult TMP2, TMP3, TMP0 + | ldl LFUNC:TMP1, FRAME_FUNC(BASE) + | bne TMP0, >6 + | cleartp LFUNC:TMP1 + | ldl TMP1, LFUNC:TMP1->pc + | ldl KBASE, PC2PROTO(k)(TMP1) + | ins_next + | + |6: // Fill up results with nil. + | stl TISNIL, 0(TMP2) + | ldi TMP2, 8(TMP2) + | br zero, <5 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP1, FRAME_TYPEP, TMP2 + | bne TMP2, ->vm_return + | // Return from vararg function: relocate BASE down. + | subl BASE, TMP1, BASE + | ldl PC, FRAME_PC(BASE) + | br zero, <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ldl PC, FRAME_PC(BASE) + | addl RA, BASE, RA + | bis RD, zero, MULTRES + | andi PC, FRAME_TYPE, TMP0 + | xori PC, FRAME_VARG, TMP1 + | bne TMP0, ->BC_RETV_Z + | ldw INS, -4(PC) + | ldi TMP2, -16(BASE) + if (op == BC_RET1) { + | ldl CRET1, 0(RA) + } + | decode_RB RB, INS + | decode_RA RA, INS + | subl TMP2, RA, BASE + if (op == BC_RET1) { + | stl CRET1, 0(TMP2) + } + |5: + | cmpult RD, RB, TMP0 + | ldl TMP1, FRAME_FUNC(BASE) + | bne TMP0, >6 + | cleartp LFUNC:TMP1 + | ldl TMP1, LFUNC:TMP1->pc + | ins_next1 + | ldl KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | ldi TMP2, 8(TMP2) + | ldi RD, 8(RD) + if (op == BC_RET1) { + | stl TISNIL, 0(TMP2) + } else { + | stl TISNIL, -8(TMP2) + } + | br zero, <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | addl RA, BASE, RA + | ldl CARG1, FORL_IDX*8(RA) // CARG1 = IDX + | ldl CARG2, FORL_STEP*8(RA) // CARG2 = STEP + | ldl CARG3, FORL_STOP*8(RA) // CARG3 = STOP + | gettp CARG4, CARG1 + | gettp CARG5, CARG2 + | gettp CRET2, CARG3 + if (op != BC_JFORL) { + |.if SW64_CORE4 + | srlwi RD, 1, RD + |.else + | zapi RD, 0xf0, RD + | srli RD, 1, RD + |.endif + | ldih TMP2, -0x2(zero) // -BCBIAS_J<<2 + | addl TMP2, RD, TMP2 + } + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CARG4, TISNUM, AT + | beq AT, >3 + | addwi CARG1, 0, CARG4 // start + | addwi CARG3, 0, CARG3 // stop + if (!vk) { // init + | ldi TISNUM, LJ_TISNUM(zero) + | cmpeq CRET2, TISNUM, AT + | beq AT,->vmeta_for + | cmpeq CARG5, TISNUM, AT + | beq AT, ->vmeta_for + | .DEXTM TMP0, CARG2, 31, 1 // sign + | cmplt CARG3, CARG4, CARG2 + | cmplt CARG4, CARG3, TMP1 + | selne TMP0, TMP1, CARG2, CARG2 // CARG2=0: +,start <= stop or -,start >= stop + } else { + | addwi CARG2, 0, CARG5 // step + | addw CARG4, CARG5, CARG1 // start + step + | xor CARG1, CARG4, TMP3 // y^a + | xor CARG1, CARG5, TMP1 // y^b + | and TMP3, TMP1, TMP3 + | cmplt CARG1, CARG3, TMP1 // start+step < stop ? + | cmplt CARG3, CARG1, CARG3 // stop < start+step ? + | cmplt CARG5, zero, TMP0 // step < 0 ? + | cmplt TMP3, zero, TMP3 // ((y^a) & (y^b)) < 0: overflow. + | selne TMP0, TMP1, CARG3, CARG3 + | bis CARG3, TMP3, CARG2 // CARG2=1: overflow; CARG2=0: continue + | zapi CARG1, 0xf0, CARG1 + | ldi TISNUM, LJ_TISNUM(zero) + | settp CARG1, TISNUM + | stl CARG1, FORL_IDX*8(RA) + } + |1: + if (op == BC_FORI) { + | seleq CARG2, zero, TMP2, TMP2 // CARG2!=0: jump out the loop; CARG2==0: next INS + | addl PC, TMP2, PC + } else if (op == BC_JFORI) { + | addl PC, TMP2, PC + | ldhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | selne CARG2, zero, TMP2, TMP2 // CARG2!=0: next INS; CARG2==0: jump back + | addl PC, TMP2, PC + } + | ins_next1 + | stl CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | decode_BC8b RD + | beq CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop + } else if (op == BC_JFORL) { + | beq CARG2, =>BC_JLOOP + } + | ins_next2 + | //TODO FCC + |3: // FP loop. + | fldd FTMP0, FORL_IDX*8(RA) // start + | fldd FTMP1, FORL_STOP*8(RA) // stop + | ldl TMP0, FORL_STEP*8(RA) // step + | cmplt TMP0, zero, TMP0 // step < 0 ? + | ifmovd TMP0, FTMP2 + if (!vk) { + | // cmpulti CARG4, LJ_TISNUM, TMP3 // start is number ? + | // cmpulti CARG5, LJ_TISNUM, TMP0 // step is number ? + | // cmpulti CARG6, LJ_TISNUM, TMP1 // stop is number ? + | ldi TMP1, LJ_TISNUM(zero) + | cmpult CARG4, TMP1, TMP3 + | cmpult CARG5, TMP1, TMP0 + | cmpult CRET2, TMP1, TMP1 + | and TMP3, TMP1, TMP3 + | and TMP0, TMP3, TMP0 + | beq TMP0, ->vmeta_for // if start or step or stop isn't number + | fcmplt FTMP0, FTMP1, FTMP3 // start < stop ? + | fcmplt FTMP1, FTMP0, FTMP4 // stop < start ? + | fseleq FTMP2, FTMP4, FTMP3, FTMP2 //TODO CHECK + | fimovd FTMP2, CARG2 // CARG2=0:+,startstop + | br zero, <1 + } else { + | fldd FTMP3, FORL_STEP*8(RA) + | faddd FTMP0, FTMP3, FTMP0 // start + step + | fcmplt FTMP0, FTMP1, FTMP3 // start + step < stop ? + | fcmplt FTMP1, FTMP0, FTMP4 + | fseleq FTMP2, FTMP4, FTMP3, FTMP2 + | fimovd FTMP2, CARG2 + if (op == BC_IFORL) { + | selne CARG2, zero, TMP2, TMP2 + | addl PC, TMP2, PC + } + | fstd FTMP0, FORL_IDX*8(RA) + | ins_next1 + | fstd FTMP0, FORL_EXT*8(RA) + | br zero, <2 + } + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | addl RA, BASE, RA + | ldl TMP1, 0(RA) + | cmpeq TMP1, TISNIL, AT + | bne AT, >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | stl TMP1,-8(RA) + | br zero, =>BC_JLOOP + } else { + | branch_RD // Otherwise save control var + branch. + | stl TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base*8 (ignored), RD = traceno*8 + | ldl TMP0, DISPATCH_J(trace)(DISPATCH) + | addl TMP0, RD, TMP0 + | // Traces on SW64 don't store the trace number, so use 0. + | stl zero, DISPATCH_GL(vmstate)(DISPATCH) + | ldl TRACE:TMP1, 0(TMP0) + | stl BASE, DISPATCH_GL(jit_base)(DISPATCH) // store Current JIT code L->base + | ldl TMP1, TRACE:TMP1->mcode + | ldi JGL, GG_DISP2G+32768(DISPATCH) + | stl L, DISPATCH_GL(tmpbuf.L)(DISPATCH) + | jmp zero, 0(TMP1) + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD // PC + (jump - 0x8000)<<2 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ldl TMP2, L->maxstack + | ldbu TMP1, -4+PC2PROTO(numparams)(PC) + | ldl KBASE, -4+PC2PROTO(k)(PC) + | cmpult TMP2, RA, TMP0 + | s8addwi TMP1, 0, TMP1 // numparams*8 + | bne TMP0, ->vm_growstack_l + |2: + | cmpult NARGS8:RC, TMP1, TMP0 // Check for missing parameters. + | bne TMP0, >3 + if (op == BC_JFUNCF) { + | decode_RD RD, INS + | br zero, =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | addl BASE, NARGS8:RC, TMP0 + | stl TISNIL, 0(TMP0) + | addwi NARGS8:RC, 8, NARGS8:RC + | br zero, <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ldi TMP0, LJ_TFUNC(zero) + | addwi TMP0, 0, TMP0 + | addl BASE, RC, TMP1 + | ldl TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | addl RA, RC, TMP0 + | stl LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | ldi TMP2, -8(TMP2) + | ldi TMP3, 16+FRAME_VARG(RC) + | cmpult TMP0, TMP2, TMP0 + | ldl KBASE, -4+PC2PROTO(k)(PC) + | stl TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | beq TMP0, ->vm_growstack_l + | ldbu TMP2, -4+PC2PROTO(numparams)(PC) + | bis BASE, zero, RA + | bis TMP1, zero, RC + | ins_next1 + | ldi BASE, 16(TMP1) + | beq TMP2, >3 + |1: + | ldl TMP0, 0(RA) + | cmpult RA, RC, AT // Less args than parameters? + | bis TMP0, zero, CARG1 + | selne AT, TMP0, TISNIL, TMP0 // Clear missing parameters. + | seleq AT, CARG1, TISNIL, CARG1 // Clear old fixarg slot (help the GC). + | subwi TMP2, 1, TMP2 + | stl TMP0, 16(TMP1) + | ldi TMP1, 8(TMP1) + | stl CARG1, 0(RA) + | ldi RA, 8(RA) + | bne TMP2, <1 + |3: + | ins_next2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ldl CFUNCADDR, CFUNC:RB->f + } else { + | ldl CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) + } + | addl RA, NARGS8:RC, TMP1 + | ldl TMP2, L->maxstack + | addl BASE, NARGS8:RC, RC + | stl BASE, L->base // base of currently excuting function + | stl RC, L->top + | cmpult TMP2, TMP1, AT + | li_vmstate C // ldi TMP0, ~LJ_VMST_C(zero) + if (op == BC_FUNCCW) { + | ldl CARG2, CFUNC:RB->f + } + | bis L, zero, CARG1 + | bne AT, ->vm_growstack_c // Need to grow stack. + | st_vmstate // .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate) + | call r26, 0(CFUNCADDR) // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ldl BASE, L->base + | ldl TMP1, L->top + | stl L, DISPATCH_GL(cur_L)(DISPATCH) + | s8addwi CRET1, 0, RD + | li_vmstate INTERP + | ldl PC, FRAME_PC(BASE) // Fetch PC of caller. + | subl TMP1, RD, RA // RA = L->top - nresults*8 + | st_vmstate + | br zero, ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 31\n" + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x9f\n\t.sleb128 2*5\n" + "\t.byte 0x9e\n\t.sleb128 2*6\n", + fcofs, CFRAME_SIZE); + for (i = 23; i >= 16; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); + for (i = 31; i >= 24; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.8byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x9f\n\t.uleb128 2*1\n" + "\t.byte 0x90\n\t.uleb128 2*2\n" + "\t.byte 0xd\n\t.uleb128 0x10\n" + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + /* NYI */ +#endif + break; + default: + break; + } +}