347 lines
11 KiB
Diff
347 lines
11 KiB
Diff
From 95a04af134431ccc8230aca1641541a5e8fcbdc9 Mon Sep 17 00:00:00 2001
|
|
From: Richard Henderson <rth@redhat.com>
|
|
Date: Tue, 21 Oct 2014 22:41:07 -0400
|
|
Subject: [PATCH 056/415] aarch64: Reduce the size of register_context
|
|
|
|
We don't need to store 32 general and vector registers.
|
|
Only 8 of each are used for parameter passing.
|
|
---
|
|
src/aarch64/ffi.c | 35 ++++++++---------
|
|
src/aarch64/ffitarget.h | 6 ---
|
|
src/aarch64/internal.h | 26 +++++++++++++
|
|
src/aarch64/sysv.S | 100 +++++++++++++++++++++++-------------------------
|
|
4 files changed, 91 insertions(+), 76 deletions(-)
|
|
create mode 100644 src/aarch64/internal.h
|
|
|
|
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
|
|
index a6fcc11..58d088b 100644
|
|
--- a/src/aarch64/ffi.c
|
|
+++ b/src/aarch64/ffi.c
|
|
@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
+#include <stdint.h>
|
|
#include <ffi.h>
|
|
#include <ffi_common.h>
|
|
+#include "internal.h"
|
|
|
|
/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
|
|
all further uses in this file will refer to the 128-bit type. */
|
|
@@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
# define FFI_TYPE_LONGDOUBLE 4
|
|
#endif
|
|
|
|
-#define N_X_ARG_REG 8
|
|
-#define N_V_ARG_REG 8
|
|
-
|
|
-#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
|
|
-
|
|
union _d
|
|
{
|
|
UINT64 d;
|
|
UINT32 s[2];
|
|
};
|
|
|
|
+struct _v
|
|
+{
|
|
+ union _d d[2] __attribute__((aligned(16)));
|
|
+};
|
|
+
|
|
struct call_context
|
|
{
|
|
- UINT64 x [AARCH64_N_XREG];
|
|
- struct
|
|
- {
|
|
- union _d d[2];
|
|
- } v [AARCH64_N_VREG];
|
|
+ struct _v v[N_V_ARG_REG];
|
|
+ UINT64 x[N_X_ARG_REG];
|
|
+ UINT64 x8;
|
|
};
|
|
|
|
#if defined (__clang__) && defined (__APPLE__)
|
|
-extern void
|
|
-sys_icache_invalidate (void *start, size_t len);
|
|
+extern void sys_icache_invalidate (void *start, size_t len);
|
|
#endif
|
|
|
|
static inline void
|
|
ffi_clear_cache (void *start, void *end)
|
|
{
|
|
#if defined (__clang__) && defined (__APPLE__)
|
|
- sys_icache_invalidate (start, (char *)end - (char *)start);
|
|
+ sys_icache_invalidate (start, (char *)end - (char *)start);
|
|
#elif defined (__GNUC__)
|
|
- __builtin___clear_cache (start, end);
|
|
+ __builtin___clear_cache (start, end);
|
|
#else
|
|
#error "Missing builtin to flush instruction cache"
|
|
#endif
|
|
@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|
|
|
if (is_v_register_candidate (cif->rtype))
|
|
{
|
|
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
|
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
|
}
|
|
else
|
|
{
|
|
@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
|
|
for (i = 0; i < cif->nargs; i++)
|
|
if (is_v_register_candidate (cif->arg_types[i]))
|
|
{
|
|
- cif->aarch64_flags |= AARCH64_FFI_WITH_V;
|
|
+ cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
|
|
break;
|
|
}
|
|
}
|
|
@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
|
|
}
|
|
else
|
|
{
|
|
- memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
|
|
+ context.x8 = (uintptr_t)rvalue;
|
|
ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
|
|
stack_bytes, fn);
|
|
}
|
|
@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
|
|
}
|
|
else
|
|
{
|
|
- memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
|
|
+ rvalue = (void *)(uintptr_t)context->x8;
|
|
(closure->fun) (cif, rvalue, avalue, closure->user_data);
|
|
}
|
|
}
|
|
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
|
|
index 4bbced2..336f28a 100644
|
|
--- a/src/aarch64/ffitarget.h
|
|
+++ b/src/aarch64/ffitarget.h
|
|
@@ -54,10 +54,4 @@ typedef enum ffi_abi
|
|
#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
|
|
#endif
|
|
|
|
-#define AARCH64_FFI_WITH_V_BIT 0
|
|
-
|
|
-#define AARCH64_N_XREG 32
|
|
-#define AARCH64_N_VREG 32
|
|
-#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
|
|
-
|
|
#endif
|
|
diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h
|
|
new file mode 100644
|
|
index 0000000..b6b6104
|
|
--- /dev/null
|
|
+++ b/src/aarch64/internal.h
|
|
@@ -0,0 +1,26 @@
|
|
+/*
|
|
+Permission is hereby granted, free of charge, to any person obtaining
|
|
+a copy of this software and associated documentation files (the
|
|
+``Software''), to deal in the Software without restriction, including
|
|
+without limitation the rights to use, copy, modify, merge, publish,
|
|
+distribute, sublicense, and/or sell copies of the Software, and to
|
|
+permit persons to whom the Software is furnished to do so, subject to
|
|
+the following conditions:
|
|
+
|
|
+The above copyright notice and this permission notice shall be
|
|
+included in all copies or substantial portions of the Software.
|
|
+
|
|
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
|
|
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
+
|
|
+#define AARCH64_FLAG_ARG_V_BIT 0
|
|
+#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
|
|
+
|
|
+#define N_X_ARG_REG 8
|
|
+#define N_V_ARG_REG 8
|
|
+#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
|
|
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
|
|
index 169eab8..70870db 100644
|
|
--- a/src/aarch64/sysv.S
|
|
+++ b/src/aarch64/sysv.S
|
|
@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
#define LIBFFI_ASM
|
|
#include <fficonfig.h>
|
|
#include <ffi.h>
|
|
+#include "internal.h"
|
|
|
|
#ifdef HAVE_MACHINE_ASM_H
|
|
#include <machine/asm.h>
|
|
@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
|
#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
|
|
|
|
.text
|
|
+ .align 2
|
|
+
|
|
.globl CNAME(ffi_call_SYSV)
|
|
#ifdef __ELF__
|
|
.type CNAME(ffi_call_SYSV), #function
|
|
#endif
|
|
-#ifdef __APPLE__
|
|
- .align 2
|
|
-#endif
|
|
|
|
/* ffi_call_SYSV()
|
|
|
|
@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
|
|
mov x23, x0
|
|
|
|
/* Figure out if we should touch the vector registers. */
|
|
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
|
+ tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
|
|
|
/* Load the vector argument passing registers. */
|
|
- ldp q0, q1, [x21, #8*32 + 0]
|
|
- ldp q2, q3, [x21, #8*32 + 32]
|
|
- ldp q4, q5, [x21, #8*32 + 64]
|
|
- ldp q6, q7, [x21, #8*32 + 96]
|
|
+ ldp q0, q1, [x21, #0]
|
|
+ ldp q2, q3, [x21, #32]
|
|
+ ldp q4, q5, [x21, #64]
|
|
+ ldp q6, q7, [x21, #96]
|
|
1:
|
|
- /* Load the core argument passing registers. */
|
|
- ldp x0, x1, [x21, #0]
|
|
- ldp x2, x3, [x21, #16]
|
|
- ldp x4, x5, [x21, #32]
|
|
- ldp x6, x7, [x21, #48]
|
|
-
|
|
- /* Don't forget x8 which may be holding the address of a return buffer.
|
|
- */
|
|
- ldr x8, [x21, #8*8]
|
|
+ /* Load the core argument passing registers, including
|
|
+ the structure return pointer. */
|
|
+ ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
|
+ ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
|
+ ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
|
+ ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
|
+ ldr x8, [x21, #16*N_V_ARG_REG + 64]
|
|
|
|
blr x24
|
|
|
|
/* Save the core argument passing registers. */
|
|
- stp x0, x1, [x21, #0]
|
|
- stp x2, x3, [x21, #16]
|
|
- stp x4, x5, [x21, #32]
|
|
- stp x6, x7, [x21, #48]
|
|
+ stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
|
+ stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
|
+ stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
|
+ stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
|
|
|
/* Note nothing useful ever comes back in x8! */
|
|
|
|
/* Figure out if we should touch the vector registers. */
|
|
- tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
|
|
+ tbz x23, #AARCH64_FLAG_ARG_V_BIT, 1f
|
|
|
|
/* Save the vector argument passing registers. */
|
|
- stp q0, q1, [x21, #8*32 + 0]
|
|
- stp q2, q3, [x21, #8*32 + 32]
|
|
- stp q4, q5, [x21, #8*32 + 64]
|
|
- stp q6, q7, [x21, #8*32 + 96]
|
|
+ stp q0, q1, [x21, #0]
|
|
+ stp q2, q3, [x21, #32]
|
|
+ stp q4, q5, [x21, #64]
|
|
+ stp q6, q7, [x21, #96]
|
|
1:
|
|
/* All done, unwind our stack frame. */
|
|
ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
|
|
@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
|
|
.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
|
|
#endif
|
|
|
|
-#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
|
|
+#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
|
|
|
|
/* ffi_closure_SYSV
|
|
|
|
@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
|
|
Voila! */
|
|
|
|
.text
|
|
- .globl CNAME(ffi_closure_SYSV)
|
|
-#ifdef __APPLE__
|
|
.align 2
|
|
-#endif
|
|
+
|
|
+ .globl CNAME(ffi_closure_SYSV)
|
|
.cfi_startproc
|
|
CNAME(ffi_closure_SYSV):
|
|
stp x29, x30, [sp, #-16]!
|
|
@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
|
|
/* Preserve our struct trampoline_data * */
|
|
mov x22, x17
|
|
|
|
- /* Save the rest of the argument passing registers. */
|
|
- stp x0, x1, [x21, #0]
|
|
- stp x2, x3, [x21, #16]
|
|
- stp x4, x5, [x21, #32]
|
|
- stp x6, x7, [x21, #48]
|
|
- /* Don't forget we may have been given a result scratch pad address.
|
|
- */
|
|
- str x8, [x21, #64]
|
|
+ /* Save the rest of the argument passing registers, including
|
|
+ the structure return pointer. */
|
|
+ stp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
|
+ stp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
|
+ stp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
|
+ stp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
|
+ str x8, [x21, #16*N_V_ARG_REG + 64]
|
|
|
|
/* Figure out if we should touch the vector registers. */
|
|
ldr x0, [x22, #8]
|
|
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
|
+ tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
|
|
|
/* Save the argument passing vector registers. */
|
|
- stp q0, q1, [x21, #8*32 + 0]
|
|
- stp q2, q3, [x21, #8*32 + 32]
|
|
- stp q4, q5, [x21, #8*32 + 64]
|
|
- stp q6, q7, [x21, #8*32 + 96]
|
|
+ stp q0, q1, [x21, #0]
|
|
+ stp q2, q3, [x21, #32]
|
|
+ stp q4, q5, [x21, #64]
|
|
+ stp q6, q7, [x21, #96]
|
|
1:
|
|
/* Load &ffi_closure.. */
|
|
ldr x0, [x22, #0]
|
|
@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
|
|
|
|
/* Figure out if we should touch the vector registers. */
|
|
ldr x0, [x22, #8]
|
|
- tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
|
|
+ tbz x0, #AARCH64_FLAG_ARG_V_BIT, 1f
|
|
|
|
/* Load the result passing vector registers. */
|
|
- ldp q0, q1, [x21, #8*32 + 0]
|
|
- ldp q2, q3, [x21, #8*32 + 32]
|
|
- ldp q4, q5, [x21, #8*32 + 64]
|
|
- ldp q6, q7, [x21, #8*32 + 96]
|
|
+ ldp q0, q1, [x21, #0]
|
|
+ ldp q2, q3, [x21, #32]
|
|
+ ldp q4, q5, [x21, #64]
|
|
+ ldp q6, q7, [x21, #96]
|
|
1:
|
|
/* Load the result passing core registers. */
|
|
- ldp x0, x1, [x21, #0]
|
|
- ldp x2, x3, [x21, #16]
|
|
- ldp x4, x5, [x21, #32]
|
|
- ldp x6, x7, [x21, #48]
|
|
+ ldp x0, x1, [x21, #16*N_V_ARG_REG + 0]
|
|
+ ldp x2, x3, [x21, #16*N_V_ARG_REG + 16]
|
|
+ ldp x4, x5, [x21, #16*N_V_ARG_REG + 32]
|
|
+ ldp x6, x7, [x21, #16*N_V_ARG_REG + 48]
|
|
/* Note nothing useful is returned in x8. */
|
|
|
|
/* We are done, unwind our frame. */
|
|
--
|
|
2.7.4.huawei.3
|
|
|