commit a2aa1fabcaa0175d60decd680d80e634df5262d1
Author: overweight <5324761+overweight@user.noreply.gitee.com>
Date:   Mon Sep 30 10:56:03 2019 -0400

    Package init

diff --git a/0012-Fix-non-variadic-CIF-initialization-for-Apple-ARM64.patch b/0012-Fix-non-variadic-CIF-initialization-for-Apple-ARM64.patch
new file mode 100644
index 0000000..d2bd8c1
--- /dev/null
+++ b/0012-Fix-non-variadic-CIF-initialization-for-Apple-ARM64.patch
@@ -0,0 +1,43 @@
+From 0f4e09d27dc175ce3e95d070b794351663c7220e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ole=20Andr=C3=A9=20Vadla=20Ravn=C3=A5s?=
+ <ole.andre.ravnas@tillitech.com>
+Date: Sat, 26 Jul 2014 00:11:06 +0200
+Subject: [PATCH 012/415] Fix non-variadic CIF initialization for Apple/ARM64
+
+Turns out `aarch64_nfixedargs` wasn't initialized in the non-variadic
+case, resulting in undefined behavior when allocating arguments.
+---
+ src/aarch64/ffi.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index b807a2d..05f1a13 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -779,6 +779,8 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+           }
+     }
+ 
++  cif->aarch64_nfixedargs = 0;
++
+   return FFI_OK;
+ }
+ 
+@@ -789,9 +791,13 @@ ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
+ 				    unsigned int nfixedargs,
+ 				    unsigned int ntotalargs)
+ {
++  ffi_status status;
++
++  status = ffi_prep_cif_machdep (cif);
++
+   cif->aarch64_nfixedargs = nfixedargs;
+ 
+-  return ffi_prep_cif_machdep(cif);
++  return status;
+ }
+ 
+ #endif
+-- 
+2.7.4.huawei.3
+
diff --git a/0013-Fix-alignment-of-FFI_TYPE_FLOAT-for-Apple-s-ARM64-AB.patch b/0013-Fix-alignment-of-FFI_TYPE_FLOAT-for-Apple-s-ARM64-AB.patch
new file mode 100644
index 0000000..66b51e2
--- /dev/null
+++ b/0013-Fix-alignment-of-FFI_TYPE_FLOAT-for-Apple-s-ARM64-AB.patch
@@ -0,0 +1,27 @@
+From aebf2c3023b5ff056a708dc34169f7b549b2ec4f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ole=20Andr=C3=A9=20Vadla=20Ravn=C3=A5s?=
+ <ole.andre.ravnas@tillitech.com>
+Date: Fri, 25 Jul 2014 21:40:50 +0200
+Subject: [PATCH 013/415] Fix alignment of FFI_TYPE_FLOAT for Apple's ARM64 ABI
+
+---
+ src/aarch64/ffi.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index b807a2d..10e95db 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -146,6 +146,9 @@ get_basic_type_alignment (unsigned short type)
+   switch (type)
+     {
+     case FFI_TYPE_FLOAT:
++#if defined (__APPLE__)
++      return sizeof (UINT32);
++#endif
+     case FFI_TYPE_DOUBLE:
+       return sizeof (UINT64);
+ #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+-- 
+2.7.4.huawei.3
+
diff --git a/0020-Fix-Werror-declaration-after-statement-problem.patch b/0020-Fix-Werror-declaration-after-statement-problem.patch
new file mode 100644
index 0000000..0615fc1
--- /dev/null
+++ b/0020-Fix-Werror-declaration-after-statement-problem.patch
@@ -0,0 +1,36 @@
+From aaf3101ba81af8f488502881648e3f687721671e Mon Sep 17 00:00:00 2001
+From: Matthias Klose <doko@ubuntu.com>
+Date: Sat, 20 Sep 2014 06:37:04 -0400
+Subject: [PATCH 020/411] Fix -Werror=declaration-after-statement problem
+
+---
+ src/arm/ffi.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/src/arm/ffi.c b/src/arm/ffi.c
+index 7cd9289..6691ab5 100644
+--- a/src/arm/ffi.c
++++ b/src/arm/ffi.c
+@@ -154,9 +154,6 @@ int ffi_prep_args_SYSV(char *stack, extended_cif *ecif, float *vfp_space)
+ 
+ int ffi_prep_args_VFP(char *stack, extended_cif *ecif, float *vfp_space)
+ {
+-  // make sure we are using FFI_VFP
+-  FFI_ASSERT(ecif->cif->abi == FFI_VFP);
+-
+   register unsigned int i, vi = 0;
+   register void **p_argv;
+   register char *argp, *regp, *eo_regp;
+@@ -165,6 +162,9 @@ int ffi_prep_args_VFP(char *stack, extended_cif *ecif, float *vfp_space)
+   char done_with_regs = 0;
+   char is_vfp_type;
+ 
++  // make sure we are using FFI_VFP
++  FFI_ASSERT(ecif->cif->abi == FFI_VFP);
++
+   /* the first 4 words on the stack are used for values passed in core
+    * registers. */
+   regp = stack;
+-- 
+1.8.3.1
+
diff --git a/0052-aarch64-Fix-non-apple-compilation.patch b/0052-aarch64-Fix-non-apple-compilation.patch
new file mode 100644
index 0000000..7cc5cc5
--- /dev/null
+++ b/0052-aarch64-Fix-non-apple-compilation.patch
@@ -0,0 +1,26 @@
+From 18b74ce54afab45fcf2a7d4eb86bb2ce9db8cec8 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Tue, 21 Oct 2014 13:00:34 -0400
+Subject: [PATCH 052/415] aarch64: Fix non-apple compilation
+
+---
+ src/aarch64/ffi.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index 5369ea4..cdb7816 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -782,7 +782,9 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+           }
+     }
+ 
++#if defined (__APPLE__)
+   cif->aarch64_nfixedargs = 0;
++#endif
+ 
+   return FFI_OK;
+ }
+-- 
+2.7.4.huawei.3
+
diff --git a/0053-aarch64-Improve-is_hfa.patch b/0053-aarch64-Improve-is_hfa.patch
new file mode 100644
index 0000000..bf3e77d
--- /dev/null
+++ b/0053-aarch64-Improve-is_hfa.patch
@@ -0,0 +1,332 @@
+From 38b54b9c180af13a3371e70a151a1a97e105b03f Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Tue, 21 Oct 2014 13:17:39 -0400
+Subject: [PATCH 053/415] aarch64: Improve is_hfa
+
+The set of functions get_homogeneous_type, element_count, and is_hfa
+are all intertwined and recompute data.  Return a compound quantity
+from is_hfa that contains all the data and avoids the recomputation.
+---
+ src/aarch64/ffi.c | 212 +++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 131 insertions(+), 81 deletions(-)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index cdb7816..0834614 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -242,88 +242,132 @@ is_floating_type (unsigned short type)
+ 	  || type == FFI_TYPE_LONGDOUBLE);
+ }
+ 
+-/* Test for a homogeneous structure.  */
++/* A subroutine of is_hfa.  Given a structure type, return the type code
++   of the first non-structure element.  Recurse for structure elements.
++   Return -1 if the structure is in fact empty, i.e. no nested elements.  */
+ 
+-static unsigned short
+-get_homogeneous_type (ffi_type *ty)
++static int
++is_hfa0 (const ffi_type *ty)
+ {
+-  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+-    {
+-      unsigned i;
+-      unsigned short candidate_type
+-	= get_homogeneous_type (ty->elements[0]);
+-      for (i =1; ty->elements[i]; i++)
+-	{
+-	  unsigned short iteration_type = 0;
+-	  /* If we have a nested struct, we must find its homogeneous type.
+-	     If that fits with our candidate type, we are still
+-	     homogeneous.  */
+-	  if (ty->elements[i]->type == FFI_TYPE_STRUCT
+-	      && ty->elements[i]->elements)
+-	    {
+-	      iteration_type = get_homogeneous_type (ty->elements[i]);
+-	    }
+-	  else
+-	    {
+-	      iteration_type = ty->elements[i]->type;
+-	    }
++  ffi_type **elements = ty->elements;
++  int i, ret = -1;
+ 
+-	  /* If we are not homogeneous, return FFI_TYPE_STRUCT.  */
+-	  if (candidate_type != iteration_type)
+-	    return FFI_TYPE_STRUCT;
+-	}
+-      return candidate_type;
+-    }
++  if (elements != NULL)
++    for (i = 0; elements[i]; ++i)
++      {
++        ret = elements[i]->type;
++        if (ret == FFI_TYPE_STRUCT)
++          {
++            ret = is_hfa0 (elements[i]);
++            if (ret < 0)
++              continue;
++          }
++        break;
++      }
+ 
+-  /* Base case, we have no more levels of nesting, so we
+-     are a basic type, and so, trivially homogeneous in that type.  */
+-  return ty->type;
++  return ret;
+ }
+ 
+-/* Determine the number of elements within a STRUCT.
++/* A subroutine of is_hfa.  Given a structure type, return true if all
++   of the non-structure elements are the same as CANDIDATE.  */
+ 
+-   Note, we must handle nested structs.
++static int
++is_hfa1 (const ffi_type *ty, int candidate)
++{
++  ffi_type **elements = ty->elements;
++  int i;
+ 
+-   If ty is not a STRUCT this function will return 0.  */
++  if (elements != NULL)
++    for (i = 0; elements[i]; ++i)
++      {
++        int t = elements[i]->type;
++        if (t == FFI_TYPE_STRUCT)
++          {
++            if (!is_hfa1 (elements[i], candidate))
++              return 0;
++          }
++        else if (t != candidate)
++          return 0;
++      }
+ 
+-static unsigned
+-element_count (ffi_type *ty)
+-{
+-  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+-    {
+-      unsigned n;
+-      unsigned elems = 0;
+-      for (n = 0; ty->elements[n]; n++)
+-	{
+-	  if (ty->elements[n]->type == FFI_TYPE_STRUCT
+-	      && ty->elements[n]->elements)
+-	    elems += element_count (ty->elements[n]);
+-	  else
+-	    elems++;
+-	}
+-      return elems;
+-    }
+-  return 0;
++  return 1;
+ }
+ 
+-/* Test for a homogeneous floating point aggregate.
++/* Determine if TY is an homogenous floating point aggregate (HFA).
++   That is, a structure consisting of 1 to 4 members of all the same type,
++   where that type is a floating point scalar.
+ 
+-   A homogeneous floating point aggregate is a homogeneous aggregate of
+-   a half- single- or double- precision floating point type with one
+-   to four elements.  Note that this includes nested structs of the
+-   basic type.  */
++   Returns non-zero iff TY is an HFA.  The result is an encoded value where
++   bits 0-7 contain the type code, and bits 8-10 contain the element count.  */
+ 
+ static int
+-is_hfa (ffi_type *ty)
++is_hfa(const ffi_type *ty)
+ {
+-  if (ty->type == FFI_TYPE_STRUCT
+-      && ty->elements[0]
+-      && is_floating_type (get_homogeneous_type (ty)))
++  ffi_type **elements;
++  int candidate, i;
++  size_t size, ele_count;
++
++  /* Quickest tests first.  */
++  if (ty->type != FFI_TYPE_STRUCT)
++    return 0;
++
++  /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
++  size = ty->size;
++  if (size < 4 || size > 64)
++    return 0;
++
++  /* Find the type of the first non-structure member.  */
++  elements = ty->elements;
++  candidate = elements[0]->type;
++  if (candidate == FFI_TYPE_STRUCT)
+     {
+-      unsigned n = element_count (ty);
+-      return n >= 1 && n <= 4;
++      for (i = 0; ; ++i)
++        {
++          candidate = is_hfa0 (elements[i]);
++          if (candidate >= 0)
++            break;
++        }
+     }
+-  return 0;
++
++  /* If the first member is not a floating point type, it's not an HFA.
++     Also quickly re-check the size of the structure.  */
++  switch (candidate)
++    {
++    case FFI_TYPE_FLOAT:
++      ele_count = size / sizeof(float);
++      if (size != ele_count * sizeof(float))
++        return 0;
++      break;
++    case FFI_TYPE_DOUBLE:
++      ele_count = size / sizeof(double);
++      if (size != ele_count * sizeof(double))
++        return 0;
++      break;
++    case FFI_TYPE_LONGDOUBLE:
++      ele_count = size / sizeof(long double);
++      if (size != ele_count * sizeof(long double))
++        return 0;
++      break;
++    default:
++      return 0;
++    }
++  if (ele_count > 4)
++    return 0;
++
++  /* Finally, make sure that all scalar elements are the same type.  */
++  for (i = 0; elements[i]; ++i)
++    {
++      if (elements[i]->type == FFI_TYPE_STRUCT)
++        {
++          if (!is_hfa1 (elements[i], candidate))
++            return 0;
++        }
++      else if (elements[i]->type != candidate)
++        return 0;
++    }
++
++  /* All tests succeeded.  Encode the result.  */
++  return (ele_count << 8) | candidate;
+ }
+ 
+ /* Test if an ffi_type is a candidate for passing in a register.
+@@ -559,7 +603,10 @@ copy_hfa_to_reg_or_stack (void *memory,
+ 			  unsigned char *stack,
+ 			  struct arg_state *state)
+ {
+-  unsigned elems = element_count (ty);
++  int h = is_hfa (ty);
++  int type = h & 0xff;
++  unsigned elems = h >> 8;
++
+   if (available_v (state) < elems)
+     {
+       /* There are insufficient V registers. Further V register allocations
+@@ -573,7 +620,6 @@ copy_hfa_to_reg_or_stack (void *memory,
+   else
+     {
+       int i;
+-      unsigned short type = get_homogeneous_type (ty);
+       for (i = 0; i < elems; i++)
+ 	{
+ 	  void *reg = allocate_to_v (context, state);
+@@ -813,6 +859,7 @@ void
+ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ {
+   extended_cif ecif;
++  int h;
+ 
+   ecif.cif = cif;
+   ecif.avalue = avalue;
+@@ -861,11 +908,12 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ 		}
+ 
+               case FFI_TYPE_STRUCT:
+-                if (is_hfa (cif->rtype))
++		h = is_hfa (cif->rtype);
++                if (h)
+ 		  {
+ 		    int j;
+-		    unsigned short type = get_homogeneous_type (cif->rtype);
+-		    unsigned elems = element_count (cif->rtype);
++		    int type = h & 0xff;
++		    int elems = h >> 8;
+ 		    for (j = 0; j < elems; j++)
+ 		      {
+ 			void *reg = get_basic_type_addr (type, &context, j);
+@@ -967,7 +1015,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+   ffi_cif *cif = closure->cif;
+   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+   void *rvalue = NULL;
+-  int i;
++  int i, h;
+   struct arg_state state;
+ 
+   arg_init (&state, ALIGN(cif->bytes, 16));
+@@ -1002,9 +1050,10 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ #endif
+ 
+ 	case FFI_TYPE_STRUCT:
+-	  if (is_hfa (ty))
++	  h = is_hfa (ty);
++	  if (h)
+ 	    {
+-	      unsigned n = element_count (ty);
++	      unsigned n = h >> 8;
+ 	      if (available_v (&state) < n)
+ 		{
+ 		  state.nsrn = N_V_ARG_REG;
+@@ -1013,7 +1062,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ 		}
+ 	      else
+ 		{
+-		  switch (get_homogeneous_type (ty))
++		  switch (h & 0xff)
+ 		    {
+ 		    case FFI_TYPE_FLOAT:
+ 		      {
+@@ -1027,9 +1076,9 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ 			   correctly. The fake can be tossed once the
+ 			   closure function has returned hence alloca()
+ 			   is sufficient. */
+-			int j;
++			unsigned j;
+ 			UINT32 *p = avalue[i] = alloca (ty->size);
+-			for (j = 0; j < element_count (ty); j++)
++			for (j = 0; j < n; j++)
+ 			  memcpy (&p[j],
+ 				  allocate_to_s (context, &state),
+ 				  sizeof (*p));
+@@ -1048,9 +1097,9 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ 			   correctly. The fake can be tossed once the
+ 			   closure function has returned hence alloca()
+ 			   is sufficient. */
+-			int j;
++			unsigned j;
+ 			UINT64 *p = avalue[i] = alloca (ty->size);
+-			for (j = 0; j < element_count (ty); j++)
++			for (j = 0; j < n; j++)
+ 			  memcpy (&p[j],
+ 				  allocate_to_d (context, &state),
+ 				  sizeof (*p));
+@@ -1143,11 +1192,12 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+             break;
+ 	  }
+         case FFI_TYPE_STRUCT:
+-          if (is_hfa (cif->rtype))
++	  h = is_hfa (cif->rtype);
++          if (h)
+ 	    {
+ 	      int j;
+-	      unsigned short type = get_homogeneous_type (cif->rtype);
+-	      unsigned elems = element_count (cif->rtype);
++	      int type = h & 0xff;
++	      int elems = h >> 8;
+ 	      for (j = 0; j < elems; j++)
+ 		{
+ 		  void *reg = get_basic_type_addr (type, context, j);
+-- 
+2.7.4.huawei.3
+
diff --git a/0054-aarch64-Always-distinguish-LONGDOUBLE.patch b/0054-aarch64-Always-distinguish-LONGDOUBLE.patch
new file mode 100644
index 0000000..2fa2bb4
--- /dev/null
+++ b/0054-aarch64-Always-distinguish-LONGDOUBLE.patch
@@ -0,0 +1,173 @@
+From b5f147d84761dc673ffe01d6af82bcde4ea47928 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Tue, 21 Oct 2014 13:27:57 -0400
+Subject: [PATCH 054/415] aarch64: Always distinguish LONGDOUBLE
+
+Avoid if-deffery by forcing FFI_TYPE_LONGDOUBLE different
+from FFI_TYPE_DOUBLE.  This will simply be unused on hosts
+that define them identically.
+---
+ src/aarch64/ffi.c | 41 ++++++++++++++---------------------------
+ 1 file changed, 14 insertions(+), 27 deletions(-)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index 0834614..f065be5 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -20,11 +20,20 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ 
+ #include <stdio.h>
+-
++#include <stdlib.h>
+ #include <ffi.h>
+ #include <ffi_common.h>
+ 
+-#include <stdlib.h>
++/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
++   all further uses in this file will refer to the 128-bit type.  */
++#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
++# if FFI_TYPE_LONGDOUBLE != 4
++#  error FFI_TYPE_LONGDOUBLE out of date
++# endif
++#else
++# undef FFI_TYPE_LONGDOUBLE
++# define FFI_TYPE_LONGDOUBLE 4
++#endif
+ 
+ /* Stack alignment requirement in bytes */
+ #if defined (__APPLE__)
+@@ -115,10 +124,8 @@ get_basic_type_addr (unsigned short type, struct call_context *context,
+       return get_s_addr (context, n);
+     case FFI_TYPE_DOUBLE:
+       return get_d_addr (context, n);
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+       return get_v_addr (context, n);
+-#endif
+     case FFI_TYPE_UINT8:
+     case FFI_TYPE_SINT8:
+     case FFI_TYPE_UINT16:
+@@ -151,10 +158,8 @@ get_basic_type_alignment (unsigned short type)
+ #endif
+     case FFI_TYPE_DOUBLE:
+       return sizeof (UINT64);
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+       return sizeof (long double);
+-#endif
+     case FFI_TYPE_UINT8:
+     case FFI_TYPE_SINT8:
+ #if defined (__APPLE__)
+@@ -193,10 +198,8 @@ get_basic_type_size (unsigned short type)
+       return sizeof (UINT32);
+     case FFI_TYPE_DOUBLE:
+       return sizeof (UINT64);
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+       return sizeof (long double);
+-#endif
+     case FFI_TYPE_UINT8:
+       return sizeof (UINT8);
+     case FFI_TYPE_SINT8:
+@@ -390,9 +393,7 @@ is_register_candidate (ffi_type *ty)
+     case FFI_TYPE_VOID:
+     case FFI_TYPE_FLOAT:
+     case FFI_TYPE_DOUBLE:
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+-#endif
+     case FFI_TYPE_UINT8:
+     case FFI_TYPE_UINT16:
+     case FFI_TYPE_UINT32:
+@@ -557,11 +558,9 @@ copy_basic_type (void *dest, void *source, unsigned short type)
+     case FFI_TYPE_DOUBLE:
+       *(double *) dest = *(double *) source;
+       break;
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+       *(long double *) dest = *(long double *) source;
+       break;
+-#endif
+     case FFI_TYPE_UINT8:
+       *(ffi_arg *) dest = *(UINT8 *) source;
+       break;
+@@ -653,13 +652,11 @@ allocate_to_register_or_stack (struct call_context *context,
+ 	return allocate_to_d (context, state);
+       state->nsrn = N_V_ARG_REG;
+       break;
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+     case FFI_TYPE_LONGDOUBLE:
+       if (state->nsrn < N_V_ARG_REG)
+ 	return allocate_to_v (context, state);
+       state->nsrn = N_V_ARG_REG;
+       break;
+-#endif
+     case FFI_TYPE_UINT8:
+     case FFI_TYPE_SINT8:
+     case FFI_TYPE_UINT16:
+@@ -722,9 +719,7 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
+ 	   appropriate register, or if none are available, to the stack.  */
+ 	case FFI_TYPE_FLOAT:
+ 	case FFI_TYPE_DOUBLE:
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ 	case FFI_TYPE_LONGDOUBLE:
+-#endif
+ 	case FFI_TYPE_UINT8:
+ 	case FFI_TYPE_SINT8:
+ 	case FFI_TYPE_UINT16:
+@@ -887,9 +882,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+               case FFI_TYPE_VOID:
+               case FFI_TYPE_FLOAT:
+               case FFI_TYPE_DOUBLE:
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+               case FFI_TYPE_LONGDOUBLE:
+-#endif
+               case FFI_TYPE_UINT8:
+               case FFI_TYPE_SINT8:
+               case FFI_TYPE_UINT16:
+@@ -1040,14 +1033,12 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ 	case FFI_TYPE_POINTER:
+ 	case FFI_TYPE_UINT64:
+ 	case FFI_TYPE_SINT64:
+-	case  FFI_TYPE_FLOAT:
+-	case  FFI_TYPE_DOUBLE:
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+-	case  FFI_TYPE_LONGDOUBLE:
++	case FFI_TYPE_FLOAT:
++	case FFI_TYPE_DOUBLE:
++	case FFI_TYPE_LONGDOUBLE:
+ 	  avalue[i] = allocate_to_register_or_stack (context, stack,
+ 						     &state, ty->type);
+ 	  break;
+-#endif
+ 
+ 	case FFI_TYPE_STRUCT:
+ 	  h = is_hfa (ty);
+@@ -1106,13 +1097,11 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ 			break;
+ 		      }
+ 
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+ 		    case FFI_TYPE_LONGDOUBLE:
+ 			  memcpy (&avalue[i],
+ 				  allocate_to_v (context, &state),
+ 				  sizeof (*avalue));
+ 		      break;
+-#endif
+ 
+ 		    default:
+ 		      FFI_ASSERT (0);
+@@ -1183,9 +1172,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+         case FFI_TYPE_SINT64:
+         case FFI_TYPE_FLOAT:
+         case FFI_TYPE_DOUBLE:
+-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+         case FFI_TYPE_LONGDOUBLE:
+-#endif
+ 	  {
+ 	    void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
+ 	    copy_basic_type (addr, rvalue, cif->rtype->type);
+-- 
+2.7.4.huawei.3
+
diff --git a/0055-aarch64-Simplify-AARCH64_STACK_ALIGN.patch b/0055-aarch64-Simplify-AARCH64_STACK_ALIGN.patch
new file mode 100644
index 0000000..5e8c4b3
--- /dev/null
+++ b/0055-aarch64-Simplify-AARCH64_STACK_ALIGN.patch
@@ -0,0 +1,43 @@
+From 77c4cddca6aeb6e545e21f235e29323e05f5a3a3 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Tue, 21 Oct 2014 13:30:40 -0400
+Subject: [PATCH 055/415] aarch64: Simplify AARCH64_STACK_ALIGN
+
+The iOS abi doesn't require padding between arguments, but
+that's not what AARCH64_STACK_ALIGN meant.  The hardware will
+in fact trap if the SP register is not 16 byte aligned.
+---
+ src/aarch64/ffi.c | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index f065be5..a6fcc11 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -35,13 +35,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ # define FFI_TYPE_LONGDOUBLE 4
+ #endif
+ 
+-/* Stack alignment requirement in bytes */
+-#if defined (__APPLE__)
+-#define AARCH64_STACK_ALIGN 1
+-#else
+-#define AARCH64_STACK_ALIGN 16
+-#endif
+-
+ #define N_X_ARG_REG 8
+ #define N_V_ARG_REG 8
+ 
+@@ -799,8 +792,7 @@ ffi_status
+ ffi_prep_cif_machdep (ffi_cif *cif)
+ {
+   /* Round the stack up to a multiple of the stack alignment requirement. */
+-  cif->bytes =
+-    (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
++  cif->bytes = ALIGN(cif->bytes, 16);
+ 
+   /* Initialize our flags. We are interested if this CIF will touch a
+      vector register, if so we will enable context save and load to
+-- 
+2.7.4.huawei.3
+
diff --git a/0056-aarch64-Reduce-the-size-of-register_context.patch b/0056-aarch64-Reduce-the-size-of-register_context.patch
new file mode 100644
index 0000000..4247da6
--- /dev/null
+++ b/0056-aarch64-Reduce-the-size-of-register_context.patch
@@ -0,0 +1,346 @@
+From 95a04af134431ccc8230aca1641541a5e8fcbdc9 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Tue, 21 Oct 2014 22:41:07 -0400
+Subject: [PATCH 056/415] aarch64: Reduce the size of register_context
+
+We don't need to store 32 general and vector registers.
+Only 8 of each are used for parameter passing.
+---
+ src/aarch64/ffi.c       |  35 ++++++++---------
+ src/aarch64/ffitarget.h |   6 ---
+ src/aarch64/internal.h  |  26 +++++++++++++
+ src/aarch64/sysv.S      | 100 +++++++++++++++++++++++-------------------------
+ 4 files changed, 91 insertions(+), 76 deletions(-)
+ create mode 100644 src/aarch64/internal.h
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index a6fcc11..58d088b 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -21,8 +21,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <stdint.h>
+ #include <ffi.h>
+ #include <ffi_common.h>
++#include "internal.h"
+ 
+ /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+    all further uses in this file will refer to the 128-bit type.  */
+@@ -35,38 +37,35 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ # define FFI_TYPE_LONGDOUBLE 4
+ #endif
+ 
+-#define N_X_ARG_REG 8
+-#define N_V_ARG_REG 8
+-
+-#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
+-
+ union _d
+ {
+   UINT64 d;
+   UINT32 s[2];
+ };
+ 
++struct _v
++{
++  union _d d[2] __attribute__((aligned(16)));
++};
++
+ struct call_context
+ {
+-  UINT64 x [AARCH64_N_XREG];
+-  struct
+-  {
+-    union _d d[2];
+-  } v [AARCH64_N_VREG];
++  struct _v v[N_V_ARG_REG];
++  UINT64 x[N_X_ARG_REG];
++  UINT64 x8;
+ };
+ 
+ #if defined (__clang__) && defined (__APPLE__)
+-extern void
+-sys_icache_invalidate (void *start, size_t len);
++extern void sys_icache_invalidate (void *start, size_t len);
+ #endif
+ 
+ static inline void
+ ffi_clear_cache (void *start, void *end)
+ {
+ #if defined (__clang__) && defined (__APPLE__)
+-	sys_icache_invalidate (start, (char *)end - (char *)start);
++  sys_icache_invalidate (start, (char *)end - (char *)start);
+ #elif defined (__GNUC__)
+-	__builtin___clear_cache (start, end);
++  __builtin___clear_cache (start, end);
+ #else
+ #error "Missing builtin to flush instruction cache"
+ #endif
+@@ -802,7 +801,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+ 
+   if (is_v_register_candidate (cif->rtype))
+     {
+-      cif->aarch64_flags |= AARCH64_FFI_WITH_V;
++      cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+     }
+   else
+     {
+@@ -810,7 +809,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
+       for (i = 0; i < cif->nargs; i++)
+         if (is_v_register_candidate (cif->arg_types[i]))
+           {
+-            cif->aarch64_flags |= AARCH64_FFI_WITH_V;
++            cif->aarch64_flags |= AARCH64_FLAG_ARG_V;
+             break;
+           }
+     }
+@@ -924,7 +923,7 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+           }
+         else
+           {
+-            memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
++	    context.x8 = (uintptr_t)rvalue;
+             ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
+ 			   stack_bytes, fn);
+           }
+@@ -1201,7 +1200,7 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+     }
+   else
+     {
+-      memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
++      rvalue = (void *)(uintptr_t)context->x8;
+       (closure->fun) (cif, rvalue, avalue, closure->user_data);
+     }
+ }
+diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
+index 4bbced2..336f28a 100644
+--- a/src/aarch64/ffitarget.h
++++ b/src/aarch64/ffitarget.h
+@@ -54,10 +54,4 @@ typedef enum ffi_abi
+ #define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
+ #endif
+ 
+-#define AARCH64_FFI_WITH_V_BIT 0
+-
+-#define AARCH64_N_XREG 32
+-#define AARCH64_N_VREG 32
+-#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
+-
+ #endif
+diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h
+new file mode 100644
+index 0000000..b6b6104
+--- /dev/null
++++ b/src/aarch64/internal.h
+@@ -0,0 +1,26 @@
++/* 
++Permission is hereby granted, free of charge, to any person obtaining
++a copy of this software and associated documentation files (the
++``Software''), to deal in the Software without restriction, including
++without limitation the rights to use, copy, modify, merge, publish,
++distribute, sublicense, and/or sell copies of the Software, and to
++permit persons to whom the Software is furnished to do so, subject to
++the following conditions:
++
++The above copyright notice and this permission notice shall be
++included in all copies or substantial portions of the Software.
++
++THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
++EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
++
++#define AARCH64_FLAG_ARG_V_BIT	0
++#define AARCH64_FLAG_ARG_V	(1 << AARCH64_FLAG_ARG_V_BIT)
++
++#define N_X_ARG_REG		8
++#define N_V_ARG_REG		8
++#define CALL_CONTEXT_SIZE	(N_V_ARG_REG * 16 + N_X_ARG_REG * 8 + 16)
+diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
+index 169eab8..70870db 100644
+--- a/src/aarch64/sysv.S
++++ b/src/aarch64/sysv.S
+@@ -22,6 +22,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ #define LIBFFI_ASM
+ #include <fficonfig.h>
+ #include <ffi.h>
++#include "internal.h"
+ 
+ #ifdef HAVE_MACHINE_ASM_H
+ #include <machine/asm.h>
+@@ -43,13 +44,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ #define cfi_def_cfa_register(reg)	.cfi_def_cfa_register reg
+ 
+         .text
++        .align 2
++
+         .globl CNAME(ffi_call_SYSV)
+ #ifdef __ELF__
+         .type CNAME(ffi_call_SYSV), #function
+ #endif
+-#ifdef __APPLE__
+-        .align 2
+-#endif
+ 
+ /* ffi_call_SYSV()
+ 
+@@ -142,42 +142,40 @@ CNAME(ffi_call_SYSV):
+         mov     x23, x0
+ 
+         /* Figure out if we should touch the vector registers.  */
+-        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
++        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
+ 
+         /* Load the vector argument passing registers.  */
+-        ldp     q0, q1, [x21, #8*32 +  0]
+-        ldp     q2, q3, [x21, #8*32 + 32]
+-        ldp     q4, q5, [x21, #8*32 + 64]
+-        ldp     q6, q7, [x21, #8*32 + 96]
++        ldp     q0, q1, [x21, #0]
++        ldp     q2, q3, [x21, #32]
++        ldp     q4, q5, [x21, #64]
++        ldp     q6, q7, [x21, #96]
+ 1:
+-        /* Load the core argument passing registers.  */
+-        ldp     x0, x1, [x21,  #0]
+-        ldp     x2, x3, [x21, #16]
+-        ldp     x4, x5, [x21, #32]
+-        ldp     x6, x7, [x21, #48]
+-
+-        /* Don't forget x8 which may be holding the address of a return buffer.
+-	 */
+-        ldr     x8,     [x21, #8*8]
++        /* Load the core argument passing registers, including
++	   the structure return pointer.  */
++        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
++        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
++        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
++        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
++        ldr     x8,     [x21, #16*N_V_ARG_REG + 64]
+ 
+         blr     x24
+ 
+         /* Save the core argument passing registers.  */
+-        stp     x0, x1, [x21,  #0]
+-        stp     x2, x3, [x21, #16]
+-        stp     x4, x5, [x21, #32]
+-        stp     x6, x7, [x21, #48]
++        stp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
++        stp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
++        stp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
++        stp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
+ 
+         /* Note nothing useful ever comes back in x8!  */
+ 
+         /* Figure out if we should touch the vector registers.  */
+-        tbz     x23, #AARCH64_FFI_WITH_V_BIT, 1f
++        tbz     x23, #AARCH64_FLAG_ARG_V_BIT, 1f
+ 
+         /* Save the vector argument passing registers.  */
+-        stp     q0, q1, [x21, #8*32 + 0]
+-        stp     q2, q3, [x21, #8*32 + 32]
+-        stp     q4, q5, [x21, #8*32 + 64]
+-        stp     q6, q7, [x21, #8*32 + 96]
++        stp     q0, q1, [x21, #0]
++        stp     q2, q3, [x21, #32]
++        stp     q4, q5, [x21, #64]
++        stp     q6, q7, [x21, #96]
+ 1:
+         /* All done, unwind our stack frame.  */
+         ldp     x21, x22, [x29,  # - ffi_call_SYSV_FS]
+@@ -203,7 +201,7 @@ CNAME(ffi_call_SYSV):
+         .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
+ #endif
+ 
+-#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
++#define ffi_closure_SYSV_FS (8 * 2 + CALL_CONTEXT_SIZE)
+ 
+ /* ffi_closure_SYSV
+ 
+@@ -243,10 +241,9 @@ CNAME(ffi_call_SYSV):
+    Voila!  */
+ 
+         .text
+-        .globl CNAME(ffi_closure_SYSV)
+-#ifdef __APPLE__
+         .align 2
+-#endif
++
++        .globl CNAME(ffi_closure_SYSV)
+         .cfi_startproc
+ CNAME(ffi_closure_SYSV):
+         stp     x29, x30, [sp, #-16]!
+@@ -268,24 +265,23 @@ CNAME(ffi_closure_SYSV):
+         /* Preserve our struct trampoline_data *  */
+         mov     x22, x17
+ 
+-        /* Save the rest of the argument passing registers.  */
+-        stp     x0, x1, [x21, #0]
+-        stp     x2, x3, [x21, #16]
+-        stp     x4, x5, [x21, #32]
+-        stp     x6, x7, [x21, #48]
+-        /* Don't forget we may have been given a result scratch pad address.
+-	 */
+-        str     x8,     [x21, #64]
++        /* Save the rest of the argument passing registers, including
++	   the structure return pointer.  */
++        stp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
++        stp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
++        stp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
++        stp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
++        str     x8,     [x21, #16*N_V_ARG_REG + 64]
+ 
+         /* Figure out if we should touch the vector registers.  */
+         ldr     x0, [x22, #8]
+-        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
++        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
+ 
+         /* Save the argument passing vector registers.  */
+-        stp     q0, q1, [x21, #8*32 + 0]
+-        stp     q2, q3, [x21, #8*32 + 32]
+-        stp     q4, q5, [x21, #8*32 + 64]
+-        stp     q6, q7, [x21, #8*32 + 96]
++        stp     q0, q1, [x21, #0]
++        stp     q2, q3, [x21, #32]
++        stp     q4, q5, [x21, #64]
++        stp     q6, q7, [x21, #96]
+ 1:
+         /* Load &ffi_closure..  */
+         ldr     x0, [x22, #0]
+@@ -298,19 +294,19 @@ CNAME(ffi_closure_SYSV):
+ 
+         /* Figure out if we should touch the vector registers.  */
+         ldr     x0, [x22, #8]
+-        tbz     x0, #AARCH64_FFI_WITH_V_BIT, 1f
++        tbz     x0, #AARCH64_FLAG_ARG_V_BIT, 1f
+ 
+         /* Load the result passing vector registers.  */
+-        ldp     q0, q1, [x21, #8*32 + 0]
+-        ldp     q2, q3, [x21, #8*32 + 32]
+-        ldp     q4, q5, [x21, #8*32 + 64]
+-        ldp     q6, q7, [x21, #8*32 + 96]
++        ldp     q0, q1, [x21, #0]
++        ldp     q2, q3, [x21, #32]
++        ldp     q4, q5, [x21, #64]
++        ldp     q6, q7, [x21, #96]
+ 1:
+         /* Load the result passing core registers.  */
+-        ldp     x0, x1, [x21,  #0]
+-        ldp     x2, x3, [x21, #16]
+-        ldp     x4, x5, [x21, #32]
+-        ldp     x6, x7, [x21, #48]
++        ldp     x0, x1, [x21, #16*N_V_ARG_REG + 0]
++        ldp     x2, x3, [x21, #16*N_V_ARG_REG + 16]
++        ldp     x4, x5, [x21, #16*N_V_ARG_REG + 32]
++        ldp     x6, x7, [x21, #16*N_V_ARG_REG + 48]
+         /* Note nothing useful is returned in x8.  */
+ 
+         /* We are done, unwind our frame.  */
+-- 
+2.7.4.huawei.3
+
diff --git a/0058-aarch64-Treat-void-return-as-not-passed-in-registers.patch b/0058-aarch64-Treat-void-return-as-not-passed-in-registers.patch
new file mode 100644
index 0000000..096414b
--- /dev/null
+++ b/0058-aarch64-Treat-void-return-as-not-passed-in-registers.patch
@@ -0,0 +1,25 @@
+From b55e03665ddf2423df9baee0d3172892ba781c26 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Wed, 22 Oct 2014 12:33:59 -0400
+Subject: [PATCH 058/415] aarch64: Treat void return as not passed in registers
+
+This lets us do less post-processing when there's no return value.
+---
+ src/aarch64/ffi.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index 58d088b..6c338e1 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -383,6 +383,7 @@ is_register_candidate (ffi_type *ty)
+   switch (ty->type)
+     {
+     case FFI_TYPE_VOID:
++      return 0;
+     case FFI_TYPE_FLOAT:
+     case FFI_TYPE_DOUBLE:
+     case FFI_TYPE_LONGDOUBLE:
+-- 
+2.7.4.huawei.3
+
diff --git a/0059-aarch64-Tidy-up-abi-manipulation.patch b/0059-aarch64-Tidy-up-abi-manipulation.patch
new file mode 100644
index 0000000..da34c98
--- /dev/null
+++ b/0059-aarch64-Tidy-up-abi-manipulation.patch
@@ -0,0 +1,1162 @@
+From 8c8161cb623585d5d0c783b9d494b9b74ada6ced Mon Sep 17 00:00:00 2001
+From: Richard Henderson <rth@redhat.com>
+Date: Wed, 22 Oct 2014 12:52:07 -0400
+Subject: [PATCH 059/415] aarch64: Tidy up abi manipulation
+
+Avoid false abstraction, like get_x_addr.  Avoid recomputing data
+about the type being manipulated.  Use NEON insns for HFA manipulation.
+
+Note that some of the inline assembly will go away in a subsequent patch.
+---
+ src/aarch64/ffi.c | 932 +++++++++++++++++++++---------------------------------
+ 1 file changed, 367 insertions(+), 565 deletions(-)
+
+diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
+index 6c338e1..d19384b 100644
+--- a/src/aarch64/ffi.c
++++ b/src/aarch64/ffi.c
+@@ -71,152 +71,6 @@ ffi_clear_cache (void *start, void *end)
+ #endif
+ }
+ 
+-static void *
+-get_x_addr (struct call_context *context, unsigned n)
+-{
+-  return &context->x[n];
+-}
+-
+-static void *
+-get_s_addr (struct call_context *context, unsigned n)
+-{
+-#if defined __AARCH64EB__
+-  return &context->v[n].d[1].s[1];
+-#else
+-  return &context->v[n].d[0].s[0];
+-#endif
+-}
+-
+-static void *
+-get_d_addr (struct call_context *context, unsigned n)
+-{
+-#if defined __AARCH64EB__
+-  return &context->v[n].d[1];
+-#else
+-  return &context->v[n].d[0];
+-#endif
+-}
+-
+-static void *
+-get_v_addr (struct call_context *context, unsigned n)
+-{
+-  return &context->v[n];
+-}
+-
+-/* Return the memory location at which a basic type would reside
+-   were it to have been stored in register n.  */
+-
+-static void *
+-get_basic_type_addr (unsigned short type, struct call_context *context,
+-		     unsigned n)
+-{
+-  switch (type)
+-    {
+-    case FFI_TYPE_FLOAT:
+-      return get_s_addr (context, n);
+-    case FFI_TYPE_DOUBLE:
+-      return get_d_addr (context, n);
+-    case FFI_TYPE_LONGDOUBLE:
+-      return get_v_addr (context, n);
+-    case FFI_TYPE_UINT8:
+-    case FFI_TYPE_SINT8:
+-    case FFI_TYPE_UINT16:
+-    case FFI_TYPE_SINT16:
+-    case FFI_TYPE_UINT32:
+-    case FFI_TYPE_SINT32:
+-    case FFI_TYPE_INT:
+-    case FFI_TYPE_POINTER:
+-    case FFI_TYPE_UINT64:
+-    case FFI_TYPE_SINT64:
+-      return get_x_addr (context, n);
+-    case FFI_TYPE_VOID:
+-      return NULL;
+-    default:
+-      FFI_ASSERT (0);
+-      return NULL;
+-    }
+-}
+-
+-/* Return the alignment width for each of the basic types.  */
+-
+-static size_t
+-get_basic_type_alignment (unsigned short type)
+-{
+-  switch (type)
+-    {
+-    case FFI_TYPE_FLOAT:
+-#if defined (__APPLE__)
+-      return sizeof (UINT32);
+-#endif
+-    case FFI_TYPE_DOUBLE:
+-      return sizeof (UINT64);
+-    case FFI_TYPE_LONGDOUBLE:
+-      return sizeof (long double);
+-    case FFI_TYPE_UINT8:
+-    case FFI_TYPE_SINT8:
+-#if defined (__APPLE__)
+-	  return sizeof (UINT8);
+-#endif
+-    case FFI_TYPE_UINT16:
+-    case FFI_TYPE_SINT16:
+-#if defined (__APPLE__)
+-	  return sizeof (UINT16);
+-#endif
+-    case FFI_TYPE_UINT32:
+-    case FFI_TYPE_INT:
+-    case FFI_TYPE_SINT32:
+-#if defined (__APPLE__)
+-	  return sizeof (UINT32);
+-#endif
+-    case FFI_TYPE_POINTER:
+-    case FFI_TYPE_UINT64:
+-    case FFI_TYPE_SINT64:
+-      return sizeof (UINT64);
+-
+-    default:
+-      FFI_ASSERT (0);
+-      return 0;
+-    }
+-}
+-
+-/* Return the size in bytes for each of the basic types.  */
+-
+-static size_t
+-get_basic_type_size (unsigned short type)
+-{
+-  switch (type)
+-    {
+-    case FFI_TYPE_FLOAT:
+-      return sizeof (UINT32);
+-    case FFI_TYPE_DOUBLE:
+-      return sizeof (UINT64);
+-    case FFI_TYPE_LONGDOUBLE:
+-      return sizeof (long double);
+-    case FFI_TYPE_UINT8:
+-      return sizeof (UINT8);
+-    case FFI_TYPE_SINT8:
+-      return sizeof (SINT8);
+-    case FFI_TYPE_UINT16:
+-      return sizeof (UINT16);
+-    case FFI_TYPE_SINT16:
+-      return sizeof (SINT16);
+-    case FFI_TYPE_UINT32:
+-      return sizeof (UINT32);
+-    case FFI_TYPE_INT:
+-    case FFI_TYPE_SINT32:
+-      return sizeof (SINT32);
+-    case FFI_TYPE_POINTER:
+-    case FFI_TYPE_UINT64:
+-      return sizeof (UINT64);
+-    case FFI_TYPE_SINT64:
+-      return sizeof (SINT64);
+-
+-    default:
+-      FFI_ASSERT (0);
+-      return 0;
+-    }
+-}
+-
+ extern void
+ ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
+ 			    extended_cif *),
+@@ -468,223 +322,211 @@ arg_init (struct arg_state *state, size_t call_frame_size)
+ #endif
+ }
+ 
+-/* Return the number of available consecutive core argument
+-   registers.  */
+-
+-static unsigned
+-available_x (struct arg_state *state)
+-{
+-  return N_X_ARG_REG - state->ngrn;
+-}
+-
+-/* Return the number of available consecutive vector argument
+-   registers.  */
+-
+-static unsigned
+-available_v (struct arg_state *state)
+-{
+-  return N_V_ARG_REG - state->nsrn;
+-}
+-
+-static void *
+-allocate_to_x (struct call_context *context, struct arg_state *state)
+-{
+-  FFI_ASSERT (state->ngrn < N_X_ARG_REG);
+-  return get_x_addr (context, (state->ngrn)++);
+-}
+-
+-static void *
+-allocate_to_s (struct call_context *context, struct arg_state *state)
+-{
+-  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
+-  return get_s_addr (context, (state->nsrn)++);
+-}
+-
+-static void *
+-allocate_to_d (struct call_context *context, struct arg_state *state)
+-{
+-  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
+-  return get_d_addr (context, (state->nsrn)++);
+-}
+-
+-static void *
+-allocate_to_v (struct call_context *context, struct arg_state *state)
+-{
+-  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
+-  return get_v_addr (context, (state->nsrn)++);
+-}
+-
+ /* Allocate an aligned slot on the stack and return a pointer to it.  */
+ static void *
+-allocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
+-		   size_t size)
++allocate_to_stack (struct arg_state *state, void *stack,
++		   size_t alignment, size_t size)
+ {
+-  void *allocation;
++  size_t nsaa = state->nsaa;
+ 
+   /* Round up the NSAA to the larger of 8 or the natural
+      alignment of the argument's type.  */
+-  state->nsaa = ALIGN (state->nsaa, alignment);
+-  state->nsaa = ALIGN (state->nsaa, alignment);
+ #if defined (__APPLE__)
+-  if (state->allocating_variadic)
+-    state->nsaa = ALIGN (state->nsaa, 8);
++  if (state->allocating_variadic && alignment < 8)
++    alignment = 8;
+ #else
+-  state->nsaa = ALIGN (state->nsaa, 8);
++  if (alignment < 8)
++    alignment = 8;
+ #endif
++    
++  nsaa = ALIGN (nsaa, alignment);
++  state->nsaa = nsaa + size;
+ 
+-  allocation = stack + state->nsaa;
+-
+-  state->nsaa += size;
+-  return allocation;
++  return (char *)stack + nsaa;
+ }
+ 
+-static void
+-copy_basic_type (void *dest, void *source, unsigned short type)
++static ffi_arg
++extend_integer_type (void *source, int type)
+ {
+-  /* This is necessary to ensure that basic types are copied
+-     sign extended to 64-bits as libffi expects.  */
+   switch (type)
+     {
+-    case FFI_TYPE_FLOAT:
+-      *(float *) dest = *(float *) source;
+-      break;
+-    case FFI_TYPE_DOUBLE:
+-      *(double *) dest = *(double *) source;
+-      break;
+-    case FFI_TYPE_LONGDOUBLE:
+-      *(long double *) dest = *(long double *) source;
+-      break;
+     case FFI_TYPE_UINT8:
+-      *(ffi_arg *) dest = *(UINT8 *) source;
+-      break;
++      return *(UINT8 *) source;
+     case FFI_TYPE_SINT8:
+-      *(ffi_sarg *) dest = *(SINT8 *) source;
+-      break;
++      return *(SINT8 *) source;
+     case FFI_TYPE_UINT16:
+-      *(ffi_arg *) dest = *(UINT16 *) source;
+-      break;
++      return *(UINT16 *) source;
+     case FFI_TYPE_SINT16:
+-      *(ffi_sarg *) dest = *(SINT16 *) source;
+-      break;
++      return *(SINT16 *) source;
+     case FFI_TYPE_UINT32:
+-      *(ffi_arg *) dest = *(UINT32 *) source;
+-      break;
++      return *(UINT32 *) source;
+     case FFI_TYPE_INT:
+     case FFI_TYPE_SINT32:
+-      *(ffi_sarg *) dest = *(SINT32 *) source;
+-      break;
+-    case FFI_TYPE_POINTER:
++      return *(SINT32 *) source;
+     case FFI_TYPE_UINT64:
+-      *(ffi_arg *) dest = *(UINT64 *) source;
+-      break;
+     case FFI_TYPE_SINT64:
+-      *(ffi_sarg *) dest = *(SINT64 *) source;
+-      break;
+-    case FFI_TYPE_VOID:
++      return *(UINT64 *) source;
+       break;
+-
++    case FFI_TYPE_POINTER:
++      return *(uintptr_t *) source;
+     default:
+-      FFI_ASSERT (0);
++      abort();
+     }
+ }
+ 
+ static void
+-copy_hfa_to_reg_or_stack (void *memory,
+-			  ffi_type *ty,
+-			  struct call_context *context,
+-			  unsigned char *stack,
+-			  struct arg_state *state)
+-{
+-  int h = is_hfa (ty);
+-  int type = h & 0xff;
+-  unsigned elems = h >> 8;
+-
+-  if (available_v (state) < elems)
+-    {
+-      /* There are insufficient V registers. Further V register allocations
+-	 are prevented, the NSAA is adjusted (by allocate_to_stack ())
+-	 and the argument is copied to memory at the adjusted NSAA.  */
+-      state->nsrn = N_V_ARG_REG;
+-      memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
+-	      memory,
+-	      ty->size);
+-    }
+-  else
+-    {
+-      int i;
+-      for (i = 0; i < elems; i++)
+-	{
+-	  void *reg = allocate_to_v (context, state);
+-	  copy_basic_type (reg, memory, type);
+-	  memory += get_basic_type_size (type);
+-	}
+-    }
++extend_hfa_type (void *dest, void *src, int h)
++{
++  int n = (h >> 8);
++  int t = h & 0xff;
++  int f = (t - FFI_TYPE_FLOAT) * 4 + 4 - n;
++  void *x0;
++
++  asm volatile (
++	"adr	%0, 0f\n"
++"	add	%0, %0, %1\n"
++"	br	%0\n"
++"0:	ldp	s16, s17, [%3]\n"	/* S4 */
++"	ldp	s18, s19, [%3, #8]\n"
++"	b	4f\n"
++"	ldp	s16, s17, [%3]\n"	/* S3 */
++"	ldr	s18, [%3, #8]\n"
++"	b	3f\n"
++"	ldp	s16, s17, [%3]\n"	/* S2 */
++"	b	2f\n"
++"	nop\n"
++"	ldr	s16, [%3]\n"		/* S1 */
++"	b	1f\n"
++"	nop\n"
++"	ldp	d16, d17, [%3]\n"	/* D4 */
++"	ldp	d18, d19, [%3, #16]\n"
++"	b	4f\n"
++"	ldp	d16, d17, [%3]\n"	/* D3 */
++"	ldr	d18, [%3, #16]\n"
++"	b	3f\n"
++"	ldp	d16, d17, [%3]\n"	/* D2 */
++"	b	2f\n"
++"	nop\n"
++"	ldr	d16, [%3]\n"		/* D1 */
++"	b	1f\n"
++"	nop\n"
++"	ldp	q16, q17, [%3]\n"	/* Q4 */
++"	ldp	q18, q19, [%3, #16]\n"
++"	b	4f\n"
++"	ldp	q16, q17, [%3]\n"	/* Q3 */
++"	ldr	q18, [%3, #16]\n"
++"	b	3f\n"
++"	ldp	q16, q17, [%3]\n"	/* Q2 */
++"	b	2f\n"
++"	nop\n"
++"	ldr	q16, [%3]\n"		/* Q1 */
++"	b	1f\n"
++"4:	str	q19, [%2, #48]\n"
++"3:	str	q18, [%2, #32]\n"
++"2:	str	q17, [%2, #16]\n"
++"1:	str	q16, [%2]"
++    : "=&r"(x0)
++    : "r"(f * 12), "r"(dest), "r"(src)
++    : "memory", "v16", "v17", "v18", "v19");
+ }
+ 
+-/* Either allocate an appropriate register for the argument type, or if
+-   none are available, allocate a stack slot and return a pointer
+-   to the allocated space.  */
+-
+ static void *
+-allocate_to_register_or_stack (struct call_context *context,
+-			       unsigned char *stack,
+-			       struct arg_state *state,
+-			       unsigned short type)
++compress_hfa_type (void *dest, void *reg, int h)
+ {
+-  size_t alignment = get_basic_type_alignment (type);
+-  size_t size = alignment;
+-  switch (type)
++  int n = h >> 8;
++  switch (h & 0xff)
+     {
+     case FFI_TYPE_FLOAT:
+-      /* This is the only case for which the allocated stack size
+-	 should not match the alignment of the type.  */
+-      size = sizeof (UINT32);
+-      /* Fall through.  */
++      switch (n)
++	{
++	default:
++	  if (dest == reg)
++	    {
++#ifdef __AARCH64EB__
++	      dest += 12;
++#endif
++	    }
++	  else
++	    *(float *)dest = *(float *)reg;
++	  break;
++	case 2:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "st2 { v16.s, v17.s }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
++	  break;
++	case 3:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "ldr q18, [%1, #32]\n\t"
++	      "st3 { v16.s, v17.s, v18.s }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
++	  break;
++	case 4:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "ldp q18, q19, [%1, #32]\n\t"
++	      "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
++	  break;
++	}
++      break;
++
+     case FFI_TYPE_DOUBLE:
+-      if (state->nsrn < N_V_ARG_REG)
+-	return allocate_to_d (context, state);
+-      state->nsrn = N_V_ARG_REG;
++      switch (n)
++	{
++	default:
++	  if (dest == reg)
++	    {
++#ifdef __AARCH64EB__
++	      dest += 8;
++#endif
++	    }
++	  else
++	    *(double *)dest = *(double *)reg;
++	  break;
++	case 2:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "st2 { v16.d, v17.d }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
++	  break;
++	case 3:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "ldr q18, [%1, #32]\n\t"
++	      "st3 { v16.d, v17.d, v18.d }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
++	  break;
++	case 4:
++	  asm("ldp q16, q17, [%1]\n\t"
++	      "ldp q18, q19, [%1, #32]\n\t"
++	      "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
++	      : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
++	  break;
++	}
+       break;
++
+     case FFI_TYPE_LONGDOUBLE:
+-      if (state->nsrn < N_V_ARG_REG)
+-	return allocate_to_v (context, state);
+-      state->nsrn = N_V_ARG_REG;
+-      break;
+-    case FFI_TYPE_UINT8:
+-    case FFI_TYPE_SINT8:
+-    case FFI_TYPE_UINT16:
+-    case FFI_TYPE_SINT16:
+-    case FFI_TYPE_UINT32:
+-    case FFI_TYPE_SINT32:
+-    case FFI_TYPE_INT:
+-    case FFI_TYPE_POINTER:
+-    case FFI_TYPE_UINT64:
+-    case FFI_TYPE_SINT64:
+-      if (state->ngrn < N_X_ARG_REG)
+-	return allocate_to_x (context, state);
+-      state->ngrn = N_X_ARG_REG;
++      if (dest != reg)
++	return memcpy (dest, reg, 16 * n);
+       break;
++
+     default:
+       FFI_ASSERT (0);
+     }
+-
+-    return allocate_to_stack (state, stack, alignment, size);
++  return dest;
+ }
+ 
+-/* Copy a value to an appropriate register, or if none are
+-   available, to the stack.  */
++/* Either allocate an appropriate register for the argument type, or if
++   none are available, allocate a stack slot and return a pointer
++   to the allocated space.  */
+ 
+-static void
+-copy_to_register_or_stack (struct call_context *context,
+-			   unsigned char *stack,
+-			   struct arg_state *state,
+-			   void *value,
+-			   unsigned short type)
++static void *
++allocate_int_to_reg_or_stack (struct call_context *context,
++			      struct arg_state *state,
++			      void *stack, size_t size)
+ {
+-  copy_basic_type (
+-	  allocate_to_register_or_stack (context, stack, state, type),
+-	  value,
+-	  type);
++  if (state->ngrn < N_X_ARG_REG)
++    return &context->x[state->ngrn++];
++
++  state->ngrn = N_X_ARG_REG;
++  return allocate_to_stack (state, stack, size, size);
+ }
+ 
+ /* Marshall the arguments from FFI representation to procedure call
+@@ -694,15 +536,21 @@ static unsigned
+ aarch64_prep_args (struct call_context *context, unsigned char *stack,
+ 		   extended_cif *ecif)
+ {
+-  int i;
++  ffi_cif *cif = ecif->cif;
++  void **avalue = ecif->avalue;
++  int i, nargs = cif->nargs;
+   struct arg_state state;
+ 
+-  arg_init (&state, ALIGN(ecif->cif->bytes, 16));
++  arg_init (&state, cif->bytes);
+ 
+-  for (i = 0; i < ecif->cif->nargs; i++)
++  for (i = 0; i < nargs; i++)
+     {
+-      ffi_type *ty = ecif->cif->arg_types[i];
+-      switch (ty->type)
++      ffi_type *ty = cif->arg_types[i];
++      size_t s = ty->size;
++      int h, t = ty->type;
++      void *a = avalue[i];
++
++      switch (t)
+ 	{
+ 	case FFI_TYPE_VOID:
+ 	  FFI_ASSERT (0);
+@@ -710,82 +558,114 @@ aarch64_prep_args (struct call_context *context, unsigned char *stack,
+ 
+ 	/* If the argument is a basic type the argument is allocated to an
+ 	   appropriate register, or if none are available, to the stack.  */
+-	case FFI_TYPE_FLOAT:
+-	case FFI_TYPE_DOUBLE:
+-	case FFI_TYPE_LONGDOUBLE:
++	case FFI_TYPE_INT:
+ 	case FFI_TYPE_UINT8:
+ 	case FFI_TYPE_SINT8:
+ 	case FFI_TYPE_UINT16:
+ 	case FFI_TYPE_SINT16:
+ 	case FFI_TYPE_UINT32:
+-	case FFI_TYPE_INT:
+ 	case FFI_TYPE_SINT32:
+-	case FFI_TYPE_POINTER:
+ 	case FFI_TYPE_UINT64:
+ 	case FFI_TYPE_SINT64:
+-	  copy_to_register_or_stack (context, stack, &state,
+-				     ecif->avalue[i], ty->type);
++	case FFI_TYPE_POINTER:
++	do_pointer:
++	  {
++	    ffi_arg ext = extend_integer_type (a, t);
++	    if (state.ngrn < N_X_ARG_REG)
++	      context->x[state.ngrn++] = ext;
++	    else
++	      {
++		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
++		state.ngrn = N_X_ARG_REG;
++		/* Note that the default abi extends each argument
++		   to a full 64-bit slot, while the iOS abi allocates
++		   only enough space. */
++#ifdef __APPLE__
++		memcpy(d, a, s);
++#else
++		*(ffi_arg *)d = ext;
++#endif
++	      }
++	  }
+ 	  break;
+ 
+-	case FFI_TYPE_STRUCT:
+-	  if (is_hfa (ty))
+-	    {
+-	      copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
+-					stack, &state);
+-	    }
+-	  else if (ty->size > 16)
+-	    {
+-	      /* If the argument is a composite type that is larger than 16
+-		 bytes, then the argument has been copied to memory, and
+-		 the argument is replaced by a pointer to the copy.  */
++	case FFI_TYPE_FLOAT:
++	case FFI_TYPE_DOUBLE:
++	case FFI_TYPE_LONGDOUBLE:
++	  /* Scalar float is a degenerate case of HFA.  */
++	  h = t + 0x100;
++	  goto do_hfa;
+ 
+-	      copy_to_register_or_stack (context, stack, &state,
+-					 &(ecif->avalue[i]), FFI_TYPE_POINTER);
+-	    }
+-	  else if (available_x (&state) >= (ty->size + 7) / 8)
+-	    {
+-	      /* If the argument is a composite type and the size in
+-		 double-words is not more than the number of available
+-		 X registers, then the argument is copied into consecutive
+-		 X registers.  */
+-	      int j;
+-	      for (j = 0; j < (ty->size + 7) / 8; j++)
+-		{
+-		  memcpy (allocate_to_x (context, &state),
+-			  &(((UINT64 *) ecif->avalue[i])[j]),
+-			  sizeof (UINT64));
++	case FFI_TYPE_STRUCT:
++	  {
++	    void *dest;
++	    int elems;
++
++	    h = is_hfa (ty);
++	    if (h)
++	      {
++	    do_hfa:
++		elems = h >> 8;
++	        if (state.nsrn + elems <= N_V_ARG_REG)
++		  {
++		    dest = &context->v[state.nsrn];
++		    state.nsrn += elems;
++		    extend_hfa_type (dest, a, h);
++		    break;
++		  }
++		state.nsrn = N_V_ARG_REG;
++		dest = allocate_to_stack (&state, stack, ty->alignment, s);
++	      }
++	    else if (s > 16)
++	      {
++		/* If the argument is a composite type that is larger than 16
++		   bytes, then the argument has been copied to memory, and
++		   the argument is replaced by a pointer to the copy.  */
++		a = &avalue[i];
++		t = FFI_TYPE_POINTER;
++		goto do_pointer;
++	      }
++	    else
++	      {
++		size_t n = (s + 7) / 8;
++		if (state.ngrn + n <= N_X_ARG_REG)
++		  {
++		    /* If the argument is a composite type and the size in
++		       double-words is not more than the number of available
++		       X registers, then the argument is copied into
++		       consecutive X registers.  */
++		    dest = &context->x[state.ngrn];
++		    state.ngrn += n;
++		  }
++		else
++		  {
++		    /* Otherwise, there are insufficient X registers. Further
++		       X register allocations are prevented, the NSAA is
++		       adjusted and the argument is copied to memory at the
++		       adjusted NSAA.  */
++		    state.ngrn = N_X_ARG_REG;
++		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
++		  }
+ 		}
+-	    }
+-	  else
+-	    {
+-	      /* Otherwise, there are insufficient X registers. Further X
+-		 register allocations are prevented, the NSAA is adjusted
+-		 (by allocate_to_stack ()) and the argument is copied to
+-		 memory at the adjusted NSAA.  */
+-	      state.ngrn = N_X_ARG_REG;
+-
+-	      memcpy (allocate_to_stack (&state, stack, ty->alignment,
+-					 ty->size), ecif->avalue[i], ty->size);
++	      memcpy (dest, a, s);
+ 	    }
+ 	  break;
+ 
+ 	default:
+-	  FFI_ASSERT (0);
+-	  break;
++	  abort();
+ 	}
+ 
+ #if defined (__APPLE__)
+-      if (i + 1 == ecif->cif->aarch64_nfixedargs)
++      if (i + 1 == cif->aarch64_nfixedargs)
+ 	{
+ 	  state.ngrn = N_X_ARG_REG;
+ 	  state.nsrn = N_V_ARG_REG;
+-
+ 	  state.allocating_variadic = 1;
+ 	}
+ #endif
+     }
+ 
+-  return ecif->cif->aarch64_flags;
++  return cif->aarch64_flags;
+ }
+ 
+ ffi_status
+@@ -846,94 +726,61 @@ void
+ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+ {
+   extended_cif ecif;
+-  int h;
++  struct call_context context;
++  size_t stack_bytes;
++  int h, t;
+ 
+   ecif.cif = cif;
+   ecif.avalue = avalue;
+   ecif.rvalue = rvalue;
+ 
+-  switch (cif->abi)
++  stack_bytes = cif->bytes;
++
++  memset (&context, 0, sizeof (context));
++  if (is_register_candidate (cif->rtype))
+     {
+-    case FFI_SYSV:
+-      {
+-        struct call_context context;
+-	size_t stack_bytes;
++      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+ 
+-	/* Figure out the total amount of stack space we need, the
+-	   above call frame space needs to be 16 bytes aligned to
+-	   ensure correct alignment of the first object inserted in
+-	   that space hence the ALIGN applied to cif->bytes.*/
+-	stack_bytes = ALIGN(cif->bytes, 16);
++      t = cif->rtype->type;
++      switch (t)
++	{
++	case FFI_TYPE_INT:
++	case FFI_TYPE_UINT8:
++	case FFI_TYPE_SINT8:
++	case FFI_TYPE_UINT16:
++	case FFI_TYPE_SINT16:
++	case FFI_TYPE_UINT32:
++	case FFI_TYPE_SINT32:
++	case FFI_TYPE_POINTER:
++	case FFI_TYPE_UINT64:
++	case FFI_TYPE_SINT64:
++	  *(ffi_arg *)rvalue = extend_integer_type (&context.x[0], t);
++	  break;
+ 
+-	memset (&context, 0, sizeof (context));
+-        if (is_register_candidate (cif->rtype))
+-          {
+-            ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+-            switch (cif->rtype->type)
+-              {
+-              case FFI_TYPE_VOID:
+-              case FFI_TYPE_FLOAT:
+-              case FFI_TYPE_DOUBLE:
+-              case FFI_TYPE_LONGDOUBLE:
+-              case FFI_TYPE_UINT8:
+-              case FFI_TYPE_SINT8:
+-              case FFI_TYPE_UINT16:
+-              case FFI_TYPE_SINT16:
+-              case FFI_TYPE_UINT32:
+-              case FFI_TYPE_SINT32:
+-              case FFI_TYPE_POINTER:
+-              case FFI_TYPE_UINT64:
+-              case FFI_TYPE_INT:
+-              case FFI_TYPE_SINT64:
+-		{
+-		  void *addr = get_basic_type_addr (cif->rtype->type,
+-						    &context, 0);
+-		  copy_basic_type (rvalue, addr, cif->rtype->type);
+-		  break;
+-		}
++	case FFI_TYPE_FLOAT:
++	case FFI_TYPE_DOUBLE:
++	case FFI_TYPE_LONGDOUBLE:
++	  compress_hfa_type (rvalue, &context.v[0], 0x100 + t);
++	  break;
+ 
+-              case FFI_TYPE_STRUCT:
+-		h = is_hfa (cif->rtype);
+-                if (h)
+-		  {
+-		    int j;
+-		    int type = h & 0xff;
+-		    int elems = h >> 8;
+-		    for (j = 0; j < elems; j++)
+-		      {
+-			void *reg = get_basic_type_addr (type, &context, j);
+-			copy_basic_type (rvalue, reg, type);
+-			rvalue += get_basic_type_size (type);
+-		      }
+-		  }
+-                else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+-                  {
+-                    size_t size = ALIGN (cif->rtype->size, sizeof (UINT64));
+-                    memcpy (rvalue, get_x_addr (&context, 0), size);
+-                  }
+-                else
+-                  {
+-                    FFI_ASSERT (0);
+-                  }
+-                break;
+-
+-              default:
+-                FFI_ASSERT (0);
+-                break;
+-              }
+-          }
+-        else
+-          {
+-	    context.x8 = (uintptr_t)rvalue;
+-            ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
+-			   stack_bytes, fn);
+-          }
+-        break;
+-      }
++	case FFI_TYPE_STRUCT:
++	  h = is_hfa (cif->rtype);
++	  if (h)
++	    compress_hfa_type (rvalue, &context.v[0], h);
++	  else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
++	    memcpy (rvalue, &context.x[0], cif->rtype->size);
++	  else
++	    abort();
++	  break;
+ 
+-    default:
+-      FFI_ASSERT (0);
+-      break;
++	default:
++	  abort();
++	}
++    }
++  else
++    {
++      context.x8 = (uintptr_t)rvalue;
++      ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+     }
+ }
+ 
+@@ -1000,203 +847,158 @@ ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+   ffi_cif *cif = closure->cif;
+   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+   void *rvalue = NULL;
+-  int i, h;
++  int i, h, nargs = cif->nargs;
+   struct arg_state state;
++  ffi_type *rtype;
+ 
+   arg_init (&state, ALIGN(cif->bytes, 16));
+ 
+-  for (i = 0; i < cif->nargs; i++)
++  for (i = 0; i < nargs; i++)
+     {
+       ffi_type *ty = cif->arg_types[i];
++      int t = ty->type;
++      size_t n, s = ty->size;
+ 
+-      switch (ty->type)
++      switch (t)
+ 	{
+ 	case FFI_TYPE_VOID:
+ 	  FFI_ASSERT (0);
+ 	  break;
+ 
++	case FFI_TYPE_INT:
+ 	case FFI_TYPE_UINT8:
+ 	case FFI_TYPE_SINT8:
+ 	case FFI_TYPE_UINT16:
+ 	case FFI_TYPE_SINT16:
+ 	case FFI_TYPE_UINT32:
+ 	case FFI_TYPE_SINT32:
+-	case FFI_TYPE_INT:
+-	case FFI_TYPE_POINTER:
+ 	case FFI_TYPE_UINT64:
+ 	case FFI_TYPE_SINT64:
++	case FFI_TYPE_POINTER:
++	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
++	  break;
++
+ 	case FFI_TYPE_FLOAT:
+ 	case FFI_TYPE_DOUBLE:
+ 	case FFI_TYPE_LONGDOUBLE:
+-	  avalue[i] = allocate_to_register_or_stack (context, stack,
+-						     &state, ty->type);
+-	  break;
++	  /* Scalar float is a degenerate case of HFA.  */
++	  h = t + 0x100;
++	  goto do_hfa;
+ 
+ 	case FFI_TYPE_STRUCT:
+ 	  h = is_hfa (ty);
+ 	  if (h)
+ 	    {
+-	      unsigned n = h >> 8;
+-	      if (available_v (&state) < n)
++	    do_hfa:
++	      n = h >> 8;
++	      if (state.nsrn + n <= N_V_ARG_REG)
+ 		{
+-		  state.nsrn = N_V_ARG_REG;
+-		  avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+-						 ty->size);
++		  void *reg = &context->v[state.nsrn];
++		  state.nsrn += n;
++
++		  /* Eeek! We need a pointer to the structure, however the
++		     homogeneous float elements are being passed in individual
++		     registers, therefore for float and double the structure
++		     is not represented as a contiguous sequence of bytes in
++		     our saved register context.  We don't need the original
++		     contents of the register storage, so we reformat the
++		     structure into the same memory.  */
++		  avalue[i] = compress_hfa_type (reg, reg, h);
+ 		}
+ 	      else
+ 		{
+-		  switch (h & 0xff)
+-		    {
+-		    case FFI_TYPE_FLOAT:
+-		      {
+-			/* Eeek! We need a pointer to the structure,
+-			   however the homogeneous float elements are
+-			   being passed in individual S registers,
+-			   therefore the structure is not represented as
+-			   a contiguous sequence of bytes in our saved
+-			   register context. We need to fake up a copy
+-			   of the structure laid out in memory
+-			   correctly. The fake can be tossed once the
+-			   closure function has returned hence alloca()
+-			   is sufficient. */
+-			unsigned j;
+-			UINT32 *p = avalue[i] = alloca (ty->size);
+-			for (j = 0; j < n; j++)
+-			  memcpy (&p[j],
+-				  allocate_to_s (context, &state),
+-				  sizeof (*p));
+-			break;
+-		      }
+-
+-		    case FFI_TYPE_DOUBLE:
+-		      {
+-			/* Eeek! We need a pointer to the structure,
+-			   however the homogeneous float elements are
+-			   being passed in individual S registers,
+-			   therefore the structure is not represented as
+-			   a contiguous sequence of bytes in our saved
+-			   register context. We need to fake up a copy
+-			   of the structure laid out in memory
+-			   correctly. The fake can be tossed once the
+-			   closure function has returned hence alloca()
+-			   is sufficient. */
+-			unsigned j;
+-			UINT64 *p = avalue[i] = alloca (ty->size);
+-			for (j = 0; j < n; j++)
+-			  memcpy (&p[j],
+-				  allocate_to_d (context, &state),
+-				  sizeof (*p));
+-			break;
+-		      }
+-
+-		    case FFI_TYPE_LONGDOUBLE:
+-			  memcpy (&avalue[i],
+-				  allocate_to_v (context, &state),
+-				  sizeof (*avalue));
+-		      break;
+-
+-		    default:
+-		      FFI_ASSERT (0);
+-		      break;
+-		    }
++		  state.nsrn = N_V_ARG_REG;
++		  avalue[i] = allocate_to_stack (&state, stack,
++						 ty->alignment, s);
+ 		}
+ 	    }
+-	  else if (ty->size > 16)
++	  else if (s > 16)
+ 	    {
+ 	      /* Replace Composite type of size greater than 16 with a
+ 		 pointer.  */
+-	      memcpy (&avalue[i],
+-		      allocate_to_register_or_stack (context, stack,
+-						     &state, FFI_TYPE_POINTER),
+-		      sizeof (avalue[i]));
+-	    }
+-	  else if (available_x (&state) >= (ty->size + 7) / 8)
+-	    {
+-	      avalue[i] = get_x_addr (context, state.ngrn);
+-	      state.ngrn += (ty->size + 7) / 8;
++	      avalue[i] = *(void **)
++		allocate_int_to_reg_or_stack (context, &state, stack,
++					      sizeof (void *));
+ 	    }
+ 	  else
+ 	    {
+-	      state.ngrn = N_X_ARG_REG;
+-
+-	      avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+-					     ty->size);
++	      n = (s + 7) / 8;
++	      if (state.ngrn + n <= N_X_ARG_REG)
++		{
++		  avalue[i] = &context->x[state.ngrn];
++		  state.ngrn += n;
++		}
++	      else
++		{
++		  state.ngrn = N_X_ARG_REG;
++		  avalue[i] = allocate_to_stack (&state, stack,
++						 ty->alignment, s);
++		}
+ 	    }
+ 	  break;
+ 
+ 	default:
+-	  FFI_ASSERT (0);
+-	  break;
++	  abort();
+ 	}
+     }
+ 
+-  /* Figure out where the return value will be passed, either in
+-     registers or in a memory block allocated by the caller and passed
+-     in x8.  */
+-
+-  if (is_register_candidate (cif->rtype))
++  /* Figure out where the return value will be passed, either in registers
++     or in a memory block allocated by the caller and passed in x8.  */
++  rtype = cif->rtype;
++  if (is_register_candidate (rtype))
+     {
++      size_t s = rtype->size;
++      int t;
++
+       /* Register candidates are *always* returned in registers. */
+ 
+       /* Allocate a scratchpad for the return value, we will let the
+          callee scrible the result into the scratch pad then move the
+          contents into the appropriate return value location for the
+          call convention.  */
+-      rvalue = alloca (cif->rtype->size);
++      rvalue = alloca (s);
+       (closure->fun) (cif, rvalue, avalue, closure->user_data);
+ 
+       /* Copy the return value into the call context so that it is returned
+          as expected to our caller.  */
+-      switch (cif->rtype->type)
++      t = rtype->type;
++      switch (t)
+         {
+         case FFI_TYPE_VOID:
+           break;
+ 
++        case FFI_TYPE_INT:
+         case FFI_TYPE_UINT8:
+         case FFI_TYPE_UINT16:
+         case FFI_TYPE_UINT32:
+-        case FFI_TYPE_POINTER:
+         case FFI_TYPE_UINT64:
+         case FFI_TYPE_SINT8:
+         case FFI_TYPE_SINT16:
+-        case FFI_TYPE_INT:
+         case FFI_TYPE_SINT32:
+         case FFI_TYPE_SINT64:
++        case FFI_TYPE_POINTER:
++	  context->x[0] = extend_integer_type (rvalue, t);
++          break;
++
+         case FFI_TYPE_FLOAT:
+         case FFI_TYPE_DOUBLE:
+         case FFI_TYPE_LONGDOUBLE:
+-	  {
+-	    void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
+-	    copy_basic_type (addr, rvalue, cif->rtype->type);
+-            break;
+-	  }
++	  extend_hfa_type (&context->v[0], rvalue, 0x100 + t);
++	  break;
++
+         case FFI_TYPE_STRUCT:
+ 	  h = is_hfa (cif->rtype);
+           if (h)
+-	    {
+-	      int j;
+-	      int type = h & 0xff;
+-	      int elems = h >> 8;
+-	      for (j = 0; j < elems; j++)
+-		{
+-		  void *reg = get_basic_type_addr (type, context, j);
+-		  copy_basic_type (reg, rvalue, type);
+-		  rvalue += get_basic_type_size (type);
+-		}
+-	    }
+-          else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+-            {
+-              size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
+-              memcpy (get_x_addr (context, 0), rvalue, size);
+-            }
++	    extend_hfa_type (&context->v[0], rvalue, h);
+           else
+-            {
+-              FFI_ASSERT (0);
++	    {
++	      FFI_ASSERT (s <= 16);
++              memcpy (&context->x[0], rvalue, s);
+             }
+           break;
++
+         default:
+-          FFI_ASSERT (0);
+-          break;
++          abort();
+         }
+     }
+   else
+-- 
+2.7.4.huawei.3
+
diff --git a/0199-Define-_GNU_SOURCE-on-Linux-for-mremap.patch b/0199-Define-_GNU_SOURCE-on-Linux-for-mremap.patch
new file mode 100644
index 0000000..20185cd
--- /dev/null
+++ b/0199-Define-_GNU_SOURCE-on-Linux-for-mremap.patch
@@ -0,0 +1,38 @@
+From 1e82e1cda43dacd8b6ab2d9ac4db33523d86f5dc Mon Sep 17 00:00:00 2001
+From: Berker Peksag <berker.peksag@gmail.com>
+Date: Mon, 7 Mar 2016 18:38:10 +0200
+Subject: [PATCH 199/411] Define _GNU_SOURCE on Linux for mremap()
+
+This was committed to CPython's libffi copy in
+https://bugs.python.org/issue10309
+
+mremap() documentation says _GNU_SOURCE needs to
+be defined in order to use mremap(): see the
+synopsis section at http://linux.die.net/man/2/mremap
+
+Original commit: https://hg.python.org/cpython/rev/9986fff720a2
+
+Original patch was written by Hallvard B Furuseth.
+---
+ src/dlmalloc.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/dlmalloc.c b/src/dlmalloc.c
+index 7e4ea83..d63dd36 100644
+--- a/src/dlmalloc.c
++++ b/src/dlmalloc.c
+@@ -438,6 +438,11 @@ DEFAULT_MMAP_THRESHOLD       default: 256K
+ 
+ */
+ 
++#if defined __linux__ && !defined _GNU_SOURCE
++/* mremap() on Linux requires this via sys/mman.h */
++#define _GNU_SOURCE 1
++#endif
++
+ #ifndef WIN32
+ #ifdef _WIN32
+ #define WIN32 1
+-- 
+1.8.3.1
+
diff --git a/0208-Don-t-dereference-ecif-before-NULL-check.patch b/0208-Don-t-dereference-ecif-before-NULL-check.patch
new file mode 100644
index 0000000..db80225
--- /dev/null
+++ b/0208-Don-t-dereference-ecif-before-NULL-check.patch
@@ -0,0 +1,32 @@
+From cf4b2a50413ecb8931eb1a94437497694f189c47 Mon Sep 17 00:00:00 2001
+From: Tom Tromey <tom@tromey.com>
+Date: Fri, 17 Jun 2016 10:09:44 +0100
+Subject: [PATCH 208/411] Don't dereference "ecif" before NULL check
+
+Fixes #260
+---
+ src/microblaze/ffi.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/microblaze/ffi.c b/src/microblaze/ffi.c
+index ea962ea..5733e6e 100644
+--- a/src/microblaze/ffi.c
++++ b/src/microblaze/ffi.c
+@@ -46,12 +46,12 @@ void ffi_prep_args(void* stack, extended_cif* ecif)
+ 	void** p_argv;
+ 	void* stack_args_p = stack;
+ 
+-	p_argv = ecif->avalue;
+-
+ 	if (ecif == NULL || ecif->cif == NULL) {
+ 		return; /* no description to prepare */
+ 	}
+ 
++	p_argv = ecif->avalue;
++
+ 	if ((ecif->cif->rtype != NULL) &&
+ 			(ecif->cif->rtype->type == FFI_TYPE_STRUCT))
+ 	{
+-- 
+1.8.3.1
+
diff --git a/0252-Fix-misaligned-memory-access-in-ffi_call_int.patch b/0252-Fix-misaligned-memory-access-in-ffi_call_int.patch
new file mode 100644
index 0000000..5f0b75f
--- /dev/null
+++ b/0252-Fix-misaligned-memory-access-in-ffi_call_int.patch
@@ -0,0 +1,29 @@
+From 9c12209d2eac40238eefb4255994277918e7eff1 Mon Sep 17 00:00:00 2001
+From: Francis Ricci <francisjricci@gmail.com>
+Date: Thu, 3 Aug 2017 10:46:28 -0700
+Subject: [PATCH 252/411] Fix misaligned memory access in ffi_call_int
+
+---
+ src/x86/ffi64.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
+index 2603a3a..757930b 100644
+--- a/src/x86/ffi64.c
++++ b/src/x86/ffi64.c
+@@ -646,10 +646,10 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+ 		  break;
+ 		case X86_64_SSE_CLASS:
+ 		case X86_64_SSEDF_CLASS:
+-		  reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
++		  memcpy (&reg_args->sse[ssecount++].i64, a, sizeof(UINT64));
+ 		  break;
+ 		case X86_64_SSESF_CLASS:
+-		  reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
++		  memcpy (&reg_args->sse[ssecount++].i32, a, sizeof(UINT32));
+ 		  break;
+ 		default:
+ 		  abort();
+-- 
+1.8.3.1
+
diff --git a/0333-Fully-allocate-file-backing-writable-maps-389.patch b/0333-Fully-allocate-file-backing-writable-maps-389.patch
new file mode 100644
index 0000000..3b07acc
--- /dev/null
+++ b/0333-Fully-allocate-file-backing-writable-maps-389.patch
@@ -0,0 +1,67 @@
+From d46406088d28b038a0a0f7396d9621f431482f6a Mon Sep 17 00:00:00 2001
+From: "Ryan C. Underwood" <nemesis@icequake.net>
+Date: Sun, 18 Mar 2018 07:00:42 -0700
+Subject: [PATCH 333/411] Fully allocate file backing writable maps (#389)
+
+When ftruncate() is used on a filesystem supporting sparse files,
+space in the file is not actually allocated.  Then, when the file
+is mmap'd and libffi writes to the mapping, SIGBUS is thrown to
+the calling application.  Instead, always fully allocate the file
+that will back writable maps.
+---
+ src/closures.c | 32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/src/closures.c b/src/closures.c
+index 59ce828..15e6e0f 100644
+--- a/src/closures.c
++++ b/src/closures.c
+@@ -723,6 +723,36 @@ open_temp_exec_file (void)
+   return fd;
+ }
+ 
++/* We need to allocate space in a file that will be backing a writable
++   mapping.  Several problems exist with the usual approaches:
++   - fallocate() is Linux-only
++   - posix_fallocate() is not available on all platforms
++   - ftruncate() does not allocate space on filesystems with sparse files
++   Failure to allocate the space will cause SIGBUS to be thrown when
++   the mapping is subsequently written to.  */
++static int
++allocate_space (int fd, off_t offset, off_t len)
++{
++  static size_t page_size;
++
++  /* Obtain system page size. */
++  if (!page_size)
++    page_size = sysconf(_SC_PAGESIZE);
++
++  unsigned char buf[page_size];
++  memset (buf, 0, page_size);
++
++  while (len > 0)
++    {
++      off_t to_write = (len < page_size) ? len : page_size;
++      if (write (fd, buf, to_write) < to_write)
++        return -1;
++      len -= to_write;
++    }
++
++  return 0;
++}
++
+ /* Map in a chunk of memory from the temporary exec file into separate
+    locations in the virtual memory address space, one writable and one
+    executable.  Returns the address of the writable portion, after
+@@ -744,7 +774,7 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
+ 
+   offset = execsize;
+ 
+-  if (ftruncate (execfd, offset + length))
++  if (allocate_space (execfd, offset, length))
+     return MFAIL;
+ 
+   flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+-- 
+1.8.3.1
+
diff --git a/ffi-multilib.h b/ffi-multilib.h
new file mode 100644
index 0000000..50a6226
--- /dev/null
+++ b/ffi-multilib.h
@@ -0,0 +1,23 @@
+/* This file is here to prevent a file conflict on multiarch systems. */
+#ifdef ffi_wrapper_h
+#error "Do not define ffi_wrapper_h!"
+#endif
+#define ffi_wrapper_h
+
+#if defined(__i386__)
+#include "ffi-i386.h"
+#elif defined(__powerpc64__)
+#include "ffi-ppc64.h"
+#elif defined(__powerpc__)
+#include "ffi-ppc.h"
+#elif defined(__s390x__)
+#include "ffi-s390x.h"
+#elif defined(__s390__)
+#include "ffi-s390.h"
+#elif defined(__x86_64__)
+#include "ffi-x86_64.h"
+#else
+#error "The libffi-devel package is not usable with the architecture."
+#endif
+
+#undef ffi_wrapper_h
diff --git a/ffitarget-multilib.h b/ffitarget-multilib.h
new file mode 100644
index 0000000..b2ed545
--- /dev/null
+++ b/ffitarget-multilib.h
@@ -0,0 +1,23 @@
+/* This file is here to prevent a file conflict on multiarch systems. */
+#ifdef ffitarget_wrapper_h
+#error "Do not define ffitarget_wrapper_h!"
+#endif
+#define ffitarget_wrapper_h
+
+#if defined(__i386__)
+#include "ffitarget-i386.h"
+#elif defined(__powerpc64__)
+#include "ffitarget-ppc64.h"
+#elif defined(__powerpc__)
+#include "ffitarget-ppc.h"
+#elif defined(__s390x__)
+#include "ffitarget-s390x.h"
+#elif defined(__s390__)
+#include "ffitarget-s390.h"
+#elif defined(__x86_64__)
+#include "ffitarget-x86_64.h"
+#else
+#error "The libffi-devel package is not usable with the architecture."
+#endif
+
+#undef ffitarget_wrapper_h
diff --git a/libffi-3.1-aarch64-fix-exec-stack.patch b/libffi-3.1-aarch64-fix-exec-stack.patch
new file mode 100644
index 0000000..e20c920
--- /dev/null
+++ b/libffi-3.1-aarch64-fix-exec-stack.patch
@@ -0,0 +1,11 @@
+--- a/src/aarch64/sysv.S
++++ b/src/aarch64/sysv.S
+@@ -396,3 +396,8 @@
+ #ifdef __ELF__
+         .size CNAME(ffi_closure_SYSV), .-CNAME(ffi_closure_SYSV)
+ #endif
++
++#if defined __ELF__ && defined __linux__
++	.section .note.GNU-stack,"",%progbits
++#endif
++
diff --git a/libffi-3.1-fix-exec-stack.patch b/libffi-3.1-fix-exec-stack.patch
new file mode 100644
index 0000000..4c2a59f
--- /dev/null
+++ b/libffi-3.1-fix-exec-stack.patch
@@ -0,0 +1,31 @@
+From 978c9540154d320525488db1b7049277122f736d Mon Sep 17 00:00:00 2001
+From: Samuli Suominen <ssuominen@gentoo.org>
+Date: Sat, 31 May 2014 08:53:10 -0400
+Subject: [PATCH] Add missing GNU stack markings in win32.S
+
+---
+ src/x86/win32.S | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/src/x86/win32.S b/src/x86/win32.S
+index daf0e79..e42baf2 100644
+--- a/src/x86/win32.S
++++ b/src/x86/win32.S
+@@ -1,5 +1,6 @@
+ /* -----------------------------------------------------------------------
+-   win32.S - Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
++   win32.S - Copyright (c) 2014  Anthony Green
++	     Copyright (c) 1996, 1998, 2001, 2002, 2009  Red Hat, Inc.
+ 	     Copyright (c) 2001  John Beniton
+ 	     Copyright (c) 2002  Ranjit Mathew
+ 	     Copyright (c) 2009  Daniel Witte
+@@ -1304,3 +1305,6 @@ L_ffi_closure_SYSV_inner$stub:
+ 
+ #endif /* !_MSC_VER */
+ 
++#if defined __ELF__ && defined __linux__
++	.section	.note.GNU-stack,"",@progbits
++#endif
+-- 
+1.9.3
+
diff --git a/libffi-3.1-fix-include-path.patch b/libffi-3.1-fix-include-path.patch
new file mode 100644
index 0000000..5a3b7a5
--- /dev/null
+++ b/libffi-3.1-fix-include-path.patch
@@ -0,0 +1,17 @@
+diff -up libffi-3.1/libffi.pc.in.fixpath libffi-3.1/libffi.pc.in
+--- libffi-3.1/libffi.pc.in.fixpath	2014-04-25 19:45:13.000000000 +0200
++++ libffi-3.1/libffi.pc.in	2014-06-12 12:06:06.000000000 +0200
+@@ -1,11 +1,10 @@
+ prefix=@prefix@
+ exec_prefix=@exec_prefix@
+ libdir=@libdir@
+-toolexeclibdir=@toolexeclibdir@
+-includedir=${libdir}/@PACKAGE_NAME@-@PACKAGE_VERSION@/include
++includedir=@includedir@
+ 
+ Name: @PACKAGE_NAME@
+ Description: Library supporting Foreign Function Interfaces
+ Version: @PACKAGE_VERSION@
+-Libs: -L${toolexeclibdir} -lffi
++Libs: -L${libdir} -lffi
+ Cflags: -I${includedir}
diff --git a/libffi-3.1.tar.gz b/libffi-3.1.tar.gz
new file mode 100644
index 0000000..fed33dc
Binary files /dev/null and b/libffi-3.1.tar.gz differ
diff --git a/libffi-aarch64-rhbz1174037.patch b/libffi-aarch64-rhbz1174037.patch
new file mode 100644
index 0000000..dbf6308
--- /dev/null
+++ b/libffi-aarch64-rhbz1174037.patch
@@ -0,0 +1,11 @@
+--- libffi-3.1/src/aarch64/ffi.c.orig	2014-04-25 18:45:13.000000000 +0100
++++ libffi-3.1/src/aarch64/ffi.c	2015-01-15 02:36:56.314906455 +0000
+@@ -728,7 +728,7 @@
+ 	      state.ngrn = N_X_ARG_REG;
+ 
+ 	      memcpy (allocate_to_stack (&state, stack, ty->alignment,
+-					 ty->size), ecif->avalue + i, ty->size);
++					 ty->size), ecif->avalue[i], ty->size);
+ 	    }
+ 	  break;
+ 
diff --git a/libffi.spec b/libffi.spec
new file mode 100644
index 0000000..2101a7d
--- /dev/null
+++ b/libffi.spec
@@ -0,0 +1,134 @@
+%global target_arch %{ix86} x86_64
+
+Name:		libffi
+Version:	3.1
+Release:	19
+Summary:	A Portable Foreign Function Interface Library
+License:	MIT
+URL:		http://sourceware.org/libffi
+
+Source0:	ftp://sourceware.org/pub/libffi/%{name}-%{version}.tar.gz
+Source1:	ffi-multilib.h
+Source2:	ffitarget-multilib.h
+Patch0:		libffi-3.1-fix-include-path.patch
+Patch1:		libffi-3.1-fix-exec-stack.patch
+Patch2:		libffi-aarch64-rhbz1174037.patch
+Patch3:		libffi-3.1-aarch64-fix-exec-stack.patch
+
+Patch6000:0012-Fix-non-variadic-CIF-initialization-for-Apple-ARM64.patch
+Patch6001:0013-Fix-alignment-of-FFI_TYPE_FLOAT-for-Apple-s-ARM64-AB.patch
+Patch6002:0020-Fix-Werror-declaration-after-statement-problem.patch
+Patch6003:0052-aarch64-Fix-non-apple-compilation.patch
+Patch6004:0053-aarch64-Improve-is_hfa.patch
+Patch6005:0054-aarch64-Always-distinguish-LONGDOUBLE.patch
+Patch6006:0055-aarch64-Simplify-AARCH64_STACK_ALIGN.patch
+Patch6007:0056-aarch64-Reduce-the-size-of-register_context.patch
+Patch6008:0058-aarch64-Treat-void-return-as-not-passed-in-registers.patch
+Patch6009:0059-aarch64-Tidy-up-abi-manipulation.patch
+
+Patch6010:0199-Define-_GNU_SOURCE-on-Linux-for-mremap.patch
+Patch6011:0208-Don-t-dereference-ecif-before-NULL-check.patch
+Patch6012:0252-Fix-misaligned-memory-access-in-ffi_call_int.patch
+Patch6013:0333-Fully-allocate-file-backing-writable-maps-389.patch
+
+BuildRequires: gcc
+
+%description
+Compilers for high level languages generate code that follows certain conventions. These
+conventions are necessary, in part, for separate compilation to work. One such convention
+is the "calling convention". The "calling convention" is a set of assumptions made by the
+compiler about where function arguments will be found on entry to a function. A "calling
+convention" also specifies where the return value for a function is found.
+
+Some programs may not know at the time of compilation what arguments are to be passed to a
+function. For instance, an interpreter may be told at run-time about the number and types
+of arguments used to call a given function. Libffi can be used in such programs to provide
+a bridge from the interpreter program to compiled code.
+
+The libffi library provides a portable, high level programming interface to various calling
+conventions. This allows a programmer to call any function specified by a call interface
+description at run-time.
+
+FFI stands for Foreign Function Interface. A foreign function interface is the popular name
+for the interface that allows code written in one language to call code written in another
+language. The libffi library really only provides the lowest, machine dependent layer of a
+fully featured foreign function interface. A layer must exist above libffi that handles type
+conversions for values passed between the two languages.
+
+%package	devel
+Summary:	Development files for libffi
+Requires:	%{name} = %{version}-%{release}
+Requires:	pkgconfig
+
+%description	devel
+The devel package with header files and libraries is for developing apps which needs libffi.
+
+%package        help
+Summary:        libffi help
+Requires:       info
+BuildArch:      noarch
+
+%description    help
+The help package contains man files.
+
+%prep
+%autosetup -n %{name}-%{version} -p1
+
+%build
+%configure --disable-static
+make %{?_smp_mflags}
+
+
+%install
+make install DESTDIR=$RPM_BUILD_ROOT
+find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
+rm -f $RPM_BUILD_ROOT%{_infodir}/dir
+
+base=%{_arch}
+%ifarch %{ix86}
+base=i386
+%endif
+
+mkdir -p $RPM_BUILD_ROOT%{_includedir}
+%ifarch %{target_arch}
+mv $RPM_BUILD_ROOT%{_libdir}/libffi-%{version}/include/ffi.h $RPM_BUILD_ROOT%{_includedir}/ffi-${base}.h
+mv $RPM_BUILD_ROOT%{_libdir}/libffi-%{version}/include/ffitarget.h $RPM_BUILD_ROOT%{_includedir}/ffitarget-${base}.h
+install -m644 %{SOURCE2} $RPM_BUILD_ROOT%{_includedir}/ffitarget.h
+install -m644 %{SOURCE1} $RPM_BUILD_ROOT%{_includedir}/ffi.h
+%else
+mv $RPM_BUILD_ROOT%{_libdir}/libffi-%{version}/include/{ffi,ffitarget}.h $RPM_BUILD_ROOT%{_includedir}
+%endif
+rm -rf $RPM_BUILD_ROOT%{_libdir}/libffi-%{version}
+
+
+%ldconfig_scriptlets
+
+%check
+make check
+
+%post help
+/sbin/install-info --info-dir=%{_infodir} %{_infodir}/libffi.info.gz || :
+
+%preun help
+if [ $1 = 0 ] ;then
+  /sbin/install-info --delete --info-dir=%{_infodir} %{_infodir}/libffi.info.gz || :
+fi
+
+
+%files
+%license LICENSE
+%doc README
+%{_libdir}/*.so.*
+
+%files devel
+%{_libdir}/pkgconfig/*.pc
+%{_includedir}/ffi*.h
+%{_libdir}/*.so
+
+%files help
+%{_mandir}/man3/*.gz
+%{_infodir}/libffi.info.gz
+
+%changelog
+* Mon Sep 09 2019 openEuler Buildteam <buildteam@openeuler.org> - 3.1-19
+- Package init