From d05ecf05cf6abb524216e0a8416fdd78c3d7e6bf Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Fri, 29 Nov 2024 13:59:09 +0800
Subject: [PATCH 06/23] Sw64: Atomic and Locking Implementation

---
 sysdeps/sw_64/atomic-machine.h                | 393 ++++++++++++++++++
 sysdeps/sw_64/nptl/bits/struct_rwlock.h       |  43 ++
 sysdeps/sw_64/nptl/pthread_spin_lock.S        |  55 +++
 sysdeps/sw_64/nptl/pthread_spin_trylock.S     |  56 +++
 sysdeps/sw_64/sw8a/atomic-machine.h           | 371 +++++++++++++++++
 sysdeps/sw_64/sw8a/nptl/bits/struct_rwlock.h  |  43 ++
 sysdeps/sw_64/sw8a/nptl/pthread_spin_lock.S   |  43 ++
 .../sw_64/sw8a/nptl/pthread_spin_trylock.S    |  44 ++
 8 files changed, 1048 insertions(+)
 create mode 100644 sysdeps/sw_64/atomic-machine.h
 create mode 100644 sysdeps/sw_64/nptl/bits/struct_rwlock.h
 create mode 100644 sysdeps/sw_64/nptl/pthread_spin_lock.S
 create mode 100644 sysdeps/sw_64/nptl/pthread_spin_trylock.S
 create mode 100644 sysdeps/sw_64/sw8a/atomic-machine.h
 create mode 100644 sysdeps/sw_64/sw8a/nptl/bits/struct_rwlock.h
 create mode 100644 sysdeps/sw_64/sw8a/nptl/pthread_spin_lock.S
 create mode 100644 sysdeps/sw_64/sw8a/nptl/pthread_spin_trylock.S

diff --git a/sysdeps/sw_64/atomic-machine.h b/sysdeps/sw_64/atomic-machine.h
new file mode 100644
index 00000000..7f379fbe
--- /dev/null
+++ b/sysdeps/sw_64/atomic-machine.h
@@ -0,0 +1,393 @@
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+typedef int8_t atomic8_t;
+typedef uint8_t uatomic8_t;
+typedef int_fast8_t atomic_fast8_t;
+typedef uint_fast8_t uatomic_fast8_t;
+
+typedef int16_t atomic16_t;
+typedef uint16_t uatomic16_t;
+typedef int_fast16_t atomic_fast16_t;
+typedef uint_fast16_t uatomic_fast16_t;
+
+typedef int32_t atomic32_t;
+typedef uint32_t uatomic32_t;
+typedef int_fast32_t atomic_fast32_t;
+typedef uint_fast32_t uatomic_fast32_t;
+
+typedef int64_t atomic64_t;
+typedef uint64_t uatomic64_t;
+typedef int_fast64_t atomic_fast64_t;
+typedef uint_fast64_t uatomic_fast64_t;
+
+typedef intptr_t atomicptr_t;
+typedef uintptr_t uatomicptr_t;
+typedef intmax_t atomic_max_t;
+typedef uintmax_t uatomic_max_t;
+
+#define __HAVE_64B_ATOMICS 1
+#define USE_ATOMIC_COMPILER_BUILTINS 0
+
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
+#define __MB "	memb\n"
+
+/* Compare and exchange.  For all of the "xxx" routines, we expect a
+   "__prev" and a "__cmp" variable to be provided by the enclosing scope,
+   in which values are returned.  */
+// delete memb after the rd_f
+#define __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2)	\
+  ({									  \
+    unsigned long __tmp, __snew, __addr64;				    \
+    __asm__ __volatile__(						     \
+	mb1 "       bic  %[__addr8],7,%[__addr64]\n"			  \
+	    "       ins0b   %[__new],%[__addr8],%[__snew]\n"		  \
+	    "1:     lldl    %[__tmp],0(%[__addr64])\n"			\
+	    "       ext0b   %[__tmp],%[__addr8],%[__prev]\n"		  \
+	    "       cmpeq   %[__prev],%[__old],%[__cmp]\n"		    \
+	    "       wr_f    %[__cmp]\n"				       \
+	    "       mask0b   %[__tmp],%[__addr8],%[__tmp]\n"		  \
+	    "       or      %[__snew],%[__tmp],%[__tmp]\n"		    \
+	    "       lstl    %[__tmp],0(%[__addr64])\n"			\
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__cmp],2f\n"				    \
+	    "       beq     %[__tmp],1b\n"				    \
+	    "2:"							      \
+	: [__prev] "=&r"(__prev), [__snew] "=&r"(__snew),		     \
+	  [__tmp] "=&r"(__tmp), [__cmp] "=&r"(__cmp),			 \
+	  [__addr64] "=&r"(__addr64)					  \
+	: [__addr8] "r"(mem),						 \
+	  [__old] "Ir"((uint64_t) (uint8_t) (uint64_t) (old)),		\
+	  [__new] "r"(new)						    \
+	: "memory");							  \
+  })
+
+#define __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __tmp, __snew, __addr64;				    \
+    __asm__ __volatile__(						     \
+	mb1 "       bic  %[__addr16],7,%[__addr64]\n"			 \
+	    "       ins1b   %[__new],%[__addr16],%[__snew]\n"		 \
+	    "1:     lldl    %[__tmp],0(%[__addr64])\n"			\
+	    "       ext1b   %[__tmp],%[__addr16],%[__prev]\n"		 \
+	    "       cmpeq   %[__prev],%[__old],%[__cmp]\n"		    \
+	    "       wr_f    %[__cmp]\n"				       \
+	    "       mask1b   %[__tmp],%[__addr16],%[__tmp]\n"		 \
+	    "       or      %[__snew],%[__tmp],%[__tmp]\n"		    \
+	    "       lstl    %[__tmp],0(%[__addr64])\n"			\
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__cmp],2f\n"				    \
+	    "       beq     %[__tmp],1b\n"				    \
+	    "2:"							      \
+	: [__prev] "=&r"(__prev), [__snew] "=&r"(__snew),		     \
+	  [__tmp] "=&r"(__tmp), [__cmp] "=&r"(__cmp),			 \
+	  [__addr64] "=&r"(__addr64)					  \
+	: [__addr16] "r"(mem),						\
+	  [__old] "Ir"((uint64_t) (uint16_t) (uint64_t) (old)),	       \
+	  [__new] "r"(new)						    \
+	: "memory");							  \
+  })
+#define __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __addr, __tmp;					      \
+    __asm__ __volatile__(						     \
+	mb1 "       ldi     %[__addr],%[__mem]\n"			     \
+	    "1:     lldw    %[__prev],0(%[__addr])\n"			 \
+	    "       cmpeq   %[__prev],%[__old],%[__tmp]\n"		    \
+	    "       wr_f    %[__tmp]\n"				       \
+	    "       mov     %[__new],%[__cmp]\n"			      \
+	    "       lstw    %[__cmp],0(%[__addr])\n"			  \
+	    "       rd_f    %[__cmp]\n"				       \
+	    "       beq     %[__tmp],2f\n"				    \
+	    "       beq     %[__cmp],1b\n"				    \
+	    "2:"							      \
+	: [__prev] "=&r"(__prev), [__cmp] "=&r"(__cmp), [__tmp] "=&r"(__tmp), \
+	  [__addr] "=&r"(__addr)					      \
+	: [__mem] "m"(*(mem)),						\
+	  [__old] "Ir"((uint64_t) (atomic32_t) (uint64_t) (old)),	     \
+	  [__new] "Ir"(new)						   \
+	: "memory");							  \
+  })
+
+#define __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __addr, __tmp;					      \
+    __asm__ __volatile__(mb1 "       ldi     %[__addr],%[__mem]\n"	    \
+			     "1:     lldl    %[__prev],0(%[__addr])\n"	\
+			     "       cmpeq   %[__prev],%[__old],%[__tmp]\n"   \
+			     "       wr_f    %[__tmp]\n"		      \
+			     "       mov     %[__new],%[__cmp]\n"	     \
+			     "       lstl    %[__cmp],0(%[__addr])\n"	 \
+			     "       rd_f    %[__cmp]\n"		      \
+			     "       beq     %[__tmp],2f\n"		   \
+			     "       beq     %[__cmp],1b\n"		   \
+			     "2:"					     \
+			 : [__prev] "=&r"(__prev), [__cmp] "=&r"(__cmp),      \
+			   [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)       \
+			 : [__mem] "m"(*(mem)),			       \
+			   [__old] "Ir"((uint64_t) (old)), [__new] "Ir"(new)  \
+			 : "memory");					 \
+  })
+/* For all "bool" routines, we return FALSE if exchange succesful.  */
+
+#define __arch_compare_and_exchange_bool_8_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_8_int (mem, new, old, mb1, mb2);	  \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_16_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_16_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_32_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_32_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_64_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_64_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+/* For all "val" routines, return the old value whether exchange
+   successful or not.  */
+
+#define __arch_compare_and_exchange_val_8_int(mem, new, old, mb1, mb2)	\
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_8_int (mem, new, old, mb1, mb2);	  \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_16_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_16_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_32_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_32_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_64_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_64_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+/* Compare and exchange with "acquire" semantics, ie barrier after.  */
+
+#define atomic_compare_and_exchange_bool_acq(mem, new, old)		   \
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int, mem, new, old, \
+			"", __MB)
+
+#define atomic_compare_and_exchange_val_acq(mem, new, old)		    \
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int, mem, new, old,   \
+		       "", __MB)
+
+/* Compare and exchange with "release" semantics, ie barrier before.  */
+
+#define atomic_compare_and_exchange_val_rel(mem, new, old)		    \
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int, mem, new, old,   \
+		       __MB, "")
+
+/* Atomically store value and return the previous value.  */
+
+#define __arch_exchange_8_int(mem, value, mb1, mb2)			   \
+  ({									  \
+    unsigned long __tmp, __addr64, __sval, __tmp1;			    \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	mb1 "       bic  %[__addr8],7,%[__addr64]\n"			  \
+	    "       ins0b   %[__value],%[__addr8],%[__sval]\n"		\
+	    "1:     lldl    %[__tmp],0(%[__addr64])\n"			\
+	    "       ldi     %[__tmp1],1\n"				    \
+	    "       wr_f    %[__tmp1]\n"				      \
+	    "       ext0b   %[__tmp],%[__addr8],%[__ret]\n"		   \
+	    "       mask0b   %[__tmp],%[__addr8],%[__tmp]\n"		  \
+	    "       or      %[__sval],%[__tmp],%[__tmp]\n"		    \
+	    "       lstl    %[__tmp],0(%[__addr64])\n"			\
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__sval] "=&r"(__sval), [__tmp] "=&r"(__tmp), \
+	  [__tmp1] "=&r"(__tmp1), [__addr64] "=&r"(__addr64)		  \
+	: [__addr8] "r"(mem), [__value] "r"(value)			    \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define __arch_exchange_16_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    unsigned long __tmp, __addr64, __sval, __tmp1;			    \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	mb1 "       bic  %[__addr16],7,%[__addr64]\n"			 \
+	    "       ins1b   %[__value],%[__addr16],%[__sval]\n"	       \
+	    "1:     lldl    %[__tmp],0(%[__addr64])\n"			\
+	    "       ldi     %[__tmp1],1\n"				    \
+	    "       wr_f    %[__tmp1]\n"				      \
+	    "       ext1b   %[__tmp],%[__addr16],%[__ret]\n"		  \
+	    "       mask1b   %[__tmp],%[__addr16],%[__tmp]\n"		 \
+	    "       or      %[__sval],%[__tmp],%[__tmp]\n"		    \
+	    "       lstl    %[__tmp],0(%[__addr64])\n"			\
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__sval] "=&r"(__sval), [__tmp] "=&r"(__tmp), \
+	  [__tmp1] "=&r"(__tmp1), [__addr64] "=&r"(__addr64)		  \
+	: [__addr16] "r"(mem), [__value] "r"(value)			   \
+	: "memory");							  \
+    __ret;								    \
+  })
+#define __arch_exchange_32_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    signed int __tmp;							 \
+    __typeof (*mem) __ret;						    \
+    unsigned long __addr;						     \
+    __asm__ __volatile__(						     \
+	mb1 "       ldi     %[__addr],%[__mem]\n"			     \
+	    "1:     lldw    %[__ret],0(%[__addr])\n"			  \
+	    "       ldi     %[__tmp],1\n"				     \
+	    "       wr_f    %[__tmp]\n"				       \
+	    "       mov     %[__val],%[__tmp]\n"			      \
+	    "       lstw    %[__tmp],0(%[__addr])\n"			  \
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"(value)			    \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define __arch_exchange_64_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    unsigned long __tmp, __addr;					      \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	mb1 "       ldi     %[__addr],%[__mem]\n"			     \
+	    "1:     lldl    %[__ret],0(%[__addr])\n"			  \
+	    "       ldi     %[__tmp],1\n"				     \
+	    "       wr_f    %[__tmp]\n"				       \
+	    "       mov     %[__val],%[__tmp]\n"			      \
+	    "       lstl    %[__tmp],0(%[__addr])\n"			  \
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"(value)			    \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define atomic_exchange_acq(mem, value)				       \
+  __atomic_val_bysize (__arch_exchange, int, mem, value, "", __MB)
+
+#define atomic_exchange_rel(mem, value)				       \
+  __atomic_val_bysize (__arch_exchange, int, mem, value, __MB, "")
+
+/* Atomically add value and return the previous (unincremented) value.  */
+
+#define __arch_exchange_and_add_8_int(mem, value, mb1, mb2)		   \
+  ({									  \
+    __builtin_trap ();							\
+    0;									\
+  })
+
+#define __arch_exchange_and_add_16_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    __builtin_trap ();							\
+    0;									\
+  })
+
+#define __arch_exchange_and_add_32_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    signed int __tmp;							 \
+    __typeof (*mem) __ret;						    \
+    unsigned long __addr;						     \
+    __asm__ __volatile__(						     \
+	mb1 "       ldi     %[__addr],%[__mem]\n"			     \
+	    "1:     lldw    %[__ret],0(%[__addr])\n"			  \
+	    "       ldi     %[__tmp],1\n"				     \
+	    "       wr_f    %[__tmp]\n"				       \
+	    "       addw    %[__ret],%[__val],%[__tmp]\n"		     \
+	    "       lstw    %[__tmp],0(%[__addr])\n"			  \
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"((signed int) (value))	     \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define __arch_exchange_and_add_64_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    unsigned long __tmp, __addr;					      \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	mb1 "       ldi     %[__addr],%[__mem]\n"			     \
+	    "1:     lldl    %[__ret],0(%[__addr])\n"			  \
+	    "       ldi     %[__tmp],1\n"				     \
+	    "       wr_f    %[__tmp]\n"				       \
+	    "       addl    %[__ret],%[__val],%[__tmp]\n"		     \
+	    "       lstl    %[__tmp],0(%[__addr])\n"			  \
+	    "       rd_f    %[__tmp]\n"				       \
+	    "       beq     %[__tmp],1b\n"				    \
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"((unsigned long) (value))	  \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+/* ??? Barrier semantics for atomic_exchange_and_add appear to be
+   undefined.  Use full barrier for now, as that's safe.  */
+#define atomic_exchange_and_add(mem, value)				   \
+  __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
+
+/* ??? Blah, I'm lazy.  Implement these later.  Can do better than the
+   compare-and-exchange loop provided by generic code.
+
+#define atomic_decrement_if_positive(mem)
+#define atomic_bit_test_set(mem, bit)
+
+*/
+#define atomic_full_barrier() __asm("memb" : : : "memory");
+#define atomic_read_barrier() __asm("memb" : : : "memory");
+#define atomic_write_barrier() __asm("memb" : : : "memory");
diff --git a/sysdeps/sw_64/nptl/bits/struct_rwlock.h b/sysdeps/sw_64/nptl/bits/struct_rwlock.h
new file mode 100644
index 00000000..8cbeefc1
--- /dev/null
+++ b/sysdeps/sw_64/nptl/bits/struct_rwlock.h
@@ -0,0 +1,43 @@
+/* Sw_64 internal rwlock struct definitions.
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _RWLOCK_INTERNAL_H
+#define _RWLOCK_INTERNAL_H
+
+struct __pthread_rwlock_arch_t
+{
+  unsigned int __readers;
+  unsigned int __writers;
+  unsigned int __wrphase_futex;
+  unsigned int __writers_futex;
+  unsigned int __pad3;
+  unsigned int __pad4;
+  int __cur_writer;
+  int __shared;
+  unsigned long int __pad1;
+  unsigned long int __pad2;
+  /* FLAGS must stay at this position in the structure to maintain
+     binary compatibility.  */
+  unsigned int __flags;
+};
+
+#define __PTHREAD_RWLOCK_INITIALIZER(__flags)				 \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, __flags
+
+#endif
diff --git a/sysdeps/sw_64/nptl/pthread_spin_lock.S b/sysdeps/sw_64/nptl/pthread_spin_lock.S
new file mode 100644
index 00000000..b7e44839
--- /dev/null
+++ b/sysdeps/sw_64/nptl/pthread_spin_lock.S
@@ -0,0 +1,55 @@
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson  <rth@twiddle.net>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <shlib-compat.h>
+
+	.text
+	.align	4
+
+	.globl	__pthread_spin_lock
+	.ent	__pthread_spin_lock
+__pthread_spin_lock:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+
+	memb
+0:      lldw    $1, 0($16)
+	xor     $1, 1, $1
+	ldi     $0, 0
+	wr_f    $1
+
+	ldi     $2, 1
+	lstw    $2, 0($16)
+	rd_f    $2
+	beq     $2, 1f
+	ret
+
+1:      ldw     $1, 0($16)
+	bne     $1, 1b
+	unop
+	br      0b
+
+
+
+
+	.end	__pthread_spin_lock
+versioned_symbol (libc, __pthread_spin_lock, pthread_spin_lock, GLIBC_2_34)
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_2, GLIBC_2_34)
+compat_symbol (libpthread, __pthread_spin_lock, pthread_spin_lock, GLIBC_2_2)
+#endif
diff --git a/sysdeps/sw_64/nptl/pthread_spin_trylock.S b/sysdeps/sw_64/nptl/pthread_spin_trylock.S
new file mode 100644
index 00000000..8551c34a
--- /dev/null
+++ b/sysdeps/sw_64/nptl/pthread_spin_trylock.S
@@ -0,0 +1,56 @@
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson  <rth@twiddle.net>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <shlib-compat.h>
+
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+	.text
+	.align	4
+
+	.globl	__pthread_spin_trylock
+	.ent	__pthread_spin_trylock
+__pthread_spin_trylock:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+
+	memb
+0:      lldw    $1, 0($16)
+	xor     $1, 1, $1
+	ldi     $2, 1
+	ldi     $0, EBUSY
+	wr_f    $1
+
+	lstw    $2, 0($16)
+	rd_f    $2
+	beq     $1, 1f
+	beq     $2, 2f
+	ldi     $0, 0
+
+1:      ret
+2:      br      0b
+
+	.end	__pthread_spin_trylock
+versioned_symbol (libc, __pthread_spin_trylock, pthread_spin_trylock,
+		  GLIBC_2_34)
+
+#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_2, GLIBC_2_34)
+compat_symbol (libpthread, __pthread_spin_trylock, pthread_spin_trylock,
+	       GLIBC_2_2)
+#endif
diff --git a/sysdeps/sw_64/sw8a/atomic-machine.h b/sysdeps/sw_64/sw8a/atomic-machine.h
new file mode 100644
index 00000000..db3320fc
--- /dev/null
+++ b/sysdeps/sw_64/sw8a/atomic-machine.h
@@ -0,0 +1,371 @@
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdint.h>
+
+typedef int8_t atomic8_t;
+typedef uint8_t uatomic8_t;
+typedef int_fast8_t atomic_fast8_t;
+typedef uint_fast8_t uatomic_fast8_t;
+
+typedef int16_t atomic16_t;
+typedef uint16_t uatomic16_t;
+typedef int_fast16_t atomic_fast16_t;
+typedef uint_fast16_t uatomic_fast16_t;
+
+typedef int32_t atomic32_t;
+typedef uint32_t uatomic32_t;
+typedef int_fast32_t atomic_fast32_t;
+typedef uint_fast32_t uatomic_fast32_t;
+
+typedef int64_t atomic64_t;
+typedef uint64_t uatomic64_t;
+typedef int_fast64_t atomic_fast64_t;
+typedef uint_fast64_t uatomic_fast64_t;
+
+typedef intptr_t atomicptr_t;
+typedef uintptr_t uatomicptr_t;
+typedef intmax_t atomic_max_t;
+typedef uintmax_t uatomic_max_t;
+
+#define __HAVE_64B_ATOMICS 1
+#define USE_ATOMIC_COMPILER_BUILTINS 0
+
+/* XXX Is this actually correct?  */
+#define ATOMIC_EXCHANGE_USES_CAS 1
+
+#ifdef UP
+#  define __MB /* nothing */
+#else
+#  define __MB "	memb\n"
+#endif
+
+/* Compare and exchange.  For all of the "xxx" routines, we expect a
+   "__prev" and a "__cmp" variable to be provided by the enclosing scope,
+   in which values are returned.  */
+// delete memb after the rd_f
+#define __arch_compare_and_exchange_xxx_8_int(mem, new, old, mb1, mb2)	\
+  ({									  \
+    unsigned long __tmp, __snew, __addr64;				    \
+    __asm__ __volatile__(						     \
+	"       bic  %[__addr8],7,%[__addr64]\n"			      \
+	"       inslb   %[__new],%[__addr8],%[__snew]\n"		      \
+	"1:     lldl    %[__tmp],0(%[__addr64])\n"			    \
+	"       extlb   %[__tmp],%[__addr8],%[__prev]\n"		      \
+	"       cmpeq   %[__prev],%[__old],%[__cmp]\n"			\
+	"       beq     %[__cmp],2f\n"					\
+	"       masklb   %[__tmp],%[__addr8],%[__tmp]\n"		      \
+	"       or      %[__snew],%[__tmp],%[__tmp]\n"			\
+	"       lstl    %[__tmp],0(%[__addr64])\n"			    \
+	"       beq     %[__tmp],1b\n"					\
+	"2:"								  \
+	: [__prev] "=&r"(__prev), [__snew] "=&r"(__snew),		     \
+	  [__tmp] "=&r"(__tmp), [__cmp] "=&r"(__cmp),			 \
+	  [__addr64] "=&r"(__addr64)					  \
+	: [__addr8] "r"(mem),						 \
+	  [__old] "Ir"((uint64_t) (uint8_t) (uint64_t) (old)),		\
+	  [__new] "r"(new)						    \
+	: "memory");							  \
+  })
+
+#define __arch_compare_and_exchange_xxx_16_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __tmp, __snew, __addr64;				    \
+    __asm__ __volatile__(						     \
+	"       bic  %[__addr16],7,%[__addr64]\n"			     \
+	"       inslh   %[__new],%[__addr16],%[__snew]\n"		     \
+	"1:     lldl    %[__tmp],0(%[__addr64])\n"			    \
+	"       extlh   %[__tmp],%[__addr16],%[__prev]\n"		     \
+	"       cmpeq   %[__prev],%[__old],%[__cmp]\n"			\
+	"       beq     %[__cmp],2f\n"					\
+	"       masklh   %[__tmp],%[__addr16],%[__tmp]\n"		     \
+	"       or      %[__snew],%[__tmp],%[__tmp]\n"			\
+	"       lstl    %[__tmp],0(%[__addr64])\n"			    \
+	"       beq     %[__tmp],1b\n"					\
+	"2:"								  \
+	: [__prev] "=&r"(__prev), [__snew] "=&r"(__snew),		     \
+	  [__tmp] "=&r"(__tmp), [__cmp] "=&r"(__cmp),			 \
+	  [__addr64] "=&r"(__addr64)					  \
+	: [__addr16] "r"(mem),						\
+	  [__old] "Ir"((uint64_t) (uint16_t) (uint64_t) (old)),	       \
+	  [__new] "r"(new)						    \
+	: "memory");							  \
+  })
+#define __arch_compare_and_exchange_xxx_32_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __addr;						     \
+    __asm__ __volatile__(						     \
+	"       ldi     %[__addr],%[__mem]\n"				 \
+	"1:     lldw    %[__prev],0(%[__addr])\n"			     \
+	"       cmpeq   %[__prev],%[__old],%[__cmp]\n"			\
+	"       beq     %[__cmp],2f\n"					\
+	"       mov     %[__new],%[__cmp]\n"				  \
+	"       lstw    %[__cmp],0(%[__addr])\n"			      \
+	"       beq     %[__cmp],1b\n"					\
+	"2:"								  \
+	:								     \
+	[__prev] "=&r"(__prev), [__cmp] "=&r"(__cmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)),						\
+	  [__old] "Ir"((uint64_t) (atomic32_t) (uint64_t) (old)),	     \
+	  [__new] "Ir"(new)						   \
+	: "memory");							  \
+  })
+
+#define __arch_compare_and_exchange_xxx_64_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __addr;						     \
+    __asm__ __volatile__("       ldi     %[__addr],%[__mem]\n"		\
+			 "1:     lldl    %[__prev],0(%[__addr])\n"	    \
+			 "       cmpeq   %[__prev],%[__old],%[__cmp]\n"       \
+			 "       beq     %[__cmp],2f\n"		       \
+			 "       mov     %[__new],%[__cmp]\n"		 \
+			 "       lstl    %[__cmp],0(%[__addr])\n"	     \
+			 "       beq     %[__cmp],1b\n"		       \
+			 "2:"						 \
+			 : [__prev] "=&r"(__prev), [__cmp] "=&r"(__cmp),      \
+			   [__addr] "=&r"(__addr)			     \
+			 : [__mem] "m"(*(mem)),			       \
+			   [__old] "Ir"((uint64_t) (old)), [__new] "Ir"(new)  \
+			 : "memory");					 \
+  })
+/* For all "bool" routines, we return FALSE if exchange succesful.  */
+
+#define __arch_compare_and_exchange_bool_8_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_8_int (mem, new, old, mb1, mb2);	  \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_16_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_16_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_32_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_32_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+#define __arch_compare_and_exchange_bool_64_int(mem, new, old, mb1, mb2)      \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_64_int (mem, new, old, mb1, mb2);	 \
+    !__cmp;								   \
+  })
+
+/* For all "val" routines, return the old value whether exchange
+   successful or not.  */
+
+#define __arch_compare_and_exchange_val_8_int(mem, new, old, mb1, mb2)	\
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_8_int (mem, new, old, mb1, mb2);	  \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_16_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_16_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_32_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_32_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+#define __arch_compare_and_exchange_val_64_int(mem, new, old, mb1, mb2)       \
+  ({									  \
+    unsigned long __prev;						     \
+    int __cmp;								\
+    __arch_compare_and_exchange_xxx_64_int (mem, new, old, mb1, mb2);	 \
+    (typeof (*mem)) __prev;						   \
+  })
+
+/* Compare and exchange with "acquire" semantics, ie barrier after.  */
+
+#define atomic_compare_and_exchange_bool_acq(mem, new, old)		   \
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int, mem, new, old, \
+			"", __MB)
+
+#define atomic_compare_and_exchange_val_acq(mem, new, old)		    \
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int, mem, new, old,   \
+		       "", __MB)
+
+/* Compare and exchange with "release" semantics, ie barrier before.  */
+
+#define atomic_compare_and_exchange_val_rel(mem, new, old)		    \
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int, mem, new, old,   \
+		       __MB, "")
+
+/* Atomically store value and return the previous value.  */
+
+#define __arch_exchange_8_int(mem, value, mb1, mb2)			   \
+  ({									  \
+    unsigned long __tmp, __addr64, __sval;				    \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__("       bic  %[__addr8],7,%[__addr64]\n"	     \
+			 "       inslb   %[__value],%[__addr8],%[__sval]\n"   \
+			 "1:     lldl    %[__tmp],0(%[__addr64])\n"	   \
+			 "       extlb   %[__tmp],%[__addr8],%[__ret]\n"      \
+			 "       masklb   %[__tmp],%[__addr8],%[__tmp]\n"     \
+			 "       or      %[__sval],%[__tmp],%[__tmp]\n"       \
+			 "       lstl    %[__tmp],0(%[__addr64])\n"	   \
+			 "       beq     %[__tmp],1b\n"		       \
+			 : [__ret] "=&r"(__ret), [__sval] "=&r"(__sval),      \
+			   [__tmp] "=&r"(__tmp), [__addr64] "=&r"(__addr64)   \
+			 : [__addr8] "r"(mem), [__value] "r"(value)	   \
+			 : "memory");					 \
+    __ret;								    \
+  })
+
+#define __arch_exchange_16_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    unsigned long __tmp, __addr64, __sval, __tmp1;			    \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__("       bic  %[__addr16],7,%[__addr64]\n"	    \
+			 "       inslh   %[__value],%[__addr16],%[__sval]\n"  \
+			 "1:     lldl    %[__tmp],0(%[__addr64])\n"	   \
+			 "       extlh   %[__tmp],%[__addr16],%[__ret]\n"     \
+			 "       masklh   %[__tmp],%[__addr16],%[__tmp]\n"    \
+			 "       or      %[__sval],%[__tmp],%[__tmp]\n"       \
+			 "       lstl    %[__tmp],0(%[__addr64])\n"	   \
+			 "       beq     %[__tmp],1b\n"		       \
+			 : [__ret] "=&r"(__ret), [__sval] "=&r"(__sval),      \
+			   [__tmp] "=&r"(__tmp), [__addr64] "=&r"(__addr64)   \
+			 : [__addr16] "r"(mem), [__value] "r"(value)	  \
+			 : "memory");					 \
+    __ret;								    \
+  })
+#define __arch_exchange_32_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    signed int __tmp;							 \
+    __typeof (*mem) __ret;						    \
+    unsigned long __addr;						     \
+    __asm__ __volatile__(						     \
+	"       ldi     %[__addr],%[__mem]\n"				 \
+	"1:     lldw    %[__ret],0(%[__addr])\n"			      \
+	"       mov     %[__val],%[__tmp]\n"				  \
+	"       lstw    %[__tmp],0(%[__addr])\n"			      \
+	"       beq     %[__tmp],1b\n"					\
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"(value)			    \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define __arch_exchange_64_int(mem, value, mb1, mb2)			  \
+  ({									  \
+    unsigned long __tmp, __addr;					      \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	"       ldi     %[__addr],%[__mem]\n"				 \
+	"1:     lldl    %[__ret],0(%[__addr])\n"			      \
+	"       mov     %[__val],%[__tmp]\n"				  \
+	"       lstl    %[__tmp],0(%[__addr])\n"			      \
+	"       beq     %[__tmp],1b\n"					\
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"(value)			    \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define atomic_exchange_acq(mem, value)				       \
+  __atomic_val_bysize (__arch_exchange, int, mem, value, "", __MB)
+
+#define atomic_exchange_rel(mem, value)				       \
+  __atomic_val_bysize (__arch_exchange, int, mem, value, __MB, "")
+
+/* Atomically add value and return the previous (unincremented) value.  */
+
+#define __arch_exchange_and_add_8_int(mem, value, mb1, mb2)		   \
+  ({									  \
+    __builtin_trap ();							\
+    0;									\
+  })
+
+#define __arch_exchange_and_add_16_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    __builtin_trap ();							\
+    0;									\
+  })
+
+#define __arch_exchange_and_add_32_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    signed int __tmp;							 \
+    __typeof (*mem) __ret;						    \
+    unsigned long __addr;						     \
+    __asm__ __volatile__(						     \
+	"       ldi     %[__addr],%[__mem]\n"				 \
+	"1:     lldw    %[__ret],0(%[__addr])\n"			      \
+	"       addw    %[__ret],%[__val],%[__tmp]\n"			 \
+	"       lstw    %[__tmp],0(%[__addr])\n"			      \
+	"       beq     %[__tmp],1b\n"					\
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"((signed int) (value))	     \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+#define __arch_exchange_and_add_64_int(mem, value, mb1, mb2)		  \
+  ({									  \
+    unsigned long __tmp, __addr;					      \
+    __typeof (*mem) __ret;						    \
+    __asm__ __volatile__(						     \
+	"       ldi     %[__addr],%[__mem]\n"				 \
+	"1:     lldl    %[__ret],0(%[__addr])\n"			      \
+	"       addl    %[__ret],%[__val],%[__tmp]\n"			 \
+	"       lstl    %[__tmp],0(%[__addr])\n"			      \
+	"       beq     %[__tmp],1b\n"					\
+	: [__ret] "=&r"(__ret), [__tmp] "=&r"(__tmp), [__addr] "=&r"(__addr)  \
+	: [__mem] "m"(*(mem)), [__val] "Ir"((unsigned long) (value))	  \
+	: "memory");							  \
+    __ret;								    \
+  })
+
+/* ??? Barrier semantics for atomic_exchange_and_add appear to be
+   undefined.  Use full barrier for now, as that's safe.  */
+#define atomic_exchange_and_add(mem, value)				   \
+  __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
+
+/* ??? Blah, I'm lazy.  Implement these later.  Can do better than the
+   compare-and-exchange loop provided by generic code.
+
+#define atomic_decrement_if_positive(mem)
+#define atomic_bit_test_set(mem, bit)
+
+*/
+#  ifndef UP
+#    define atomic_full_barrier() __asm ("memb" : : : "memory");
+#    define atomic_read_barrier() __asm ("memb" : : : "memory");
+#    define atomic_write_barrier() __asm ("memb" : : : "memory");
+#  endif
diff --git a/sysdeps/sw_64/sw8a/nptl/bits/struct_rwlock.h b/sysdeps/sw_64/sw8a/nptl/bits/struct_rwlock.h
new file mode 100644
index 00000000..8cbeefc1
--- /dev/null
+++ b/sysdeps/sw_64/sw8a/nptl/bits/struct_rwlock.h
@@ -0,0 +1,43 @@
+/* Sw_64 internal rwlock struct definitions.
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _RWLOCK_INTERNAL_H
+#define _RWLOCK_INTERNAL_H
+
+struct __pthread_rwlock_arch_t
+{
+  unsigned int __readers;
+  unsigned int __writers;
+  unsigned int __wrphase_futex;
+  unsigned int __writers_futex;
+  unsigned int __pad3;
+  unsigned int __pad4;
+  int __cur_writer;
+  int __shared;
+  unsigned long int __pad1;
+  unsigned long int __pad2;
+  /* FLAGS must stay at this position in the structure to maintain
+     binary compatibility.  */
+  unsigned int __flags;
+};
+
+#define __PTHREAD_RWLOCK_INITIALIZER(__flags)				 \
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, __flags
+
+#endif
diff --git a/sysdeps/sw_64/sw8a/nptl/pthread_spin_lock.S b/sysdeps/sw_64/sw8a/nptl/pthread_spin_lock.S
new file mode 100644
index 00000000..ab3408a3
--- /dev/null
+++ b/sysdeps/sw_64/sw8a/nptl/pthread_spin_lock.S
@@ -0,0 +1,43 @@
+
+
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson  <rth@twiddle.net>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* SHIPS20171102_LOCK_READ_CONDITION_WRITE.  */
+	.text
+	.align	4
+
+	.globl	pthread_spin_lock
+	.ent	pthread_spin_lock
+pthread_spin_lock:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+0:	lldw	$1, 0($16)
+	ldi	$2, 1
+	ldi	$0, 0
+	bne	$1, 1f
+
+	lstw	$2, 0($16)
+	beq	$2, 1f
+	ret
+
+1:	ldw	$1, 0($16)
+	bne	$1, 1b
+	unop
+	br	0b
+	.end	pthread_spin_lock
diff --git a/sysdeps/sw_64/sw8a/nptl/pthread_spin_trylock.S b/sysdeps/sw_64/sw8a/nptl/pthread_spin_trylock.S
new file mode 100644
index 00000000..374dccae
--- /dev/null
+++ b/sysdeps/sw_64/sw8a/nptl/pthread_spin_trylock.S
@@ -0,0 +1,44 @@
+
+
+/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson  <rth@twiddle.net>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* SHIPS20171102_LOCK_READ_CONDITION_WRITE.  */
+#define _ERRNO_H 1
+#include <bits/errno.h>
+
+	.text
+	.align	4
+
+	.globl	pthread_spin_trylock
+	.ent	pthread_spin_trylock
+pthread_spin_trylock:
+	.frame	$sp, 0, $26, 0
+	.prologue 0
+0:	lldw	$1, 0($16)
+	ldi	$2, 1
+	ldi	$0, EBUSY
+	bne	$1, 1f
+
+	lstw	$2, 0($16)
+	beq	$2, 2f
+	ldi	$0, 0
+
+1:	ret
+2:	br	0b
+	.end	pthread_spin_trylock
-- 
2.25.1