We don't asynchronously preempt if we are in the runtime. We do this by checking the function name. However, it failed to take inlining into account. If a runtime function gets inlined into a non-runtime function, it can be preempted, and bad things can happen. One instance of this is dounlockOSThread inlined into UnlockOSThread which is in turn inlined into a non-runtime function. Fix this by using the innermost frame's function name. Change-Id: Ifa036ce1320700aaaefd829b4bee0d04d05c395d Reviewed-on: https://go-review.googlesource.com/c/go/+/211978 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com> Signed-off-by: Grooooot <isula@huawei.com>
118 lines
3.9 KiB
Diff
118 lines
3.9 KiB
Diff
From f1887468d1ae9781407f24a2b121ed34a6dfec4c Mon Sep 17 00:00:00 2001
|
|
From: Cherry Zhang <cherryyz@google.com>
|
|
Date: Fri, 27 Dec 2019 12:02:00 -0500
|
|
Subject: [PATCH] [release-branch.go1.13] runtime: ensure memmove write pointer atomically on ARM64
|
|
|
|
If a pointer write is not atomic, if the GC is running
|
|
concurrently, it may observe a partially updated pointer, which
|
|
may point to unallocated or already dead memory. Most pointer
|
|
writes, like the store instructions generated by the compiler,
|
|
are already atomic. But we still need to be careful in places
|
|
like memmove. In memmove, we don't know which bits are pointers
|
|
(or too expensive to query), so we ensure that all aligned
|
|
pointer-sized units are written atomically.
|
|
|
|
Fixes #36361.
|
|
Updates #36101.
|
|
|
|
Change-Id: I1b3ca24c6b1ac8a8aaf9ee470115e9a89ec1b00b
|
|
Reviewed-on: https://go-review.googlesource.com/c/go/+/212626
|
|
Reviewed-by: Austin Clements <austin@google.com>
|
|
(cherry picked from commit ffbc02761abb47106ce88e09290a31513b5f6c8a)
|
|
---
|
|
|
|
diff --git a/src/runtime/memmove_arm64.s b/src/runtime/memmove_arm64.s
|
|
index ac29f94..cedb018 100644
|
|
--- a/src/runtime/memmove_arm64.s
|
|
+++ b/src/runtime/memmove_arm64.s
|
|
@@ -22,7 +22,7 @@
|
|
CMP R3, R4
|
|
BLT backward
|
|
|
|
- // Copying forward proceeds by copying R7/8 words then copying R6 bytes.
|
|
+ // Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
|
|
// R3 and R4 are advanced as we copy.
|
|
|
|
// (There may be implementations of armv8 where copying by bytes until
|
|
@@ -30,11 +30,12 @@
|
|
// optimization, but the on the one tested so far (xgene) it did not
|
|
// make a significance difference.)
|
|
|
|
- CBZ R7, noforwardlarge // Do we need to do any doubleword-by-doubleword copying?
|
|
+ CBZ R7, noforwardlarge // Do we need to do any quadword copying?
|
|
|
|
ADD R3, R7, R9 // R9 points just past where we copy by word
|
|
|
|
forwardlargeloop:
|
|
+ // Copy 32 bytes at a time.
|
|
LDP.P 32(R4), (R8, R10)
|
|
STP.P (R8, R10), 32(R3)
|
|
LDP -16(R4), (R11, R12)
|
|
@@ -43,10 +44,26 @@
|
|
CBNZ R7, forwardlargeloop
|
|
|
|
noforwardlarge:
|
|
- CBNZ R6, forwardtail // Do we need to do any byte-by-byte copying?
|
|
+ CBNZ R6, forwardtail // Do we need to copy any tail bytes?
|
|
RET
|
|
|
|
forwardtail:
|
|
+ // There are R6 <= 31 bytes remaining to copy.
|
|
+ // This is large enough to still contain pointers,
|
|
+ // which must be copied atomically.
|
|
+ // Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
|
|
+ TBZ $4, R6, 3(PC) // write 16 bytes if R6&16 != 0
|
|
+ LDP.P 16(R4), (R8, R10)
|
|
+ STP.P (R8, R10), 16(R3)
|
|
+
|
|
+ TBZ $3, R6, 3(PC) // write 8 bytes if R6&8 != 0
|
|
+ MOVD.P 8(R4), R8
|
|
+ MOVD.P R8, 8(R3)
|
|
+
|
|
+ AND $7, R6
|
|
+ CBNZ R6, 2(PC)
|
|
+ RET
|
|
+
|
|
ADD R3, R6, R9 // R9 points just past the destination memory
|
|
|
|
forwardtailloop:
|
|
@@ -90,7 +107,7 @@
|
|
RET
|
|
|
|
backward:
|
|
- // Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
|
|
+ // Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
|
|
// R3 and R4 are advanced to the end of the destination/source buffers
|
|
// respectively and moved back as we copy.
|
|
|
|
@@ -99,13 +116,28 @@
|
|
|
|
CBZ R6, nobackwardtail // Do we need to do any byte-by-byte copying?
|
|
|
|
- SUB R6, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
|
|
+ AND $7, R6, R12
|
|
+ CBZ R12, backwardtaillarge
|
|
+
|
|
+ SUB R12, R3, R9 // R9 points at the lowest destination byte that should be copied by byte.
|
|
backwardtailloop:
|
|
+ // Copy sub-pointer-size tail.
|
|
MOVBU.W -1(R4), R8
|
|
MOVBU.W R8, -1(R3)
|
|
CMP R9, R3
|
|
BNE backwardtailloop
|
|
|
|
+backwardtaillarge:
|
|
+ // Do 8/16-byte write if possible.
|
|
+ // See comment at forwardtail.
|
|
+ TBZ $3, R6, 3(PC)
|
|
+ MOVD.W -8(R4), R8
|
|
+ MOVD.W R8, -8(R3)
|
|
+
|
|
+ TBZ $4, R6, 3(PC)
|
|
+ LDP.W -16(R4), (R8, R10)
|
|
+ STP.W (R8, R10), -16(R3)
|
|
+
|
|
nobackwardtail:
|
|
CBNZ R7, backwardlarge // Do we need to do any doubleword-by-doubleword copying?
|
|
RET
|