1 files changed, 78 insertions, 0 deletions
diff --git a/pkg/safecopy/memcpy_arm64.s b/pkg/safecopy/memcpy_arm64.s
new file mode 100644
index 000000000..e7e541565
--- /dev/null
+++ b/pkg/safecopy/memcpy_arm64.s
@@ -0,0 +1,78 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// handleMemcpyFault returns (the value stored in R0, the value stored in R1).
+// Control is transferred to it when memcpy below receives SIGSEGV or SIGBUS,
+// with the faulting address stored in R0 and the signal number stored in R1.
+//
+// It must have the same frame configuration as memcpy so that it can undo any
+// potential call frame set up by the assembler.
+TEXT handleMemcpyFault(SB), NOSPLIT, $0-36
+	MOVD R0, addr+24(FP)
+	MOVW R1, sig+32(FP)
+	RET
+
+// memcpy copies data from src to dst. If a SIGSEGV or SIGBUS signal is received
+// during the copy, it returns the address that caused the fault and the number
+// of the signal that was received. Otherwise, it returns an unspecified address
+// and a signal number of 0.
+//
+// Data is copied in order, such that if a fault happens at address p, it is
+// safe to assume that all data before p-maxRegisterSize has already been
+// successfully copied.
+//
+// The code is derived from the Go source runtime.memmove.
+//
+// func memcpy(dst, src unsafe.Pointer, n uintptr) (fault unsafe.Pointer, sig int32)
+TEXT ·memcpy(SB), NOSPLIT, $-8-36
+	// Store 0 as the returned signal number. If we run to completion,
+	// this is the value the caller will see; if a signal is received,
+	// handleMemcpyFault will store a different value in this address.
+	MOVW $0, sig+32(FP)
+
+	MOVD to+0(FP), R3
+	MOVD from+8(FP), R4
+	MOVD n+16(FP), R5
+	CMP $0, R5
+	BNE check
+	RET
+
+check:
+	AND $~7, R5, R7     // R7 is N&~7.
+	SUB R7, R5, R6      // R6 is N&7.
+
+	// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
+	// R3 and R4 are advanced as we copy.
+
+	// (There may be implementations of armv8 where copying by bytes until
+	// at least one of source or dest is word aligned is a worthwhile
+	// optimization, but the on the one tested so far (xgene) it did not
+	// make a significance difference.)
+
+	CMP $0, R7          // Do we need to do any word-by-word copying?
+	BEQ noforwardlarge
+	ADD R3, R7, R9      // R9 points just past where we copy by word.
+
+forwardlargeloop:
+	MOVD.P 8(R4), R8       // R8 is just a scratch register.
+	MOVD.P R8, 8(R3)
+	CMP R3, R9
+	BNE forwardlargeloop
+
+noforwardlarge:
+	CMP $0, R6          // Do we need to do any byte-by-byte copying?
+	BNE forwardtail
+	RET
+
+forwardtail:
+	ADD R3, R6, R9      // R9 points just past the destination memory.
+
+forwardtailloop:
+	MOVBU.P 1(R4), R8
+	MOVBU.P R8, 1(R3)
+	CMP R3, R9
+	BNE forwardtailloop
+	RET