summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/ring0
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform/ring0')
-rw-r--r--pkg/sentry/platform/ring0/BUILD50
-rw-r--r--pkg/sentry/platform/ring0/aarch64.go111
-rw-r--r--pkg/sentry/platform/ring0/defs.go22
-rw-r--r--pkg/sentry/platform/ring0/defs_amd64.go12
-rw-r--r--pkg/sentry/platform/ring0/defs_arm64.go143
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.go6
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.go60
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.s786
-rw-r--r--pkg/sentry/platform/ring0/gen_offsets/BUILD21
-rw-r--r--pkg/sentry/platform/ring0/kernel.go24
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go12
-rw-r--r--pkg/sentry/platform/ring0/kernel_arm64.go72
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.go58
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64.s217
-rw-r--r--pkg/sentry/platform/ring0/lib_arm64_unsafe.go108
-rw-r--r--pkg/sentry/platform/ring0/offsets_amd64.go5
-rw-r--r--pkg/sentry/platform/ring0/offsets_arm64.go127
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD28
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator.go11
-rw-r--r--pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go19
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go215
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go9
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go57
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go80
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_test.go2
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go4
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids.go104
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go32
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s45
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go95
-rw-r--r--pkg/sentry/platform/ring0/pagetables/walker_arm64.go314
-rw-r--r--pkg/sentry/platform/ring0/x86.go2
34 files changed, 2690 insertions, 165 deletions
diff --git a/pkg/sentry/platform/ring0/BUILD b/pkg/sentry/platform/ring0/BUILD
index 48b0ceaec..679b287c3 100644
--- a/pkg/sentry/platform/ring0/BUILD
+++ b/pkg/sentry/platform/ring0/BUILD
@@ -1,10 +1,10 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
+load("//tools:defs.bzl", "go_library")
load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
package(licenses = ["notice"])
go_template(
- name = "defs",
+ name = "defs_amd64",
srcs = [
"defs.go",
"defs_amd64.go",
@@ -14,11 +14,29 @@ go_template(
visibility = [":__subpackages__"],
)
+go_template(
+ name = "defs_arm64",
+ srcs = [
+ "aarch64.go",
+ "defs.go",
+ "defs_arm64.go",
+ "offsets_arm64.go",
+ ],
+ visibility = [":__subpackages__"],
+)
+
go_template_instance(
- name = "defs_impl",
- out = "defs_impl.go",
+ name = "defs_impl_amd64",
+ out = "defs_impl_amd64.go",
package = "ring0",
- template = ":defs",
+ template = ":defs_amd64",
+)
+
+go_template_instance(
+ name = "defs_impl_arm64",
+ out = "defs_impl_arm64.go",
+ package = "ring0",
+ template = ":defs_arm64",
)
genrule(
@@ -29,24 +47,40 @@ genrule(
tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
)
+genrule(
+ name = "entry_impl_arm64",
+ srcs = ["entry_arm64.s"],
+ outs = ["entry_impl_arm64.s"],
+ cmd = "(echo -e '// build +arm64\\n' && $(location //pkg/sentry/platform/ring0/gen_offsets) && cat $(SRCS)) > $@",
+ tools = ["//pkg/sentry/platform/ring0/gen_offsets"],
+)
+
go_library(
name = "ring0",
srcs = [
- "defs_impl.go",
+ "defs_impl_amd64.go",
+ "defs_impl_arm64.go",
"entry_amd64.go",
+ "entry_arm64.go",
"entry_impl_amd64.s",
+ "entry_impl_arm64.s",
"kernel.go",
"kernel_amd64.go",
+ "kernel_arm64.go",
"kernel_unsafe.go",
"lib_amd64.go",
"lib_amd64.s",
+ "lib_arm64.go",
+ "lib_arm64.s",
+ "lib_arm64_unsafe.go",
"ring0.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0",
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/cpuid",
+ "//pkg/safecopy",
+ "//pkg/sentry/arch",
"//pkg/sentry/platform/ring0/pagetables",
- "//pkg/sentry/usermem",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go
new file mode 100644
index 000000000..87a573cc4
--- /dev/null
+++ b/pkg/sentry/platform/ring0/aarch64.go
@@ -0,0 +1,111 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+// Useful bits.
+const (
+ _PGD_PGT_BASE = 0x1000
+ _PGD_PGT_SIZE = 0x1000
+ _PUD_PGT_BASE = 0x2000
+ _PUD_PGT_SIZE = 0x1000
+ _PMD_PGT_BASE = 0x3000
+ _PMD_PGT_SIZE = 0x4000
+ _PTE_PGT_BASE = 0x7000
+ _PTE_PGT_SIZE = 0x1000
+)
+
+const (
+ // DAIF bits:debug, sError, IRQ, FIQ.
+ _PSR_D_BIT = 0x00000200
+ _PSR_A_BIT = 0x00000100
+ _PSR_I_BIT = 0x00000080
+ _PSR_F_BIT = 0x00000040
+ _PSR_DAIF_SHIFT = 6
+ _PSR_DAIF_MASK = 0xf << _PSR_DAIF_SHIFT
+
+ // PSR bits.
+ _PSR_MODE_EL0t = 0x00000000
+ _PSR_MODE_EL1t = 0x00000004
+ _PSR_MODE_EL1h = 0x00000005
+ _PSR_MODE_MASK = 0x0000000f
+
+ PsrFlagsClear = _PSR_MODE_MASK | _PSR_DAIF_MASK
+ PsrModeMask = _PSR_MODE_MASK
+
+ // KernelFlagsSet should always be set in the kernel.
+ KernelFlagsSet = _PSR_MODE_EL1h | _PSR_D_BIT | _PSR_A_BIT | _PSR_I_BIT | _PSR_F_BIT
+
+ // UserFlagsSet are always set in userspace.
+ UserFlagsSet = _PSR_MODE_EL0t
+)
+
+// Vector is an exception vector.
+type Vector uintptr
+
+// Exception vectors.
+const (
+ El1SyncInvalid = iota
+ El1IrqInvalid
+ El1FiqInvalid
+ El1ErrorInvalid
+ El1Sync
+ El1Irq
+ El1Fiq
+ El1Error
+ El0Sync
+ El0Irq
+ El0Fiq
+ El0Error
+ El0Sync_invalid
+ El0Irq_invalid
+ El0Fiq_invalid
+ El0Error_invalid
+ El1Sync_da
+ El1Sync_ia
+ El1Sync_sp_pc
+ El1Sync_undef
+ El1Sync_dbg
+ El1Sync_inv
+ El0Sync_svc
+ El0Sync_da
+ El0Sync_ia
+ El0Sync_fpsimd_acc
+ El0Sync_sve_acc
+ El0Sync_sys
+ El0Sync_sp_pc
+ El0Sync_undef
+ El0Sync_dbg
+ El0Sync_inv
+ _NR_INTERRUPTS
+)
+
+// System call vectors.
+const (
+ Syscall Vector = El0Sync_svc
+ PageFault Vector = El0Sync_da
+ VirtualizationException Vector = El0Error
+)
+
+// VirtualAddressBits returns the number bits available for virtual addresses.
+func VirtualAddressBits() uint32 {
+ return 48
+}
+
+// PhysicalAddressBits returns the number of bits available for physical addresses.
+func PhysicalAddressBits() uint32 {
+ return 40
+}
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
index 076063f85..e6daf24df 100644
--- a/pkg/sentry/platform/ring0/defs.go
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -15,20 +15,8 @@
package ring0
import (
- "syscall"
-
- "gvisor.dev/gvisor/pkg/sentry/usermem"
-)
-
-var (
- // UserspaceSize is the total size of userspace.
- UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
-
- // MaximumUserAddress is the largest possible user address.
- MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
-
- // KernelStartAddress is the starting kernel address.
- KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
)
// Kernel is a global kernel object.
@@ -83,7 +71,7 @@ type CPU struct {
// registers is a set of registers; these may be used on kernel system
// calls and exceptions via the Registers function.
- registers syscall.PtraceRegs
+ registers arch.Registers
// hooks are kernel hooks.
hooks Hooks
@@ -94,14 +82,14 @@ type CPU struct {
// This is explicitly safe to call during KernelException and KernelSyscall.
//
//go:nosplit
-func (c *CPU) Registers() *syscall.PtraceRegs {
+func (c *CPU) Registers() *arch.Registers {
return &c.registers
}
// SwitchOpts are passed to the Switch function.
type SwitchOpts struct {
// Registers are the user register state.
- Registers *syscall.PtraceRegs
+ Registers *arch.Registers
// FloatingPointState is a byte pointer where floating point state is
// saved and restored.
diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go
index 7206322b1..9c6c2cf5c 100644
--- a/pkg/sentry/platform/ring0/defs_amd64.go
+++ b/pkg/sentry/platform/ring0/defs_amd64.go
@@ -18,6 +18,18 @@ package ring0
import (
"gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+var (
+ // UserspaceSize is the total size of userspace.
+ UserspaceSize = uintptr(1) << (VirtualAddressBits() - 1)
+
+ // MaximumUserAddress is the largest possible user address.
+ MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
+
+ // KernelStartAddress is the starting kernel address.
+ KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
)
// Segment indices and Selectors.
diff --git a/pkg/sentry/platform/ring0/defs_arm64.go b/pkg/sentry/platform/ring0/defs_arm64.go
new file mode 100644
index 000000000..0e2ab716c
--- /dev/null
+++ b/pkg/sentry/platform/ring0/defs_arm64.go
@@ -0,0 +1,143 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+import (
+ "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+var (
+ // UserspaceSize is the total size of userspace.
+ UserspaceSize = uintptr(1) << (VirtualAddressBits())
+
+ // MaximumUserAddress is the largest possible user address.
+ MaximumUserAddress = (UserspaceSize - 1) & ^uintptr(usermem.PageSize-1)
+
+ // KernelStartAddress is the starting kernel address.
+ KernelStartAddress = ^uintptr(0) - (UserspaceSize - 1)
+)
+
+// KernelOpts has initialization options for the kernel.
+type KernelOpts struct {
+ // PageTables are the kernel pagetables; this must be provided.
+ PageTables *pagetables.PageTables
+}
+
+// KernelArchState contains architecture-specific state.
+type KernelArchState struct {
+ KernelOpts
+}
+
+// CPUArchState contains CPU-specific arch state.
+type CPUArchState struct {
+ // stack is the stack used for interrupts on this CPU.
+ stack [512]byte
+
+ // errorCode is the error code from the last exception.
+ errorCode uintptr
+
+ // errorType indicates the type of error code here, it is always set
+ // along with the errorCode value above.
+ //
+ // It will either by 1, which indicates a user error, or 0 indicating a
+ // kernel error. If the error code below returns false (kernel error),
+ // then it cannot provide relevant information about the last
+ // exception.
+ errorType uintptr
+
+ // faultAddr is the value of far_el1.
+ faultAddr uintptr
+
+ // ttbr0Kvm is the value of ttbr0_el1 for sentry.
+ ttbr0Kvm uintptr
+
+ // ttbr0App is the value of ttbr0_el1 for applicaton.
+ ttbr0App uintptr
+
+ // exception vector.
+ vecCode Vector
+
+ // application context pointer.
+ appAddr uintptr
+
+ // lazyVFP is the value of cpacr_el1.
+ lazyVFP uintptr
+}
+
+// ErrorCode returns the last error code.
+//
+// The returned boolean indicates whether the error code corresponds to the
+// last user error or not. If it does not, then fault information must be
+// ignored. This is generally the result of a kernel fault while servicing a
+// user fault.
+//
+//go:nosplit
+func (c *CPU) ErrorCode() (value uintptr, user bool) {
+ return c.errorCode, c.errorType != 0
+}
+
+// ClearErrorCode resets the error code.
+//
+//go:nosplit
+func (c *CPU) ClearErrorCode() {
+ c.errorCode = 0 // No code.
+ c.errorType = 1 // User mode.
+}
+
+//go:nosplit
+func (c *CPU) GetFaultAddr() (value uintptr) {
+ return c.faultAddr
+}
+
+//go:nosplit
+func (c *CPU) SetTtbr0Kvm(value uintptr) {
+ c.ttbr0Kvm = value
+}
+
+//go:nosplit
+func (c *CPU) SetTtbr0App(value uintptr) {
+ c.ttbr0App = value
+}
+
+//go:nosplit
+func (c *CPU) GetVector() (value Vector) {
+ return c.vecCode
+}
+
+//go:nosplit
+func (c *CPU) SetAppAddr(value uintptr) {
+ c.appAddr = value
+}
+
+// GetLazyVFP returns the value of cpacr_el1.
+//go:nosplit
+func (c *CPU) GetLazyVFP() (value uintptr) {
+ return c.lazyVFP
+}
+
+// SwitchArchOpts are embedded in SwitchOpts.
+type SwitchArchOpts struct {
+ // UserASID indicates that the application ASID to be used on switch,
+ UserASID uint16
+
+ // KernelASID indicates that the kernel ASID to be used on return,
+ KernelASID uint16
+}
+
+func init() {
+}
diff --git a/pkg/sentry/platform/ring0/entry_amd64.go b/pkg/sentry/platform/ring0/entry_amd64.go
index a5ce67885..7fa43c2f5 100644
--- a/pkg/sentry/platform/ring0/entry_amd64.go
+++ b/pkg/sentry/platform/ring0/entry_amd64.go
@@ -17,7 +17,7 @@
package ring0
import (
- "syscall"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// This is an assembly function.
@@ -41,7 +41,7 @@ func swapgs()
// The return code is the vector that interrupted execution.
//
// See stubs.go for a note regarding the frame size of this function.
-func sysret(*CPU, *syscall.PtraceRegs) Vector
+func sysret(*CPU, *arch.Registers) Vector
// "iret is the cadillac of CPL switching."
//
@@ -50,7 +50,7 @@ func sysret(*CPU, *syscall.PtraceRegs) Vector
// iret is nearly identical to sysret, except an iret is used to fully restore
// all user state. This must be called in cases where all registers need to be
// restored.
-func iret(*CPU, *syscall.PtraceRegs) Vector
+func iret(*CPU, *arch.Registers) Vector
// exception is the generic exception entry.
//
diff --git a/pkg/sentry/platform/ring0/entry_arm64.go b/pkg/sentry/platform/ring0/entry_arm64.go
new file mode 100644
index 000000000..62a93f3d6
--- /dev/null
+++ b/pkg/sentry/platform/ring0/entry_arm64.go
@@ -0,0 +1,60 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+// This is an assembly function.
+//
+// The sysenter function is invoked in two situations:
+//
+// (1) The guest kernel has executed a system call.
+// (2) The guest application has executed a system call.
+//
+// The interrupt flag is examined to determine whether the system call was
+// executed from kernel mode or not and the appropriate stub is called.
+
+func El1_sync_invalid()
+func El1_irq_invalid()
+func El1_fiq_invalid()
+func El1_error_invalid()
+
+func El1_sync()
+func El1_irq()
+func El1_fiq()
+func El1_error()
+
+func El0_sync()
+func El0_irq()
+func El0_fiq()
+func El0_error()
+
+func El0_sync_invalid()
+func El0_irq_invalid()
+func El0_fiq_invalid()
+func El0_error_invalid()
+
+func Vectors()
+
+// Start is the CPU entrypoint.
+//
+// The CPU state will be set to c.Registers().
+func Start()
+func kernelExitToEl1()
+
+func kernelExitToEl0()
+
+// Shutdown execution
+func Shutdown()
diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s
new file mode 100644
index 000000000..9d29b7168
--- /dev/null
+++ b/pkg/sentry/platform/ring0/entry_arm64.s
@@ -0,0 +1,786 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "funcdata.h"
+#include "textflag.h"
+
+// NB: Offsets are programatically generated (see BUILD).
+//
+// This file is concatenated with the definitions.
+
+// Saves a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+
+// ERET returns using the ELR and SPSR for the current exception level.
+#define ERET() \
+ WORD $0xd69f03e0
+
+// RSV_REG is a register that holds el1 information temporarily.
+#define RSV_REG R18_PLATFORM
+
+// RSV_REG_APP is a register that holds el0 information temporarily.
+#define RSV_REG_APP R9
+
+#define FPEN_NOTRAP 0x3
+#define FPEN_SHIFT 20
+
+#define FPEN_ENABLE (FPEN_NOTRAP << FPEN_SHIFT)
+
+// sctlr_el1: system control register el1.
+#define SCTLR_M 1 << 0
+#define SCTLR_C 1 << 2
+#define SCTLR_I 1 << 12
+#define SCTLR_UCT 1 << 15
+
+#define SCTLR_EL1_DEFAULT (SCTLR_M | SCTLR_C | SCTLR_I | SCTLR_UCT)
+
+// cntkctl_el1: counter-timer kernel control register el1.
+#define CNTKCTL_EL0PCTEN 1 << 0
+#define CNTKCTL_EL0VCTEN 1 << 1
+
+#define CNTKCTL_EL1_DEFAULT (CNTKCTL_EL0PCTEN | CNTKCTL_EL0VCTEN)
+
+// Saves a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not saved: R9, R18.
+#define REGISTERS_SAVE(reg, offset) \
+ MOVD R0, offset+PTRACE_R0(reg); \
+ MOVD R1, offset+PTRACE_R1(reg); \
+ MOVD R2, offset+PTRACE_R2(reg); \
+ MOVD R3, offset+PTRACE_R3(reg); \
+ MOVD R4, offset+PTRACE_R4(reg); \
+ MOVD R5, offset+PTRACE_R5(reg); \
+ MOVD R6, offset+PTRACE_R6(reg); \
+ MOVD R7, offset+PTRACE_R7(reg); \
+ MOVD R8, offset+PTRACE_R8(reg); \
+ MOVD R10, offset+PTRACE_R10(reg); \
+ MOVD R11, offset+PTRACE_R11(reg); \
+ MOVD R12, offset+PTRACE_R12(reg); \
+ MOVD R13, offset+PTRACE_R13(reg); \
+ MOVD R14, offset+PTRACE_R14(reg); \
+ MOVD R15, offset+PTRACE_R15(reg); \
+ MOVD R16, offset+PTRACE_R16(reg); \
+ MOVD R17, offset+PTRACE_R17(reg); \
+ MOVD R19, offset+PTRACE_R19(reg); \
+ MOVD R20, offset+PTRACE_R20(reg); \
+ MOVD R21, offset+PTRACE_R21(reg); \
+ MOVD R22, offset+PTRACE_R22(reg); \
+ MOVD R23, offset+PTRACE_R23(reg); \
+ MOVD R24, offset+PTRACE_R24(reg); \
+ MOVD R25, offset+PTRACE_R25(reg); \
+ MOVD R26, offset+PTRACE_R26(reg); \
+ MOVD R27, offset+PTRACE_R27(reg); \
+ MOVD g, offset+PTRACE_R28(reg); \
+ MOVD R29, offset+PTRACE_R29(reg); \
+ MOVD R30, offset+PTRACE_R30(reg);
+
+// Loads a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not loaded: R9, R18.
+#define REGISTERS_LOAD(reg, offset) \
+ MOVD offset+PTRACE_R0(reg), R0; \
+ MOVD offset+PTRACE_R1(reg), R1; \
+ MOVD offset+PTRACE_R2(reg), R2; \
+ MOVD offset+PTRACE_R3(reg), R3; \
+ MOVD offset+PTRACE_R4(reg), R4; \
+ MOVD offset+PTRACE_R5(reg), R5; \
+ MOVD offset+PTRACE_R6(reg), R6; \
+ MOVD offset+PTRACE_R7(reg), R7; \
+ MOVD offset+PTRACE_R8(reg), R8; \
+ MOVD offset+PTRACE_R10(reg), R10; \
+ MOVD offset+PTRACE_R11(reg), R11; \
+ MOVD offset+PTRACE_R12(reg), R12; \
+ MOVD offset+PTRACE_R13(reg), R13; \
+ MOVD offset+PTRACE_R14(reg), R14; \
+ MOVD offset+PTRACE_R15(reg), R15; \
+ MOVD offset+PTRACE_R16(reg), R16; \
+ MOVD offset+PTRACE_R17(reg), R17; \
+ MOVD offset+PTRACE_R19(reg), R19; \
+ MOVD offset+PTRACE_R20(reg), R20; \
+ MOVD offset+PTRACE_R21(reg), R21; \
+ MOVD offset+PTRACE_R22(reg), R22; \
+ MOVD offset+PTRACE_R23(reg), R23; \
+ MOVD offset+PTRACE_R24(reg), R24; \
+ MOVD offset+PTRACE_R25(reg), R25; \
+ MOVD offset+PTRACE_R26(reg), R26; \
+ MOVD offset+PTRACE_R27(reg), R27; \
+ MOVD offset+PTRACE_R28(reg), g; \
+ MOVD offset+PTRACE_R29(reg), R29; \
+ MOVD offset+PTRACE_R30(reg), R30;
+
+// NOP-s
+#define nop31Instructions() \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f; \
+ WORD $0xd503201f;
+
+#define ESR_ELx_EC_UNKNOWN (0x00)
+#define ESR_ELx_EC_WFx (0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32 (0x03)
+#define ESR_ELx_EC_CP15_64 (0x04)
+#define ESR_ELx_EC_CP14_MR (0x05)
+#define ESR_ELx_EC_CP14_LS (0x06)
+#define ESR_ELx_EC_FP_ASIMD (0x07)
+#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_CP14_64 (0x0C)
+/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_ILL (0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32 (0x11)
+#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64 (0x15)
+#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 (0x18)
+#define ESR_ELx_EC_SVE (0x19)
+/* Unallocated EC: 0x1A - 0x1E */
+#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW (0x20)
+#define ESR_ELx_EC_IABT_CUR (0x21)
+#define ESR_ELx_EC_PC_ALIGN (0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW (0x24)
+#define ESR_ELx_EC_DABT_CUR (0x25)
+#define ESR_ELx_EC_SP_ALIGN (0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32 (0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64 (0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR (0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW (0x30)
+#define ESR_ELx_EC_BREAKPT_CUR (0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
+#define ESR_ELx_EC_WATCHPT_LOW (0x34)
+#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32 (0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+/* Unallocted EC: 0x3B */
+#define ESR_ELx_EC_BRK64 (0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX (0x3F)
+
+#define ESR_ELx_EC_SHIFT (26)
+#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT)
+#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
+
+#define ESR_ELx_IL_SHIFT (25)
+#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
+#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+
+/* ISS field definitions shared by different classes */
+#define ESR_ELx_WNR_SHIFT (6)
+#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
+
+/* Asynchronous Error Type */
+#define ESR_ELx_IDS_SHIFT (24)
+#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
+#define ESR_ELx_AET_SHIFT (10)
+#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
+
+#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
+
+/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_SET_SHIFT (11)
+#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
+#define ESR_ELx_FnV_SHIFT (10)
+#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT)
+#define ESR_ELx_EA_SHIFT (9)
+#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT)
+#define ESR_ELx_S1PTW_SHIFT (7)
+#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT)
+
+/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
+#define ESR_ELx_FSC (0x3F)
+#define ESR_ELx_FSC_TYPE (0x3C)
+#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_SERROR (0x11)
+#define ESR_ELx_FSC_ACCESS (0x08)
+#define ESR_ELx_FSC_FAULT (0x04)
+#define ESR_ELx_FSC_PERM (0x0C)
+
+/* ISS field definitions for Data Aborts */
+#define ESR_ELx_ISV_SHIFT (24)
+#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT)
+#define ESR_ELx_SAS_SHIFT (22)
+#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT)
+#define ESR_ELx_SSE_SHIFT (21)
+#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT)
+#define ESR_ELx_SRT_SHIFT (16)
+#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT)
+#define ESR_ELx_SF_SHIFT (15)
+#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
+#define ESR_ELx_AR_SHIFT (14)
+#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_CM_SHIFT (8)
+#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
+
+/* ISS field definitions for exceptions taken in to Hyp */
+#define ESR_ELx_CV (UL(1) << 24)
+#define ESR_ELx_COND_SHIFT (20)
+#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
+#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
+#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
+#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
+#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
+
+// LOAD_KERNEL_ADDRESS loads a kernel address.
+#define LOAD_KERNEL_ADDRESS(from, to) \
+ MOVD from, to; \
+ ORR $0xffff000000000000, to, to;
+
+// LOAD_KERNEL_STACK loads the kernel temporary stack.
+#define LOAD_KERNEL_STACK(from) \
+ LOAD_KERNEL_ADDRESS(CPU_SELF(from), RSV_REG); \
+ MOVD $CPU_STACK_TOP(RSV_REG), RSV_REG; \
+ MOVD RSV_REG, RSP; \
+ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
+ ISB $15; \
+ DSB $15;
+
+// SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application.
+#define SWITCH_TO_APP_PAGETABLE(from) \
+ MOVD CPU_TTBR0_APP(from), RSV_REG; \
+ WORD $0xd5182012; \ // MSR R18, TTBR0_EL1
+ ISB $15; \
+ DSB $15;
+
+// SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable.
+#define SWITCH_TO_KVM_PAGETABLE(from) \
+ MOVD CPU_TTBR0_KVM(from), RSV_REG; \
+ WORD $0xd5182012; \ // MSR R18, TTBR0_EL1
+ ISB $15; \
+ DSB $15;
+
+#define VFP_ENABLE \
+ MOVD $FPEN_ENABLE, R0; \
+ WORD $0xd5181040; \ //MSR R0, CPACR_EL1
+ ISB $15;
+
+#define VFP_DISABLE \
+ MOVD $0x0, R0; \
+ WORD $0xd5181040; \ //MSR R0, CPACR_EL1
+ ISB $15;
+
+// KERNEL_ENTRY_FROM_EL0 is the entry code of the vcpu from el0 to el1.
+#define KERNEL_ENTRY_FROM_EL0 \
+ SUB $16, RSP, RSP; \ // step1, save r18, r9 into kernel temporary stack.
+ STP (RSV_REG, RSV_REG_APP), 16*0(RSP); \
+ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18, step2, switch user pagetable.
+ SWITCH_TO_KVM_PAGETABLE(RSV_REG); \
+ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
+ MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP; \ // step3, load app context pointer.
+ REGISTERS_SAVE(RSV_REG_APP, 0); \ // step4, save app context.
+ MOVD RSV_REG_APP, R20; \
+ LDP 16*0(RSP), (RSV_REG, RSV_REG_APP); \
+ ADD $16, RSP, RSP; \
+ MOVD RSV_REG, PTRACE_R18(R20); \
+ MOVD RSV_REG_APP, PTRACE_R9(R20); \
+ MOVD R20, RSV_REG_APP; \
+ WORD $0xd5384003; \ // MRS SPSR_EL1, R3
+ MOVD R3, PTRACE_PSTATE(RSV_REG_APP); \
+ MRS ELR_EL1, R3; \
+ MOVD R3, PTRACE_PC(RSV_REG_APP); \
+ WORD $0xd5384103; \ // MRS SP_EL0, R3
+ MOVD R3, PTRACE_SP(RSV_REG_APP);
+
+// KERNEL_ENTRY_FROM_EL1 is the entry code of the vcpu from el1 to el1.
+#define KERNEL_ENTRY_FROM_EL1 \
+ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
+ REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // Save sentry context.
+ MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG); \
+ WORD $0xd5384004; \ // MRS SPSR_EL1, R4
+ MOVD R4, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG); \
+ MRS ELR_EL1, R4; \
+ MOVD R4, CPU_REGISTERS+PTRACE_PC(RSV_REG); \
+ MOVD RSP, R4; \
+ MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \
+ LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack.
+
+// Halt halts execution.
+TEXT ·Halt(SB),NOSPLIT,$0
+ // Clear bluepill.
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ CMP RSV_REG, R9
+ BNE mmio_exit
+ MOVD $0, CPU_REGISTERS+PTRACE_R9(RSV_REG)
+
+ // Flush dcache.
+ WORD $0xd5087e52 // DC CISW
+mmio_exit:
+ // Disable fpsimd.
+ WORD $0xd5381041 // MRS CPACR_EL1, R1
+ MOVD R1, CPU_LAZY_VFP(RSV_REG)
+ VFP_DISABLE
+
+ // Trigger MMIO_EXIT/_KVM_HYPERCALL_VMEXIT.
+ //
+ // To keep it simple, I used the address of exception table as the
+ // MMIO base address, so that I can trigger a MMIO-EXIT by forcibly writing
+ // a read-only space.
+ // Also, the length is engough to match a sufficient number of hypercall ID.
+ // Then, in host user space, I can calculate this address to find out
+ // which hypercall.
+ MRS VBAR_EL1, R9
+ MOVD R0, 0x0(R9)
+
+ // Flush dcahce.
+ WORD $0xd5087e52 // DC CISW
+
+ RET
+
+// HaltAndResume halts execution and point the pointer to the resume function.
+TEXT ·HaltAndResume(SB),NOSPLIT,$0
+ BL ·Halt(SB)
+ B ·kernelExitToEl1(SB) // Resume.
+
+// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
+TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0
+ WORD $0xd538d092 // MRS TPIDR_EL1, R18
+ MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
+ MOVD R3, 8(RSP) // First argument (vCPU).
+ CALL ·kernelSyscall(SB) // Call the trampoline.
+ B ·kernelExitToEl1(SB) // Resume.
+
+// Shutdown stops the guest.
+TEXT ·Shutdown(SB),NOSPLIT,$0
+ // PSCI EVENT.
+ MOVD $0x84000009, R0
+ HVC $0
+
+// See kernel.go.
+TEXT ·Current(SB),NOSPLIT,$0-8
+ MOVD CPU_SELF(RSV_REG), R8
+ MOVD R8, ret+0(FP)
+ RET
+
+#define STACK_FRAME_SIZE 16
+
+// kernelExitToEl0 is the entrypoint for application in guest_el0.
+// Prepare the vcpu environment for container application.
+TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
+ // Step1, save sentry context into memory.
+ MRS TPIDR_EL1, RSV_REG
+ REGISTERS_SAVE(RSV_REG, CPU_REGISTERS)
+ MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG)
+
+ WORD $0xd5384003 // MRS SPSR_EL1, R3
+ MOVD R3, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG)
+ MOVD R30, CPU_REGISTERS+PTRACE_PC(RSV_REG)
+ MOVD RSP, R3
+ MOVD R3, CPU_REGISTERS+PTRACE_SP(RSV_REG)
+
+ MOVD CPU_REGISTERS+PTRACE_R3(RSV_REG), R3
+
+ // Step2, switch to temporary stack.
+ LOAD_KERNEL_STACK(RSV_REG)
+
+ // Step3, load app context pointer.
+ MOVD CPU_APP_ADDR(RSV_REG), RSV_REG_APP
+
+ // Step4, prepare the environment for container application.
+ // set sp_el0.
+ MOVD PTRACE_SP(RSV_REG_APP), R1
+ WORD $0xd5184101 //MSR R1, SP_EL0
+ // set pc.
+ MOVD PTRACE_PC(RSV_REG_APP), R1
+ MSR R1, ELR_EL1
+ // set pstate.
+ MOVD PTRACE_PSTATE(RSV_REG_APP), R1
+ WORD $0xd5184001 //MSR R1, SPSR_EL1
+
+ // RSV_REG & RSV_REG_APP will be loaded at the end.
+ REGISTERS_LOAD(RSV_REG_APP, 0)
+
+ // switch to user pagetable.
+ MOVD PTRACE_R18(RSV_REG_APP), RSV_REG
+ MOVD PTRACE_R9(RSV_REG_APP), RSV_REG_APP
+
+ SUB $STACK_FRAME_SIZE, RSP, RSP
+ STP (RSV_REG, RSV_REG_APP), 16*0(RSP)
+
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+
+ SWITCH_TO_APP_PAGETABLE(RSV_REG)
+
+ LDP 16*0(RSP), (RSV_REG, RSV_REG_APP)
+ ADD $STACK_FRAME_SIZE, RSP, RSP
+
+ ISB $15
+ ERET()
+
+// kernelExitToEl1 is the entrypoint for sentry in guest_el1.
+// Prepare the vcpu environment for sentry.
+TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ MOVD CPU_REGISTERS+PTRACE_PSTATE(RSV_REG), R1
+ WORD $0xd5184001 //MSR R1, SPSR_EL1
+
+ MOVD CPU_REGISTERS+PTRACE_PC(RSV_REG), R1
+ MSR R1, ELR_EL1
+
+ MOVD CPU_REGISTERS+PTRACE_SP(RSV_REG), R1
+ MOVD R1, RSP
+
+ REGISTERS_LOAD(RSV_REG, CPU_REGISTERS)
+ MOVD CPU_REGISTERS+PTRACE_R9(RSV_REG), RSV_REG_APP
+
+ ERET()
+
+// Start is the CPU entrypoint.
+TEXT ·Start(SB),NOSPLIT,$0
+ // Flush dcache.
+ WORD $0xd5087e52 // DC CISW
+ // Init.
+ MOVD $SCTLR_EL1_DEFAULT, R1
+ MSR R1, SCTLR_EL1
+
+ MOVD $CNTKCTL_EL1_DEFAULT, R1
+ MSR R1, CNTKCTL_EL1
+
+ MOVD R8, RSV_REG
+ ORR $0xffff000000000000, RSV_REG, RSV_REG
+ WORD $0xd518d092 //MSR R18, TPIDR_EL1
+
+ B ·kernelExitToEl1(SB)
+
+// El1_sync_invalid is the handler for an invalid EL1_sync.
+TEXT ·El1_sync_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_irq_invalid is the handler for an invalid El1_irq.
+TEXT ·El1_irq_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_fiq_invalid is the handler for an invalid El1_fiq.
+TEXT ·El1_fiq_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_error_invalid is the handler for an invalid El1_error.
+TEXT ·El1_error_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_sync is the handler for El1_sync.
+TEXT ·El1_sync(SB),NOSPLIT,$0
+ KERNEL_ENTRY_FROM_EL1
+ WORD $0xd5385219 // MRS ESR_EL1, R25
+ LSR $ESR_ELx_EC_SHIFT, R25, R24
+ CMP $ESR_ELx_EC_DABT_CUR, R24
+ BEQ el1_da
+ CMP $ESR_ELx_EC_IABT_CUR, R24
+ BEQ el1_ia
+ CMP $ESR_ELx_EC_SYS64, R24
+ BEQ el1_undef
+ CMP $ESR_ELx_EC_SP_ALIGN, R24
+ BEQ el1_sp_pc
+ CMP $ESR_ELx_EC_PC_ALIGN, R24
+ BEQ el1_sp_pc
+ CMP $ESR_ELx_EC_UNKNOWN, R24
+ BEQ el1_undef
+ CMP $ESR_ELx_EC_SVC64, R24
+ BEQ el1_svc
+ CMP $ESR_ELx_EC_BREAKPT_CUR, R24
+ BGE el1_dbg
+ CMP $ESR_ELx_EC_FP_ASIMD, R24
+ BEQ el1_fpsimd_acc
+ B el1_invalid
+
+el1_da:
+el1_ia:
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ WORD $0xd538601a //MRS FAR_EL1, R26
+
+ MOVD R26, CPU_FAULT_ADDR(RSV_REG)
+
+ MOVD $0, CPU_ERROR_TYPE(RSV_REG)
+
+ MOVD $PageFault, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·HaltAndResume(SB)
+
+el1_sp_pc:
+ B ·Shutdown(SB)
+
+el1_undef:
+ B ·Shutdown(SB)
+
+el1_svc:
+ MOVD $0, CPU_ERROR_CODE(RSV_REG)
+ MOVD $0, CPU_ERROR_TYPE(RSV_REG)
+ B ·HaltEl1SvcAndResume(SB)
+
+el1_dbg:
+ B ·Shutdown(SB)
+
+el1_fpsimd_acc:
+ VFP_ENABLE
+ B ·kernelExitToEl1(SB) // Resume.
+
+el1_invalid:
+ B ·Shutdown(SB)
+
+// El1_irq is the handler for El1_irq.
+TEXT ·El1_irq(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_fiq is the handler for El1_fiq.
+TEXT ·El1_fiq(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El1_error is the handler for El1_error.
+TEXT ·El1_error(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// El0_sync is the handler for El0_sync.
+TEXT ·El0_sync(SB),NOSPLIT,$0
+ KERNEL_ENTRY_FROM_EL0
+ WORD $0xd5385219 // MRS ESR_EL1, R25
+ LSR $ESR_ELx_EC_SHIFT, R25, R24
+ CMP $ESR_ELx_EC_SVC64, R24
+ BEQ el0_svc
+ CMP $ESR_ELx_EC_DABT_LOW, R24
+ BEQ el0_da
+ CMP $ESR_ELx_EC_IABT_LOW, R24
+ BEQ el0_ia
+ CMP $ESR_ELx_EC_FP_ASIMD, R24
+ BEQ el0_fpsimd_acc
+ CMP $ESR_ELx_EC_SVE, R24
+ BEQ el0_sve_acc
+ CMP $ESR_ELx_EC_FP_EXC64, R24
+ BEQ el0_fpsimd_exc
+ CMP $ESR_ELx_EC_SP_ALIGN, R24
+ BEQ el0_sp_pc
+ CMP $ESR_ELx_EC_PC_ALIGN, R24
+ BEQ el0_sp_pc
+ CMP $ESR_ELx_EC_UNKNOWN, R24
+ BEQ el0_undef
+ CMP $ESR_ELx_EC_BREAKPT_LOW, R24
+ BGE el0_dbg
+ B el0_invalid
+
+el0_svc:
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+
+ MOVD $0, CPU_ERROR_CODE(RSV_REG) // Clear error code.
+
+ MOVD $1, R3
+ MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
+
+ MOVD $Syscall, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·kernelExitToEl1(SB)
+
+el0_da:
+el0_ia:
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ WORD $0xd538601a //MRS FAR_EL1, R26
+
+ MOVD R26, CPU_FAULT_ADDR(RSV_REG)
+
+ MOVD $1, R3
+ MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
+
+ MOVD $PageFault, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ MRS ESR_EL1, R3
+ MOVD R3, CPU_ERROR_CODE(RSV_REG)
+
+ B ·kernelExitToEl1(SB)
+
+el0_fpsimd_acc:
+ B ·Shutdown(SB)
+
+el0_sve_acc:
+ B ·Shutdown(SB)
+
+el0_fpsimd_exc:
+ B ·Shutdown(SB)
+
+el0_sp_pc:
+ B ·Shutdown(SB)
+
+el0_undef:
+ MOVD $El0Sync_undef, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·kernelExitToEl1(SB)
+
+el0_dbg:
+ B ·Shutdown(SB)
+
+el0_invalid:
+ B ·Shutdown(SB)
+
+TEXT ·El0_irq(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+TEXT ·El0_fiq(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+TEXT ·El0_error(SB),NOSPLIT,$0
+ KERNEL_ENTRY_FROM_EL0
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ WORD $0xd538601a //MRS FAR_EL1, R26
+
+ MOVD R26, CPU_FAULT_ADDR(RSV_REG)
+
+ MOVD $1, R3
+ MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
+
+ MOVD $VirtualizationException, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·HaltAndResume(SB)
+
+TEXT ·El0_sync_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+TEXT ·El0_irq_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+TEXT ·El0_fiq_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+TEXT ·El0_error_invalid(SB),NOSPLIT,$0
+ B ·Shutdown(SB)
+
+// Vectors implements exception vector table.
+TEXT ·Vectors(SB),NOSPLIT,$0
+ B ·El1_sync_invalid(SB)
+ nop31Instructions()
+ B ·El1_irq_invalid(SB)
+ nop31Instructions()
+ B ·El1_fiq_invalid(SB)
+ nop31Instructions()
+ B ·El1_error_invalid(SB)
+ nop31Instructions()
+
+ B ·El1_sync(SB)
+ nop31Instructions()
+ B ·El1_irq(SB)
+ nop31Instructions()
+ B ·El1_fiq(SB)
+ nop31Instructions()
+ B ·El1_error(SB)
+ nop31Instructions()
+
+ B ·El0_sync(SB)
+ nop31Instructions()
+ B ·El0_irq(SB)
+ nop31Instructions()
+ B ·El0_fiq(SB)
+ nop31Instructions()
+ B ·El0_error(SB)
+ nop31Instructions()
+
+ B ·El0_sync_invalid(SB)
+ nop31Instructions()
+ B ·El0_irq_invalid(SB)
+ nop31Instructions()
+ B ·El0_fiq_invalid(SB)
+ nop31Instructions()
+ B ·El0_error_invalid(SB)
+ nop31Instructions()
+
+ // The exception-vector-table is required to be 11-bits aligned.
+ // Please see Linux source code as reference: arch/arm64/kernel/entry.s.
+ // For gvisor, I defined it as 4K in length, filled the 2nd 2K part with NOPs.
+ // So that, I can safely move the 1st 2K part into the address with 11-bits alignment.
+ WORD $0xd503201f //nop
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
+ WORD $0xd503201f
+ nop31Instructions()
diff --git a/pkg/sentry/platform/ring0/gen_offsets/BUILD b/pkg/sentry/platform/ring0/gen_offsets/BUILD
index 780bf9a66..549f3d228 100644
--- a/pkg/sentry/platform/ring0/gen_offsets/BUILD
+++ b/pkg/sentry/platform/ring0/gen_offsets/BUILD
@@ -1,25 +1,34 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("//tools:defs.bzl", "go_binary")
load("//tools/go_generics:defs.bzl", "go_template_instance")
package(licenses = ["notice"])
go_template_instance(
- name = "defs_impl",
- out = "defs_impl.go",
+ name = "defs_impl_arm64",
+ out = "defs_impl_arm64.go",
package = "main",
- template = "//pkg/sentry/platform/ring0:defs",
+ template = "//pkg/sentry/platform/ring0:defs_arm64",
+)
+
+go_template_instance(
+ name = "defs_impl_amd64",
+ out = "defs_impl_amd64.go",
+ package = "main",
+ template = "//pkg/sentry/platform/ring0:defs_amd64",
)
go_binary(
name = "gen_offsets",
srcs = [
- "defs_impl.go",
+ "defs_impl_amd64.go",
+ "defs_impl_arm64.go",
"main.go",
],
visibility = ["//pkg/sentry/platform/ring0:__pkg__"],
deps = [
"//pkg/cpuid",
+ "//pkg/sentry/arch",
"//pkg/sentry/platform/ring0/pagetables",
- "//pkg/sentry/usermem",
+ "//pkg/usermem",
],
)
diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go
index 900c0bba7..021693791 100644
--- a/pkg/sentry/platform/ring0/kernel.go
+++ b/pkg/sentry/platform/ring0/kernel.go
@@ -31,23 +31,39 @@ type defaultHooks struct{}
// KernelSyscall implements Hooks.KernelSyscall.
//
+// +checkescape:all
+//
//go:nosplit
-func (defaultHooks) KernelSyscall() { Halt() }
+func (defaultHooks) KernelSyscall() {
+ Halt()
+}
// KernelException implements Hooks.KernelException.
//
+// +checkescape:all
+//
//go:nosplit
-func (defaultHooks) KernelException(Vector) { Halt() }
+func (defaultHooks) KernelException(Vector) {
+ Halt()
+}
// kernelSyscall is a trampoline.
//
+// +checkescape:hard,stack
+//
//go:nosplit
-func kernelSyscall(c *CPU) { c.hooks.KernelSyscall() }
+func kernelSyscall(c *CPU) {
+ c.hooks.KernelSyscall()
+}
// kernelException is a trampoline.
//
+// +checkescape:hard,stack
+//
//go:nosplit
-func kernelException(c *CPU, vector Vector) { c.hooks.KernelException(vector) }
+func kernelException(c *CPU, vector Vector) {
+ c.hooks.KernelException(vector)
+}
// Init initializes a new CPU.
//
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index 0feff8778..d37981dbf 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -178,6 +178,8 @@ func IsCanonical(addr uint64) bool {
//
// Precondition: the Rip, Rsp, Fs and Gs registers must be canonical.
//
+// +checkescape:all
+//
//go:nosplit
func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID)
@@ -192,9 +194,9 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
// Perform the switch.
swapgs() // GS will be swapped on return.
- WriteFS(uintptr(regs.Fs_base)) // Set application FS.
- WriteGS(uintptr(regs.Gs_base)) // Set application GS.
- LoadFloatingPoint(switchOpts.FloatingPointState) // Copy in floating point.
+ WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
+ WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
+ LoadFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy in floating point.
jumpToKernel() // Switch to upper half.
writeCR3(uintptr(userCR3)) // Change to user address space.
if switchOpts.FullRestore {
@@ -204,8 +206,8 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
}
writeCR3(uintptr(kernelCR3)) // Return to kernel address space.
jumpToUser() // Return to lower half.
- SaveFloatingPoint(switchOpts.FloatingPointState) // Copy out floating point.
- WriteFS(uintptr(c.registers.Fs_base)) // Restore kernel FS.
+ SaveFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy out floating point.
+ WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
return
}
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
new file mode 100644
index 000000000..d0afa1aaa
--- /dev/null
+++ b/pkg/sentry/platform/ring0/kernel_arm64.go
@@ -0,0 +1,72 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+// HaltAndResume halts execution and point the pointer to the resume function.
+//go:nosplit
+func HaltAndResume()
+
+// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
+//go:nosplit
+func HaltEl1SvcAndResume()
+
+// init initializes architecture-specific state.
+func (k *Kernel) init(opts KernelOpts) {
+ // Save the root page tables.
+ k.PageTables = opts.PageTables
+}
+
+// init initializes architecture-specific state.
+func (c *CPU) init() {
+ // Set the kernel stack pointer(virtual address).
+ c.registers.Sp = uint64(c.StackTop())
+
+}
+
+// StackTop returns the kernel's stack address.
+//
+//go:nosplit
+func (c *CPU) StackTop() uint64 {
+ return uint64(kernelAddr(&c.stack[0])) + uint64(len(c.stack))
+}
+
+// IsCanonical indicates whether addr is canonical per the arm64 spec.
+//
+//go:nosplit
+func IsCanonical(addr uint64) bool {
+ return addr <= 0x0000ffffffffffff || addr > 0xffff000000000000
+}
+
+//go:nosplit
+func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
+ regs := switchOpts.Registers
+
+ regs.Pstate &= ^uint64(PsrFlagsClear)
+ regs.Pstate |= UserFlagsSet
+
+ LoadFloatingPoint(switchOpts.FloatingPointState)
+ SetTLS(regs.TPIDR_EL0)
+
+ kernelExitToEl0()
+
+ regs.TPIDR_EL0 = GetTLS()
+ SaveFloatingPoint(switchOpts.FloatingPointState)
+
+ vector = c.vecCode
+
+ return
+}
diff --git a/pkg/sentry/platform/ring0/lib_arm64.go b/pkg/sentry/platform/ring0/lib_arm64.go
new file mode 100644
index 000000000..00e52c8af
--- /dev/null
+++ b/pkg/sentry/platform/ring0/lib_arm64.go
@@ -0,0 +1,58 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+// CPACREL1 returns the value of the CPACR_EL1 register.
+func CPACREL1() (value uintptr)
+
+// FPCR returns the value of FPCR register.
+func GetFPCR() (value uintptr)
+
+// SetFPCR writes the FPCR value.
+func SetFPCR(value uintptr)
+
+// FPSR returns the value of FPSR register.
+func GetFPSR() (value uintptr)
+
+// SetFPSR writes the FPSR value.
+func SetFPSR(value uintptr)
+
+// SaveVRegs saves V0-V31 registers.
+// V0-V31: 32 128-bit registers for floating point and simd.
+func SaveVRegs(*byte)
+
+// LoadVRegs loads V0-V31 registers.
+func LoadVRegs(*byte)
+
+// LoadFloatingPoint loads floating point state.
+func LoadFloatingPoint(*byte)
+
+// SaveFloatingPoint saves floating point state.
+func SaveFloatingPoint(*byte)
+
+// GetTLS returns the value of TPIDR_EL0 register.
+func GetTLS() (value uint64)
+
+// SetTLS writes the TPIDR_EL0 value.
+func SetTLS(value uint64)
+
+// Init sets function pointers based on architectural features.
+//
+// This must be called prior to using ring0.
+func Init() {
+ rewriteVectors()
+}
diff --git a/pkg/sentry/platform/ring0/lib_arm64.s b/pkg/sentry/platform/ring0/lib_arm64.s
new file mode 100644
index 000000000..86bfbe46f
--- /dev/null
+++ b/pkg/sentry/platform/ring0/lib_arm64.s
@@ -0,0 +1,217 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "funcdata.h"
+#include "textflag.h"
+
+TEXT ·GetTLS(SB),NOSPLIT,$0-8
+ MRS TPIDR_EL0, R1
+ MOVD R1, ret+0(FP)
+ RET
+
+TEXT ·SetTLS(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R1
+ MSR R1, TPIDR_EL0
+ RET
+
+TEXT ·CPACREL1(SB),NOSPLIT,$0-8
+ WORD $0xd5381041 // MRS CPACR_EL1, R1
+ MOVD R1, ret+0(FP)
+ RET
+
+TEXT ·GetFPCR(SB),NOSPLIT,$0-8
+ WORD $0xd53b4201 // MRS NZCV, R1
+ MOVD R1, ret+0(FP)
+ RET
+
+TEXT ·GetFPSR(SB),NOSPLIT,$0-8
+ WORD $0xd53b4421 // MRS FPSR, R1
+ MOVD R1, ret+0(FP)
+ RET
+
+TEXT ·SetFPCR(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R1
+ WORD $0xd51b4201 // MSR R1, NZCV
+ RET
+
+TEXT ·SetFPSR(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R1
+ WORD $0xd51b4421 // MSR R1, FPSR
+ RET
+
+TEXT ·SaveVRegs(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R0
+
+ // Skip aarch64_ctx, fpsr, fpcr.
+ FMOVD F0, 16*1(R0)
+ FMOVD F1, 16*2(R0)
+ FMOVD F2, 16*3(R0)
+ FMOVD F3, 16*4(R0)
+ FMOVD F4, 16*5(R0)
+ FMOVD F5, 16*6(R0)
+ FMOVD F6, 16*7(R0)
+ FMOVD F7, 16*8(R0)
+ FMOVD F8, 16*9(R0)
+ FMOVD F9, 16*10(R0)
+ FMOVD F10, 16*11(R0)
+ FMOVD F11, 16*12(R0)
+ FMOVD F12, 16*13(R0)
+ FMOVD F13, 16*14(R0)
+ FMOVD F14, 16*15(R0)
+ FMOVD F15, 16*16(R0)
+ FMOVD F16, 16*17(R0)
+ FMOVD F17, 16*18(R0)
+ FMOVD F18, 16*19(R0)
+ FMOVD F19, 16*20(R0)
+ FMOVD F20, 16*21(R0)
+ FMOVD F21, 16*22(R0)
+ FMOVD F22, 16*23(R0)
+ FMOVD F23, 16*24(R0)
+ FMOVD F24, 16*25(R0)
+ FMOVD F25, 16*26(R0)
+ FMOVD F26, 16*27(R0)
+ FMOVD F27, 16*28(R0)
+ FMOVD F28, 16*29(R0)
+ FMOVD F29, 16*30(R0)
+ FMOVD F30, 16*31(R0)
+ FMOVD F31, 16*32(R0)
+ ISB $15
+
+ RET
+
+TEXT ·LoadVRegs(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R0
+
+ // Skip aarch64_ctx, fpsr, fpcr.
+ FMOVD 16*1(R0), F0
+ FMOVD 16*2(R0), F1
+ FMOVD 16*3(R0), F2
+ FMOVD 16*4(R0), F3
+ FMOVD 16*5(R0), F4
+ FMOVD 16*6(R0), F5
+ FMOVD 16*7(R0), F6
+ FMOVD 16*8(R0), F7
+ FMOVD 16*9(R0), F8
+ FMOVD 16*10(R0), F9
+ FMOVD 16*11(R0), F10
+ FMOVD 16*12(R0), F11
+ FMOVD 16*13(R0), F12
+ FMOVD 16*14(R0), F13
+ FMOVD 16*15(R0), F14
+ FMOVD 16*16(R0), F15
+ FMOVD 16*17(R0), F16
+ FMOVD 16*18(R0), F17
+ FMOVD 16*19(R0), F18
+ FMOVD 16*20(R0), F19
+ FMOVD 16*21(R0), F20
+ FMOVD 16*22(R0), F21
+ FMOVD 16*23(R0), F22
+ FMOVD 16*24(R0), F23
+ FMOVD 16*25(R0), F24
+ FMOVD 16*26(R0), F25
+ FMOVD 16*27(R0), F26
+ FMOVD 16*28(R0), F27
+ FMOVD 16*29(R0), F28
+ FMOVD 16*30(R0), F29
+ FMOVD 16*31(R0), F30
+ FMOVD 16*32(R0), F31
+ ISB $15
+
+ RET
+
+TEXT ·LoadFloatingPoint(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R0
+
+ MOVD 0(R0), R1
+ MOVD R1, FPSR
+ MOVD 8(R0), R1
+ MOVD R1, NZCV
+
+ FMOVD 16*1(R0), F0
+ FMOVD 16*2(R0), F1
+ FMOVD 16*3(R0), F2
+ FMOVD 16*4(R0), F3
+ FMOVD 16*5(R0), F4
+ FMOVD 16*6(R0), F5
+ FMOVD 16*7(R0), F6
+ FMOVD 16*8(R0), F7
+ FMOVD 16*9(R0), F8
+ FMOVD 16*10(R0), F9
+ FMOVD 16*11(R0), F10
+ FMOVD 16*12(R0), F11
+ FMOVD 16*13(R0), F12
+ FMOVD 16*14(R0), F13
+ FMOVD 16*15(R0), F14
+ FMOVD 16*16(R0), F15
+ FMOVD 16*17(R0), F16
+ FMOVD 16*18(R0), F17
+ FMOVD 16*19(R0), F18
+ FMOVD 16*20(R0), F19
+ FMOVD 16*21(R0), F20
+ FMOVD 16*22(R0), F21
+ FMOVD 16*23(R0), F22
+ FMOVD 16*24(R0), F23
+ FMOVD 16*25(R0), F24
+ FMOVD 16*26(R0), F25
+ FMOVD 16*27(R0), F26
+ FMOVD 16*28(R0), F27
+ FMOVD 16*29(R0), F28
+ FMOVD 16*30(R0), F29
+ FMOVD 16*31(R0), F30
+ FMOVD 16*32(R0), F31
+
+ RET
+
+TEXT ·SaveFloatingPoint(SB),NOSPLIT,$0-8
+ MOVD addr+0(FP), R0
+
+ MOVD FPSR, R1
+ MOVD R1, 0(R0)
+ MOVD NZCV, R1
+ MOVD R1, 8(R0)
+
+ FMOVD F0, 16*1(R0)
+ FMOVD F1, 16*2(R0)
+ FMOVD F2, 16*3(R0)
+ FMOVD F3, 16*4(R0)
+ FMOVD F4, 16*5(R0)
+ FMOVD F5, 16*6(R0)
+ FMOVD F6, 16*7(R0)
+ FMOVD F7, 16*8(R0)
+ FMOVD F8, 16*9(R0)
+ FMOVD F9, 16*10(R0)
+ FMOVD F10, 16*11(R0)
+ FMOVD F11, 16*12(R0)
+ FMOVD F12, 16*13(R0)
+ FMOVD F13, 16*14(R0)
+ FMOVD F14, 16*15(R0)
+ FMOVD F15, 16*16(R0)
+ FMOVD F16, 16*17(R0)
+ FMOVD F17, 16*18(R0)
+ FMOVD F18, 16*19(R0)
+ FMOVD F19, 16*20(R0)
+ FMOVD F20, 16*21(R0)
+ FMOVD F21, 16*22(R0)
+ FMOVD F22, 16*23(R0)
+ FMOVD F23, 16*24(R0)
+ FMOVD F24, 16*25(R0)
+ FMOVD F25, 16*26(R0)
+ FMOVD F26, 16*27(R0)
+ FMOVD F27, 16*28(R0)
+ FMOVD F28, 16*29(R0)
+ FMOVD F29, 16*30(R0)
+ FMOVD F30, 16*31(R0)
+ FMOVD F31, 16*32(R0)
+
+ RET
diff --git a/pkg/sentry/platform/ring0/lib_arm64_unsafe.go b/pkg/sentry/platform/ring0/lib_arm64_unsafe.go
new file mode 100644
index 000000000..c05166fea
--- /dev/null
+++ b/pkg/sentry/platform/ring0/lib_arm64_unsafe.go
@@ -0,0 +1,108 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+import (
+ "reflect"
+ "syscall"
+ "unsafe"
+
+ "gvisor.dev/gvisor/pkg/safecopy"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+const (
+ nopInstruction = 0xd503201f
+ instSize = unsafe.Sizeof(uint32(0))
+ vectorsRawLen = 0x800
+)
+
+func unsafeSlice(addr uintptr, length int) (slice []uint32) {
+ hdr := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
+ hdr.Data = addr
+ hdr.Len = length / int(instSize)
+ hdr.Cap = length / int(instSize)
+ return slice
+}
+
+// Work around: move ring0.Vectors() into a specific address with 11-bits alignment.
+//
+// According to the design documentation of Arm64,
+// the start address of exception vector table should be 11-bits aligned.
+// Please see the code in linux kernel as reference: arch/arm64/kernel/entry.S
+// But, we can't align a function's start address to a specific address by using golang.
+// We have raised this question in golang community:
+// https://groups.google.com/forum/m/#!topic/golang-dev/RPj90l5x86I
+// This function will be removed when golang supports this feature.
+//
+// There are 2 jobs were implemented in this function:
+// 1, move the start address of exception vector table into the specific address.
+// 2, modify the offset of each instruction.
+func rewriteVectors() {
+ vectorsBegin := reflect.ValueOf(Vectors).Pointer()
+
+ // The exception-vector-table is required to be 11-bits aligned.
+ // And the size is 0x800.
+ // Please see the documentation as reference:
+ // https://developer.arm.com/docs/100933/0100/aarch64-exception-vector-table
+ //
+ // But, golang does not allow to set a function's address to a specific value.
+ // So, for gvisor, I defined the size of exception-vector-table as 4K,
+ // filled the 2nd 2K part with NOP-s.
+ // So that, I can safely move the 1st 2K part into the address with 11-bits alignment.
+ //
+ // So, the prerequisite for this function to work correctly is:
+ // vectorsSafeLen >= 0x1000
+ // vectorsRawLen = 0x800
+ vectorsSafeLen := int(safecopy.FindEndAddress(vectorsBegin) - vectorsBegin)
+ if vectorsSafeLen < 2*vectorsRawLen {
+ panic("Can't update vectors")
+ }
+
+ vectorsSafeTable := unsafeSlice(vectorsBegin, vectorsSafeLen) // Now a []uint32
+ vectorsRawLen32 := vectorsRawLen / int(instSize)
+
+ offset := vectorsBegin & (1<<11 - 1)
+ if offset != 0 {
+ offset = 1<<11 - offset
+ }
+
+ pageBegin := (vectorsBegin + offset) & ^uintptr(usermem.PageSize-1)
+
+ _, _, errno := syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_WRITE|syscall.PROT_EXEC))
+ if errno != 0 {
+ panic(errno.Error())
+ }
+
+ offset = offset / instSize // By index, not bytes.
+ // Move exception-vector-table into the specific address, should uses memmove here.
+ for i := 1; i <= vectorsRawLen32; i++ {
+ vectorsSafeTable[int(offset)+vectorsRawLen32-i] = vectorsSafeTable[vectorsRawLen32-i]
+ }
+
+ // Adjust branch since instruction was moved forward.
+ for i := 0; i < vectorsRawLen32; i++ {
+ if vectorsSafeTable[int(offset)+i] != nopInstruction {
+ vectorsSafeTable[int(offset)+i] -= uint32(offset)
+ }
+ }
+
+ _, _, errno = syscall.Syscall(syscall.SYS_MPROTECT, uintptr(pageBegin), uintptr(usermem.PageSize), uintptr(syscall.PROT_READ|syscall.PROT_EXEC))
+ if errno != 0 {
+ panic(errno.Error())
+ }
+}
diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go
index 85cc3fdad..b8ab120a0 100644
--- a/pkg/sentry/platform/ring0/offsets_amd64.go
+++ b/pkg/sentry/platform/ring0/offsets_amd64.go
@@ -20,7 +20,8 @@ import (
"fmt"
"io"
"reflect"
- "syscall"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
)
// Emit prints architecture-specific offsets.
@@ -64,7 +65,7 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define SyscallInt80 0x%02x\n", SyscallInt80)
fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
- p := &syscall.PtraceRegs{}
+ p := &arch.Registers{}
fmt.Fprintf(w, "\n// Ptrace registers.\n")
fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.R15).Pointer()-reflect.ValueOf(p).Pointer())
fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.R14).Pointer()-reflect.ValueOf(p).Pointer())
diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go
new file mode 100644
index 000000000..f3de962f0
--- /dev/null
+++ b/pkg/sentry/platform/ring0/offsets_arm64.go
@@ -0,0 +1,127 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package ring0
+
+import (
+ "fmt"
+ "io"
+ "reflect"
+
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+)
+
+// Emit prints architecture-specific offsets.
+func Emit(w io.Writer) {
+ fmt.Fprintf(w, "// Automatically generated, do not edit.\n")
+
+ c := &CPU{}
+ fmt.Fprintf(w, "\n// CPU offsets.\n")
+ fmt.Fprintf(w, "#define CPU_SELF 0x%02x\n", reflect.ValueOf(&c.self).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_REGISTERS 0x%02x\n", reflect.ValueOf(&c.registers).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack)))
+ fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_FAULT_ADDR 0x%02x\n", reflect.ValueOf(&c.faultAddr).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_TTBR0_KVM 0x%02x\n", reflect.ValueOf(&c.ttbr0Kvm).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_TTBR0_APP 0x%02x\n", reflect.ValueOf(&c.ttbr0App).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_VECTOR_CODE 0x%02x\n", reflect.ValueOf(&c.vecCode).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_APP_ADDR 0x%02x\n", reflect.ValueOf(&c.appAddr).Pointer()-reflect.ValueOf(c).Pointer())
+ fmt.Fprintf(w, "#define CPU_LAZY_VFP 0x%02x\n", reflect.ValueOf(&c.lazyVFP).Pointer()-reflect.ValueOf(c).Pointer())
+
+ fmt.Fprintf(w, "\n// Bits.\n")
+ fmt.Fprintf(w, "#define _KERNEL_FLAGS 0x%02x\n", KernelFlagsSet)
+
+ fmt.Fprintf(w, "\n// Vectors.\n")
+ fmt.Fprintf(w, "#define El1SyncInvalid 0x%02x\n", El1SyncInvalid)
+ fmt.Fprintf(w, "#define El1IrqInvalid 0x%02x\n", El1IrqInvalid)
+ fmt.Fprintf(w, "#define El1FiqInvalid 0x%02x\n", El1FiqInvalid)
+ fmt.Fprintf(w, "#define El1ErrorInvalid 0x%02x\n", El1ErrorInvalid)
+
+ fmt.Fprintf(w, "#define El1Sync 0x%02x\n", El1Sync)
+ fmt.Fprintf(w, "#define El1Irq 0x%02x\n", El1Irq)
+ fmt.Fprintf(w, "#define El1Fiq 0x%02x\n", El1Fiq)
+ fmt.Fprintf(w, "#define El1Error 0x%02x\n", El1Error)
+
+ fmt.Fprintf(w, "#define El0Sync 0x%02x\n", El0Sync)
+ fmt.Fprintf(w, "#define El0Irq 0x%02x\n", El0Irq)
+ fmt.Fprintf(w, "#define El0Fiq 0x%02x\n", El0Fiq)
+ fmt.Fprintf(w, "#define El0Error 0x%02x\n", El0Error)
+
+ fmt.Fprintf(w, "#define El0Sync_invalid 0x%02x\n", El0Sync_invalid)
+ fmt.Fprintf(w, "#define El0Irq_invalid 0x%02x\n", El0Irq_invalid)
+ fmt.Fprintf(w, "#define El0Fiq_invalid 0x%02x\n", El0Fiq_invalid)
+ fmt.Fprintf(w, "#define El0Error_invalid 0x%02x\n", El0Error_invalid)
+
+ fmt.Fprintf(w, "#define El1Sync_da 0x%02x\n", El1Sync_da)
+ fmt.Fprintf(w, "#define El1Sync_ia 0x%02x\n", El1Sync_ia)
+ fmt.Fprintf(w, "#define El1Sync_sp_pc 0x%02x\n", El1Sync_sp_pc)
+ fmt.Fprintf(w, "#define El1Sync_undef 0x%02x\n", El1Sync_undef)
+ fmt.Fprintf(w, "#define El1Sync_dbg 0x%02x\n", El1Sync_dbg)
+ fmt.Fprintf(w, "#define El1Sync_inv 0x%02x\n", El1Sync_inv)
+
+ fmt.Fprintf(w, "#define El0Sync_svc 0x%02x\n", El0Sync_svc)
+ fmt.Fprintf(w, "#define El0Sync_da 0x%02x\n", El0Sync_da)
+ fmt.Fprintf(w, "#define El0Sync_ia 0x%02x\n", El0Sync_ia)
+ fmt.Fprintf(w, "#define El0Sync_fpsimd_acc 0x%02x\n", El0Sync_fpsimd_acc)
+ fmt.Fprintf(w, "#define El0Sync_sve_acc 0x%02x\n", El0Sync_sve_acc)
+ fmt.Fprintf(w, "#define El0Sync_sys 0x%02x\n", El0Sync_sys)
+ fmt.Fprintf(w, "#define El0Sync_sp_pc 0x%02x\n", El0Sync_sp_pc)
+ fmt.Fprintf(w, "#define El0Sync_undef 0x%02x\n", El0Sync_undef)
+ fmt.Fprintf(w, "#define El0Sync_dbg 0x%02x\n", El0Sync_dbg)
+ fmt.Fprintf(w, "#define El0Sync_inv 0x%02x\n", El0Sync_inv)
+
+ fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
+ fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
+ fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
+
+ p := &arch.Registers{}
+ fmt.Fprintf(w, "\n// Ptrace registers.\n")
+ fmt.Fprintf(w, "#define PTRACE_R0 0x%02x\n", reflect.ValueOf(&p.Regs[0]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R1 0x%02x\n", reflect.ValueOf(&p.Regs[1]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R2 0x%02x\n", reflect.ValueOf(&p.Regs[2]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R3 0x%02x\n", reflect.ValueOf(&p.Regs[3]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R4 0x%02x\n", reflect.ValueOf(&p.Regs[4]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R5 0x%02x\n", reflect.ValueOf(&p.Regs[5]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R6 0x%02x\n", reflect.ValueOf(&p.Regs[6]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R7 0x%02x\n", reflect.ValueOf(&p.Regs[7]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R8 0x%02x\n", reflect.ValueOf(&p.Regs[8]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R9 0x%02x\n", reflect.ValueOf(&p.Regs[9]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R10 0x%02x\n", reflect.ValueOf(&p.Regs[10]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R11 0x%02x\n", reflect.ValueOf(&p.Regs[11]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R12 0x%02x\n", reflect.ValueOf(&p.Regs[12]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R13 0x%02x\n", reflect.ValueOf(&p.Regs[13]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R14 0x%02x\n", reflect.ValueOf(&p.Regs[14]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R15 0x%02x\n", reflect.ValueOf(&p.Regs[15]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R16 0x%02x\n", reflect.ValueOf(&p.Regs[16]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R17 0x%02x\n", reflect.ValueOf(&p.Regs[17]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R18 0x%02x\n", reflect.ValueOf(&p.Regs[18]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R19 0x%02x\n", reflect.ValueOf(&p.Regs[19]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R20 0x%02x\n", reflect.ValueOf(&p.Regs[20]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R21 0x%02x\n", reflect.ValueOf(&p.Regs[21]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R22 0x%02x\n", reflect.ValueOf(&p.Regs[22]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R23 0x%02x\n", reflect.ValueOf(&p.Regs[23]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R24 0x%02x\n", reflect.ValueOf(&p.Regs[24]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R25 0x%02x\n", reflect.ValueOf(&p.Regs[25]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R26 0x%02x\n", reflect.ValueOf(&p.Regs[26]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R27 0x%02x\n", reflect.ValueOf(&p.Regs[27]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R28 0x%02x\n", reflect.ValueOf(&p.Regs[28]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R29 0x%02x\n", reflect.ValueOf(&p.Regs[29]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_R30 0x%02x\n", reflect.ValueOf(&p.Regs[30]).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_SP 0x%02x\n", reflect.ValueOf(&p.Sp).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_PC 0x%02x\n", reflect.ValueOf(&p.Pc).Pointer()-reflect.ValueOf(p).Pointer())
+ fmt.Fprintf(w, "#define PTRACE_PSTATE 0x%02x\n", reflect.ValueOf(&p.Pstate).Pointer()-reflect.ValueOf(p).Pointer())
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index 934a90378..16d5f478b 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -1,14 +1,14 @@
-load("//tools/go_stateify:defs.bzl", "go_library")
-load("@io_bazel_rules_go//go:def.bzl", "go_test")
+load("//tools:defs.bzl", "go_library", "go_test", "select_arch")
load("//tools/go_generics:defs.bzl", "go_template", "go_template_instance")
package(licenses = ["notice"])
go_template(
name = "generic_walker",
- srcs = [
- "walker_amd64.go",
- ],
+ srcs = select_arch(
+ amd64 = ["walker_amd64.go"],
+ arm64 = ["walker_arm64.go"],
+ ),
opt_types = [
"Visitor",
],
@@ -76,20 +76,29 @@ go_library(
"allocator.go",
"allocator_unsafe.go",
"pagetables.go",
+ "pagetables_aarch64.go",
"pagetables_amd64.go",
+ "pagetables_arm64.go",
"pagetables_x86.go",
+ "pcids.go",
+ "pcids_aarch64.go",
+ "pcids_aarch64.s",
"pcids_x86.go",
+ "walker_amd64.go",
+ "walker_arm64.go",
"walker_empty.go",
"walker_lookup.go",
"walker_map.go",
"walker_unmap.go",
],
- importpath = "gvisor.dev/gvisor/pkg/sentry/platform/ring0/pagetables",
visibility = [
"//pkg/sentry/platform/kvm:__subpackages__",
"//pkg/sentry/platform/ring0:__subpackages__",
],
- deps = ["//pkg/sentry/usermem"],
+ deps = [
+ "//pkg/sync",
+ "//pkg/usermem",
+ ],
)
go_test(
@@ -97,9 +106,10 @@ go_test(
size = "small",
srcs = [
"pagetables_amd64_test.go",
+ "pagetables_arm64_test.go",
"pagetables_test.go",
"walker_check.go",
],
- embed = [":pagetables"],
- deps = ["//pkg/sentry/usermem"],
+ library = ":pagetables",
+ deps = ["//pkg/usermem"],
)
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator.go b/pkg/sentry/platform/ring0/pagetables/allocator.go
index 23fd5c352..8d75b7599 100644
--- a/pkg/sentry/platform/ring0/pagetables/allocator.go
+++ b/pkg/sentry/platform/ring0/pagetables/allocator.go
@@ -53,9 +53,14 @@ type RuntimeAllocator struct {
// NewRuntimeAllocator returns an allocator that uses runtime allocation.
func NewRuntimeAllocator() *RuntimeAllocator {
- return &RuntimeAllocator{
- used: make(map[*PTEs]struct{}),
- }
+ r := new(RuntimeAllocator)
+ r.Init()
+ return r
+}
+
+// Init initializes a RuntimeAllocator.
+func (r *RuntimeAllocator) Init() {
+ r.used = make(map[*PTEs]struct{})
}
// Recycle returns freed pages to the pool.
diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
index a90394a33..d08bfdeb3 100644
--- a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
+++ b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go
@@ -17,7 +17,7 @@ package pagetables
import (
"unsafe"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// newAlignedPTEs returns a set of aligned PTEs.
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
index 904f1a6de..7f18ac296 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go
@@ -21,7 +21,7 @@
package pagetables
import (
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// PageTables is a set of page tables.
@@ -48,15 +48,6 @@ func New(a Allocator) *PageTables {
return p
}
-// Init initializes a set of PageTables.
-//
-//go:nosplit
-func (p *PageTables) Init(allocator Allocator) {
- p.Allocator = allocator
- p.root = p.Allocator.NewPTEs()
- p.rootPhysical = p.Allocator.PhysicalFor(p.root)
-}
-
// mapVisitor is used for map.
type mapVisitor struct {
target uintptr // Input.
@@ -95,6 +86,8 @@ func (*mapVisitor) requiresSplit() bool { return true }
//
// Precondition: addr & length must be page-aligned, their sum must not overflow.
//
+// +checkescape:hard,stack
+//
//go:nosplit
func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool {
if !opts.AccessType.Any() {
@@ -137,6 +130,8 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) {
//
// Precondition: addr & length must be page-aligned.
//
+// +checkescape:hard,stack
+//
//go:nosplit
func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
w := unmapWalker{
@@ -171,6 +166,8 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) {
//
// Precondition: addr & length must be page-aligned.
//
+// +checkescape:hard,stack
+//
//go:nosplit
func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool {
w := emptyWalker{
@@ -206,6 +203,8 @@ func (*lookupVisitor) requiresSplit() bool { return false }
// Lookup returns the physical address for the given virtual address.
//
+// +checkescape:hard,stack
+//
//go:nosplit
func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) {
mask := uintptr(usermem.PageSize - 1)
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
new file mode 100644
index 000000000..6409d1d91
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go
@@ -0,0 +1,215 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package pagetables
+
+import (
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// archPageTables is architecture-specific data.
+type archPageTables struct {
+ // root is the pagetable root for kernel space.
+ root *PTEs
+
+ // rootPhysical is the cached physical address of the root.
+ //
+ // This is saved only to prevent constant translation.
+ rootPhysical uintptr
+
+ asid uint16
+}
+
+// TTBR0_EL1 returns the translation table base register 0.
+//
+//go:nosplit
+func (p *PageTables) TTBR0_EL1(noFlush bool, asid uint16) uint64 {
+ return uint64(p.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
+}
+
+// TTBR1_EL1 returns the translation table base register 1.
+//
+//go:nosplit
+func (p *PageTables) TTBR1_EL1(noFlush bool, asid uint16) uint64 {
+ return uint64(p.archPageTables.rootPhysical) | (uint64(asid)&ttbrASIDMask)<<ttbrASIDOffset
+}
+
+// Bits in page table entries.
+const (
+ typeTable = 0x3 << 0
+ typeSect = 0x1 << 0
+ typePage = 0x3 << 0
+ pteValid = 0x1 << 0
+ pteTableBit = 0x1 << 1
+ pteTypeMask = 0x3 << 0
+ present = pteValid | pteTableBit
+ user = 0x1 << 6 /* AP[1] */
+ readOnly = 0x1 << 7 /* AP[2] */
+ accessed = 0x1 << 10
+ dbm = 0x1 << 51
+ writable = dbm
+ cont = 0x1 << 52
+ pxn = 0x1 << 53
+ xn = 0x1 << 54
+ dirty = 0x1 << 55
+ nG = 0x1 << 11
+ shared = 0x3 << 8
+)
+
+const (
+ mtDevicenGnRE = 0x1 << 2
+ mtNormal = 0x4 << 2
+)
+
+const (
+ executeDisable = xn
+ optionMask = 0xfff | 0xfff<<48
+ protDefault = accessed | shared
+)
+
+// MapOpts are x86 options.
+type MapOpts struct {
+ // AccessType defines permissions.
+ AccessType usermem.AccessType
+
+ // Global indicates the page is globally accessible.
+ Global bool
+
+ // User indicates the page is a user page.
+ User bool
+}
+
+// PTE is a page table entry.
+type PTE uintptr
+
+// Clear clears this PTE, including sect page information.
+//
+//go:nosplit
+func (p *PTE) Clear() {
+ atomic.StoreUintptr((*uintptr)(p), 0)
+}
+
+// Valid returns true iff this entry is valid.
+//
+//go:nosplit
+func (p *PTE) Valid() bool {
+ return atomic.LoadUintptr((*uintptr)(p))&present != 0
+}
+
+// Opts returns the PTE options.
+//
+// These are all options except Valid and Sect.
+//
+//go:nosplit
+func (p *PTE) Opts() MapOpts {
+ v := atomic.LoadUintptr((*uintptr)(p))
+
+ return MapOpts{
+ AccessType: usermem.AccessType{
+ Read: true,
+ Write: v&readOnly == 0,
+ Execute: v&xn == 0,
+ },
+ Global: v&nG == 0,
+ User: v&user != 0,
+ }
+}
+
+// SetSect sets this page as a sect page.
+//
+// The page must not be valid or a panic will result.
+//
+//go:nosplit
+func (p *PTE) SetSect() {
+ if p.Valid() {
+ // This is not allowed.
+ panic("SetSect called on valid page!")
+ }
+ atomic.StoreUintptr((*uintptr)(p), typeSect)
+}
+
+// IsSect returns true iff this page is a sect page.
+//
+//go:nosplit
+func (p *PTE) IsSect() bool {
+ return atomic.LoadUintptr((*uintptr)(p))&pteTypeMask == typeSect
+}
+
+// Set sets this PTE value.
+//
+// This does not change the sect page property.
+//
+//go:nosplit
+func (p *PTE) Set(addr uintptr, opts MapOpts) {
+ if !opts.AccessType.Any() {
+ p.Clear()
+ return
+ }
+ v := (addr &^ optionMask) | protDefault | nG | readOnly
+
+ if p.IsSect() {
+ // Note that this is inherited from the previous instance. Set
+ // does not change the value of Sect. See above.
+ v |= typeSect
+ } else {
+ v |= typePage
+ }
+
+ if opts.Global {
+ v = v &^ nG
+ }
+
+ if opts.AccessType.Execute {
+ v = v &^ executeDisable
+ } else {
+ v |= executeDisable
+ }
+ if opts.AccessType.Write {
+ v = v &^ readOnly
+ }
+
+ if opts.User {
+ v |= user
+ v |= mtNormal
+ } else {
+ v = v &^ user
+ v |= mtDevicenGnRE // Strong order for the addresses with ring0.KernelStartAddress.
+ }
+ atomic.StoreUintptr((*uintptr)(p), v)
+}
+
+// setPageTable sets this PTE value and forces the write bit and sect bit to
+// be cleared. This is used explicitly for breaking sect pages.
+//
+//go:nosplit
+func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) {
+ addr := pt.Allocator.PhysicalFor(ptes)
+ if addr&^optionMask != addr {
+ // This should never happen.
+ panic("unaligned physical address!")
+ }
+ v := addr | typeTable | protDefault | mtNormal
+ atomic.StoreUintptr((*uintptr)(p), v)
+}
+
+// Address extracts the address. This should only be used if Valid returns true.
+//
+//go:nosplit
+func (p *PTE) Address() uintptr {
+ return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
index 7aa6c524e..0c153cf8c 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go
@@ -41,5 +41,14 @@ const (
entriesPerPage = 512
)
+// Init initializes a set of PageTables.
+//
+//go:nosplit
+func (p *PageTables) Init(allocator Allocator) {
+ p.Allocator = allocator
+ p.root = p.Allocator.NewPTEs()
+ p.rootPhysical = p.Allocator.PhysicalFor(p.root)
+}
+
// PTEs is a collection of entries.
type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
index 35e917526..54e8e554f 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
@@ -19,7 +19,7 @@ package pagetables
import (
"testing"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
func Test2MAnd4K(t *testing.T) {
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
new file mode 100644
index 000000000..1a49f12a2
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go
@@ -0,0 +1,57 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pagetables
+
+// Address constraints.
+//
+// The lowerTop and upperBottom currently apply to four-level pagetables;
+// additional refactoring would be necessary to support five-level pagetables.
+const (
+ lowerTop = 0x0000ffffffffffff
+ upperBottom = 0xffff000000000000
+ pteShift = 12
+ pmdShift = 21
+ pudShift = 30
+ pgdShift = 39
+
+ pteMask = 0x1ff << pteShift
+ pmdMask = 0x1ff << pmdShift
+ pudMask = 0x1ff << pudShift
+ pgdMask = 0x1ff << pgdShift
+
+ pteSize = 1 << pteShift
+ pmdSize = 1 << pmdShift
+ pudSize = 1 << pudShift
+ pgdSize = 1 << pgdShift
+
+ ttbrASIDOffset = 55
+ ttbrASIDMask = 0xff
+
+ entriesPerPage = 512
+)
+
+// Init initializes a set of PageTables.
+//
+//go:nosplit
+func (p *PageTables) Init(allocator Allocator) {
+ p.Allocator = allocator
+ p.root = p.Allocator.NewPTEs()
+ p.rootPhysical = p.Allocator.PhysicalFor(p.root)
+ p.archPageTables.root = p.Allocator.NewPTEs()
+ p.archPageTables.rootPhysical = p.Allocator.PhysicalFor(p.archPageTables.root)
+}
+
+// PTEs is a collection of entries.
+type PTEs [entriesPerPage]PTE
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go
new file mode 100644
index 000000000..2f73d424f
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64_test.go
@@ -0,0 +1,80 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package pagetables
+
+import (
+ "testing"
+
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func Test2MAnd4K(t *testing.T) {
+ pt := New(NewRuntimeAllocator())
+
+ // Map a small page and a huge page.
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: true}, pteSize*42)
+ pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*47)
+
+ pt.Map(0xffff000000400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: false}, pteSize*42)
+ pt.Map(0xffffff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: false}, pmdSize*47)
+
+ checkMappings(t, pt, []mapping{
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: true}},
+ {0x0000ff0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read, User: true}},
+ {0xffff000000400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: false}},
+ {0xffffff0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read, User: false}},
+ })
+}
+
+func Test1GAnd4K(t *testing.T) {
+ pt := New(NewRuntimeAllocator())
+
+ // Map a small page and a super page.
+ pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite, User: true}, pteSize*42)
+ pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*47)
+
+ checkMappings(t, pt, []mapping{
+ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite, User: true}},
+ {0x0000ff0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read, User: true}},
+ })
+}
+
+func TestSplit1GPage(t *testing.T) {
+ pt := New(NewRuntimeAllocator())
+
+ // Map a super page and knock out the middle.
+ pt.Map(0x0000ff0000000000, pudSize, MapOpts{AccessType: usermem.Read, User: true}, pudSize*42)
+ pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pudSize-(2*pteSize))
+
+ checkMappings(t, pt, []mapping{
+ {0x0000ff0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
+ {0x0000ff0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read, User: true}},
+ })
+}
+
+func TestSplit2MPage(t *testing.T) {
+ pt := New(NewRuntimeAllocator())
+
+ // Map a huge page and knock out the middle.
+ pt.Map(0x0000ff0000000000, pmdSize, MapOpts{AccessType: usermem.Read, User: true}, pmdSize*42)
+ pt.Unmap(usermem.Addr(0x0000ff0000000000+pteSize), pmdSize-(2*pteSize))
+
+ checkMappings(t, pt, []mapping{
+ {0x0000ff0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read, User: true}},
+ {0x0000ff0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read, User: true}},
+ })
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
index 6e95ad2b9..5c88d087d 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
@@ -17,7 +17,7 @@ package pagetables
import (
"testing"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
type mapping struct {
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
index 3e2383c5e..157438d9b 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// +build i386 amd64
+// +build 386 amd64
package pagetables
import (
"sync/atomic"
- "gvisor.dev/gvisor/pkg/sentry/usermem"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// archPageTables is architecture-specific data.
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids.go b/pkg/sentry/platform/ring0/pagetables/pcids.go
new file mode 100644
index 000000000..964496aac
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids.go
@@ -0,0 +1,104 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pagetables
+
+import (
+ "gvisor.dev/gvisor/pkg/sync"
+)
+
+// PCIDs is a simple PCID database.
+//
+// This is not protected by locks and is thus suitable for use only with a
+// single CPU at a time.
+type PCIDs struct {
+ // mu protects below.
+ mu sync.Mutex
+
+ // cache are the assigned page tables.
+ cache map[*PageTables]uint16
+
+ // avail are available PCIDs.
+ avail []uint16
+}
+
+// NewPCIDs returns a new PCID database.
+//
+// start is the first index to assign. Typically this will be one, as the zero
+// pcid will always be flushed on transition (see pagetables_x86.go). This may
+// be more than one if specific PCIDs are reserved.
+//
+// Nil is returned iff the start and size are out of range.
+func NewPCIDs(start, size uint16) *PCIDs {
+ if start+uint16(size) > limitPCID {
+ return nil // See comment.
+ }
+ p := &PCIDs{
+ cache: make(map[*PageTables]uint16),
+ }
+ for pcid := start; pcid < start+size; pcid++ {
+ p.avail = append(p.avail, pcid)
+ }
+ return p
+}
+
+// Assign assigns a PCID to the given PageTables.
+//
+// This may overwrite any previous assignment provided. If this in the case,
+// true is returned to indicate that the PCID should be flushed.
+func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
+ p.mu.Lock()
+ if pcid, ok := p.cache[pt]; ok {
+ p.mu.Unlock()
+ return pcid, false // No flush.
+ }
+
+ // Is there something available?
+ if len(p.avail) > 0 {
+ pcid := p.avail[len(p.avail)-1]
+ p.avail = p.avail[:len(p.avail)-1]
+ p.cache[pt] = pcid
+
+ // We need to flush because while this is in the available
+ // pool, it may have been used previously.
+ p.mu.Unlock()
+ return pcid, true
+ }
+
+ // Evict an existing table.
+ for old, pcid := range p.cache {
+ delete(p.cache, old)
+ p.cache[pt] = pcid
+
+ // A flush is definitely required in this case, these page
+ // tables may still be active. (They will just be assigned some
+ // other PCID if and when they hit the given CPU again.)
+ p.mu.Unlock()
+ return pcid, true
+ }
+
+ // No PCID.
+ p.mu.Unlock()
+ return 0, false
+}
+
+// Drop drops references to a set of page tables.
+func (p *PCIDs) Drop(pt *PageTables) {
+ p.mu.Lock()
+ if pcid, ok := p.cache[pt]; ok {
+ delete(p.cache, pt)
+ p.avail = append(p.avail, pcid)
+ }
+ p.mu.Unlock()
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
new file mode 100644
index 000000000..fbfd41d83
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.go
@@ -0,0 +1,32 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package pagetables
+
+// limitPCID is the maximum value of PCIDs.
+//
+// In VMSAv8-64, the PCID(ASID) size is an IMPLEMENTATION DEFINED choice
+// of 8 bits or 16 bits, and ID_AA64MMFR0_EL1.ASIDBits identifies the
+// supported size. When an implementation supports a 16-bit ASID, TCR_ELx.AS
+// selects whether the top 8 bits of the ASID are used.
+var limitPCID uint16
+
+// GetASIDBits return the system ASID bits, 8 or 16 bits.
+func GetASIDBits() uint8
+
+func init() {
+ limitPCID = uint16(1)<<GetASIDBits() - 1
+}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
new file mode 100644
index 000000000..e9d62d768
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_aarch64.s
@@ -0,0 +1,45 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+#include "funcdata.h"
+#include "textflag.h"
+
+#define ID_AA64MMFR0_ASIDBITS_SHIFT 4
+#define ID_AA64MMFR0_ASIDBITS_16 2
+#define TCR_EL1_AS_BIT 36
+
+// GetASIDBits return the system ASID bits, 8 or 16 bits.
+//
+// func GetASIDBits() uint8
+TEXT ·GetASIDBits(SB),NOSPLIT,$0-1
+ // First, check whether 16bits ASID is supported.
+ // ID_AA64MMFR0_EL1.ASIDBITS[7:4] == 0010.
+ WORD $0xd5380700 // MRS ID_AA64MMFR0_EL1, R0
+ UBFX $ID_AA64MMFR0_ASIDBITS_SHIFT, R0, $4, R0
+ CMPW $ID_AA64MMFR0_ASIDBITS_16, R0
+ BNE bits_8
+
+ // Second, check whether 16bits ASID is enabled.
+ // TCR_EL1.AS[36] == 1.
+ WORD $0xd5382040 // MRS TCR_EL1, R0
+ TBZ $TCR_EL1_AS_BIT, R0, bits_8
+ MOVD $16, R0
+ B done
+bits_8:
+ MOVD $8, R0
+done:
+ MOVB R0, ret+0(FP)
+ RET
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
index 0f029f25d..91fc5e8dd 100644
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -16,94 +16,5 @@
package pagetables
-import (
- "sync"
-)
-
-// limitPCID is the number of valid PCIDs.
-const limitPCID = 4096
-
-// PCIDs is a simple PCID database.
-//
-// This is not protected by locks and is thus suitable for use only with a
-// single CPU at a time.
-type PCIDs struct {
- // mu protects below.
- mu sync.Mutex
-
- // cache are the assigned page tables.
- cache map[*PageTables]uint16
-
- // avail are available PCIDs.
- avail []uint16
-}
-
-// NewPCIDs returns a new PCID database.
-//
-// start is the first index to assign. Typically this will be one, as the zero
-// pcid will always be flushed on transition (see pagetables_x86.go). This may
-// be more than one if specific PCIDs are reserved.
-//
-// Nil is returned iff the start and size are out of range.
-func NewPCIDs(start, size uint16) *PCIDs {
- if start+uint16(size) >= limitPCID {
- return nil // See comment.
- }
- p := &PCIDs{
- cache: make(map[*PageTables]uint16),
- }
- for pcid := start; pcid < start+size; pcid++ {
- p.avail = append(p.avail, pcid)
- }
- return p
-}
-
-// Assign assigns a PCID to the given PageTables.
-//
-// This may overwrite any previous assignment provided. If this in the case,
-// true is returned to indicate that the PCID should be flushed.
-func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- p.mu.Unlock()
- return pcid, false // No flush.
- }
-
- // Is there something available?
- if len(p.avail) > 0 {
- pcid := p.avail[len(p.avail)-1]
- p.avail = p.avail[:len(p.avail)-1]
- p.cache[pt] = pcid
-
- // We need to flush because while this is in the available
- // pool, it may have been used previously.
- p.mu.Unlock()
- return pcid, true
- }
-
- // Evict an existing table.
- for old, pcid := range p.cache {
- delete(p.cache, old)
- p.cache[pt] = pcid
-
- // A flush is definitely required in this case, these page
- // tables may still be active. (They will just be assigned some
- // other PCID if and when they hit the given CPU again.)
- p.mu.Unlock()
- return pcid, true
- }
-
- // No PCID.
- p.mu.Unlock()
- return 0, false
-}
-
-// Drop drops references to a set of page tables.
-func (p *PCIDs) Drop(pt *PageTables) {
- p.mu.Lock()
- if pcid, ok := p.cache[pt]; ok {
- delete(p.cache, pt)
- p.avail = append(p.avail, pcid)
- }
- p.mu.Unlock()
-}
+// limitPCID is the maximum value of valid PCIDs.
+const limitPCID = 4095
diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
new file mode 100644
index 000000000..c261d393a
--- /dev/null
+++ b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go
@@ -0,0 +1,314 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package pagetables
+
+// Visitor is a generic type.
+type Visitor interface {
+ // visit is called on each PTE.
+ visit(start uintptr, pte *PTE, align uintptr)
+
+ // requiresAlloc indicates that new entries should be allocated within
+ // the walked range.
+ requiresAlloc() bool
+
+ // requiresSplit indicates that entries in the given range should be
+ // split if they are huge or jumbo pages.
+ requiresSplit() bool
+}
+
+// Walker walks page tables.
+type Walker struct {
+ // pageTables are the tables to walk.
+ pageTables *PageTables
+
+ // Visitor is the set of arguments.
+ visitor Visitor
+}
+
+// iterateRange iterates over all appropriate levels of page tables for the given range.
+//
+// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The
+// exception is sect pages. If a valid sect page (huge or jumbo) cannot be
+// installed, then the walk will continue to individual entries.
+//
+// This algorithm will attempt to maximize the use of sect pages whenever
+// possible. Whether a sect page is provided will be clear through the range
+// provided in the callback.
+//
+// Note that if requiresAlloc is true, then no gaps will be present. However,
+// if alloc is not set, then the iteration will likely be full of gaps.
+//
+// Note that this function should generally be avoided in favor of Map, Unmap,
+// etc. when not necessary.
+//
+// Precondition: start must be page-aligned.
+//
+// Precondition: start must be less than end.
+//
+// Precondition: If requiresAlloc is true, then start and end should not span
+// non-canonical ranges. If they do, a panic will result.
+//
+//go:nosplit
+func (w *Walker) iterateRange(start, end uintptr) {
+ if start%pteSize != 0 {
+ panic("unaligned start")
+ }
+ if end < start {
+ panic("start > end")
+ }
+ if start < lowerTop {
+ if end <= lowerTop {
+ w.iterateRangeCanonical(start, end)
+ } else if end > lowerTop && end <= upperBottom {
+ if w.visitor.requiresAlloc() {
+ panic("alloc spans non-canonical range")
+ }
+ w.iterateRangeCanonical(start, lowerTop)
+ } else {
+ if w.visitor.requiresAlloc() {
+ panic("alloc spans non-canonical range")
+ }
+ w.iterateRangeCanonical(start, lowerTop)
+ w.iterateRangeCanonical(upperBottom, end)
+ }
+ } else if start < upperBottom {
+ if end <= upperBottom {
+ if w.visitor.requiresAlloc() {
+ panic("alloc spans non-canonical range")
+ }
+ } else {
+ if w.visitor.requiresAlloc() {
+ panic("alloc spans non-canonical range")
+ }
+ w.iterateRangeCanonical(upperBottom, end)
+ }
+ } else {
+ w.iterateRangeCanonical(start, end)
+ }
+}
+
+// next returns the next address quantized by the given size.
+//
+//go:nosplit
+func next(start uintptr, size uintptr) uintptr {
+ start &= ^(size - 1)
+ start += size
+ return start
+}
+
+// iterateRangeCanonical walks a canonical range.
+//
+//go:nosplit
+func (w *Walker) iterateRangeCanonical(start, end uintptr) {
+ pgdEntryIndex := w.pageTables.root
+ if start >= upperBottom {
+ pgdEntryIndex = w.pageTables.archPageTables.root
+ }
+
+ for pgdIndex := (uint16((start & pgdMask) >> pgdShift)); start < end && pgdIndex < entriesPerPage; pgdIndex++ {
+ var (
+ pgdEntry = &pgdEntryIndex[pgdIndex]
+ pudEntries *PTEs
+ )
+ if !pgdEntry.Valid() {
+ if !w.visitor.requiresAlloc() {
+ // Skip over this entry.
+ start = next(start, pgdSize)
+ continue
+ }
+
+ // Allocate a new pgd.
+ pudEntries = w.pageTables.Allocator.NewPTEs()
+ pgdEntry.setPageTable(w.pageTables, pudEntries)
+ } else {
+ pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address())
+ }
+
+ // Map the next level.
+ clearPUDEntries := uint16(0)
+
+ for pudIndex := uint16((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ {
+ var (
+ pudEntry = &pudEntries[pudIndex]
+ pmdEntries *PTEs
+ )
+ if !pudEntry.Valid() {
+ if !w.visitor.requiresAlloc() {
+ // Skip over this entry.
+ clearPUDEntries++
+ start = next(start, pudSize)
+ continue
+ }
+
+ // This level has 1-GB sect pages. Is this
+ // entire region at least as large as a single
+ // PUD entry? If so, we can skip allocating a
+ // new page for the pmd.
+ if start&(pudSize-1) == 0 && end-start >= pudSize {
+ pudEntry.SetSect()
+ w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
+ if pudEntry.Valid() {
+ start = next(start, pudSize)
+ continue
+ }
+ }
+
+ // Allocate a new pud.
+ pmdEntries = w.pageTables.Allocator.NewPTEs()
+ pudEntry.setPageTable(w.pageTables, pmdEntries)
+
+ } else if pudEntry.IsSect() {
+ // Does this page need to be split?
+ if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) {
+ // Install the relevant entries.
+ pmdEntries = w.pageTables.Allocator.NewPTEs()
+ for index := uint16(0); index < entriesPerPage; index++ {
+ pmdEntries[index].SetSect()
+ pmdEntries[index].Set(
+ pudEntry.Address()+(pmdSize*uintptr(index)),
+ pudEntry.Opts())
+ }
+ pudEntry.setPageTable(w.pageTables, pmdEntries)
+ } else {
+ // A sect page to be checked directly.
+ w.visitor.visit(uintptr(start), pudEntry, pudSize-1)
+
+ // Might have been cleared.
+ if !pudEntry.Valid() {
+ clearPUDEntries++
+ }
+
+ // Note that the sect page was changed.
+ start = next(start, pudSize)
+ continue
+ }
+
+ } else {
+ pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address())
+ }
+
+ // Map the next level, since this is valid.
+ clearPMDEntries := uint16(0)
+
+ for pmdIndex := uint16((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ {
+ var (
+ pmdEntry = &pmdEntries[pmdIndex]
+ pteEntries *PTEs
+ )
+ if !pmdEntry.Valid() {
+ if !w.visitor.requiresAlloc() {
+ // Skip over this entry.
+ clearPMDEntries++
+ start = next(start, pmdSize)
+ continue
+ }
+
+ // This level has 2-MB huge pages. If this
+ // region is contined in a single PMD entry?
+ // As above, we can skip allocating a new page.
+ if start&(pmdSize-1) == 0 && end-start >= pmdSize {
+ pmdEntry.SetSect()
+ w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
+ if pmdEntry.Valid() {
+ start = next(start, pmdSize)
+ continue
+ }
+ }
+
+ // Allocate a new pmd.
+ pteEntries = w.pageTables.Allocator.NewPTEs()
+ pmdEntry.setPageTable(w.pageTables, pteEntries)
+
+ } else if pmdEntry.IsSect() {
+ // Does this page need to be split?
+ if w.visitor.requiresSplit() && (start&(pmdSize-1) != 0 || end < next(start, pmdSize)) {
+ // Install the relevant entries.
+ pteEntries = w.pageTables.Allocator.NewPTEs()
+ for index := uint16(0); index < entriesPerPage; index++ {
+ pteEntries[index].Set(
+ pmdEntry.Address()+(pteSize*uintptr(index)),
+ pmdEntry.Opts())
+ }
+ pmdEntry.setPageTable(w.pageTables, pteEntries)
+ } else {
+ // A huge page to be checked directly.
+ w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1)
+
+ // Might have been cleared.
+ if !pmdEntry.Valid() {
+ clearPMDEntries++
+ }
+
+ // Note that the huge page was changed.
+ start = next(start, pmdSize)
+ continue
+ }
+
+ } else {
+ pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address())
+ }
+
+ // Map the next level, since this is valid.
+ clearPTEEntries := uint16(0)
+
+ for pteIndex := uint16((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ {
+ var (
+ pteEntry = &pteEntries[pteIndex]
+ )
+ if !pteEntry.Valid() && !w.visitor.requiresAlloc() {
+ clearPTEEntries++
+ start += pteSize
+ continue
+ }
+
+ // At this point, we are guaranteed that start%pteSize == 0.
+ w.visitor.visit(uintptr(start), pteEntry, pteSize-1)
+ if !pteEntry.Valid() {
+ if w.visitor.requiresAlloc() {
+ panic("PTE not set after iteration with requiresAlloc!")
+ }
+ clearPTEEntries++
+ }
+
+ // Note that the pte was changed.
+ start += pteSize
+ continue
+ }
+
+ // Check if we no longer need this page.
+ if clearPTEEntries == entriesPerPage {
+ pmdEntry.Clear()
+ w.pageTables.Allocator.FreePTEs(pteEntries)
+ clearPMDEntries++
+ }
+ }
+
+ // Check if we no longer need this page.
+ if clearPMDEntries == entriesPerPage {
+ pudEntry.Clear()
+ w.pageTables.Allocator.FreePTEs(pmdEntries)
+ clearPUDEntries++
+ }
+ }
+
+ // Check if we no longer need this page.
+ if clearPUDEntries == entriesPerPage {
+ pgdEntry.Clear()
+ w.pageTables.Allocator.FreePTEs(pudEntries)
+ }
+ }
+}
diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go
index 5f80d64e8..9da0ea685 100644
--- a/pkg/sentry/platform/ring0/x86.go
+++ b/pkg/sentry/platform/ring0/x86.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// +build i386 amd64
+// +build 386 amd64
package ring0