summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go6
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/machine.go18
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go24
-rw-r--r--pkg/sentry/platform/ring0/aarch64.go6
-rw-r--r--pkg/sentry/platform/ring0/entry_arm64.s85
-rw-r--r--pkg/sentry/platform/ring0/kernel_arm64.go8
-rw-r--r--pkg/sentry/platform/ring0/offsets_arm64.go1
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD2
9 files changed, 110 insertions, 46 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 35cd55fef..4b23f7803 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -81,12 +81,6 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) {
// Save the death message, which will be thrown.
c.dieState.message = msg
- // Reload all registers to have an accurate stack trace when we return
- // to host mode. This means that the stack should be unwound correctly.
- if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 {
- throw(msg)
- }
-
// Setup the trampoline.
dieArchSetup(c, context, &c.dieState.guestRegs)
}
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
index a63a6a071..99cac665d 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go
@@ -31,6 +31,12 @@ import (
//
//go:nosplit
func dieArchSetup(c *vCPU, context *arch.SignalContext64, guestRegs *userRegs) {
+ // Reload all registers to have an accurate stack trace when we return
+ // to host mode. This means that the stack should be unwound correctly.
+ if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 {
+ throw(c.dieState.message)
+ }
+
// If the vCPU is in user mode, we set the stack to the stored stack
// value in the vCPU itself. We don't want to unwind the user stack.
if guestRegs.RFLAGS&ring0.UserFlagsSet == ring0.UserFlagsSet {
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 8076c7529..f1afc74dc 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -329,10 +329,12 @@ func (m *machine) Destroy() {
}
// Get gets an available vCPU.
+//
+// This will return with the OS thread locked.
func (m *machine) Get() *vCPU {
+ m.mu.RLock()
runtime.LockOSThread()
tid := procid.Current()
- m.mu.RLock()
// Check for an exact match.
if c := m.vCPUs[tid]; c != nil {
@@ -343,8 +345,22 @@ func (m *machine) Get() *vCPU {
// The happy path failed. We now proceed to acquire an exclusive lock
// (because the vCPU map may change), and scan all available vCPUs.
+ // In this case, we first unlock the OS thread. Otherwise, if mu is
+ // not available, the current system thread will be parked and a new
+ // system thread spawned. We avoid this situation by simply refreshing
+ // tid after relocking the system thread.
m.mu.RUnlock()
+ runtime.UnlockOSThread()
m.mu.Lock()
+ runtime.LockOSThread()
+ tid = procid.Current()
+
+ // Recheck for an exact match.
+ if c := m.vCPUs[tid]; c != nil {
+ c.lock()
+ m.mu.Unlock()
+ return c
+ }
for {
// Scan for an available vCPU.
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 1c8384e6b..b531f2f85 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -29,30 +29,6 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// setMemoryRegion initializes a region.
-//
-// This may be called from bluepillHandler, and therefore returns an errno
-// directly (instead of wrapping in an error) to avoid allocations.
-//
-//go:nosplit
-func (m *machine) setMemoryRegion(slot int, physical, length, virtual uintptr) syscall.Errno {
- userRegion := userMemoryRegion{
- slot: uint32(slot),
- flags: 0,
- guestPhysAddr: uint64(physical),
- memorySize: uint64(length),
- userspaceAddr: uint64(virtual),
- }
-
- // Set the region.
- _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(m.fd),
- _KVM_SET_USER_MEMORY_REGION,
- uintptr(unsafe.Pointer(&userRegion)))
- return errno
-}
-
type kvmVcpuInit struct {
target uint32
features [7]uint32
diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go
index 6b078cd1e..f6da41c27 100644
--- a/pkg/sentry/platform/ring0/aarch64.go
+++ b/pkg/sentry/platform/ring0/aarch64.go
@@ -88,14 +88,14 @@ const (
El0Sync_undef
El0Sync_dbg
El0Sync_inv
- VirtualizationException
_NR_INTERRUPTS
)
// System call vectors.
const (
- Syscall Vector = El0Sync_svc
- PageFault Vector = El0Sync_da
+ Syscall Vector = El0Sync_svc
+ PageFault Vector = El0Sync_da
+ VirtualizationException Vector = El0Error
)
// VirtualAddressBits returns the number bits available for virtual addresses.
diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s
index 679842288..d42eda37b 100644
--- a/pkg/sentry/platform/ring0/entry_arm64.s
+++ b/pkg/sentry/platform/ring0/entry_arm64.s
@@ -25,10 +25,14 @@
// not available for calls.
//
+// ERET returns using the ELR and SPSR for the current exception level.
#define ERET() \
WORD $0xd69f03e0
+// RSV_REG is a register that holds el1 information temporarily.
#define RSV_REG R18_PLATFORM
+
+// RSV_REG_APP is a register that holds el0 information temporarily.
#define RSV_REG_APP R9
#define FPEN_NOTRAP 0x3
@@ -36,6 +40,12 @@
#define FPEN_ENABLE (FPEN_NOTRAP << FPEN_SHIFT)
+// Saves a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not saved: R9, R18.
#define REGISTERS_SAVE(reg, offset) \
MOVD R0, offset+PTRACE_R0(reg); \
MOVD R1, offset+PTRACE_R1(reg); \
@@ -67,6 +77,12 @@
MOVD R29, offset+PTRACE_R29(reg); \
MOVD R30, offset+PTRACE_R30(reg);
+// Loads a register set.
+//
+// This is a macro because it may need to executed in contents where a stack is
+// not available for calls.
+//
+// The following registers are not loaded: R9, R18.
#define REGISTERS_LOAD(reg, offset) \
MOVD offset+PTRACE_R0(reg), R0; \
MOVD offset+PTRACE_R1(reg), R1; \
@@ -98,7 +114,7 @@
MOVD offset+PTRACE_R29(reg), R29; \
MOVD offset+PTRACE_R30(reg), R30;
-//NOP
+// NOP-s
#define nop31Instructions() \
WORD $0xd503201f; \
WORD $0xd503201f; \
@@ -254,6 +270,7 @@
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
+// LOAD_KERNEL_ADDRESS loads a kernel address.
#define LOAD_KERNEL_ADDRESS(from, to) \
MOVD from, to; \
ORR $0xffff000000000000, to, to;
@@ -263,15 +280,18 @@
LOAD_KERNEL_ADDRESS(CPU_SELF(from), RSV_REG); \
MOVD $CPU_STACK_TOP(RSV_REG), RSV_REG; \
MOVD RSV_REG, RSP; \
+ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
ISB $15; \
DSB $15;
+// SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application.
#define SWITCH_TO_APP_PAGETABLE(from) \
MOVD CPU_TTBR0_APP(from), RSV_REG; \
WORD $0xd5182012; \ // MSR R18, TTBR0_EL1
ISB $15; \
DSB $15;
+// SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable.
#define SWITCH_TO_KVM_PAGETABLE(from) \
MOVD CPU_TTBR0_KVM(from), RSV_REG; \
WORD $0xd5182012; \ // MSR R18, TTBR0_EL1
@@ -294,6 +314,7 @@
WORD $0xd5181040; \ //MSR R0, CPACR_EL1
ISB $15;
+// KERNEL_ENTRY_FROM_EL0 is the entry code of the vcpu from el0 to el1.
#define KERNEL_ENTRY_FROM_EL0 \
SUB $16, RSP, RSP; \ // step1, save r18, r9 into kernel temporary stack.
STP (RSV_REG, RSV_REG_APP), 16*0(RSP); \
@@ -315,19 +336,22 @@
WORD $0xd5384103; \ // MRS SP_EL0, R3
MOVD R3, PTRACE_SP(RSV_REG_APP);
+// KERNEL_ENTRY_FROM_EL1 is the entry code of the vcpu from el1 to el1.
#define KERNEL_ENTRY_FROM_EL1 \
WORD $0xd538d092; \ //MRS TPIDR_EL1, R18
- REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // save sentry context
+ REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // Save sentry context.
MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG); \
WORD $0xd5384004; \ // MRS SPSR_EL1, R4
MOVD R4, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG); \
MRS ELR_EL1, R4; \
MOVD R4, CPU_REGISTERS+PTRACE_PC(RSV_REG); \
MOVD RSP, R4; \
- MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG);
+ MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \
+ LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack.
+// Halt halts execution.
TEXT ·Halt(SB),NOSPLIT,$0
- // clear bluepill.
+ // Clear bluepill.
WORD $0xd538d092 //MRS TPIDR_EL1, R18
CMP RSV_REG, R9
BNE mmio_exit
@@ -341,8 +365,22 @@ mmio_exit:
// MMIO_EXIT.
MOVD $0, R9
MOVD R0, 0xffff000000001000(R9)
- B ·kernelExitToEl1(SB)
+ RET
+
+// HaltAndResume halts execution and point the pointer to the resume function.
+TEXT ·HaltAndResume(SB),NOSPLIT,$0
+ BL ·Halt(SB)
+ B ·kernelExitToEl1(SB) // Resume.
+// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
+TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0
+ WORD $0xd538d092 // MRS TPIDR_EL1, R18
+ MOVD CPU_SELF(RSV_REG), R3 // Load vCPU.
+ MOVD R3, 8(RSP) // First argument (vCPU).
+ CALL ·kernelSyscall(SB) // Call the trampoline.
+ B ·kernelExitToEl1(SB) // Resume.
+
+// Shutdown stops the guest.
TEXT ·Shutdown(SB),NOSPLIT,$0
// PSCI EVENT.
MOVD $0x84000009, R0
@@ -429,6 +467,7 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0
TEXT ·kernelExitToEl1(SB),NOSPLIT,$0
ERET()
+// Start is the CPU entrypoint.
TEXT ·Start(SB),NOSPLIT,$0
IRQ_DISABLE
MOVD R8, RSV_REG
@@ -437,18 +476,23 @@ TEXT ·Start(SB),NOSPLIT,$0
B ·kernelExitToEl1(SB)
+// El1_sync_invalid is the handler for an invalid EL1_sync.
TEXT ·El1_sync_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_irq_invalid is the handler for an invalid El1_irq.
TEXT ·El1_irq_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_fiq_invalid is the handler for an invalid El1_fiq.
TEXT ·El1_fiq_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_error_invalid is the handler for an invalid El1_error.
TEXT ·El1_error_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_sync is the handler for El1_sync.
TEXT ·El1_sync(SB),NOSPLIT,$0
KERNEL_ENTRY_FROM_EL1
WORD $0xd5385219 // MRS ESR_EL1, R25
@@ -484,10 +528,10 @@ el1_da:
MOVD $PageFault, R3
MOVD R3, CPU_VECTOR_CODE(RSV_REG)
- B ·Halt(SB)
+ B ·HaltAndResume(SB)
el1_ia:
- B ·Halt(SB)
+ B ·HaltAndResume(SB)
el1_sp_pc:
B ·Shutdown(SB)
@@ -496,7 +540,9 @@ el1_undef:
B ·Shutdown(SB)
el1_svc:
- B ·Halt(SB)
+ MOVD $0, CPU_ERROR_CODE(RSV_REG)
+ MOVD $0, CPU_ERROR_TYPE(RSV_REG)
+ B ·HaltEl1SvcAndResume(SB)
el1_dbg:
B ·Shutdown(SB)
@@ -508,15 +554,19 @@ el1_fpsimd_acc:
el1_invalid:
B ·Shutdown(SB)
+// El1_irq is the handler for El1_irq.
TEXT ·El1_irq(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_fiq is the handler for El1_fiq.
TEXT ·El1_fiq(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El1_error is the handler for El1_error.
TEXT ·El1_error(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// El0_sync is the handler for El0_sync.
TEXT ·El0_sync(SB),NOSPLIT,$0
KERNEL_ENTRY_FROM_EL0
WORD $0xd5385219 // MRS ESR_EL1, R25
@@ -554,7 +604,7 @@ el0_svc:
MOVD $Syscall, R3
MOVD R3, CPU_VECTOR_CODE(RSV_REG)
- B ·Halt(SB)
+ B ·HaltAndResume(SB)
el0_da:
WORD $0xd538d092 //MRS TPIDR_EL1, R18
@@ -568,7 +618,7 @@ el0_da:
MOVD $PageFault, R3
MOVD R3, CPU_VECTOR_CODE(RSV_REG)
- B ·Halt(SB)
+ B ·HaltAndResume(SB)
el0_ia:
B ·Shutdown(SB)
@@ -601,7 +651,19 @@ TEXT ·El0_fiq(SB),NOSPLIT,$0
B ·Shutdown(SB)
TEXT ·El0_error(SB),NOSPLIT,$0
- B ·Shutdown(SB)
+ KERNEL_ENTRY_FROM_EL0
+ WORD $0xd538d092 //MRS TPIDR_EL1, R18
+ WORD $0xd538601a //MRS FAR_EL1, R26
+
+ MOVD R26, CPU_FAULT_ADDR(RSV_REG)
+
+ MOVD $1, R3
+ MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user.
+
+ MOVD $VirtualizationException, R3
+ MOVD R3, CPU_VECTOR_CODE(RSV_REG)
+
+ B ·HaltAndResume(SB)
TEXT ·El0_sync_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
@@ -615,6 +677,7 @@ TEXT ·El0_fiq_invalid(SB),NOSPLIT,$0
TEXT ·El0_error_invalid(SB),NOSPLIT,$0
B ·Shutdown(SB)
+// Vectors implements exception vector table.
TEXT ·Vectors(SB),NOSPLIT,$0
B ·El1_sync_invalid(SB)
nop31Instructions()
diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go
index c3d341998..ccacaea6b 100644
--- a/pkg/sentry/platform/ring0/kernel_arm64.go
+++ b/pkg/sentry/platform/ring0/kernel_arm64.go
@@ -16,6 +16,14 @@
package ring0
+// HaltAndResume halts execution and point the pointer to the resume function.
+//go:nosplit
+func HaltAndResume()
+
+// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume.
+//go:nosplit
+func HaltEl1SvcAndResume()
+
// init initializes architecture-specific state.
func (k *Kernel) init(opts KernelOpts) {
// Save the root page tables.
diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go
index 8c960c749..057fb5c69 100644
--- a/pkg/sentry/platform/ring0/offsets_arm64.go
+++ b/pkg/sentry/platform/ring0/offsets_arm64.go
@@ -85,6 +85,7 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault)
fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall)
+ fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException)
p := &syscall.PtraceRegs{}
fmt.Fprintf(w, "\n// Ptrace registers.\n")
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index 971eed7fa..4f2406ce3 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -7,7 +7,7 @@ go_template(
name = "generic_walker",
srcs = select_arch(
amd64 = ["walker_amd64.go"],
- arm64 = ["walker_amd64.go"],
+ arm64 = ["walker_arm64.go"],
),
opt_types = [
"Visitor",