diff options
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 18 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 24 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/aarch64.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/entry_arm64.s | 85 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_arm64.go | 8 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/offsets_arm64.go | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 2 |
9 files changed, 110 insertions, 46 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 35cd55fef..4b23f7803 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -81,12 +81,6 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) { // Save the death message, which will be thrown. c.dieState.message = msg - // Reload all registers to have an accurate stack trace when we return - // to host mode. This means that the stack should be unwound correctly. - if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 { - throw(msg) - } - // Setup the trampoline. dieArchSetup(c, context, &c.dieState.guestRegs) } diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index a63a6a071..99cac665d 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -31,6 +31,12 @@ import ( // //go:nosplit func dieArchSetup(c *vCPU, context *arch.SignalContext64, guestRegs *userRegs) { + // Reload all registers to have an accurate stack trace when we return + // to host mode. This means that the stack should be unwound correctly. + if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 { + throw(c.dieState.message) + } + // If the vCPU is in user mode, we set the stack to the stored stack // value in the vCPU itself. We don't want to unwind the user stack. if guestRegs.RFLAGS&ring0.UserFlagsSet == ring0.UserFlagsSet { diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 8076c7529..f1afc74dc 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -329,10 +329,12 @@ func (m *machine) Destroy() { } // Get gets an available vCPU. +// +// This will return with the OS thread locked. func (m *machine) Get() *vCPU { + m.mu.RLock() runtime.LockOSThread() tid := procid.Current() - m.mu.RLock() // Check for an exact match. if c := m.vCPUs[tid]; c != nil { @@ -343,8 +345,22 @@ func (m *machine) Get() *vCPU { // The happy path failed. We now proceed to acquire an exclusive lock // (because the vCPU map may change), and scan all available vCPUs. + // In this case, we first unlock the OS thread. Otherwise, if mu is + // not available, the current system thread will be parked and a new + // system thread spawned. We avoid this situation by simply refreshing + // tid after relocking the system thread. m.mu.RUnlock() + runtime.UnlockOSThread() m.mu.Lock() + runtime.LockOSThread() + tid = procid.Current() + + // Recheck for an exact match. + if c := m.vCPUs[tid]; c != nil { + c.lock() + m.mu.Unlock() + return c + } for { // Scan for an available vCPU. diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 1c8384e6b..b531f2f85 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -29,30 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// setMemoryRegion initializes a region. -// -// This may be called from bluepillHandler, and therefore returns an errno -// directly (instead of wrapping in an error) to avoid allocations. -// -//go:nosplit -func (m *machine) setMemoryRegion(slot int, physical, length, virtual uintptr) syscall.Errno { - userRegion := userMemoryRegion{ - slot: uint32(slot), - flags: 0, - guestPhysAddr: uint64(physical), - memorySize: uint64(length), - userspaceAddr: uint64(virtual), - } - - // Set the region. - _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(m.fd), - _KVM_SET_USER_MEMORY_REGION, - uintptr(unsafe.Pointer(&userRegion))) - return errno -} - type kvmVcpuInit struct { target uint32 features [7]uint32 diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go index 6b078cd1e..f6da41c27 100644 --- a/pkg/sentry/platform/ring0/aarch64.go +++ b/pkg/sentry/platform/ring0/aarch64.go @@ -88,14 +88,14 @@ const ( El0Sync_undef El0Sync_dbg El0Sync_inv - VirtualizationException _NR_INTERRUPTS ) // System call vectors. const ( - Syscall Vector = El0Sync_svc - PageFault Vector = El0Sync_da + Syscall Vector = El0Sync_svc + PageFault Vector = El0Sync_da + VirtualizationException Vector = El0Error ) // VirtualAddressBits returns the number bits available for virtual addresses. diff --git a/pkg/sentry/platform/ring0/entry_arm64.s b/pkg/sentry/platform/ring0/entry_arm64.s index 679842288..d42eda37b 100644 --- a/pkg/sentry/platform/ring0/entry_arm64.s +++ b/pkg/sentry/platform/ring0/entry_arm64.s @@ -25,10 +25,14 @@ // not available for calls. // +// ERET returns using the ELR and SPSR for the current exception level. #define ERET() \ WORD $0xd69f03e0 +// RSV_REG is a register that holds el1 information temporarily. #define RSV_REG R18_PLATFORM + +// RSV_REG_APP is a register that holds el0 information temporarily. #define RSV_REG_APP R9 #define FPEN_NOTRAP 0x3 @@ -36,6 +40,12 @@ #define FPEN_ENABLE (FPEN_NOTRAP << FPEN_SHIFT) +// Saves a register set. +// +// This is a macro because it may need to executed in contents where a stack is +// not available for calls. +// +// The following registers are not saved: R9, R18. #define REGISTERS_SAVE(reg, offset) \ MOVD R0, offset+PTRACE_R0(reg); \ MOVD R1, offset+PTRACE_R1(reg); \ @@ -67,6 +77,12 @@ MOVD R29, offset+PTRACE_R29(reg); \ MOVD R30, offset+PTRACE_R30(reg); +// Loads a register set. +// +// This is a macro because it may need to executed in contents where a stack is +// not available for calls. +// +// The following registers are not loaded: R9, R18. #define REGISTERS_LOAD(reg, offset) \ MOVD offset+PTRACE_R0(reg), R0; \ MOVD offset+PTRACE_R1(reg), R1; \ @@ -98,7 +114,7 @@ MOVD offset+PTRACE_R29(reg), R29; \ MOVD offset+PTRACE_R30(reg), R30; -//NOP +// NOP-s #define nop31Instructions() \ WORD $0xd503201f; \ WORD $0xd503201f; \ @@ -254,6 +270,7 @@ #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) +// LOAD_KERNEL_ADDRESS loads a kernel address. #define LOAD_KERNEL_ADDRESS(from, to) \ MOVD from, to; \ ORR $0xffff000000000000, to, to; @@ -263,15 +280,18 @@ LOAD_KERNEL_ADDRESS(CPU_SELF(from), RSV_REG); \ MOVD $CPU_STACK_TOP(RSV_REG), RSV_REG; \ MOVD RSV_REG, RSP; \ + WORD $0xd538d092; \ //MRS TPIDR_EL1, R18 ISB $15; \ DSB $15; +// SWITCH_TO_APP_PAGETABLE sets a new pagetable for a container application. #define SWITCH_TO_APP_PAGETABLE(from) \ MOVD CPU_TTBR0_APP(from), RSV_REG; \ WORD $0xd5182012; \ // MSR R18, TTBR0_EL1 ISB $15; \ DSB $15; +// SWITCH_TO_KVM_PAGETABLE sets the kvm pagetable. #define SWITCH_TO_KVM_PAGETABLE(from) \ MOVD CPU_TTBR0_KVM(from), RSV_REG; \ WORD $0xd5182012; \ // MSR R18, TTBR0_EL1 @@ -294,6 +314,7 @@ WORD $0xd5181040; \ //MSR R0, CPACR_EL1 ISB $15; +// KERNEL_ENTRY_FROM_EL0 is the entry code of the vcpu from el0 to el1. #define KERNEL_ENTRY_FROM_EL0 \ SUB $16, RSP, RSP; \ // step1, save r18, r9 into kernel temporary stack. STP (RSV_REG, RSV_REG_APP), 16*0(RSP); \ @@ -315,19 +336,22 @@ WORD $0xd5384103; \ // MRS SP_EL0, R3 MOVD R3, PTRACE_SP(RSV_REG_APP); +// KERNEL_ENTRY_FROM_EL1 is the entry code of the vcpu from el1 to el1. #define KERNEL_ENTRY_FROM_EL1 \ WORD $0xd538d092; \ //MRS TPIDR_EL1, R18 - REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // save sentry context + REGISTERS_SAVE(RSV_REG, CPU_REGISTERS); \ // Save sentry context. MOVD RSV_REG_APP, CPU_REGISTERS+PTRACE_R9(RSV_REG); \ WORD $0xd5384004; \ // MRS SPSR_EL1, R4 MOVD R4, CPU_REGISTERS+PTRACE_PSTATE(RSV_REG); \ MRS ELR_EL1, R4; \ MOVD R4, CPU_REGISTERS+PTRACE_PC(RSV_REG); \ MOVD RSP, R4; \ - MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); + MOVD R4, CPU_REGISTERS+PTRACE_SP(RSV_REG); \ + LOAD_KERNEL_STACK(RSV_REG); // Load the temporary stack. +// Halt halts execution. TEXT ·Halt(SB),NOSPLIT,$0 - // clear bluepill. + // Clear bluepill. WORD $0xd538d092 //MRS TPIDR_EL1, R18 CMP RSV_REG, R9 BNE mmio_exit @@ -341,8 +365,22 @@ mmio_exit: // MMIO_EXIT. MOVD $0, R9 MOVD R0, 0xffff000000001000(R9) - B ·kernelExitToEl1(SB) + RET + +// HaltAndResume halts execution and point the pointer to the resume function. +TEXT ·HaltAndResume(SB),NOSPLIT,$0 + BL ·Halt(SB) + B ·kernelExitToEl1(SB) // Resume. +// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume. +TEXT ·HaltEl1SvcAndResume(SB),NOSPLIT,$0 + WORD $0xd538d092 // MRS TPIDR_EL1, R18 + MOVD CPU_SELF(RSV_REG), R3 // Load vCPU. + MOVD R3, 8(RSP) // First argument (vCPU). + CALL ·kernelSyscall(SB) // Call the trampoline. + B ·kernelExitToEl1(SB) // Resume. + +// Shutdown stops the guest. TEXT ·Shutdown(SB),NOSPLIT,$0 // PSCI EVENT. MOVD $0x84000009, R0 @@ -429,6 +467,7 @@ TEXT ·kernelExitToEl0(SB),NOSPLIT,$0 TEXT ·kernelExitToEl1(SB),NOSPLIT,$0 ERET() +// Start is the CPU entrypoint. TEXT ·Start(SB),NOSPLIT,$0 IRQ_DISABLE MOVD R8, RSV_REG @@ -437,18 +476,23 @@ TEXT ·Start(SB),NOSPLIT,$0 B ·kernelExitToEl1(SB) +// El1_sync_invalid is the handler for an invalid EL1_sync. TEXT ·El1_sync_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_irq_invalid is the handler for an invalid El1_irq. TEXT ·El1_irq_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_fiq_invalid is the handler for an invalid El1_fiq. TEXT ·El1_fiq_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_error_invalid is the handler for an invalid El1_error. TEXT ·El1_error_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_sync is the handler for El1_sync. TEXT ·El1_sync(SB),NOSPLIT,$0 KERNEL_ENTRY_FROM_EL1 WORD $0xd5385219 // MRS ESR_EL1, R25 @@ -484,10 +528,10 @@ el1_da: MOVD $PageFault, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·Halt(SB) + B ·HaltAndResume(SB) el1_ia: - B ·Halt(SB) + B ·HaltAndResume(SB) el1_sp_pc: B ·Shutdown(SB) @@ -496,7 +540,9 @@ el1_undef: B ·Shutdown(SB) el1_svc: - B ·Halt(SB) + MOVD $0, CPU_ERROR_CODE(RSV_REG) + MOVD $0, CPU_ERROR_TYPE(RSV_REG) + B ·HaltEl1SvcAndResume(SB) el1_dbg: B ·Shutdown(SB) @@ -508,15 +554,19 @@ el1_fpsimd_acc: el1_invalid: B ·Shutdown(SB) +// El1_irq is the handler for El1_irq. TEXT ·El1_irq(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_fiq is the handler for El1_fiq. TEXT ·El1_fiq(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El1_error is the handler for El1_error. TEXT ·El1_error(SB),NOSPLIT,$0 B ·Shutdown(SB) +// El0_sync is the handler for El0_sync. TEXT ·El0_sync(SB),NOSPLIT,$0 KERNEL_ENTRY_FROM_EL0 WORD $0xd5385219 // MRS ESR_EL1, R25 @@ -554,7 +604,7 @@ el0_svc: MOVD $Syscall, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·Halt(SB) + B ·HaltAndResume(SB) el0_da: WORD $0xd538d092 //MRS TPIDR_EL1, R18 @@ -568,7 +618,7 @@ el0_da: MOVD $PageFault, R3 MOVD R3, CPU_VECTOR_CODE(RSV_REG) - B ·Halt(SB) + B ·HaltAndResume(SB) el0_ia: B ·Shutdown(SB) @@ -601,7 +651,19 @@ TEXT ·El0_fiq(SB),NOSPLIT,$0 B ·Shutdown(SB) TEXT ·El0_error(SB),NOSPLIT,$0 - B ·Shutdown(SB) + KERNEL_ENTRY_FROM_EL0 + WORD $0xd538d092 //MRS TPIDR_EL1, R18 + WORD $0xd538601a //MRS FAR_EL1, R26 + + MOVD R26, CPU_FAULT_ADDR(RSV_REG) + + MOVD $1, R3 + MOVD R3, CPU_ERROR_TYPE(RSV_REG) // Set error type to user. + + MOVD $VirtualizationException, R3 + MOVD R3, CPU_VECTOR_CODE(RSV_REG) + + B ·HaltAndResume(SB) TEXT ·El0_sync_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) @@ -615,6 +677,7 @@ TEXT ·El0_fiq_invalid(SB),NOSPLIT,$0 TEXT ·El0_error_invalid(SB),NOSPLIT,$0 B ·Shutdown(SB) +// Vectors implements exception vector table. TEXT ·Vectors(SB),NOSPLIT,$0 B ·El1_sync_invalid(SB) nop31Instructions() diff --git a/pkg/sentry/platform/ring0/kernel_arm64.go b/pkg/sentry/platform/ring0/kernel_arm64.go index c3d341998..ccacaea6b 100644 --- a/pkg/sentry/platform/ring0/kernel_arm64.go +++ b/pkg/sentry/platform/ring0/kernel_arm64.go @@ -16,6 +16,14 @@ package ring0 +// HaltAndResume halts execution and point the pointer to the resume function. +//go:nosplit +func HaltAndResume() + +// HaltEl1SvcAndResume calls Hooks.KernelSyscall and resume. +//go:nosplit +func HaltEl1SvcAndResume() + // init initializes architecture-specific state. func (k *Kernel) init(opts KernelOpts) { // Save the root page tables. diff --git a/pkg/sentry/platform/ring0/offsets_arm64.go b/pkg/sentry/platform/ring0/offsets_arm64.go index 8c960c749..057fb5c69 100644 --- a/pkg/sentry/platform/ring0/offsets_arm64.go +++ b/pkg/sentry/platform/ring0/offsets_arm64.go @@ -85,6 +85,7 @@ func Emit(w io.Writer) { fmt.Fprintf(w, "#define PageFault 0x%02x\n", PageFault) fmt.Fprintf(w, "#define Syscall 0x%02x\n", Syscall) + fmt.Fprintf(w, "#define VirtualizationException 0x%02x\n", VirtualizationException) p := &syscall.PtraceRegs{} fmt.Fprintf(w, "\n// Ptrace registers.\n") diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 971eed7fa..4f2406ce3 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -7,7 +7,7 @@ go_template( name = "generic_walker", srcs = select_arch( amd64 = ["walker_amd64.go"], - arm64 = ["walker_amd64.go"], + arm64 = ["walker_arm64.go"], ), opt_types = [ "Visitor", |