From de68e1d8c437e6234f5e413d7cad6f892a4452d3 Mon Sep 17 00:00:00 2001 From: Bin Lu Date: Thu, 20 Feb 2020 01:12:03 -0500 Subject: Code Clean:Move getUserRegisters into dieArchSetup() and other small changes. Consistent with QEMU, getUserRegisters() should be an arch-specific function. So, it should be called in dieArchSetup(). With this patch and the pagetable/pcid patch, the kvm modules on Arm64 can be built successfully. Signed-off-by: Bin Lu --- pkg/sentry/platform/kvm/bluepill.go | 6 ------ pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go | 6 ++++++ pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 24 ------------------------ 3 files changed, 6 insertions(+), 30 deletions(-) (limited to 'pkg/sentry/platform/kvm') diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 35cd55fef..4b23f7803 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -81,12 +81,6 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) { // Save the death message, which will be thrown. c.dieState.message = msg - // Reload all registers to have an accurate stack trace when we return - // to host mode. This means that the stack should be unwound correctly. - if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 { - throw(msg) - } - // Setup the trampoline. dieArchSetup(c, context, &c.dieState.guestRegs) } diff --git a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go index a63a6a071..99cac665d 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go @@ -31,6 +31,12 @@ import ( // //go:nosplit func dieArchSetup(c *vCPU, context *arch.SignalContext64, guestRegs *userRegs) { + // Reload all registers to have an accurate stack trace when we return + // to host mode. This means that the stack should be unwound correctly. + if errno := c.getUserRegisters(&c.dieState.guestRegs); errno != 0 { + throw(c.dieState.message) + } + // If the vCPU is in user mode, we set the stack to the stored stack // value in the vCPU itself. We don't want to unwind the user stack. if guestRegs.RFLAGS&ring0.UserFlagsSet == ring0.UserFlagsSet { diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 1c8384e6b..b531f2f85 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -29,30 +29,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// setMemoryRegion initializes a region. -// -// This may be called from bluepillHandler, and therefore returns an errno -// directly (instead of wrapping in an error) to avoid allocations. -// -//go:nosplit -func (m *machine) setMemoryRegion(slot int, physical, length, virtual uintptr) syscall.Errno { - userRegion := userMemoryRegion{ - slot: uint32(slot), - flags: 0, - guestPhysAddr: uint64(physical), - memorySize: uint64(length), - userspaceAddr: uint64(virtual), - } - - // Set the region. - _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(m.fd), - _KVM_SET_USER_MEMORY_REGION, - uintptr(unsafe.Pointer(&userRegion))) - return errno -} - type kvmVcpuInit struct { target uint32 features [7]uint32 -- cgit v1.2.3 From 93e0c3752981b6a1c5b745faec6506c17480b84b Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 21 Feb 2020 10:28:16 +0000 Subject: Enable bluepill dieTrampoline operation on arm64. Signed-off-by: Haibo Xu Change-Id: I9e1bf2513c23bdd8c387e5b3c874c6ad3ca9aab0 --- pkg/sentry/platform/kvm/bluepill_arm64.s | 8 ++++--- pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go | 23 +++++++++++++++++++- pkg/sentry/platform/ring0/aarch64.go | 27 ++++++++++++------------ 3 files changed, 41 insertions(+), 17 deletions(-) (limited to 'pkg/sentry/platform/kvm') diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.s b/pkg/sentry/platform/kvm/bluepill_arm64.s index c61700892..04efa0147 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.s +++ b/pkg/sentry/platform/kvm/bluepill_arm64.s @@ -82,6 +82,8 @@ fallback: // dieTrampoline: see bluepill.go, bluepill_arm64_unsafe.go for documentation. TEXT ·dieTrampoline(SB),NOSPLIT,$0 - // TODO(gvisor.dev/issue/1249): dieTrampoline supporting for Arm64. - MOVD R9, 8(RSP) - BL ·dieHandler(SB) + // R0: Fake the old PC as caller + // R1: First argument (vCPU) + MOVD.P R1, 8(RSP) // R1: First argument (vCPU) + MOVD.P R0, 8(RSP) // R0: Fake the old PC as caller + B ·dieHandler(SB) diff --git a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go index 2f02c03cf..195331383 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go @@ -18,9 +18,30 @@ package kvm import ( "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) +// dieArchSetup initialies the state for dieTrampoline. +// +// The arm64 dieTrampoline requires the vCPU to be set in R1, and the last PC +// to be in R0. The trampoline then simulates a call to dieHandler from the +// provided PC. +// //go:nosplit func dieArchSetup(c *vCPU, context *arch.SignalContext64, guestRegs *userRegs) { - // TODO(gvisor.dev/issue/1249): dieTrampoline supporting for Arm64. + // If the vCPU is in user mode, we set the stack to the stored stack + // value in the vCPU itself. We don't want to unwind the user stack. + if guestRegs.Regs.Pstate&ring0.PSR_MODE_MASK == ring0.PSR_MODE_EL0t { + regs := c.CPU.Registers() + context.Regs[0] = regs.Regs[0] + context.Sp = regs.Sp + context.Regs[29] = regs.Regs[29] // stack base address + } else { + context.Regs[0] = guestRegs.Regs.Pc + context.Sp = guestRegs.Regs.Sp + context.Regs[29] = guestRegs.Regs.Regs[29] + context.Pstate = guestRegs.Regs.Pstate + } + context.Regs[1] = uint64(uintptr(unsafe.Pointer(c))) + context.Pc = uint64(dieTrampolineAddr) } diff --git a/pkg/sentry/platform/ring0/aarch64.go b/pkg/sentry/platform/ring0/aarch64.go index f6da41c27..8122ac6e2 100644 --- a/pkg/sentry/platform/ring0/aarch64.go +++ b/pkg/sentry/platform/ring0/aarch64.go @@ -27,26 +27,27 @@ const ( _PTE_PGT_BASE = 0x7000 _PTE_PGT_SIZE = 0x1000 - _PSR_MODE_EL0t = 0x0 - _PSR_MODE_EL1t = 0x4 - _PSR_MODE_EL1h = 0x5 - _PSR_EL_MASK = 0xf - - _PSR_D_BIT = 0x200 - _PSR_A_BIT = 0x100 - _PSR_I_BIT = 0x80 - _PSR_F_BIT = 0x40 + _PSR_D_BIT = 0x00000200 + _PSR_A_BIT = 0x00000100 + _PSR_I_BIT = 0x00000080 + _PSR_F_BIT = 0x00000040 ) const ( + // PSR bits + PSR_MODE_EL0t = 0x00000000 + PSR_MODE_EL1t = 0x00000004 + PSR_MODE_EL1h = 0x00000005 + PSR_MODE_MASK = 0x0000000f + // KernelFlagsSet should always be set in the kernel. - KernelFlagsSet = _PSR_MODE_EL1h + KernelFlagsSet = PSR_MODE_EL1h // UserFlagsSet are always set in userspace. - UserFlagsSet = _PSR_MODE_EL0t + UserFlagsSet = PSR_MODE_EL0t - KernelFlagsClear = _PSR_EL_MASK - UserFlagsClear = _PSR_EL_MASK + KernelFlagsClear = PSR_MODE_MASK + UserFlagsClear = PSR_MODE_MASK PsrDefaultSet = _PSR_D_BIT | _PSR_A_BIT | _PSR_I_BIT | _PSR_F_BIT ) -- cgit v1.2.3 From 98b693e61b37a62f7b29ce1cab8b4c4c54fa044e Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 25 Feb 2020 12:21:27 -0800 Subject: Don't acquire contended lock with the OS thread locked. Fixes #1049 PiperOrigin-RevId: 297175164 --- pkg/sentry/platform/kvm/machine.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'pkg/sentry/platform/kvm') diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 8076c7529..f1afc74dc 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -329,10 +329,12 @@ func (m *machine) Destroy() { } // Get gets an available vCPU. +// +// This will return with the OS thread locked. func (m *machine) Get() *vCPU { + m.mu.RLock() runtime.LockOSThread() tid := procid.Current() - m.mu.RLock() // Check for an exact match. if c := m.vCPUs[tid]; c != nil { @@ -343,8 +345,22 @@ func (m *machine) Get() *vCPU { // The happy path failed. We now proceed to acquire an exclusive lock // (because the vCPU map may change), and scan all available vCPUs. + // In this case, we first unlock the OS thread. Otherwise, if mu is + // not available, the current system thread will be parked and a new + // system thread spawned. We avoid this situation by simply refreshing + // tid after relocking the system thread. m.mu.RUnlock() + runtime.UnlockOSThread() m.mu.Lock() + runtime.LockOSThread() + tid = procid.Current() + + // Recheck for an exact match. + if c := m.vCPUs[tid]; c != nil { + c.lock() + m.mu.Unlock() + return c + } for { // Scan for an available vCPU. -- cgit v1.2.3 From 73201f4c5700ce7af404b968698b86d80989ab1e Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Tue, 25 Feb 2020 07:30:12 +0000 Subject: Code Clean: Move arch independent codes to common file in kvm pkg. Signed-off-by: Haibo Xu Change-Id: Iefbdf53e8e8d6d23ae75d8a2ff0d2a6e71f414d8 --- pkg/sentry/platform/kvm/kvm.go | 32 ++++++++++++++++++++++++++++++++ pkg/sentry/platform/kvm/kvm_amd64.go | 32 -------------------------------- pkg/sentry/platform/kvm/kvm_arm64.go | 32 -------------------------------- 3 files changed, 32 insertions(+), 64 deletions(-) (limited to 'pkg/sentry/platform/kvm') diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 972ba85c3..a9b4af43e 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -27,6 +27,38 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) +// userMemoryRegion is a region of physical memory. +// +// This mirrors kvm_memory_region. +type userMemoryRegion struct { + slot uint32 + flags uint32 + guestPhysAddr uint64 + memorySize uint64 + userspaceAddr uint64 +} + +// runData is the run structure. This may be mapped for synchronous register +// access (although that doesn't appear to be supported by my kernel at least). +// +// This mirrors kvm_run. +type runData struct { + requestInterruptWindow uint8 + _ [7]uint8 + + exitReason uint32 + readyForInterruptInjection uint8 + ifFlag uint8 + _ [2]uint8 + + cr8 uint64 + apicBase uint64 + + // This is the union data for exits. Interpretation depends entirely on + // the exitReason above (see vCPU code for more information). + data [32]uint64 +} + // KVM represents a lightweight VM context. type KVM struct { platform.NoCPUPreemptionDetection diff --git a/pkg/sentry/platform/kvm/kvm_amd64.go b/pkg/sentry/platform/kvm/kvm_amd64.go index c5a6f9c7d..093497bc4 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64.go +++ b/pkg/sentry/platform/kvm/kvm_amd64.go @@ -21,17 +21,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/platform/ring0" ) -// userMemoryRegion is a region of physical memory. -// -// This mirrors kvm_memory_region. -type userMemoryRegion struct { - slot uint32 - flags uint32 - guestPhysAddr uint64 - memorySize uint64 - userspaceAddr uint64 -} - // userRegs represents KVM user registers. // // This mirrors kvm_regs. @@ -169,27 +158,6 @@ type modelControlRegisters struct { entries [16]modelControlRegister } -// runData is the run structure. This may be mapped for synchronous register -// access (although that doesn't appear to be supported by my kernel at least). -// -// This mirrors kvm_run. -type runData struct { - requestInterruptWindow uint8 - _ [7]uint8 - - exitReason uint32 - readyForInterruptInjection uint8 - ifFlag uint8 - _ [2]uint8 - - cr8 uint64 - apicBase uint64 - - // This is the union data for exits. Interpretation depends entirely on - // the exitReason above (see vCPU code for more information). - data [32]uint64 -} - // cpuidEntry is a single CPUID entry. // // This mirrors kvm_cpuid_entry2. diff --git a/pkg/sentry/platform/kvm/kvm_arm64.go b/pkg/sentry/platform/kvm/kvm_arm64.go index 2319c86d3..79045651e 100644 --- a/pkg/sentry/platform/kvm/kvm_arm64.go +++ b/pkg/sentry/platform/kvm/kvm_arm64.go @@ -20,17 +20,6 @@ import ( "syscall" ) -// userMemoryRegion is a region of physical memory. -// -// This mirrors kvm_memory_region. -type userMemoryRegion struct { - slot uint32 - flags uint32 - guestPhysAddr uint64 - memorySize uint64 - userspaceAddr uint64 -} - type kvmOneReg struct { id uint64 addr uint64 @@ -53,27 +42,6 @@ type userRegs struct { fpRegs userFpsimdState } -// runData is the run structure. This may be mapped for synchronous register -// access (although that doesn't appear to be supported by my kernel at least). -// -// This mirrors kvm_run. -type runData struct { - requestInterruptWindow uint8 - _ [7]uint8 - - exitReason uint32 - readyForInterruptInjection uint8 - ifFlag uint8 - _ [2]uint8 - - cr8 uint64 - apicBase uint64 - - // This is the union data for exits. Interpretation depends entirely on - // the exitReason above (see vCPU code for more information). - data [32]uint64 -} - // updateGlobalOnce does global initialization. It has to be called only once. func updateGlobalOnce(fd int) error { physicalInit() -- cgit v1.2.3