diff options
Diffstat (limited to 'pkg/sentry/platform/kvm')
-rw-r--r-- | pkg/sentry/platform/kvm/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill.go | 12 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_unsafe.go | 97 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_const.go | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 64 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 9 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64_unsafe.go | 25 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 26 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_unsafe.go | 41 |
9 files changed, 99 insertions, 177 deletions
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index e27f57536..159f7eafd 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -70,7 +70,6 @@ go_test( "requires-kvm", ], deps = [ - "//pkg/procid", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 555b5fa96..4b23f7803 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -46,14 +46,6 @@ var ( // bounceSignalMask has only bounceSignal set. bounceSignalMask = uint64(1 << (uint64(bounceSignal) - 1)) - // otherSignalsMask includes all other signals that will be cause the - // vCPU to exit during execution. - // - // Currently, this includes the preemption signal and the profiling - // signal. In general, these should be signals whose delivery actually - // influences the way the program executes as the switch can be costly. - otherSignalsMask = uint64(1<<(uint64(syscall.SIGURG)-1)) | uint64(1<<(uint64(syscall.SIGPROF)-1)) - // bounce is the interrupt vector used to return to the kernel. bounce = uint32(ring0.VirtualizationException) @@ -94,8 +86,8 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) { } func init() { - // Install the handler, masking all signals. - if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler, ^uint64(0)); err != nil { + // Install the handler. + if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler); err != nil { panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err)) } diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 4e9d80765..9add7c944 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -24,7 +24,6 @@ import ( "syscall" "unsafe" - "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/arch" ) @@ -59,19 +58,6 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { return &((*arch.UContext64)(context).MContext) } -// injectInterrupt is a helper to inject an interrupt. -// -//go:nosplit -func injectInterrupt(c *vCPU) { - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_INTERRUPT, - uintptr(unsafe.Pointer(&bounce))); errno != 0 { - throw("interrupt injection failed") - } -} - // bluepillHandler is called from the signal stub. // // The world may be stopped while this is executing, and it executes on the @@ -83,9 +69,6 @@ func bluepillHandler(context unsafe.Pointer) { // Sanitize the registers; interrupts must always be disabled. c := bluepillArchEnter(bluepillArchContext(context)) - // Enable preemption. - c.setSignalMask(true) - // Increment the number of switches. atomic.AddUint32(&c.switches, 1) @@ -106,9 +89,6 @@ func bluepillHandler(context unsafe.Pointer) { // interrupted KVM. Since we're in a signal handler // currently, all signals are masked and the signal // must have been delivered directly to this thread. - // - // We will not be able to actually do subsequent - // KVM_RUNs until this signal is processed. timeout := syscall.Timespec{} sig, _, errno := syscall.RawSyscall6( syscall.SYS_RT_SIGTIMEDWAIT, @@ -118,24 +98,12 @@ func bluepillHandler(context unsafe.Pointer) { 8, // sigset size. 0, 0) if errno == syscall.EAGAIN { - // If weren't able to process this signal, then - // it must not have been in the bounceMask. By - // elimination, it must have been the - // preemption signal. We can't process this - // signal right now, so we need to disable - // preemption until the interrupt is actually - // handled. - c.setSignalMask(false) - // Note that there is a waiter for this vCPU. - // This will cause the vCPU to exit at some - // point in the future (releasing the user lock - // and guest mode). - atomicbitops.OrUint32(&c.state, vCPUWaiter) - } else if errno != 0 { - // We only expect success or a timeout. + continue + } + if errno != 0 { throw("error waiting for pending signal") - } else if sig != uintptr(bounceSignal) { - // Only the bounce should be processed. + } + if sig != uintptr(bounceSignal) { throw("unexpected signal") } @@ -146,10 +114,11 @@ func bluepillHandler(context unsafe.Pointer) { // ready. if c.runData.readyForInterruptInjection == 0 { c.runData.requestInterruptWindow = 1 + continue // Rerun vCPU. } else { - injectInterrupt(c) + // Force injection below; the vCPU is ready. + c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN } - continue // Rerun vCPU. case syscall.EFAULT: // If a fault is not serviceable due to the host // backing pages having page permissions, instead of an @@ -168,30 +137,6 @@ func bluepillHandler(context unsafe.Pointer) { } switch c.runData.exitReason { - case _KVM_EXIT_HLT: - // Copy out registers. - bluepillArchExit(c, bluepillArchContext(context)) - - // Return to the vCPUReady state; notify any waiters. - user := atomic.LoadUint32(&c.state) & vCPUUser - switch atomic.SwapUint32(&c.state, user) { - case user | vCPUGuest: // Expected case. - case user | vCPUGuest | vCPUWaiter: - c.notify() - default: - throw("invalid state") - } - return - case _KVM_EXIT_IRQ_WINDOW_OPEN: - // Inject an interrupt now. - injectInterrupt(c) - // Clear previous injection request. - c.runData.requestInterruptWindow = 0 - case _KVM_EXIT_INTR: - // This is fine, it is the normal exit reason during - // signal delivery. However, we still need to handle - // other potential exit reasons *combined* with EINTR, - // so this switch must be hit even after the above. case _KVM_EXIT_EXCEPTION: c.die(bluepillArchContext(context), "exception") return @@ -210,6 +155,20 @@ func bluepillHandler(context unsafe.Pointer) { case _KVM_EXIT_DEBUG: c.die(bluepillArchContext(context), "debug") return + case _KVM_EXIT_HLT: + // Copy out registers. + bluepillArchExit(c, bluepillArchContext(context)) + + // Return to the vCPUReady state; notify any waiters. + user := atomic.LoadUint32(&c.state) & vCPUUser + switch atomic.SwapUint32(&c.state, user) { + case user | vCPUGuest: // Expected case. + case user | vCPUGuest | vCPUWaiter: + c.notify() + default: + throw("invalid state") + } + return case _KVM_EXIT_MMIO: // Increment the fault count. atomic.AddUint32(&c.faults, 1) @@ -241,6 +200,18 @@ func bluepillHandler(context unsafe.Pointer) { data[i] = *b } } + case _KVM_EXIT_IRQ_WINDOW_OPEN: + // Interrupt: we must have requested an interrupt + // window; set the interrupt line. + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_INTERRUPT, + uintptr(unsafe.Pointer(&bounce))); errno != 0 { + throw("interrupt injection failed") + } + // Clear previous injection request. + c.runData.requestInterruptWindow = 0 case _KVM_EXIT_SHUTDOWN: c.die(bluepillArchContext(context), "shutdown") return diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index 07d9c9a98..1d5c77ff4 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -48,7 +48,6 @@ const ( _KVM_EXIT_IRQ_WINDOW_OPEN = 0x7 _KVM_EXIT_SHUTDOWN = 0x8 _KVM_EXIT_FAIL_ENTRY = 0x9 - _KVM_EXIT_INTR = 0xa _KVM_EXIT_INTERNAL_ERROR = 0x11 _KVM_EXIT_SYSTEM_EVENT = 0x18 ) diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index d42ba3f24..c42752d50 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -16,15 +16,12 @@ package kvm import ( "math/rand" - "os" "reflect" - "runtime" "sync/atomic" "syscall" "testing" "time" - "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" @@ -323,18 +320,15 @@ func TestBounce(t *testing.T) { }) } -// randomSleep is used by some race tests below. -// -// O(hundreds of microseconds) is appropriate to ensure different overlaps and -// different schedules. -func randomSleep() { - if n := rand.Intn(1000); n > 100 { - time.Sleep(time.Duration(n) * time.Microsecond) - } -} - func TestBounceStress(t *testing.T) { applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { + randomSleep := func() { + // O(hundreds of microseconds) is appropriate to ensure + // different overlaps and different schedules. + if n := rand.Intn(1000); n > 100 { + time.Sleep(time.Duration(n) * time.Microsecond) + } + } for i := 0; i < 1000; i++ { // Start an asynchronously executing goroutine that // calls Bounce at pseudo-random point in time. @@ -361,50 +355,6 @@ func TestBounceStress(t *testing.T) { }) } -func TestPreemption(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - // Lock the main vCPU thread. - runtime.LockOSThread() - pid := os.Getpid() - tid := procid.Current() - running := uint32(1) - defer atomic.StoreUint32(&running, 0) - - // Start generating "preemptions". - go func() { - for atomic.LoadUint32(&running) != 0 { - // Kick via a preemption: best effort. - syscall.Tgkill(pid, int(tid), syscall.SIGURG) - randomSleep() - } - }() - - for i := 0; i < 1000; i++ { - randomSleep() - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - // Was this caused by a preemption signal? - if got := atomic.LoadUint32(&c.state); got&vCPUGuest != 0 && got&vCPUWaiter == 0 { - continue - } - c.unlock() - // Should have dropped from guest mode, processed preemption. - if got := atomic.LoadUint32(&c.state); got != vCPUReady { - t.Errorf("vCPU not in ready state: got %v", got) - } - randomSleep() - c.lock() - } - return false - }) -} - func TestInvalidate(t *testing.T) { var data uintptr // Used below. applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 345b71e8f..f1afc74dc 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -108,9 +108,6 @@ type vCPU struct { // This is a bitmask of the three fields (vCPU*) described above. state uint32 - // signalMask is the vCPU signal mask. - signalMask uint64 - // runData for this vCPU. runData *runData @@ -124,7 +121,6 @@ type vCPU struct { // vCPUArchState is the architecture-specific state. vCPUArchState - // dieState is the temporary state associated with throwing exceptions. dieState dieState } @@ -157,6 +153,11 @@ func (m *machine) newVCPU() *vCPU { c.CPU.Init(&m.kernel, c) m.vCPUsByID[c.id] = c + // Ensure the signal mask is correct. + if err := c.setSignalMask(); err != nil { + panic(fmt.Sprintf("error setting signal mask: %v", err)) + } + // Map the run data. runData, err := mapRunData(int(fd)) if err != nil { diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 52286e56d..7156c245f 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -111,6 +111,31 @@ func (c *vCPU) setSystemTime() error { return nil } +// setSignalMask sets the vCPU signal mask. +// +// This must be called prior to running the vCPU. +func (c *vCPU) setSignalMask() error { + // The layout of this structure implies that it will not necessarily be + // the same layout chosen by the Go compiler. It gets fudged here. + var data struct { + length uint32 + mask1 uint32 + mask2 uint32 + _ uint32 + } + data.length = 8 // Fixed sigset size. + data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) + data.mask2 = ^uint32(bounceSignalMask >> 32) + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_SIGNAL_MASK, + uintptr(unsafe.Pointer(&data))); errno != 0 { + return fmt.Errorf("error setting signal mask: %v", errno) + } + return nil +} + // setUserRegisters sets user registers in the vCPU. func (c *vCPU) setUserRegisters(uregs *userRegs) error { if _, _, errno := syscall.RawSyscall( diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 185eeb4f0..b531f2f85 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -268,6 +268,32 @@ func (c *vCPU) setSystemTime() error { return nil } +// setSignalMask sets the vCPU signal mask. +// +// This must be called prior to running the vCPU. +func (c *vCPU) setSignalMask() error { + // The layout of this structure implies that it will not necessarily be + // the same layout chosen by the Go compiler. It gets fudged here. + var data struct { + length uint32 + mask1 uint32 + mask2 uint32 + _ uint32 + } + data.length = 8 // Fixed sigset size. + data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) + data.mask2 = ^uint32(bounceSignalMask >> 32) + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_SIGNAL_MASK, + uintptr(unsafe.Pointer(&data))); errno != 0 { + return fmt.Errorf("error setting signal mask: %v", errno) + } + + return nil +} + // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { // Check for canonical addresses. diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index e4de0a889..f04be2ab5 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -87,47 +87,6 @@ func unmapRunData(r *runData) error { return nil } -// setSignalMask sets the vCPU signal mask. -// -// This will be called from the bluepill handler, and therefore must not -// perform any allocation. -// -//go:nosplit -func (c *vCPU) setSignalMask(enableOthers bool) { - // The signal mask is either: - // *) Only the bounce signal, which we need to use to execute the - // machine state up until the bounce interrupt can be processed. - // or - // *) All signals, which is the default state unless we need to - // continue execution to exit guest mode (the case above). - mask := bounceSignalMask - if enableOthers { - mask |= otherSignalsMask - } - if c.signalMask == mask { - return // Already set. - } - - // The layout of this structure implies that it will not necessarily be - // the same layout chosen by the Go compiler. It gets fudged here. - var data struct { - length uint32 - mask1 uint32 - mask2 uint32 - _ uint32 - } - data.length = 8 // Fixed sigset size. - data.mask1 = ^uint32(mask & 0xffffffff) - data.mask2 = ^uint32(mask >> 32) - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_SET_SIGNAL_MASK, - uintptr(unsafe.Pointer(&data))); errno != 0 { - throw("setSignal mask failed") - } -} - // atomicAddressSpace is an atomic address space pointer. type atomicAddressSpace struct { pointer unsafe.Pointer |