summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
authorAdin Scannell <ascannell@google.com>2020-04-01 11:05:05 -0700
committergVisor bot <gvisor-bot@google.com>2020-04-01 11:06:26 -0700
commit4e6a1a5adb5607423c180089d8b464ef7dfdd1ae (patch)
treeaac579d8395dd545fb647ca6e08ca8079262fcdc /pkg/sentry
parentdb7917556a7e4bd2cd6d183c68f04a4787dec493 (diff)
Automated rollback of changelist 303799678
PiperOrigin-RevId: 304221302
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/platform/kvm/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go12
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go97
-rw-r--r--pkg/sentry/platform/kvm/kvm_const.go1
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go64
-rw-r--r--pkg/sentry/platform/kvm/machine.go9
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64_unsafe.go25
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go26
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go41
9 files changed, 99 insertions, 177 deletions
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index e27f57536..159f7eafd 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -70,7 +70,6 @@ go_test(
"requires-kvm",
],
deps = [
- "//pkg/procid",
"//pkg/sentry/arch",
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm/testutil",
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 555b5fa96..4b23f7803 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -46,14 +46,6 @@ var (
// bounceSignalMask has only bounceSignal set.
bounceSignalMask = uint64(1 << (uint64(bounceSignal) - 1))
- // otherSignalsMask includes all other signals that will be cause the
- // vCPU to exit during execution.
- //
- // Currently, this includes the preemption signal and the profiling
- // signal. In general, these should be signals whose delivery actually
- // influences the way the program executes as the switch can be costly.
- otherSignalsMask = uint64(1<<(uint64(syscall.SIGURG)-1)) | uint64(1<<(uint64(syscall.SIGPROF)-1))
-
// bounce is the interrupt vector used to return to the kernel.
bounce = uint32(ring0.VirtualizationException)
@@ -94,8 +86,8 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) {
}
func init() {
- // Install the handler, masking all signals.
- if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler, ^uint64(0)); err != nil {
+ // Install the handler.
+ if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler); err != nil {
panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 4e9d80765..9add7c944 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -24,7 +24,6 @@ import (
"syscall"
"unsafe"
- "gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/sentry/arch"
)
@@ -59,19 +58,6 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 {
return &((*arch.UContext64)(context).MContext)
}
-// injectInterrupt is a helper to inject an interrupt.
-//
-//go:nosplit
-func injectInterrupt(c *vCPU) {
- if _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(c.fd),
- _KVM_INTERRUPT,
- uintptr(unsafe.Pointer(&bounce))); errno != 0 {
- throw("interrupt injection failed")
- }
-}
-
// bluepillHandler is called from the signal stub.
//
// The world may be stopped while this is executing, and it executes on the
@@ -83,9 +69,6 @@ func bluepillHandler(context unsafe.Pointer) {
// Sanitize the registers; interrupts must always be disabled.
c := bluepillArchEnter(bluepillArchContext(context))
- // Enable preemption.
- c.setSignalMask(true)
-
// Increment the number of switches.
atomic.AddUint32(&c.switches, 1)
@@ -106,9 +89,6 @@ func bluepillHandler(context unsafe.Pointer) {
// interrupted KVM. Since we're in a signal handler
// currently, all signals are masked and the signal
// must have been delivered directly to this thread.
- //
- // We will not be able to actually do subsequent
- // KVM_RUNs until this signal is processed.
timeout := syscall.Timespec{}
sig, _, errno := syscall.RawSyscall6(
syscall.SYS_RT_SIGTIMEDWAIT,
@@ -118,24 +98,12 @@ func bluepillHandler(context unsafe.Pointer) {
8, // sigset size.
0, 0)
if errno == syscall.EAGAIN {
- // If weren't able to process this signal, then
- // it must not have been in the bounceMask. By
- // elimination, it must have been the
- // preemption signal. We can't process this
- // signal right now, so we need to disable
- // preemption until the interrupt is actually
- // handled.
- c.setSignalMask(false)
- // Note that there is a waiter for this vCPU.
- // This will cause the vCPU to exit at some
- // point in the future (releasing the user lock
- // and guest mode).
- atomicbitops.OrUint32(&c.state, vCPUWaiter)
- } else if errno != 0 {
- // We only expect success or a timeout.
+ continue
+ }
+ if errno != 0 {
throw("error waiting for pending signal")
- } else if sig != uintptr(bounceSignal) {
- // Only the bounce should be processed.
+ }
+ if sig != uintptr(bounceSignal) {
throw("unexpected signal")
}
@@ -146,10 +114,11 @@ func bluepillHandler(context unsafe.Pointer) {
// ready.
if c.runData.readyForInterruptInjection == 0 {
c.runData.requestInterruptWindow = 1
+ continue // Rerun vCPU.
} else {
- injectInterrupt(c)
+ // Force injection below; the vCPU is ready.
+ c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN
}
- continue // Rerun vCPU.
case syscall.EFAULT:
// If a fault is not serviceable due to the host
// backing pages having page permissions, instead of an
@@ -168,30 +137,6 @@ func bluepillHandler(context unsafe.Pointer) {
}
switch c.runData.exitReason {
- case _KVM_EXIT_HLT:
- // Copy out registers.
- bluepillArchExit(c, bluepillArchContext(context))
-
- // Return to the vCPUReady state; notify any waiters.
- user := atomic.LoadUint32(&c.state) & vCPUUser
- switch atomic.SwapUint32(&c.state, user) {
- case user | vCPUGuest: // Expected case.
- case user | vCPUGuest | vCPUWaiter:
- c.notify()
- default:
- throw("invalid state")
- }
- return
- case _KVM_EXIT_IRQ_WINDOW_OPEN:
- // Inject an interrupt now.
- injectInterrupt(c)
- // Clear previous injection request.
- c.runData.requestInterruptWindow = 0
- case _KVM_EXIT_INTR:
- // This is fine, it is the normal exit reason during
- // signal delivery. However, we still need to handle
- // other potential exit reasons *combined* with EINTR,
- // so this switch must be hit even after the above.
case _KVM_EXIT_EXCEPTION:
c.die(bluepillArchContext(context), "exception")
return
@@ -210,6 +155,20 @@ func bluepillHandler(context unsafe.Pointer) {
case _KVM_EXIT_DEBUG:
c.die(bluepillArchContext(context), "debug")
return
+ case _KVM_EXIT_HLT:
+ // Copy out registers.
+ bluepillArchExit(c, bluepillArchContext(context))
+
+ // Return to the vCPUReady state; notify any waiters.
+ user := atomic.LoadUint32(&c.state) & vCPUUser
+ switch atomic.SwapUint32(&c.state, user) {
+ case user | vCPUGuest: // Expected case.
+ case user | vCPUGuest | vCPUWaiter:
+ c.notify()
+ default:
+ throw("invalid state")
+ }
+ return
case _KVM_EXIT_MMIO:
// Increment the fault count.
atomic.AddUint32(&c.faults, 1)
@@ -241,6 +200,18 @@ func bluepillHandler(context unsafe.Pointer) {
data[i] = *b
}
}
+ case _KVM_EXIT_IRQ_WINDOW_OPEN:
+ // Interrupt: we must have requested an interrupt
+ // window; set the interrupt line.
+ if _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(c.fd),
+ _KVM_INTERRUPT,
+ uintptr(unsafe.Pointer(&bounce))); errno != 0 {
+ throw("interrupt injection failed")
+ }
+ // Clear previous injection request.
+ c.runData.requestInterruptWindow = 0
case _KVM_EXIT_SHUTDOWN:
c.die(bluepillArchContext(context), "shutdown")
return
diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go
index 07d9c9a98..1d5c77ff4 100644
--- a/pkg/sentry/platform/kvm/kvm_const.go
+++ b/pkg/sentry/platform/kvm/kvm_const.go
@@ -48,7 +48,6 @@ const (
_KVM_EXIT_IRQ_WINDOW_OPEN = 0x7
_KVM_EXIT_SHUTDOWN = 0x8
_KVM_EXIT_FAIL_ENTRY = 0x9
- _KVM_EXIT_INTR = 0xa
_KVM_EXIT_INTERNAL_ERROR = 0x11
_KVM_EXIT_SYSTEM_EVENT = 0x18
)
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index d42ba3f24..c42752d50 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -16,15 +16,12 @@ package kvm
import (
"math/rand"
- "os"
"reflect"
- "runtime"
"sync/atomic"
"syscall"
"testing"
"time"
- "gvisor.dev/gvisor/pkg/procid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
@@ -323,18 +320,15 @@ func TestBounce(t *testing.T) {
})
}
-// randomSleep is used by some race tests below.
-//
-// O(hundreds of microseconds) is appropriate to ensure different overlaps and
-// different schedules.
-func randomSleep() {
- if n := rand.Intn(1000); n > 100 {
- time.Sleep(time.Duration(n) * time.Microsecond)
- }
-}
-
func TestBounceStress(t *testing.T) {
applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ randomSleep := func() {
+ // O(hundreds of microseconds) is appropriate to ensure
+ // different overlaps and different schedules.
+ if n := rand.Intn(1000); n > 100 {
+ time.Sleep(time.Duration(n) * time.Microsecond)
+ }
+ }
for i := 0; i < 1000; i++ {
// Start an asynchronously executing goroutine that
// calls Bounce at pseudo-random point in time.
@@ -361,50 +355,6 @@ func TestBounceStress(t *testing.T) {
})
}
-func TestPreemption(t *testing.T) {
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
- // Lock the main vCPU thread.
- runtime.LockOSThread()
- pid := os.Getpid()
- tid := procid.Current()
- running := uint32(1)
- defer atomic.StoreUint32(&running, 0)
-
- // Start generating "preemptions".
- go func() {
- for atomic.LoadUint32(&running) != 0 {
- // Kick via a preemption: best effort.
- syscall.Tgkill(pid, int(tid), syscall.SIGURG)
- randomSleep()
- }
- }()
-
- for i := 0; i < 1000; i++ {
- randomSleep()
- var si arch.SignalInfo
- if _, err := c.SwitchToUser(ring0.SwitchOpts{
- Registers: regs,
- FloatingPointState: dummyFPState,
- PageTables: pt,
- }, &si); err != platform.ErrContextInterrupt {
- t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt)
- }
- // Was this caused by a preemption signal?
- if got := atomic.LoadUint32(&c.state); got&vCPUGuest != 0 && got&vCPUWaiter == 0 {
- continue
- }
- c.unlock()
- // Should have dropped from guest mode, processed preemption.
- if got := atomic.LoadUint32(&c.state); got != vCPUReady {
- t.Errorf("vCPU not in ready state: got %v", got)
- }
- randomSleep()
- c.lock()
- }
- return false
- })
-}
-
func TestInvalidate(t *testing.T) {
var data uintptr // Used below.
applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 345b71e8f..f1afc74dc 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -108,9 +108,6 @@ type vCPU struct {
// This is a bitmask of the three fields (vCPU*) described above.
state uint32
- // signalMask is the vCPU signal mask.
- signalMask uint64
-
// runData for this vCPU.
runData *runData
@@ -124,7 +121,6 @@ type vCPU struct {
// vCPUArchState is the architecture-specific state.
vCPUArchState
- // dieState is the temporary state associated with throwing exceptions.
dieState dieState
}
@@ -157,6 +153,11 @@ func (m *machine) newVCPU() *vCPU {
c.CPU.Init(&m.kernel, c)
m.vCPUsByID[c.id] = c
+ // Ensure the signal mask is correct.
+ if err := c.setSignalMask(); err != nil {
+ panic(fmt.Sprintf("error setting signal mask: %v", err))
+ }
+
// Map the run data.
runData, err := mapRunData(int(fd))
if err != nil {
diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
index 52286e56d..7156c245f 100644
--- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
@@ -111,6 +111,31 @@ func (c *vCPU) setSystemTime() error {
return nil
}
+// setSignalMask sets the vCPU signal mask.
+//
+// This must be called prior to running the vCPU.
+func (c *vCPU) setSignalMask() error {
+ // The layout of this structure implies that it will not necessarily be
+ // the same layout chosen by the Go compiler. It gets fudged here.
+ var data struct {
+ length uint32
+ mask1 uint32
+ mask2 uint32
+ _ uint32
+ }
+ data.length = 8 // Fixed sigset size.
+ data.mask1 = ^uint32(bounceSignalMask & 0xffffffff)
+ data.mask2 = ^uint32(bounceSignalMask >> 32)
+ if _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(c.fd),
+ _KVM_SET_SIGNAL_MASK,
+ uintptr(unsafe.Pointer(&data))); errno != 0 {
+ return fmt.Errorf("error setting signal mask: %v", errno)
+ }
+ return nil
+}
+
// setUserRegisters sets user registers in the vCPU.
func (c *vCPU) setUserRegisters(uregs *userRegs) error {
if _, _, errno := syscall.RawSyscall(
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index 185eeb4f0..b531f2f85 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -268,6 +268,32 @@ func (c *vCPU) setSystemTime() error {
return nil
}
+// setSignalMask sets the vCPU signal mask.
+//
+// This must be called prior to running the vCPU.
+func (c *vCPU) setSignalMask() error {
+ // The layout of this structure implies that it will not necessarily be
+ // the same layout chosen by the Go compiler. It gets fudged here.
+ var data struct {
+ length uint32
+ mask1 uint32
+ mask2 uint32
+ _ uint32
+ }
+ data.length = 8 // Fixed sigset size.
+ data.mask1 = ^uint32(bounceSignalMask & 0xffffffff)
+ data.mask2 = ^uint32(bounceSignalMask >> 32)
+ if _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(c.fd),
+ _KVM_SET_SIGNAL_MASK,
+ uintptr(unsafe.Pointer(&data))); errno != 0 {
+ return fmt.Errorf("error setting signal mask: %v", errno)
+ }
+
+ return nil
+}
+
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
// Check for canonical addresses.
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index e4de0a889..f04be2ab5 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -87,47 +87,6 @@ func unmapRunData(r *runData) error {
return nil
}
-// setSignalMask sets the vCPU signal mask.
-//
-// This will be called from the bluepill handler, and therefore must not
-// perform any allocation.
-//
-//go:nosplit
-func (c *vCPU) setSignalMask(enableOthers bool) {
- // The signal mask is either:
- // *) Only the bounce signal, which we need to use to execute the
- // machine state up until the bounce interrupt can be processed.
- // or
- // *) All signals, which is the default state unless we need to
- // continue execution to exit guest mode (the case above).
- mask := bounceSignalMask
- if enableOthers {
- mask |= otherSignalsMask
- }
- if c.signalMask == mask {
- return // Already set.
- }
-
- // The layout of this structure implies that it will not necessarily be
- // the same layout chosen by the Go compiler. It gets fudged here.
- var data struct {
- length uint32
- mask1 uint32
- mask2 uint32
- _ uint32
- }
- data.length = 8 // Fixed sigset size.
- data.mask1 = ^uint32(mask & 0xffffffff)
- data.mask2 = ^uint32(mask >> 32)
- if _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(c.fd),
- _KVM_SET_SIGNAL_MASK,
- uintptr(unsafe.Pointer(&data))); errno != 0 {
- throw("setSignal mask failed")
- }
-}
-
// atomicAddressSpace is an atomic address space pointer.
type atomicAddressSpace struct {
pointer unsafe.Pointer