summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/atomicbitops/atomicbitops_amd64.s16
-rw-r--r--pkg/atomicbitops/atomicbitops_arm64.s16
-rw-r--r--pkg/atomicbitops/atomicbitops_noasm.go8
-rw-r--r--pkg/safecopy/safecopy.go4
-rw-r--r--pkg/safecopy/safecopy_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/bluepill.go12
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go97
-rw-r--r--pkg/sentry/platform/kvm/kvm_const.go1
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go64
-rw-r--r--pkg/sentry/platform/kvm/machine.go9
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64_unsafe.go25
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64_unsafe.go26
-rw-r--r--pkg/sentry/platform/kvm/machine_unsafe.go41
-rw-r--r--runsc/sandbox/sandbox.go6
15 files changed, 207 insertions, 125 deletions
diff --git a/pkg/atomicbitops/atomicbitops_amd64.s b/pkg/atomicbitops/atomicbitops_amd64.s
index 54c887ee5..f0edd4de7 100644
--- a/pkg/atomicbitops/atomicbitops_amd64.s
+++ b/pkg/atomicbitops/atomicbitops_amd64.s
@@ -16,28 +16,28 @@
#include "textflag.h"
-TEXT ·AndUint32(SB),$0-12
+TEXT ·AndUint32(SB),NOSPLIT,$0-12
MOVQ addr+0(FP), BP
MOVL val+8(FP), AX
LOCK
ANDL AX, 0(BP)
RET
-TEXT ·OrUint32(SB),$0-12
+TEXT ·OrUint32(SB),NOSPLIT,$0-12
MOVQ addr+0(FP), BP
MOVL val+8(FP), AX
LOCK
ORL AX, 0(BP)
RET
-TEXT ·XorUint32(SB),$0-12
+TEXT ·XorUint32(SB),NOSPLIT,$0-12
MOVQ addr+0(FP), BP
MOVL val+8(FP), AX
LOCK
XORL AX, 0(BP)
RET
-TEXT ·CompareAndSwapUint32(SB),$0-20
+TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20
MOVQ addr+0(FP), DI
MOVL old+8(FP), AX
MOVL new+12(FP), DX
@@ -46,28 +46,28 @@ TEXT ·CompareAndSwapUint32(SB),$0-20
MOVL AX, ret+16(FP)
RET
-TEXT ·AndUint64(SB),$0-16
+TEXT ·AndUint64(SB),NOSPLIT,$0-16
MOVQ addr+0(FP), BP
MOVQ val+8(FP), AX
LOCK
ANDQ AX, 0(BP)
RET
-TEXT ·OrUint64(SB),$0-16
+TEXT ·OrUint64(SB),NOSPLIT,$0-16
MOVQ addr+0(FP), BP
MOVQ val+8(FP), AX
LOCK
ORQ AX, 0(BP)
RET
-TEXT ·XorUint64(SB),$0-16
+TEXT ·XorUint64(SB),NOSPLIT,$0-16
MOVQ addr+0(FP), BP
MOVQ val+8(FP), AX
LOCK
XORQ AX, 0(BP)
RET
-TEXT ·CompareAndSwapUint64(SB),$0-32
+TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32
MOVQ addr+0(FP), DI
MOVQ old+8(FP), AX
MOVQ new+16(FP), DX
diff --git a/pkg/atomicbitops/atomicbitops_arm64.s b/pkg/atomicbitops/atomicbitops_arm64.s
index 5c780851b..644a6bca5 100644
--- a/pkg/atomicbitops/atomicbitops_arm64.s
+++ b/pkg/atomicbitops/atomicbitops_arm64.s
@@ -16,7 +16,7 @@
#include "textflag.h"
-TEXT ·AndUint32(SB),$0-12
+TEXT ·AndUint32(SB),NOSPLIT,$0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
again:
@@ -26,7 +26,7 @@ again:
CBNZ R3, again
RET
-TEXT ·OrUint32(SB),$0-12
+TEXT ·OrUint32(SB),NOSPLIT,$0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
again:
@@ -36,7 +36,7 @@ again:
CBNZ R3, again
RET
-TEXT ·XorUint32(SB),$0-12
+TEXT ·XorUint32(SB),NOSPLIT,$0-12
MOVD ptr+0(FP), R0
MOVW val+8(FP), R1
again:
@@ -46,7 +46,7 @@ again:
CBNZ R3, again
RET
-TEXT ·CompareAndSwapUint32(SB),$0-20
+TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20
MOVD addr+0(FP), R0
MOVW old+8(FP), R1
MOVW new+12(FP), R2
@@ -60,7 +60,7 @@ done:
MOVW R3, prev+16(FP)
RET
-TEXT ·AndUint64(SB),$0-16
+TEXT ·AndUint64(SB),NOSPLIT,$0-16
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
again:
@@ -70,7 +70,7 @@ again:
CBNZ R3, again
RET
-TEXT ·OrUint64(SB),$0-16
+TEXT ·OrUint64(SB),NOSPLIT,$0-16
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
again:
@@ -80,7 +80,7 @@ again:
CBNZ R3, again
RET
-TEXT ·XorUint64(SB),$0-16
+TEXT ·XorUint64(SB),NOSPLIT,$0-16
MOVD ptr+0(FP), R0
MOVD val+8(FP), R1
again:
@@ -90,7 +90,7 @@ again:
CBNZ R3, again
RET
-TEXT ·CompareAndSwapUint64(SB),$0-32
+TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32
MOVD addr+0(FP), R0
MOVD old+8(FP), R1
MOVD new+16(FP), R2
diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go
index 3b2898256..4e9c27b98 100644
--- a/pkg/atomicbitops/atomicbitops_noasm.go
+++ b/pkg/atomicbitops/atomicbitops_noasm.go
@@ -20,6 +20,7 @@ import (
"sync/atomic"
)
+//go:nosplit
func AndUint32(addr *uint32, val uint32) {
for {
o := atomic.LoadUint32(addr)
@@ -30,6 +31,7 @@ func AndUint32(addr *uint32, val uint32) {
}
}
+//go:nosplit
func OrUint32(addr *uint32, val uint32) {
for {
o := atomic.LoadUint32(addr)
@@ -40,6 +42,7 @@ func OrUint32(addr *uint32, val uint32) {
}
}
+//go:nosplit
func XorUint32(addr *uint32, val uint32) {
for {
o := atomic.LoadUint32(addr)
@@ -50,6 +53,7 @@ func XorUint32(addr *uint32, val uint32) {
}
}
+//go:nosplit
func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) {
for {
prev = atomic.LoadUint32(addr)
@@ -62,6 +66,7 @@ func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) {
}
}
+//go:nosplit
func AndUint64(addr *uint64, val uint64) {
for {
o := atomic.LoadUint64(addr)
@@ -72,6 +77,7 @@ func AndUint64(addr *uint64, val uint64) {
}
}
+//go:nosplit
func OrUint64(addr *uint64, val uint64) {
for {
o := atomic.LoadUint64(addr)
@@ -82,6 +88,7 @@ func OrUint64(addr *uint64, val uint64) {
}
}
+//go:nosplit
func XorUint64(addr *uint64, val uint64) {
for {
o := atomic.LoadUint64(addr)
@@ -92,6 +99,7 @@ func XorUint64(addr *uint64, val uint64) {
}
}
+//go:nosplit
func CompareAndSwapUint64(addr *uint64, old, new uint64) (prev uint64) {
for {
prev = atomic.LoadUint64(addr)
diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go
index 2fb7e5809..521f1a82d 100644
--- a/pkg/safecopy/safecopy.go
+++ b/pkg/safecopy/safecopy.go
@@ -127,10 +127,10 @@ func initializeAddresses() {
func init() {
initializeAddresses()
- if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler); err != nil {
+ if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler, 0); err != nil {
panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err))
}
- if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler); err != nil {
+ if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler, 0); err != nil {
panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err))
}
syserror.AddErrorUnwrapper(func(e error) (syscall.Errno, bool) {
diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go
index 41dd567f3..b15b920fe 100644
--- a/pkg/safecopy/safecopy_unsafe.go
+++ b/pkg/safecopy/safecopy_unsafe.go
@@ -324,11 +324,13 @@ func errorFromFaultSignal(addr uintptr, sig int32) error {
//
// It stores the value of the previously set handler in previous.
//
+// The extraMask parameter is OR'ed into the existing signal handler mask.
+//
// This function will be called on initialization in order to install safecopy
// handlers for appropriate signals. These handlers will call the previous
// handler however, and if this is function is being used externally then the
// same courtesy is expected.
-func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr) error {
+func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr, extraMask uint64) error {
var sa struct {
handler uintptr
flags uint64
@@ -348,10 +350,10 @@ func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr
if sa.handler == 0 {
return fmt.Errorf("previous handler for signal %x isn't set", sig)
}
-
*previous = sa.handler
// Install our own handler.
+ sa.mask |= extraMask
sa.handler = handler
if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 {
return e
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index 159f7eafd..e27f57536 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -70,6 +70,7 @@ go_test(
"requires-kvm",
],
deps = [
+ "//pkg/procid",
"//pkg/sentry/arch",
"//pkg/sentry/platform",
"//pkg/sentry/platform/kvm/testutil",
diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go
index 4b23f7803..555b5fa96 100644
--- a/pkg/sentry/platform/kvm/bluepill.go
+++ b/pkg/sentry/platform/kvm/bluepill.go
@@ -46,6 +46,14 @@ var (
// bounceSignalMask has only bounceSignal set.
bounceSignalMask = uint64(1 << (uint64(bounceSignal) - 1))
+ // otherSignalsMask includes all other signals that will be cause the
+ // vCPU to exit during execution.
+ //
+ // Currently, this includes the preemption signal and the profiling
+ // signal. In general, these should be signals whose delivery actually
+ // influences the way the program executes as the switch can be costly.
+ otherSignalsMask = uint64(1<<(uint64(syscall.SIGURG)-1)) | uint64(1<<(uint64(syscall.SIGPROF)-1))
+
// bounce is the interrupt vector used to return to the kernel.
bounce = uint32(ring0.VirtualizationException)
@@ -86,8 +94,8 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) {
}
func init() {
- // Install the handler.
- if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler); err != nil {
+ // Install the handler, masking all signals.
+ if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler, ^uint64(0)); err != nil {
panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 9add7c944..4e9d80765 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -24,6 +24,7 @@ import (
"syscall"
"unsafe"
+ "gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/sentry/arch"
)
@@ -58,6 +59,19 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 {
return &((*arch.UContext64)(context).MContext)
}
+// injectInterrupt is a helper to inject an interrupt.
+//
+//go:nosplit
+func injectInterrupt(c *vCPU) {
+ if _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(c.fd),
+ _KVM_INTERRUPT,
+ uintptr(unsafe.Pointer(&bounce))); errno != 0 {
+ throw("interrupt injection failed")
+ }
+}
+
// bluepillHandler is called from the signal stub.
//
// The world may be stopped while this is executing, and it executes on the
@@ -69,6 +83,9 @@ func bluepillHandler(context unsafe.Pointer) {
// Sanitize the registers; interrupts must always be disabled.
c := bluepillArchEnter(bluepillArchContext(context))
+ // Enable preemption.
+ c.setSignalMask(true)
+
// Increment the number of switches.
atomic.AddUint32(&c.switches, 1)
@@ -89,6 +106,9 @@ func bluepillHandler(context unsafe.Pointer) {
// interrupted KVM. Since we're in a signal handler
// currently, all signals are masked and the signal
// must have been delivered directly to this thread.
+ //
+ // We will not be able to actually do subsequent
+ // KVM_RUNs until this signal is processed.
timeout := syscall.Timespec{}
sig, _, errno := syscall.RawSyscall6(
syscall.SYS_RT_SIGTIMEDWAIT,
@@ -98,12 +118,24 @@ func bluepillHandler(context unsafe.Pointer) {
8, // sigset size.
0, 0)
if errno == syscall.EAGAIN {
- continue
- }
- if errno != 0 {
+ // If weren't able to process this signal, then
+ // it must not have been in the bounceMask. By
+ // elimination, it must have been the
+ // preemption signal. We can't process this
+ // signal right now, so we need to disable
+ // preemption until the interrupt is actually
+ // handled.
+ c.setSignalMask(false)
+ // Note that there is a waiter for this vCPU.
+ // This will cause the vCPU to exit at some
+ // point in the future (releasing the user lock
+ // and guest mode).
+ atomicbitops.OrUint32(&c.state, vCPUWaiter)
+ } else if errno != 0 {
+ // We only expect success or a timeout.
throw("error waiting for pending signal")
- }
- if sig != uintptr(bounceSignal) {
+ } else if sig != uintptr(bounceSignal) {
+ // Only the bounce should be processed.
throw("unexpected signal")
}
@@ -114,11 +146,10 @@ func bluepillHandler(context unsafe.Pointer) {
// ready.
if c.runData.readyForInterruptInjection == 0 {
c.runData.requestInterruptWindow = 1
- continue // Rerun vCPU.
} else {
- // Force injection below; the vCPU is ready.
- c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN
+ injectInterrupt(c)
}
+ continue // Rerun vCPU.
case syscall.EFAULT:
// If a fault is not serviceable due to the host
// backing pages having page permissions, instead of an
@@ -137,6 +168,30 @@ func bluepillHandler(context unsafe.Pointer) {
}
switch c.runData.exitReason {
+ case _KVM_EXIT_HLT:
+ // Copy out registers.
+ bluepillArchExit(c, bluepillArchContext(context))
+
+ // Return to the vCPUReady state; notify any waiters.
+ user := atomic.LoadUint32(&c.state) & vCPUUser
+ switch atomic.SwapUint32(&c.state, user) {
+ case user | vCPUGuest: // Expected case.
+ case user | vCPUGuest | vCPUWaiter:
+ c.notify()
+ default:
+ throw("invalid state")
+ }
+ return
+ case _KVM_EXIT_IRQ_WINDOW_OPEN:
+ // Inject an interrupt now.
+ injectInterrupt(c)
+ // Clear previous injection request.
+ c.runData.requestInterruptWindow = 0
+ case _KVM_EXIT_INTR:
+ // This is fine, it is the normal exit reason during
+ // signal delivery. However, we still need to handle
+ // other potential exit reasons *combined* with EINTR,
+ // so this switch must be hit even after the above.
case _KVM_EXIT_EXCEPTION:
c.die(bluepillArchContext(context), "exception")
return
@@ -155,20 +210,6 @@ func bluepillHandler(context unsafe.Pointer) {
case _KVM_EXIT_DEBUG:
c.die(bluepillArchContext(context), "debug")
return
- case _KVM_EXIT_HLT:
- // Copy out registers.
- bluepillArchExit(c, bluepillArchContext(context))
-
- // Return to the vCPUReady state; notify any waiters.
- user := atomic.LoadUint32(&c.state) & vCPUUser
- switch atomic.SwapUint32(&c.state, user) {
- case user | vCPUGuest: // Expected case.
- case user | vCPUGuest | vCPUWaiter:
- c.notify()
- default:
- throw("invalid state")
- }
- return
case _KVM_EXIT_MMIO:
// Increment the fault count.
atomic.AddUint32(&c.faults, 1)
@@ -200,18 +241,6 @@ func bluepillHandler(context unsafe.Pointer) {
data[i] = *b
}
}
- case _KVM_EXIT_IRQ_WINDOW_OPEN:
- // Interrupt: we must have requested an interrupt
- // window; set the interrupt line.
- if _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(c.fd),
- _KVM_INTERRUPT,
- uintptr(unsafe.Pointer(&bounce))); errno != 0 {
- throw("interrupt injection failed")
- }
- // Clear previous injection request.
- c.runData.requestInterruptWindow = 0
case _KVM_EXIT_SHUTDOWN:
c.die(bluepillArchContext(context), "shutdown")
return
diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go
index 1d5c77ff4..07d9c9a98 100644
--- a/pkg/sentry/platform/kvm/kvm_const.go
+++ b/pkg/sentry/platform/kvm/kvm_const.go
@@ -48,6 +48,7 @@ const (
_KVM_EXIT_IRQ_WINDOW_OPEN = 0x7
_KVM_EXIT_SHUTDOWN = 0x8
_KVM_EXIT_FAIL_ENTRY = 0x9
+ _KVM_EXIT_INTR = 0xa
_KVM_EXIT_INTERNAL_ERROR = 0x11
_KVM_EXIT_SYSTEM_EVENT = 0x18
)
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index c42752d50..d42ba3f24 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -16,12 +16,15 @@ package kvm
import (
"math/rand"
+ "os"
"reflect"
+ "runtime"
"sync/atomic"
"syscall"
"testing"
"time"
+ "gvisor.dev/gvisor/pkg/procid"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil"
@@ -320,15 +323,18 @@ func TestBounce(t *testing.T) {
})
}
+// randomSleep is used by some race tests below.
+//
+// O(hundreds of microseconds) is appropriate to ensure different overlaps and
+// different schedules.
+func randomSleep() {
+ if n := rand.Intn(1000); n > 100 {
+ time.Sleep(time.Duration(n) * time.Microsecond)
+ }
+}
+
func TestBounceStress(t *testing.T) {
applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
- randomSleep := func() {
- // O(hundreds of microseconds) is appropriate to ensure
- // different overlaps and different schedules.
- if n := rand.Intn(1000); n > 100 {
- time.Sleep(time.Duration(n) * time.Microsecond)
- }
- }
for i := 0; i < 1000; i++ {
// Start an asynchronously executing goroutine that
// calls Bounce at pseudo-random point in time.
@@ -355,6 +361,50 @@ func TestBounceStress(t *testing.T) {
})
}
+func TestPreemption(t *testing.T) {
+ applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
+ // Lock the main vCPU thread.
+ runtime.LockOSThread()
+ pid := os.Getpid()
+ tid := procid.Current()
+ running := uint32(1)
+ defer atomic.StoreUint32(&running, 0)
+
+ // Start generating "preemptions".
+ go func() {
+ for atomic.LoadUint32(&running) != 0 {
+ // Kick via a preemption: best effort.
+ syscall.Tgkill(pid, int(tid), syscall.SIGURG)
+ randomSleep()
+ }
+ }()
+
+ for i := 0; i < 1000; i++ {
+ randomSleep()
+ var si arch.SignalInfo
+ if _, err := c.SwitchToUser(ring0.SwitchOpts{
+ Registers: regs,
+ FloatingPointState: dummyFPState,
+ PageTables: pt,
+ }, &si); err != platform.ErrContextInterrupt {
+ t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt)
+ }
+ // Was this caused by a preemption signal?
+ if got := atomic.LoadUint32(&c.state); got&vCPUGuest != 0 && got&vCPUWaiter == 0 {
+ continue
+ }
+ c.unlock()
+ // Should have dropped from guest mode, processed preemption.
+ if got := atomic.LoadUint32(&c.state); got != vCPUReady {
+ t.Errorf("vCPU not in ready state: got %v", got)
+ }
+ randomSleep()
+ c.lock()
+ }
+ return false
+ })
+}
+
func TestInvalidate(t *testing.T) {
var data uintptr // Used below.
applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool {
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index f1afc74dc..345b71e8f 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -108,6 +108,9 @@ type vCPU struct {
// This is a bitmask of the three fields (vCPU*) described above.
state uint32
+ // signalMask is the vCPU signal mask.
+ signalMask uint64
+
// runData for this vCPU.
runData *runData
@@ -121,6 +124,7 @@ type vCPU struct {
// vCPUArchState is the architecture-specific state.
vCPUArchState
+ // dieState is the temporary state associated with throwing exceptions.
dieState dieState
}
@@ -153,11 +157,6 @@ func (m *machine) newVCPU() *vCPU {
c.CPU.Init(&m.kernel, c)
m.vCPUsByID[c.id] = c
- // Ensure the signal mask is correct.
- if err := c.setSignalMask(); err != nil {
- panic(fmt.Sprintf("error setting signal mask: %v", err))
- }
-
// Map the run data.
runData, err := mapRunData(int(fd))
if err != nil {
diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
index 7156c245f..52286e56d 100644
--- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go
@@ -111,31 +111,6 @@ func (c *vCPU) setSystemTime() error {
return nil
}
-// setSignalMask sets the vCPU signal mask.
-//
-// This must be called prior to running the vCPU.
-func (c *vCPU) setSignalMask() error {
- // The layout of this structure implies that it will not necessarily be
- // the same layout chosen by the Go compiler. It gets fudged here.
- var data struct {
- length uint32
- mask1 uint32
- mask2 uint32
- _ uint32
- }
- data.length = 8 // Fixed sigset size.
- data.mask1 = ^uint32(bounceSignalMask & 0xffffffff)
- data.mask2 = ^uint32(bounceSignalMask >> 32)
- if _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(c.fd),
- _KVM_SET_SIGNAL_MASK,
- uintptr(unsafe.Pointer(&data))); errno != 0 {
- return fmt.Errorf("error setting signal mask: %v", errno)
- }
- return nil
-}
-
// setUserRegisters sets user registers in the vCPU.
func (c *vCPU) setUserRegisters(uregs *userRegs) error {
if _, _, errno := syscall.RawSyscall(
diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
index b531f2f85..185eeb4f0 100644
--- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go
@@ -268,32 +268,6 @@ func (c *vCPU) setSystemTime() error {
return nil
}
-// setSignalMask sets the vCPU signal mask.
-//
-// This must be called prior to running the vCPU.
-func (c *vCPU) setSignalMask() error {
- // The layout of this structure implies that it will not necessarily be
- // the same layout chosen by the Go compiler. It gets fudged here.
- var data struct {
- length uint32
- mask1 uint32
- mask2 uint32
- _ uint32
- }
- data.length = 8 // Fixed sigset size.
- data.mask1 = ^uint32(bounceSignalMask & 0xffffffff)
- data.mask2 = ^uint32(bounceSignalMask >> 32)
- if _, _, errno := syscall.RawSyscall(
- syscall.SYS_IOCTL,
- uintptr(c.fd),
- _KVM_SET_SIGNAL_MASK,
- uintptr(unsafe.Pointer(&data))); errno != 0 {
- return fmt.Errorf("error setting signal mask: %v", errno)
- }
-
- return nil
-}
-
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
// Check for canonical addresses.
diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go
index f04be2ab5..e4de0a889 100644
--- a/pkg/sentry/platform/kvm/machine_unsafe.go
+++ b/pkg/sentry/platform/kvm/machine_unsafe.go
@@ -87,6 +87,47 @@ func unmapRunData(r *runData) error {
return nil
}
+// setSignalMask sets the vCPU signal mask.
+//
+// This will be called from the bluepill handler, and therefore must not
+// perform any allocation.
+//
+//go:nosplit
+func (c *vCPU) setSignalMask(enableOthers bool) {
+ // The signal mask is either:
+ // *) Only the bounce signal, which we need to use to execute the
+ // machine state up until the bounce interrupt can be processed.
+ // or
+ // *) All signals, which is the default state unless we need to
+ // continue execution to exit guest mode (the case above).
+ mask := bounceSignalMask
+ if enableOthers {
+ mask |= otherSignalsMask
+ }
+ if c.signalMask == mask {
+ return // Already set.
+ }
+
+ // The layout of this structure implies that it will not necessarily be
+ // the same layout chosen by the Go compiler. It gets fudged here.
+ var data struct {
+ length uint32
+ mask1 uint32
+ mask2 uint32
+ _ uint32
+ }
+ data.length = 8 // Fixed sigset size.
+ data.mask1 = ^uint32(mask & 0xffffffff)
+ data.mask2 = ^uint32(mask >> 32)
+ if _, _, errno := syscall.RawSyscall(
+ syscall.SYS_IOCTL,
+ uintptr(c.fd),
+ _KVM_SET_SIGNAL_MASK,
+ uintptr(unsafe.Pointer(&data))); errno != 0 {
+ throw("setSignal mask failed")
+ }
+}
+
// atomicAddressSpace is an atomic address space pointer.
type atomicAddressSpace struct {
pointer unsafe.Pointer
diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go
index 8de75ae57..6c15727fa 100644
--- a/runsc/sandbox/sandbox.go
+++ b/runsc/sandbox/sandbox.go
@@ -444,12 +444,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF
nextFD++
}
- // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff
- // isn't set.
- if conf.Platform == "kvm" {
- cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1")
- }
-
// The current process' stdio must be passed to the application via the
// --stdio-fds flag. The stdio of the sandbox process itself must not
// be connected to the same FDs, otherwise we risk leaking sandbox