From 906eb6295d54a05663a223f1dc379a16148de2d1 Mon Sep 17 00:00:00 2001 From: gVisor bot Date: Tue, 18 Feb 2020 13:42:31 -0800 Subject: atomicbitops package cleanups - Redocument memory ordering from "no ordering" to "acquire-release". (No functional change: both LOCK WHATEVER on x86, and LDAXR/STLXR loops on ARM64, already have this property.) - Remove IncUnlessZeroInt32 and DecUnlessOneInt32, which were only faster than the equivalent loops using sync/atomic before the Go compiler inlined non-unsafe.Pointer atomics many releases ago. PiperOrigin-RevId: 295811743 --- pkg/atomicbitops/atomicbitops_arm64.s | 105 ++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 pkg/atomicbitops/atomicbitops_arm64.s (limited to 'pkg/atomicbitops/atomicbitops_arm64.s') diff --git a/pkg/atomicbitops/atomicbitops_arm64.s b/pkg/atomicbitops/atomicbitops_arm64.s new file mode 100644 index 000000000..5c780851b --- /dev/null +++ b/pkg/atomicbitops/atomicbitops_arm64.s @@ -0,0 +1,105 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +#include "textflag.h" + +TEXT ·AndUint32(SB),$0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 +again: + LDAXRW (R0), R2 + ANDW R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·OrUint32(SB),$0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 +again: + LDAXRW (R0), R2 + ORRW R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·XorUint32(SB),$0-12 + MOVD ptr+0(FP), R0 + MOVW val+8(FP), R1 +again: + LDAXRW (R0), R2 + EORW R1, R2 + STLXRW R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·CompareAndSwapUint32(SB),$0-20 + MOVD addr+0(FP), R0 + MOVW old+8(FP), R1 + MOVW new+12(FP), R2 +again: + LDAXRW (R0), R3 + CMPW R1, R3 + BNE done + STLXRW R2, (R0), R4 + CBNZ R4, again +done: + MOVW R3, prev+16(FP) + RET + +TEXT ·AndUint64(SB),$0-16 + MOVD ptr+0(FP), R0 + MOVD val+8(FP), R1 +again: + LDAXR (R0), R2 + AND R1, R2 + STLXR R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·OrUint64(SB),$0-16 + MOVD ptr+0(FP), R0 + MOVD val+8(FP), R1 +again: + LDAXR (R0), R2 + ORR R1, R2 + STLXR R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·XorUint64(SB),$0-16 + MOVD ptr+0(FP), R0 + MOVD val+8(FP), R1 +again: + LDAXR (R0), R2 + EOR R1, R2 + STLXR R2, (R0), R3 + CBNZ R3, again + RET + +TEXT ·CompareAndSwapUint64(SB),$0-32 + MOVD addr+0(FP), R0 + MOVD old+8(FP), R1 + MOVD new+16(FP), R2 +again: + LDAXR (R0), R3 + CMP R1, R3 + BNE done + STLXR R2, (R0), R4 + CBNZ R4, again +done: + MOVD R3, prev+24(FP) + RET -- cgit v1.2.3 From 3fac85da951f9f56d0232718ea7584250cf11f31 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 30 Mar 2020 12:36:30 -0700 Subject: kvm: handle exit reasons even under EINTR. In the case of other signals (preemption), inject a normal bounce and defer the signal until the vCPU has been returned from guest mode. PiperOrigin-RevId: 303799678 --- pkg/atomicbitops/atomicbitops_amd64.s | 16 ++-- pkg/atomicbitops/atomicbitops_arm64.s | 16 ++-- pkg/atomicbitops/atomicbitops_noasm.go | 8 ++ pkg/safecopy/safecopy.go | 4 +- pkg/safecopy/safecopy_unsafe.go | 6 +- pkg/sentry/platform/kvm/BUILD | 1 + pkg/sentry/platform/kvm/bluepill.go | 12 ++- pkg/sentry/platform/kvm/bluepill_unsafe.go | 97 ++++++++++++++++--------- pkg/sentry/platform/kvm/kvm_const.go | 1 + pkg/sentry/platform/kvm/kvm_test.go | 64 ++++++++++++++-- pkg/sentry/platform/kvm/machine.go | 9 +-- pkg/sentry/platform/kvm/machine_amd64_unsafe.go | 25 ------- pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 26 ------- pkg/sentry/platform/kvm/machine_unsafe.go | 41 +++++++++++ runsc/sandbox/sandbox.go | 6 -- 15 files changed, 207 insertions(+), 125 deletions(-) (limited to 'pkg/atomicbitops/atomicbitops_arm64.s') diff --git a/pkg/atomicbitops/atomicbitops_amd64.s b/pkg/atomicbitops/atomicbitops_amd64.s index 54c887ee5..f0edd4de7 100644 --- a/pkg/atomicbitops/atomicbitops_amd64.s +++ b/pkg/atomicbitops/atomicbitops_amd64.s @@ -16,28 +16,28 @@ #include "textflag.h" -TEXT ·AndUint32(SB),$0-12 +TEXT ·AndUint32(SB),NOSPLIT,$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK ANDL AX, 0(BP) RET -TEXT ·OrUint32(SB),$0-12 +TEXT ·OrUint32(SB),NOSPLIT,$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK ORL AX, 0(BP) RET -TEXT ·XorUint32(SB),$0-12 +TEXT ·XorUint32(SB),NOSPLIT,$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK XORL AX, 0(BP) RET -TEXT ·CompareAndSwapUint32(SB),$0-20 +TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20 MOVQ addr+0(FP), DI MOVL old+8(FP), AX MOVL new+12(FP), DX @@ -46,28 +46,28 @@ TEXT ·CompareAndSwapUint32(SB),$0-20 MOVL AX, ret+16(FP) RET -TEXT ·AndUint64(SB),$0-16 +TEXT ·AndUint64(SB),NOSPLIT,$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK ANDQ AX, 0(BP) RET -TEXT ·OrUint64(SB),$0-16 +TEXT ·OrUint64(SB),NOSPLIT,$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK ORQ AX, 0(BP) RET -TEXT ·XorUint64(SB),$0-16 +TEXT ·XorUint64(SB),NOSPLIT,$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK XORQ AX, 0(BP) RET -TEXT ·CompareAndSwapUint64(SB),$0-32 +TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32 MOVQ addr+0(FP), DI MOVQ old+8(FP), AX MOVQ new+16(FP), DX diff --git a/pkg/atomicbitops/atomicbitops_arm64.s b/pkg/atomicbitops/atomicbitops_arm64.s index 5c780851b..644a6bca5 100644 --- a/pkg/atomicbitops/atomicbitops_arm64.s +++ b/pkg/atomicbitops/atomicbitops_arm64.s @@ -16,7 +16,7 @@ #include "textflag.h" -TEXT ·AndUint32(SB),$0-12 +TEXT ·AndUint32(SB),NOSPLIT,$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -26,7 +26,7 @@ again: CBNZ R3, again RET -TEXT ·OrUint32(SB),$0-12 +TEXT ·OrUint32(SB),NOSPLIT,$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -36,7 +36,7 @@ again: CBNZ R3, again RET -TEXT ·XorUint32(SB),$0-12 +TEXT ·XorUint32(SB),NOSPLIT,$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -46,7 +46,7 @@ again: CBNZ R3, again RET -TEXT ·CompareAndSwapUint32(SB),$0-20 +TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20 MOVD addr+0(FP), R0 MOVW old+8(FP), R1 MOVW new+12(FP), R2 @@ -60,7 +60,7 @@ done: MOVW R3, prev+16(FP) RET -TEXT ·AndUint64(SB),$0-16 +TEXT ·AndUint64(SB),NOSPLIT,$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -70,7 +70,7 @@ again: CBNZ R3, again RET -TEXT ·OrUint64(SB),$0-16 +TEXT ·OrUint64(SB),NOSPLIT,$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -80,7 +80,7 @@ again: CBNZ R3, again RET -TEXT ·XorUint64(SB),$0-16 +TEXT ·XorUint64(SB),NOSPLIT,$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -90,7 +90,7 @@ again: CBNZ R3, again RET -TEXT ·CompareAndSwapUint64(SB),$0-32 +TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32 MOVD addr+0(FP), R0 MOVD old+8(FP), R1 MOVD new+16(FP), R2 diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go index 3b2898256..4e9c27b98 100644 --- a/pkg/atomicbitops/atomicbitops_noasm.go +++ b/pkg/atomicbitops/atomicbitops_noasm.go @@ -20,6 +20,7 @@ import ( "sync/atomic" ) +//go:nosplit func AndUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -30,6 +31,7 @@ func AndUint32(addr *uint32, val uint32) { } } +//go:nosplit func OrUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -40,6 +42,7 @@ func OrUint32(addr *uint32, val uint32) { } } +//go:nosplit func XorUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -50,6 +53,7 @@ func XorUint32(addr *uint32, val uint32) { } } +//go:nosplit func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) { for { prev = atomic.LoadUint32(addr) @@ -62,6 +66,7 @@ func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) { } } +//go:nosplit func AndUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -72,6 +77,7 @@ func AndUint64(addr *uint64, val uint64) { } } +//go:nosplit func OrUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -82,6 +88,7 @@ func OrUint64(addr *uint64, val uint64) { } } +//go:nosplit func XorUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -92,6 +99,7 @@ func XorUint64(addr *uint64, val uint64) { } } +//go:nosplit func CompareAndSwapUint64(addr *uint64, old, new uint64) (prev uint64) { for { prev = atomic.LoadUint64(addr) diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go index 2fb7e5809..521f1a82d 100644 --- a/pkg/safecopy/safecopy.go +++ b/pkg/safecopy/safecopy.go @@ -127,10 +127,10 @@ func initializeAddresses() { func init() { initializeAddresses() - if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler); err != nil { + if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler, 0); err != nil { panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err)) } - if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler); err != nil { + if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler, 0); err != nil { panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err)) } syserror.AddErrorUnwrapper(func(e error) (syscall.Errno, bool) { diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go index 41dd567f3..b15b920fe 100644 --- a/pkg/safecopy/safecopy_unsafe.go +++ b/pkg/safecopy/safecopy_unsafe.go @@ -324,11 +324,13 @@ func errorFromFaultSignal(addr uintptr, sig int32) error { // // It stores the value of the previously set handler in previous. // +// The extraMask parameter is OR'ed into the existing signal handler mask. +// // This function will be called on initialization in order to install safecopy // handlers for appropriate signals. These handlers will call the previous // handler however, and if this is function is being used externally then the // same courtesy is expected. -func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr) error { +func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr, extraMask uint64) error { var sa struct { handler uintptr flags uint64 @@ -348,10 +350,10 @@ func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr if sa.handler == 0 { return fmt.Errorf("previous handler for signal %x isn't set", sig) } - *previous = sa.handler // Install our own handler. + sa.mask |= extraMask sa.handler = handler if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 { return e diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 159f7eafd..e27f57536 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -70,6 +70,7 @@ go_test( "requires-kvm", ], deps = [ + "//pkg/procid", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 4b23f7803..555b5fa96 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -46,6 +46,14 @@ var ( // bounceSignalMask has only bounceSignal set. bounceSignalMask = uint64(1 << (uint64(bounceSignal) - 1)) + // otherSignalsMask includes all other signals that will be cause the + // vCPU to exit during execution. + // + // Currently, this includes the preemption signal and the profiling + // signal. In general, these should be signals whose delivery actually + // influences the way the program executes as the switch can be costly. + otherSignalsMask = uint64(1<<(uint64(syscall.SIGURG)-1)) | uint64(1<<(uint64(syscall.SIGPROF)-1)) + // bounce is the interrupt vector used to return to the kernel. bounce = uint32(ring0.VirtualizationException) @@ -86,8 +94,8 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) { } func init() { - // Install the handler. - if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler); err != nil { + // Install the handler, masking all signals. + if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler, ^uint64(0)); err != nil { panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err)) } diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 9add7c944..4e9d80765 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -24,6 +24,7 @@ import ( "syscall" "unsafe" + "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/arch" ) @@ -58,6 +59,19 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { return &((*arch.UContext64)(context).MContext) } +// injectInterrupt is a helper to inject an interrupt. +// +//go:nosplit +func injectInterrupt(c *vCPU) { + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_INTERRUPT, + uintptr(unsafe.Pointer(&bounce))); errno != 0 { + throw("interrupt injection failed") + } +} + // bluepillHandler is called from the signal stub. // // The world may be stopped while this is executing, and it executes on the @@ -69,6 +83,9 @@ func bluepillHandler(context unsafe.Pointer) { // Sanitize the registers; interrupts must always be disabled. c := bluepillArchEnter(bluepillArchContext(context)) + // Enable preemption. + c.setSignalMask(true) + // Increment the number of switches. atomic.AddUint32(&c.switches, 1) @@ -89,6 +106,9 @@ func bluepillHandler(context unsafe.Pointer) { // interrupted KVM. Since we're in a signal handler // currently, all signals are masked and the signal // must have been delivered directly to this thread. + // + // We will not be able to actually do subsequent + // KVM_RUNs until this signal is processed. timeout := syscall.Timespec{} sig, _, errno := syscall.RawSyscall6( syscall.SYS_RT_SIGTIMEDWAIT, @@ -98,12 +118,24 @@ func bluepillHandler(context unsafe.Pointer) { 8, // sigset size. 0, 0) if errno == syscall.EAGAIN { - continue - } - if errno != 0 { + // If weren't able to process this signal, then + // it must not have been in the bounceMask. By + // elimination, it must have been the + // preemption signal. We can't process this + // signal right now, so we need to disable + // preemption until the interrupt is actually + // handled. + c.setSignalMask(false) + // Note that there is a waiter for this vCPU. + // This will cause the vCPU to exit at some + // point in the future (releasing the user lock + // and guest mode). + atomicbitops.OrUint32(&c.state, vCPUWaiter) + } else if errno != 0 { + // We only expect success or a timeout. throw("error waiting for pending signal") - } - if sig != uintptr(bounceSignal) { + } else if sig != uintptr(bounceSignal) { + // Only the bounce should be processed. throw("unexpected signal") } @@ -114,11 +146,10 @@ func bluepillHandler(context unsafe.Pointer) { // ready. if c.runData.readyForInterruptInjection == 0 { c.runData.requestInterruptWindow = 1 - continue // Rerun vCPU. } else { - // Force injection below; the vCPU is ready. - c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN + injectInterrupt(c) } + continue // Rerun vCPU. case syscall.EFAULT: // If a fault is not serviceable due to the host // backing pages having page permissions, instead of an @@ -137,6 +168,30 @@ func bluepillHandler(context unsafe.Pointer) { } switch c.runData.exitReason { + case _KVM_EXIT_HLT: + // Copy out registers. + bluepillArchExit(c, bluepillArchContext(context)) + + // Return to the vCPUReady state; notify any waiters. + user := atomic.LoadUint32(&c.state) & vCPUUser + switch atomic.SwapUint32(&c.state, user) { + case user | vCPUGuest: // Expected case. + case user | vCPUGuest | vCPUWaiter: + c.notify() + default: + throw("invalid state") + } + return + case _KVM_EXIT_IRQ_WINDOW_OPEN: + // Inject an interrupt now. + injectInterrupt(c) + // Clear previous injection request. + c.runData.requestInterruptWindow = 0 + case _KVM_EXIT_INTR: + // This is fine, it is the normal exit reason during + // signal delivery. However, we still need to handle + // other potential exit reasons *combined* with EINTR, + // so this switch must be hit even after the above. case _KVM_EXIT_EXCEPTION: c.die(bluepillArchContext(context), "exception") return @@ -155,20 +210,6 @@ func bluepillHandler(context unsafe.Pointer) { case _KVM_EXIT_DEBUG: c.die(bluepillArchContext(context), "debug") return - case _KVM_EXIT_HLT: - // Copy out registers. - bluepillArchExit(c, bluepillArchContext(context)) - - // Return to the vCPUReady state; notify any waiters. - user := atomic.LoadUint32(&c.state) & vCPUUser - switch atomic.SwapUint32(&c.state, user) { - case user | vCPUGuest: // Expected case. - case user | vCPUGuest | vCPUWaiter: - c.notify() - default: - throw("invalid state") - } - return case _KVM_EXIT_MMIO: // Increment the fault count. atomic.AddUint32(&c.faults, 1) @@ -200,18 +241,6 @@ func bluepillHandler(context unsafe.Pointer) { data[i] = *b } } - case _KVM_EXIT_IRQ_WINDOW_OPEN: - // Interrupt: we must have requested an interrupt - // window; set the interrupt line. - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_INTERRUPT, - uintptr(unsafe.Pointer(&bounce))); errno != 0 { - throw("interrupt injection failed") - } - // Clear previous injection request. - c.runData.requestInterruptWindow = 0 case _KVM_EXIT_SHUTDOWN: c.die(bluepillArchContext(context), "shutdown") return diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index 1d5c77ff4..07d9c9a98 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -48,6 +48,7 @@ const ( _KVM_EXIT_IRQ_WINDOW_OPEN = 0x7 _KVM_EXIT_SHUTDOWN = 0x8 _KVM_EXIT_FAIL_ENTRY = 0x9 + _KVM_EXIT_INTR = 0xa _KVM_EXIT_INTERNAL_ERROR = 0x11 _KVM_EXIT_SYSTEM_EVENT = 0x18 ) diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index c42752d50..d42ba3f24 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -16,12 +16,15 @@ package kvm import ( "math/rand" + "os" "reflect" + "runtime" "sync/atomic" "syscall" "testing" "time" + "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" @@ -320,15 +323,18 @@ func TestBounce(t *testing.T) { }) } +// randomSleep is used by some race tests below. +// +// O(hundreds of microseconds) is appropriate to ensure different overlaps and +// different schedules. +func randomSleep() { + if n := rand.Intn(1000); n > 100 { + time.Sleep(time.Duration(n) * time.Microsecond) + } +} + func TestBounceStress(t *testing.T) { applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - randomSleep := func() { - // O(hundreds of microseconds) is appropriate to ensure - // different overlaps and different schedules. - if n := rand.Intn(1000); n > 100 { - time.Sleep(time.Duration(n) * time.Microsecond) - } - } for i := 0; i < 1000; i++ { // Start an asynchronously executing goroutine that // calls Bounce at pseudo-random point in time. @@ -355,6 +361,50 @@ func TestBounceStress(t *testing.T) { }) } +func TestPreemption(t *testing.T) { + applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { + // Lock the main vCPU thread. + runtime.LockOSThread() + pid := os.Getpid() + tid := procid.Current() + running := uint32(1) + defer atomic.StoreUint32(&running, 0) + + // Start generating "preemptions". + go func() { + for atomic.LoadUint32(&running) != 0 { + // Kick via a preemption: best effort. + syscall.Tgkill(pid, int(tid), syscall.SIGURG) + randomSleep() + } + }() + + for i := 0; i < 1000; i++ { + randomSleep() + var si arch.SignalInfo + if _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }, &si); err != platform.ErrContextInterrupt { + t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) + } + // Was this caused by a preemption signal? + if got := atomic.LoadUint32(&c.state); got&vCPUGuest != 0 && got&vCPUWaiter == 0 { + continue + } + c.unlock() + // Should have dropped from guest mode, processed preemption. + if got := atomic.LoadUint32(&c.state); got != vCPUReady { + t.Errorf("vCPU not in ready state: got %v", got) + } + randomSleep() + c.lock() + } + return false + }) +} + func TestInvalidate(t *testing.T) { var data uintptr // Used below. applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index f1afc74dc..345b71e8f 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -108,6 +108,9 @@ type vCPU struct { // This is a bitmask of the three fields (vCPU*) described above. state uint32 + // signalMask is the vCPU signal mask. + signalMask uint64 + // runData for this vCPU. runData *runData @@ -121,6 +124,7 @@ type vCPU struct { // vCPUArchState is the architecture-specific state. vCPUArchState + // dieState is the temporary state associated with throwing exceptions. dieState dieState } @@ -153,11 +157,6 @@ func (m *machine) newVCPU() *vCPU { c.CPU.Init(&m.kernel, c) m.vCPUsByID[c.id] = c - // Ensure the signal mask is correct. - if err := c.setSignalMask(); err != nil { - panic(fmt.Sprintf("error setting signal mask: %v", err)) - } - // Map the run data. runData, err := mapRunData(int(fd)) if err != nil { diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 7156c245f..52286e56d 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -111,31 +111,6 @@ func (c *vCPU) setSystemTime() error { return nil } -// setSignalMask sets the vCPU signal mask. -// -// This must be called prior to running the vCPU. -func (c *vCPU) setSignalMask() error { - // The layout of this structure implies that it will not necessarily be - // the same layout chosen by the Go compiler. It gets fudged here. - var data struct { - length uint32 - mask1 uint32 - mask2 uint32 - _ uint32 - } - data.length = 8 // Fixed sigset size. - data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) - data.mask2 = ^uint32(bounceSignalMask >> 32) - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_SET_SIGNAL_MASK, - uintptr(unsafe.Pointer(&data))); errno != 0 { - return fmt.Errorf("error setting signal mask: %v", errno) - } - return nil -} - // setUserRegisters sets user registers in the vCPU. func (c *vCPU) setUserRegisters(uregs *userRegs) error { if _, _, errno := syscall.RawSyscall( diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index b531f2f85..185eeb4f0 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -268,32 +268,6 @@ func (c *vCPU) setSystemTime() error { return nil } -// setSignalMask sets the vCPU signal mask. -// -// This must be called prior to running the vCPU. -func (c *vCPU) setSignalMask() error { - // The layout of this structure implies that it will not necessarily be - // the same layout chosen by the Go compiler. It gets fudged here. - var data struct { - length uint32 - mask1 uint32 - mask2 uint32 - _ uint32 - } - data.length = 8 // Fixed sigset size. - data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) - data.mask2 = ^uint32(bounceSignalMask >> 32) - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_SET_SIGNAL_MASK, - uintptr(unsafe.Pointer(&data))); errno != 0 { - return fmt.Errorf("error setting signal mask: %v", errno) - } - - return nil -} - // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { // Check for canonical addresses. diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index f04be2ab5..e4de0a889 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -87,6 +87,47 @@ func unmapRunData(r *runData) error { return nil } +// setSignalMask sets the vCPU signal mask. +// +// This will be called from the bluepill handler, and therefore must not +// perform any allocation. +// +//go:nosplit +func (c *vCPU) setSignalMask(enableOthers bool) { + // The signal mask is either: + // *) Only the bounce signal, which we need to use to execute the + // machine state up until the bounce interrupt can be processed. + // or + // *) All signals, which is the default state unless we need to + // continue execution to exit guest mode (the case above). + mask := bounceSignalMask + if enableOthers { + mask |= otherSignalsMask + } + if c.signalMask == mask { + return // Already set. + } + + // The layout of this structure implies that it will not necessarily be + // the same layout chosen by the Go compiler. It gets fudged here. + var data struct { + length uint32 + mask1 uint32 + mask2 uint32 + _ uint32 + } + data.length = 8 // Fixed sigset size. + data.mask1 = ^uint32(mask & 0xffffffff) + data.mask2 = ^uint32(mask >> 32) + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_SIGNAL_MASK, + uintptr(unsafe.Pointer(&data))); errno != 0 { + throw("setSignal mask failed") + } +} + // atomicAddressSpace is an atomic address space pointer. type atomicAddressSpace struct { pointer unsafe.Pointer diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 8de75ae57..6c15727fa 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -444,12 +444,6 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } - // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff - // isn't set. - if conf.Platform == "kvm" { - cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1") - } - // The current process' stdio must be passed to the application via the // --stdio-fds flag. The stdio of the sandbox process itself must not // be connected to the same FDs, otherwise we risk leaking sandbox -- cgit v1.2.3 From 4e6a1a5adb5607423c180089d8b464ef7dfdd1ae Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Wed, 1 Apr 2020 11:05:05 -0700 Subject: Automated rollback of changelist 303799678 PiperOrigin-RevId: 304221302 --- pkg/atomicbitops/atomicbitops_amd64.s | 16 ++-- pkg/atomicbitops/atomicbitops_arm64.s | 16 ++-- pkg/atomicbitops/atomicbitops_noasm.go | 8 -- pkg/safecopy/safecopy.go | 4 +- pkg/safecopy/safecopy_unsafe.go | 6 +- pkg/sentry/platform/kvm/BUILD | 1 - pkg/sentry/platform/kvm/bluepill.go | 12 +-- pkg/sentry/platform/kvm/bluepill_unsafe.go | 97 +++++++++---------------- pkg/sentry/platform/kvm/kvm_const.go | 1 - pkg/sentry/platform/kvm/kvm_test.go | 64 ++-------------- pkg/sentry/platform/kvm/machine.go | 9 ++- pkg/sentry/platform/kvm/machine_amd64_unsafe.go | 25 +++++++ pkg/sentry/platform/kvm/machine_arm64_unsafe.go | 26 +++++++ pkg/sentry/platform/kvm/machine_unsafe.go | 41 ----------- runsc/sandbox/sandbox.go | 6 ++ 15 files changed, 125 insertions(+), 207 deletions(-) (limited to 'pkg/atomicbitops/atomicbitops_arm64.s') diff --git a/pkg/atomicbitops/atomicbitops_amd64.s b/pkg/atomicbitops/atomicbitops_amd64.s index f0edd4de7..54c887ee5 100644 --- a/pkg/atomicbitops/atomicbitops_amd64.s +++ b/pkg/atomicbitops/atomicbitops_amd64.s @@ -16,28 +16,28 @@ #include "textflag.h" -TEXT ·AndUint32(SB),NOSPLIT,$0-12 +TEXT ·AndUint32(SB),$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK ANDL AX, 0(BP) RET -TEXT ·OrUint32(SB),NOSPLIT,$0-12 +TEXT ·OrUint32(SB),$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK ORL AX, 0(BP) RET -TEXT ·XorUint32(SB),NOSPLIT,$0-12 +TEXT ·XorUint32(SB),$0-12 MOVQ addr+0(FP), BP MOVL val+8(FP), AX LOCK XORL AX, 0(BP) RET -TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20 +TEXT ·CompareAndSwapUint32(SB),$0-20 MOVQ addr+0(FP), DI MOVL old+8(FP), AX MOVL new+12(FP), DX @@ -46,28 +46,28 @@ TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20 MOVL AX, ret+16(FP) RET -TEXT ·AndUint64(SB),NOSPLIT,$0-16 +TEXT ·AndUint64(SB),$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK ANDQ AX, 0(BP) RET -TEXT ·OrUint64(SB),NOSPLIT,$0-16 +TEXT ·OrUint64(SB),$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK ORQ AX, 0(BP) RET -TEXT ·XorUint64(SB),NOSPLIT,$0-16 +TEXT ·XorUint64(SB),$0-16 MOVQ addr+0(FP), BP MOVQ val+8(FP), AX LOCK XORQ AX, 0(BP) RET -TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32 +TEXT ·CompareAndSwapUint64(SB),$0-32 MOVQ addr+0(FP), DI MOVQ old+8(FP), AX MOVQ new+16(FP), DX diff --git a/pkg/atomicbitops/atomicbitops_arm64.s b/pkg/atomicbitops/atomicbitops_arm64.s index 644a6bca5..5c780851b 100644 --- a/pkg/atomicbitops/atomicbitops_arm64.s +++ b/pkg/atomicbitops/atomicbitops_arm64.s @@ -16,7 +16,7 @@ #include "textflag.h" -TEXT ·AndUint32(SB),NOSPLIT,$0-12 +TEXT ·AndUint32(SB),$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -26,7 +26,7 @@ again: CBNZ R3, again RET -TEXT ·OrUint32(SB),NOSPLIT,$0-12 +TEXT ·OrUint32(SB),$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -36,7 +36,7 @@ again: CBNZ R3, again RET -TEXT ·XorUint32(SB),NOSPLIT,$0-12 +TEXT ·XorUint32(SB),$0-12 MOVD ptr+0(FP), R0 MOVW val+8(FP), R1 again: @@ -46,7 +46,7 @@ again: CBNZ R3, again RET -TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-20 +TEXT ·CompareAndSwapUint32(SB),$0-20 MOVD addr+0(FP), R0 MOVW old+8(FP), R1 MOVW new+12(FP), R2 @@ -60,7 +60,7 @@ done: MOVW R3, prev+16(FP) RET -TEXT ·AndUint64(SB),NOSPLIT,$0-16 +TEXT ·AndUint64(SB),$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -70,7 +70,7 @@ again: CBNZ R3, again RET -TEXT ·OrUint64(SB),NOSPLIT,$0-16 +TEXT ·OrUint64(SB),$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -80,7 +80,7 @@ again: CBNZ R3, again RET -TEXT ·XorUint64(SB),NOSPLIT,$0-16 +TEXT ·XorUint64(SB),$0-16 MOVD ptr+0(FP), R0 MOVD val+8(FP), R1 again: @@ -90,7 +90,7 @@ again: CBNZ R3, again RET -TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-32 +TEXT ·CompareAndSwapUint64(SB),$0-32 MOVD addr+0(FP), R0 MOVD old+8(FP), R1 MOVD new+16(FP), R2 diff --git a/pkg/atomicbitops/atomicbitops_noasm.go b/pkg/atomicbitops/atomicbitops_noasm.go index 4e9c27b98..3b2898256 100644 --- a/pkg/atomicbitops/atomicbitops_noasm.go +++ b/pkg/atomicbitops/atomicbitops_noasm.go @@ -20,7 +20,6 @@ import ( "sync/atomic" ) -//go:nosplit func AndUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -31,7 +30,6 @@ func AndUint32(addr *uint32, val uint32) { } } -//go:nosplit func OrUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -42,7 +40,6 @@ func OrUint32(addr *uint32, val uint32) { } } -//go:nosplit func XorUint32(addr *uint32, val uint32) { for { o := atomic.LoadUint32(addr) @@ -53,7 +50,6 @@ func XorUint32(addr *uint32, val uint32) { } } -//go:nosplit func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) { for { prev = atomic.LoadUint32(addr) @@ -66,7 +62,6 @@ func CompareAndSwapUint32(addr *uint32, old, new uint32) (prev uint32) { } } -//go:nosplit func AndUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -77,7 +72,6 @@ func AndUint64(addr *uint64, val uint64) { } } -//go:nosplit func OrUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -88,7 +82,6 @@ func OrUint64(addr *uint64, val uint64) { } } -//go:nosplit func XorUint64(addr *uint64, val uint64) { for { o := atomic.LoadUint64(addr) @@ -99,7 +92,6 @@ func XorUint64(addr *uint64, val uint64) { } } -//go:nosplit func CompareAndSwapUint64(addr *uint64, old, new uint64) (prev uint64) { for { prev = atomic.LoadUint64(addr) diff --git a/pkg/safecopy/safecopy.go b/pkg/safecopy/safecopy.go index 521f1a82d..2fb7e5809 100644 --- a/pkg/safecopy/safecopy.go +++ b/pkg/safecopy/safecopy.go @@ -127,10 +127,10 @@ func initializeAddresses() { func init() { initializeAddresses() - if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler, 0); err != nil { + if err := ReplaceSignalHandler(syscall.SIGSEGV, reflect.ValueOf(signalHandler).Pointer(), &savedSigSegVHandler); err != nil { panic(fmt.Sprintf("Unable to set handler for SIGSEGV: %v", err)) } - if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler, 0); err != nil { + if err := ReplaceSignalHandler(syscall.SIGBUS, reflect.ValueOf(signalHandler).Pointer(), &savedSigBusHandler); err != nil { panic(fmt.Sprintf("Unable to set handler for SIGBUS: %v", err)) } syserror.AddErrorUnwrapper(func(e error) (syscall.Errno, bool) { diff --git a/pkg/safecopy/safecopy_unsafe.go b/pkg/safecopy/safecopy_unsafe.go index b15b920fe..41dd567f3 100644 --- a/pkg/safecopy/safecopy_unsafe.go +++ b/pkg/safecopy/safecopy_unsafe.go @@ -324,13 +324,11 @@ func errorFromFaultSignal(addr uintptr, sig int32) error { // // It stores the value of the previously set handler in previous. // -// The extraMask parameter is OR'ed into the existing signal handler mask. -// // This function will be called on initialization in order to install safecopy // handlers for appropriate signals. These handlers will call the previous // handler however, and if this is function is being used externally then the // same courtesy is expected. -func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr, extraMask uint64) error { +func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr) error { var sa struct { handler uintptr flags uint64 @@ -350,10 +348,10 @@ func ReplaceSignalHandler(sig syscall.Signal, handler uintptr, previous *uintptr if sa.handler == 0 { return fmt.Errorf("previous handler for signal %x isn't set", sig) } + *previous = sa.handler // Install our own handler. - sa.mask |= extraMask sa.handler = handler if _, _, e := syscall.RawSyscall6(syscall.SYS_RT_SIGACTION, uintptr(sig), uintptr(unsafe.Pointer(&sa)), 0, maskLen, 0, 0); e != 0 { return e diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index e27f57536..159f7eafd 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -70,7 +70,6 @@ go_test( "requires-kvm", ], deps = [ - "//pkg/procid", "//pkg/sentry/arch", "//pkg/sentry/platform", "//pkg/sentry/platform/kvm/testutil", diff --git a/pkg/sentry/platform/kvm/bluepill.go b/pkg/sentry/platform/kvm/bluepill.go index 555b5fa96..4b23f7803 100644 --- a/pkg/sentry/platform/kvm/bluepill.go +++ b/pkg/sentry/platform/kvm/bluepill.go @@ -46,14 +46,6 @@ var ( // bounceSignalMask has only bounceSignal set. bounceSignalMask = uint64(1 << (uint64(bounceSignal) - 1)) - // otherSignalsMask includes all other signals that will be cause the - // vCPU to exit during execution. - // - // Currently, this includes the preemption signal and the profiling - // signal. In general, these should be signals whose delivery actually - // influences the way the program executes as the switch can be costly. - otherSignalsMask = uint64(1<<(uint64(syscall.SIGURG)-1)) | uint64(1<<(uint64(syscall.SIGPROF)-1)) - // bounce is the interrupt vector used to return to the kernel. bounce = uint32(ring0.VirtualizationException) @@ -94,8 +86,8 @@ func (c *vCPU) die(context *arch.SignalContext64, msg string) { } func init() { - // Install the handler, masking all signals. - if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler, ^uint64(0)); err != nil { + // Install the handler. + if err := safecopy.ReplaceSignalHandler(bluepillSignal, reflect.ValueOf(sighandler).Pointer(), &savedHandler); err != nil { panic(fmt.Sprintf("Unable to set handler for signal %d: %v", bluepillSignal, err)) } diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 4e9d80765..9add7c944 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -24,7 +24,6 @@ import ( "syscall" "unsafe" - "gvisor.dev/gvisor/pkg/atomicbitops" "gvisor.dev/gvisor/pkg/sentry/arch" ) @@ -59,19 +58,6 @@ func bluepillArchContext(context unsafe.Pointer) *arch.SignalContext64 { return &((*arch.UContext64)(context).MContext) } -// injectInterrupt is a helper to inject an interrupt. -// -//go:nosplit -func injectInterrupt(c *vCPU) { - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_INTERRUPT, - uintptr(unsafe.Pointer(&bounce))); errno != 0 { - throw("interrupt injection failed") - } -} - // bluepillHandler is called from the signal stub. // // The world may be stopped while this is executing, and it executes on the @@ -83,9 +69,6 @@ func bluepillHandler(context unsafe.Pointer) { // Sanitize the registers; interrupts must always be disabled. c := bluepillArchEnter(bluepillArchContext(context)) - // Enable preemption. - c.setSignalMask(true) - // Increment the number of switches. atomic.AddUint32(&c.switches, 1) @@ -106,9 +89,6 @@ func bluepillHandler(context unsafe.Pointer) { // interrupted KVM. Since we're in a signal handler // currently, all signals are masked and the signal // must have been delivered directly to this thread. - // - // We will not be able to actually do subsequent - // KVM_RUNs until this signal is processed. timeout := syscall.Timespec{} sig, _, errno := syscall.RawSyscall6( syscall.SYS_RT_SIGTIMEDWAIT, @@ -118,24 +98,12 @@ func bluepillHandler(context unsafe.Pointer) { 8, // sigset size. 0, 0) if errno == syscall.EAGAIN { - // If weren't able to process this signal, then - // it must not have been in the bounceMask. By - // elimination, it must have been the - // preemption signal. We can't process this - // signal right now, so we need to disable - // preemption until the interrupt is actually - // handled. - c.setSignalMask(false) - // Note that there is a waiter for this vCPU. - // This will cause the vCPU to exit at some - // point in the future (releasing the user lock - // and guest mode). - atomicbitops.OrUint32(&c.state, vCPUWaiter) - } else if errno != 0 { - // We only expect success or a timeout. + continue + } + if errno != 0 { throw("error waiting for pending signal") - } else if sig != uintptr(bounceSignal) { - // Only the bounce should be processed. + } + if sig != uintptr(bounceSignal) { throw("unexpected signal") } @@ -146,10 +114,11 @@ func bluepillHandler(context unsafe.Pointer) { // ready. if c.runData.readyForInterruptInjection == 0 { c.runData.requestInterruptWindow = 1 + continue // Rerun vCPU. } else { - injectInterrupt(c) + // Force injection below; the vCPU is ready. + c.runData.exitReason = _KVM_EXIT_IRQ_WINDOW_OPEN } - continue // Rerun vCPU. case syscall.EFAULT: // If a fault is not serviceable due to the host // backing pages having page permissions, instead of an @@ -168,30 +137,6 @@ func bluepillHandler(context unsafe.Pointer) { } switch c.runData.exitReason { - case _KVM_EXIT_HLT: - // Copy out registers. - bluepillArchExit(c, bluepillArchContext(context)) - - // Return to the vCPUReady state; notify any waiters. - user := atomic.LoadUint32(&c.state) & vCPUUser - switch atomic.SwapUint32(&c.state, user) { - case user | vCPUGuest: // Expected case. - case user | vCPUGuest | vCPUWaiter: - c.notify() - default: - throw("invalid state") - } - return - case _KVM_EXIT_IRQ_WINDOW_OPEN: - // Inject an interrupt now. - injectInterrupt(c) - // Clear previous injection request. - c.runData.requestInterruptWindow = 0 - case _KVM_EXIT_INTR: - // This is fine, it is the normal exit reason during - // signal delivery. However, we still need to handle - // other potential exit reasons *combined* with EINTR, - // so this switch must be hit even after the above. case _KVM_EXIT_EXCEPTION: c.die(bluepillArchContext(context), "exception") return @@ -210,6 +155,20 @@ func bluepillHandler(context unsafe.Pointer) { case _KVM_EXIT_DEBUG: c.die(bluepillArchContext(context), "debug") return + case _KVM_EXIT_HLT: + // Copy out registers. + bluepillArchExit(c, bluepillArchContext(context)) + + // Return to the vCPUReady state; notify any waiters. + user := atomic.LoadUint32(&c.state) & vCPUUser + switch atomic.SwapUint32(&c.state, user) { + case user | vCPUGuest: // Expected case. + case user | vCPUGuest | vCPUWaiter: + c.notify() + default: + throw("invalid state") + } + return case _KVM_EXIT_MMIO: // Increment the fault count. atomic.AddUint32(&c.faults, 1) @@ -241,6 +200,18 @@ func bluepillHandler(context unsafe.Pointer) { data[i] = *b } } + case _KVM_EXIT_IRQ_WINDOW_OPEN: + // Interrupt: we must have requested an interrupt + // window; set the interrupt line. + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_INTERRUPT, + uintptr(unsafe.Pointer(&bounce))); errno != 0 { + throw("interrupt injection failed") + } + // Clear previous injection request. + c.runData.requestInterruptWindow = 0 case _KVM_EXIT_SHUTDOWN: c.die(bluepillArchContext(context), "shutdown") return diff --git a/pkg/sentry/platform/kvm/kvm_const.go b/pkg/sentry/platform/kvm/kvm_const.go index 07d9c9a98..1d5c77ff4 100644 --- a/pkg/sentry/platform/kvm/kvm_const.go +++ b/pkg/sentry/platform/kvm/kvm_const.go @@ -48,7 +48,6 @@ const ( _KVM_EXIT_IRQ_WINDOW_OPEN = 0x7 _KVM_EXIT_SHUTDOWN = 0x8 _KVM_EXIT_FAIL_ENTRY = 0x9 - _KVM_EXIT_INTR = 0xa _KVM_EXIT_INTERNAL_ERROR = 0x11 _KVM_EXIT_SYSTEM_EVENT = 0x18 ) diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index d42ba3f24..c42752d50 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -16,15 +16,12 @@ package kvm import ( "math/rand" - "os" "reflect" - "runtime" "sync/atomic" "syscall" "testing" "time" - "gvisor.dev/gvisor/pkg/procid" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/platform" "gvisor.dev/gvisor/pkg/sentry/platform/kvm/testutil" @@ -323,18 +320,15 @@ func TestBounce(t *testing.T) { }) } -// randomSleep is used by some race tests below. -// -// O(hundreds of microseconds) is appropriate to ensure different overlaps and -// different schedules. -func randomSleep() { - if n := rand.Intn(1000); n > 100 { - time.Sleep(time.Duration(n) * time.Microsecond) - } -} - func TestBounceStress(t *testing.T) { applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { + randomSleep := func() { + // O(hundreds of microseconds) is appropriate to ensure + // different overlaps and different schedules. + if n := rand.Intn(1000); n > 100 { + time.Sleep(time.Duration(n) * time.Microsecond) + } + } for i := 0; i < 1000; i++ { // Start an asynchronously executing goroutine that // calls Bounce at pseudo-random point in time. @@ -361,50 +355,6 @@ func TestBounceStress(t *testing.T) { }) } -func TestPreemption(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - // Lock the main vCPU thread. - runtime.LockOSThread() - pid := os.Getpid() - tid := procid.Current() - running := uint32(1) - defer atomic.StoreUint32(&running, 0) - - // Start generating "preemptions". - go func() { - for atomic.LoadUint32(&running) != 0 { - // Kick via a preemption: best effort. - syscall.Tgkill(pid, int(tid), syscall.SIGURG) - randomSleep() - } - }() - - for i := 0; i < 1000; i++ { - randomSleep() - var si arch.SignalInfo - if _, err := c.SwitchToUser(ring0.SwitchOpts{ - Registers: regs, - FloatingPointState: dummyFPState, - PageTables: pt, - }, &si); err != platform.ErrContextInterrupt { - t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) - } - // Was this caused by a preemption signal? - if got := atomic.LoadUint32(&c.state); got&vCPUGuest != 0 && got&vCPUWaiter == 0 { - continue - } - c.unlock() - // Should have dropped from guest mode, processed preemption. - if got := atomic.LoadUint32(&c.state); got != vCPUReady { - t.Errorf("vCPU not in ready state: got %v", got) - } - randomSleep() - c.lock() - } - return false - }) -} - func TestInvalidate(t *testing.T) { var data uintptr // Used below. applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 345b71e8f..f1afc74dc 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -108,9 +108,6 @@ type vCPU struct { // This is a bitmask of the three fields (vCPU*) described above. state uint32 - // signalMask is the vCPU signal mask. - signalMask uint64 - // runData for this vCPU. runData *runData @@ -124,7 +121,6 @@ type vCPU struct { // vCPUArchState is the architecture-specific state. vCPUArchState - // dieState is the temporary state associated with throwing exceptions. dieState dieState } @@ -157,6 +153,11 @@ func (m *machine) newVCPU() *vCPU { c.CPU.Init(&m.kernel, c) m.vCPUsByID[c.id] = c + // Ensure the signal mask is correct. + if err := c.setSignalMask(); err != nil { + panic(fmt.Sprintf("error setting signal mask: %v", err)) + } + // Map the run data. runData, err := mapRunData(int(fd)) if err != nil { diff --git a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go index 52286e56d..7156c245f 100644 --- a/pkg/sentry/platform/kvm/machine_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_amd64_unsafe.go @@ -111,6 +111,31 @@ func (c *vCPU) setSystemTime() error { return nil } +// setSignalMask sets the vCPU signal mask. +// +// This must be called prior to running the vCPU. +func (c *vCPU) setSignalMask() error { + // The layout of this structure implies that it will not necessarily be + // the same layout chosen by the Go compiler. It gets fudged here. + var data struct { + length uint32 + mask1 uint32 + mask2 uint32 + _ uint32 + } + data.length = 8 // Fixed sigset size. + data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) + data.mask2 = ^uint32(bounceSignalMask >> 32) + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_SIGNAL_MASK, + uintptr(unsafe.Pointer(&data))); errno != 0 { + return fmt.Errorf("error setting signal mask: %v", errno) + } + return nil +} + // setUserRegisters sets user registers in the vCPU. func (c *vCPU) setUserRegisters(uregs *userRegs) error { if _, _, errno := syscall.RawSyscall( diff --git a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go index 185eeb4f0..b531f2f85 100644 --- a/pkg/sentry/platform/kvm/machine_arm64_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_arm64_unsafe.go @@ -268,6 +268,32 @@ func (c *vCPU) setSystemTime() error { return nil } +// setSignalMask sets the vCPU signal mask. +// +// This must be called prior to running the vCPU. +func (c *vCPU) setSignalMask() error { + // The layout of this structure implies that it will not necessarily be + // the same layout chosen by the Go compiler. It gets fudged here. + var data struct { + length uint32 + mask1 uint32 + mask2 uint32 + _ uint32 + } + data.length = 8 // Fixed sigset size. + data.mask1 = ^uint32(bounceSignalMask & 0xffffffff) + data.mask2 = ^uint32(bounceSignalMask >> 32) + if _, _, errno := syscall.RawSyscall( + syscall.SYS_IOCTL, + uintptr(c.fd), + _KVM_SET_SIGNAL_MASK, + uintptr(unsafe.Pointer(&data))); errno != 0 { + return fmt.Errorf("error setting signal mask: %v", errno) + } + + return nil +} + // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { // Check for canonical addresses. diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index e4de0a889..f04be2ab5 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -87,47 +87,6 @@ func unmapRunData(r *runData) error { return nil } -// setSignalMask sets the vCPU signal mask. -// -// This will be called from the bluepill handler, and therefore must not -// perform any allocation. -// -//go:nosplit -func (c *vCPU) setSignalMask(enableOthers bool) { - // The signal mask is either: - // *) Only the bounce signal, which we need to use to execute the - // machine state up until the bounce interrupt can be processed. - // or - // *) All signals, which is the default state unless we need to - // continue execution to exit guest mode (the case above). - mask := bounceSignalMask - if enableOthers { - mask |= otherSignalsMask - } - if c.signalMask == mask { - return // Already set. - } - - // The layout of this structure implies that it will not necessarily be - // the same layout chosen by the Go compiler. It gets fudged here. - var data struct { - length uint32 - mask1 uint32 - mask2 uint32 - _ uint32 - } - data.length = 8 // Fixed sigset size. - data.mask1 = ^uint32(mask & 0xffffffff) - data.mask2 = ^uint32(mask >> 32) - if _, _, errno := syscall.RawSyscall( - syscall.SYS_IOCTL, - uintptr(c.fd), - _KVM_SET_SIGNAL_MASK, - uintptr(unsafe.Pointer(&data))); errno != 0 { - throw("setSignal mask failed") - } -} - // atomicAddressSpace is an atomic address space pointer. type atomicAddressSpace struct { pointer unsafe.Pointer diff --git a/runsc/sandbox/sandbox.go b/runsc/sandbox/sandbox.go index 6c15727fa..8de75ae57 100644 --- a/runsc/sandbox/sandbox.go +++ b/runsc/sandbox/sandbox.go @@ -444,6 +444,12 @@ func (s *Sandbox) createSandboxProcess(conf *boot.Config, args *Args, startSyncF nextFD++ } + // TODO(b/151157106): syscall tests fail by timeout if asyncpreemptoff + // isn't set. + if conf.Platform == "kvm" { + cmd.Env = append(cmd.Env, "GODEBUG=asyncpreemptoff=1") + } + // The current process' stdio must be passed to the application via the // --stdio-fds flag. The stdio of the sandbox process itself must not // be connected to the same FDs, otherwise we risk leaking sandbox -- cgit v1.2.3