summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/prctl.go9
-rw-r--r--pkg/sentry/kernel/task_run.go41
-rw-r--r--pkg/sentry/platform/kvm/machine.go22
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go22
-rw-r--r--pkg/sentry/platform/platform.go7
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go15
-rw-r--r--pkg/sentry/platform/ptrace/subprocess_linux.go5
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go22
-rw-r--r--pkg/sentry/platform/ring0/x86.go14
9 files changed, 111 insertions, 46 deletions
diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go
index 6c93601de..074ec03f0 100644
--- a/pkg/abi/linux/prctl.go
+++ b/pkg/abi/linux/prctl.go
@@ -65,8 +65,9 @@ const (
// From <asm/prctl.h>
// Flags are used in syscall arch_prctl(2).
const (
- ARCH_SET_GS = 0x1001
- ARCH_SET_FS = 0x1002
- ARCH_GET_FS = 0x1003
- ARCH_GET_GS = 0x1004
+ ARCH_SET_GS = 0x1001
+ ARCH_SET_FS = 0x1002
+ ARCH_GET_FS = 0x1003
+ ARCH_GET_GS = 0x1004
+ ARCH_SET_CPUID = 0x1012
)
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 94ce5582b..a03fa6ac0 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -221,6 +221,24 @@ func (*runApp) execute(t *Task) taskRunState {
// loop to figure out why.
return (*runApp)(nil)
+ case platform.ErrContextSignalCPUID:
+ // Is this a CPUID instruction?
+ expected := arch.CPUIDInstruction[:]
+ found := make([]byte, len(expected))
+ _, err := t.CopyIn(usermem.Addr(t.Arch().IP()), &found)
+ if err == nil && bytes.Equal(expected, found) {
+ // Skip the cpuid instruction.
+ t.Arch().CPUIDEmulate(t)
+ t.Arch().SetIP(t.Arch().IP() + uintptr(len(expected)))
+
+ // Resume execution.
+ return (*runApp)(nil)
+ }
+
+ // The instruction at the given RIP was not a CPUID, and we
+ // fallthrough to the default signal deliver behavior below.
+ fallthrough
+
case platform.ErrContextSignal:
// Looks like a signal has been delivered to us. If it's a synchronous
// signal (SEGV, SIGBUS, etc.), it should be sent to the application
@@ -266,28 +284,7 @@ func (*runApp) execute(t *Task) taskRunState {
}
switch sig {
- case linux.SIGILL:
- // N.B. The debug stuff here is arguably
- // expensive. Don't fret. This gets called
- // about 5 times for a typical application, if
- // that.
- t.Debugf("SIGILL @ %x", t.Arch().IP())
-
- // Is this a CPUID instruction?
- expected := arch.CPUIDInstruction[:]
- found := make([]byte, len(expected))
- _, err := t.CopyIn(usermem.Addr(t.Arch().IP()), &found)
- if err == nil && bytes.Equal(expected, found) {
- // Skip the cpuid instruction.
- t.Arch().CPUIDEmulate(t)
- t.Arch().SetIP(t.Arch().IP() + uintptr(len(expected)))
- break
- }
-
- // Treat it like any other synchronous signal.
- fallthrough
-
- case linux.SIGSEGV, linux.SIGBUS, linux.SIGFPE, linux.SIGTRAP:
+ case linux.SIGILL, linux.SIGSEGV, linux.SIGBUS, linux.SIGFPE, linux.SIGTRAP:
// Synchronous signal. Send it to ourselves. Assume the signal is
// legitimate and force it (work around the signal being ignored or
// blocked) like Linux does. Conveniently, this is even the correct
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index abdc51431..68e099d1b 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -141,11 +141,6 @@ func (m *machine) newVCPU() *vCPU {
panic(fmt.Sprintf("error setting signal mask: %v", err))
}
- // Initialize architecture state.
- if err := c.initArchState(); err != nil {
- panic(fmt.Sprintf("error initialization vCPU state: %v", err))
- }
-
// Map the run data.
runData, err := mapRunData(int(fd))
if err != nil {
@@ -153,6 +148,11 @@ func (m *machine) newVCPU() *vCPU {
}
c.runData = runData
+ // Initialize architecture state.
+ if err := c.initArchState(); err != nil {
+ panic(fmt.Sprintf("error initialization vCPU state: %v", err))
+ }
+
return c // Done.
}
@@ -168,12 +168,6 @@ func newMachine(vm int) (*machine, error) {
PageTables: pagetables.New(newAllocator()),
})
- // Initialize architecture state.
- if err := m.initArchState(); err != nil {
- m.Destroy()
- return nil, err
- }
-
// Apply the physical mappings. Note that these mappings may point to
// guest physical addresses that are not actually available. These
// physical pages are mapped on demand, see kernel_unsafe.go.
@@ -221,6 +215,12 @@ func newMachine(vm int) (*machine, error) {
}
})
+ // Initialize architecture state.
+ if err := m.initArchState(); err != nil {
+ m.Destroy()
+ return nil, err
+ }
+
// Ensure the machine is cleaned up properly.
runtime.SetFinalizer(m, (*machine).Destroy)
return m, nil
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 9af4f3f3d..bcd29a947 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -19,6 +19,7 @@ package kvm
import (
"fmt"
"reflect"
+ "runtime/debug"
"syscall"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
@@ -39,6 +40,21 @@ func (m *machine) initArchState() error {
uintptr(reservedMemory-(3*usermem.PageSize))); errno != 0 {
return errno
}
+
+ // Enable CPUID faulting, if possible. Note that this also serves as a
+ // basic platform sanity tests, since we will enter guest mode for the
+ // first time here. The recovery is necessary, since if we fail to read
+ // the platform info register, we will retry to host mode and
+ // ultimately need to handle a segmentation fault.
+ old := debug.SetPanicOnFault(true)
+ defer func() {
+ recover()
+ debug.SetPanicOnFault(old)
+ }()
+ m.retryInGuest(func() {
+ ring0.SetCPUIDFaulting(true)
+ })
+
return nil
}
@@ -238,6 +254,12 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, user
Code: arch.SignalInfoKernel,
}
info.SetAddr(switchOpts.Registers.Rip) // Include address.
+ if vector == ring0.GeneralProtectionFault {
+ // When CPUID faulting is enabled, we will generate a #GP(0) when
+ // userspace executes a CPUID instruction. This is handled above,
+ // because we need to be able to map and read user memory.
+ return info, usermem.AccessType{}, platform.ErrContextSignalCPUID
+ }
return info, usermem.AccessType{}, platform.ErrContextSignal
case ring0.InvalidOpcode:
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index f2fe163e8..6eb2acbd7 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -154,6 +154,13 @@ var (
// Context was interrupted by a signal.
ErrContextSignal = fmt.Errorf("interrupted by signal")
+ // ErrContextSignalCPUID is equivalent to ErrContextSignal, except that
+ // a check should be done for execution of the CPUID instruction. If
+ // the current instruction pointer is a CPUID instruction, then this
+ // should be emulated appropriately. If not, then the given signal
+ // should be handled per above.
+ ErrContextSignalCPUID = fmt.Errorf("interrupted by signal, possible CPUID")
+
// ErrContextInterrupt is returned by Context.Switch() to indicate that the
// Context was interrupted by a call to Context.Interrupt().
ErrContextInterrupt = fmt.Errorf("interrupted by platform.Context.Interrupt()")
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 05f8b1d05..a44f549a2 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -101,9 +101,11 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) (
s := as.(*subprocess)
isSyscall := s.switchToApp(c, ac)
- var faultSP *subprocess
- var faultAddr usermem.Addr
- var faultIP usermem.Addr
+ var (
+ faultSP *subprocess
+ faultAddr usermem.Addr
+ faultIP usermem.Addr
+ )
if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV {
faultSP = s
faultAddr = usermem.Addr(c.signalInfo.Addr())
@@ -161,7 +163,12 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) (
lastFaultIP == faultIP {
at.Write = true
}
- return &c.signalInfo, at, platform.ErrContextSignal
+
+ // Unfortunately, we have to unilaterally return ErrContextSignalCPUID
+ // here, in case this fault was generated by a CPUID exception. There
+ // is no way to distinguish between CPUID-generated faults and regular
+ // page faults.
+ return &c.signalInfo, at, platform.ErrContextSignalCPUID
}
// Interrupt interrupts the running guest application associated with this context.
diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go
index b3f2ebb20..b212bbdfe 100644
--- a/pkg/sentry/platform/ptrace/subprocess_linux.go
+++ b/pkg/sentry/platform/ptrace/subprocess_linux.go
@@ -20,6 +20,7 @@ import (
"fmt"
"syscall"
+ "gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/procid"
)
@@ -85,6 +86,10 @@ func createStub() (*thread, error) {
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0)
}
+ // Enable cpuid-faulting; this may fail on older kernels or hardware,
+ // so we just disregard the result. Host CPUID will be enabled.
+ syscall.RawSyscall(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0)
+
// Call the stub; should not return.
stubCall(stubStart, ppid)
panic("unreachable")
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index 117e86104..0d2b0f7dc 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -163,7 +163,6 @@ func IsCanonical(addr uint64) bool {
// the case for amd64, but may not be the case for other architectures.
//
// Precondition: the Rip, Rsp, Fs and Gs registers must be canonical.
-
//
//go:nosplit
func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
@@ -237,6 +236,27 @@ func start(c *CPU) {
wrmsr(_MSR_CSTAR, kernelFunc(sysenter))
}
+// SetCPUIDFaulting sets CPUID faulting per the boolean value.
+//
+// True is returned if faulting could be set.
+//
+//go:nosplit
+func SetCPUIDFaulting(on bool) bool {
+ // Per the SDM (Vol 3, Table 2-43), PLATFORM_INFO bit 31 denotes support
+ // for CPUID faulting, and we enable and disable via the MISC_FEATURES MSR.
+ if rdmsr(_MSR_PLATFORM_INFO)&_PLATFORM_INFO_CPUID_FAULT != 0 {
+ features := rdmsr(_MSR_MISC_FEATURES)
+ if on {
+ features |= _MISC_FEATURE_CPUID_TRAP
+ } else {
+ features &^= _MISC_FEATURE_CPUID_TRAP
+ }
+ wrmsr(_MSR_MISC_FEATURES, features)
+ return true // Setting successful.
+ }
+ return false
+}
+
// ReadCR2 reads the current CR2 value.
//
//go:nosplit
diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go
index 3d437a77c..f489fcecb 100644
--- a/pkg/sentry/platform/ring0/x86.go
+++ b/pkg/sentry/platform/ring0/x86.go
@@ -50,10 +50,16 @@ const (
_EFER_LMA = 0x400
_EFER_NX = 0x800
- _MSR_STAR = 0xc0000081
- _MSR_LSTAR = 0xc0000082
- _MSR_CSTAR = 0xc0000083
- _MSR_SYSCALL_MASK = 0xc0000084
+ _MSR_STAR = 0xc0000081
+ _MSR_LSTAR = 0xc0000082
+ _MSR_CSTAR = 0xc0000083
+ _MSR_SYSCALL_MASK = 0xc0000084
+ _MSR_PLATFORM_INFO = 0xce
+ _MSR_MISC_FEATURES = 0x140
+
+ _PLATFORM_INFO_CPUID_FAULT = 1 << 31
+
+ _MISC_FEATURE_CPUID_TRAP = 0x1
)
const (