From 29e00c943a61dfcfd4ac8d3f6f526eab641c44a6 Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Mon, 16 Jul 2018 22:02:03 -0700 Subject: Add CPUID faulting for ptrace and KVM. PiperOrigin-RevId: 204858314 Change-Id: I8252bf8de3232a7a27af51076139b585e73276d4 --- pkg/abi/linux/prctl.go | 9 +++--- pkg/sentry/kernel/task_run.go | 41 ++++++++++++-------------- pkg/sentry/platform/kvm/machine.go | 22 +++++++------- pkg/sentry/platform/kvm/machine_amd64.go | 22 ++++++++++++++ pkg/sentry/platform/platform.go | 7 +++++ pkg/sentry/platform/ptrace/ptrace.go | 15 +++++++--- pkg/sentry/platform/ptrace/subprocess_linux.go | 5 ++++ pkg/sentry/platform/ring0/kernel_amd64.go | 22 +++++++++++++- pkg/sentry/platform/ring0/x86.go | 14 ++++++--- 9 files changed, 111 insertions(+), 46 deletions(-) diff --git a/pkg/abi/linux/prctl.go b/pkg/abi/linux/prctl.go index 6c93601de..074ec03f0 100644 --- a/pkg/abi/linux/prctl.go +++ b/pkg/abi/linux/prctl.go @@ -65,8 +65,9 @@ const ( // From // Flags are used in syscall arch_prctl(2). const ( - ARCH_SET_GS = 0x1001 - ARCH_SET_FS = 0x1002 - ARCH_GET_FS = 0x1003 - ARCH_GET_GS = 0x1004 + ARCH_SET_GS = 0x1001 + ARCH_SET_FS = 0x1002 + ARCH_GET_FS = 0x1003 + ARCH_GET_GS = 0x1004 + ARCH_SET_CPUID = 0x1012 ) diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index 94ce5582b..a03fa6ac0 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -221,6 +221,24 @@ func (*runApp) execute(t *Task) taskRunState { // loop to figure out why. return (*runApp)(nil) + case platform.ErrContextSignalCPUID: + // Is this a CPUID instruction? + expected := arch.CPUIDInstruction[:] + found := make([]byte, len(expected)) + _, err := t.CopyIn(usermem.Addr(t.Arch().IP()), &found) + if err == nil && bytes.Equal(expected, found) { + // Skip the cpuid instruction. + t.Arch().CPUIDEmulate(t) + t.Arch().SetIP(t.Arch().IP() + uintptr(len(expected))) + + // Resume execution. + return (*runApp)(nil) + } + + // The instruction at the given RIP was not a CPUID, and we + // fallthrough to the default signal deliver behavior below. + fallthrough + case platform.ErrContextSignal: // Looks like a signal has been delivered to us. If it's a synchronous // signal (SEGV, SIGBUS, etc.), it should be sent to the application @@ -266,28 +284,7 @@ func (*runApp) execute(t *Task) taskRunState { } switch sig { - case linux.SIGILL: - // N.B. The debug stuff here is arguably - // expensive. Don't fret. This gets called - // about 5 times for a typical application, if - // that. - t.Debugf("SIGILL @ %x", t.Arch().IP()) - - // Is this a CPUID instruction? - expected := arch.CPUIDInstruction[:] - found := make([]byte, len(expected)) - _, err := t.CopyIn(usermem.Addr(t.Arch().IP()), &found) - if err == nil && bytes.Equal(expected, found) { - // Skip the cpuid instruction. - t.Arch().CPUIDEmulate(t) - t.Arch().SetIP(t.Arch().IP() + uintptr(len(expected))) - break - } - - // Treat it like any other synchronous signal. - fallthrough - - case linux.SIGSEGV, linux.SIGBUS, linux.SIGFPE, linux.SIGTRAP: + case linux.SIGILL, linux.SIGSEGV, linux.SIGBUS, linux.SIGFPE, linux.SIGTRAP: // Synchronous signal. Send it to ourselves. Assume the signal is // legitimate and force it (work around the signal being ignored or // blocked) like Linux does. Conveniently, this is even the correct diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index abdc51431..68e099d1b 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -141,11 +141,6 @@ func (m *machine) newVCPU() *vCPU { panic(fmt.Sprintf("error setting signal mask: %v", err)) } - // Initialize architecture state. - if err := c.initArchState(); err != nil { - panic(fmt.Sprintf("error initialization vCPU state: %v", err)) - } - // Map the run data. runData, err := mapRunData(int(fd)) if err != nil { @@ -153,6 +148,11 @@ func (m *machine) newVCPU() *vCPU { } c.runData = runData + // Initialize architecture state. + if err := c.initArchState(); err != nil { + panic(fmt.Sprintf("error initialization vCPU state: %v", err)) + } + return c // Done. } @@ -168,12 +168,6 @@ func newMachine(vm int) (*machine, error) { PageTables: pagetables.New(newAllocator()), }) - // Initialize architecture state. - if err := m.initArchState(); err != nil { - m.Destroy() - return nil, err - } - // Apply the physical mappings. Note that these mappings may point to // guest physical addresses that are not actually available. These // physical pages are mapped on demand, see kernel_unsafe.go. @@ -221,6 +215,12 @@ func newMachine(vm int) (*machine, error) { } }) + // Initialize architecture state. + if err := m.initArchState(); err != nil { + m.Destroy() + return nil, err + } + // Ensure the machine is cleaned up properly. runtime.SetFinalizer(m, (*machine).Destroy) return m, nil diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 9af4f3f3d..bcd29a947 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -19,6 +19,7 @@ package kvm import ( "fmt" "reflect" + "runtime/debug" "syscall" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" @@ -39,6 +40,21 @@ func (m *machine) initArchState() error { uintptr(reservedMemory-(3*usermem.PageSize))); errno != 0 { return errno } + + // Enable CPUID faulting, if possible. Note that this also serves as a + // basic platform sanity tests, since we will enter guest mode for the + // first time here. The recovery is necessary, since if we fail to read + // the platform info register, we will retry to host mode and + // ultimately need to handle a segmentation fault. + old := debug.SetPanicOnFault(true) + defer func() { + recover() + debug.SetPanicOnFault(old) + }() + m.retryInGuest(func() { + ring0.SetCPUIDFaulting(true) + }) + return nil } @@ -238,6 +254,12 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, user Code: arch.SignalInfoKernel, } info.SetAddr(switchOpts.Registers.Rip) // Include address. + if vector == ring0.GeneralProtectionFault { + // When CPUID faulting is enabled, we will generate a #GP(0) when + // userspace executes a CPUID instruction. This is handled above, + // because we need to be able to map and read user memory. + return info, usermem.AccessType{}, platform.ErrContextSignalCPUID + } return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.InvalidOpcode: diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go index f2fe163e8..6eb2acbd7 100644 --- a/pkg/sentry/platform/platform.go +++ b/pkg/sentry/platform/platform.go @@ -154,6 +154,13 @@ var ( // Context was interrupted by a signal. ErrContextSignal = fmt.Errorf("interrupted by signal") + // ErrContextSignalCPUID is equivalent to ErrContextSignal, except that + // a check should be done for execution of the CPUID instruction. If + // the current instruction pointer is a CPUID instruction, then this + // should be emulated appropriately. If not, then the given signal + // should be handled per above. + ErrContextSignalCPUID = fmt.Errorf("interrupted by signal, possible CPUID") + // ErrContextInterrupt is returned by Context.Switch() to indicate that the // Context was interrupted by a call to Context.Interrupt(). ErrContextInterrupt = fmt.Errorf("interrupted by platform.Context.Interrupt()") diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go index 05f8b1d05..a44f549a2 100644 --- a/pkg/sentry/platform/ptrace/ptrace.go +++ b/pkg/sentry/platform/ptrace/ptrace.go @@ -101,9 +101,11 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) ( s := as.(*subprocess) isSyscall := s.switchToApp(c, ac) - var faultSP *subprocess - var faultAddr usermem.Addr - var faultIP usermem.Addr + var ( + faultSP *subprocess + faultAddr usermem.Addr + faultIP usermem.Addr + ) if !isSyscall && linux.Signal(c.signalInfo.Signo) == linux.SIGSEGV { faultSP = s faultAddr = usermem.Addr(c.signalInfo.Addr()) @@ -161,7 +163,12 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) ( lastFaultIP == faultIP { at.Write = true } - return &c.signalInfo, at, platform.ErrContextSignal + + // Unfortunately, we have to unilaterally return ErrContextSignalCPUID + // here, in case this fault was generated by a CPUID exception. There + // is no way to distinguish between CPUID-generated faults and regular + // page faults. + return &c.signalInfo, at, platform.ErrContextSignalCPUID } // Interrupt interrupts the running guest application associated with this context. diff --git a/pkg/sentry/platform/ptrace/subprocess_linux.go b/pkg/sentry/platform/ptrace/subprocess_linux.go index b3f2ebb20..b212bbdfe 100644 --- a/pkg/sentry/platform/ptrace/subprocess_linux.go +++ b/pkg/sentry/platform/ptrace/subprocess_linux.go @@ -20,6 +20,7 @@ import ( "fmt" "syscall" + "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/procid" ) @@ -85,6 +86,10 @@ func createStub() (*thread, error) { syscall.RawSyscall(syscall.SYS_EXIT, uintptr(errno), 0, 0) } + // Enable cpuid-faulting; this may fail on older kernels or hardware, + // so we just disregard the result. Host CPUID will be enabled. + syscall.RawSyscall(syscall.SYS_ARCH_PRCTL, linux.ARCH_SET_CPUID, 0, 0) + // Call the stub; should not return. stubCall(stubStart, ppid) panic("unreachable") diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 117e86104..0d2b0f7dc 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -163,7 +163,6 @@ func IsCanonical(addr uint64) bool { // the case for amd64, but may not be the case for other architectures. // // Precondition: the Rip, Rsp, Fs and Gs registers must be canonical. - // //go:nosplit func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { @@ -237,6 +236,27 @@ func start(c *CPU) { wrmsr(_MSR_CSTAR, kernelFunc(sysenter)) } +// SetCPUIDFaulting sets CPUID faulting per the boolean value. +// +// True is returned if faulting could be set. +// +//go:nosplit +func SetCPUIDFaulting(on bool) bool { + // Per the SDM (Vol 3, Table 2-43), PLATFORM_INFO bit 31 denotes support + // for CPUID faulting, and we enable and disable via the MISC_FEATURES MSR. + if rdmsr(_MSR_PLATFORM_INFO)&_PLATFORM_INFO_CPUID_FAULT != 0 { + features := rdmsr(_MSR_MISC_FEATURES) + if on { + features |= _MISC_FEATURE_CPUID_TRAP + } else { + features &^= _MISC_FEATURE_CPUID_TRAP + } + wrmsr(_MSR_MISC_FEATURES, features) + return true // Setting successful. + } + return false +} + // ReadCR2 reads the current CR2 value. // //go:nosplit diff --git a/pkg/sentry/platform/ring0/x86.go b/pkg/sentry/platform/ring0/x86.go index 3d437a77c..f489fcecb 100644 --- a/pkg/sentry/platform/ring0/x86.go +++ b/pkg/sentry/platform/ring0/x86.go @@ -50,10 +50,16 @@ const ( _EFER_LMA = 0x400 _EFER_NX = 0x800 - _MSR_STAR = 0xc0000081 - _MSR_LSTAR = 0xc0000082 - _MSR_CSTAR = 0xc0000083 - _MSR_SYSCALL_MASK = 0xc0000084 + _MSR_STAR = 0xc0000081 + _MSR_LSTAR = 0xc0000082 + _MSR_CSTAR = 0xc0000083 + _MSR_SYSCALL_MASK = 0xc0000084 + _MSR_PLATFORM_INFO = 0xce + _MSR_MISC_FEATURES = 0x140 + + _PLATFORM_INFO_CPUID_FAULT = 1 << 31 + + _MISC_FEATURE_CPUID_TRAP = 0x1 ) const ( -- cgit v1.2.3