From 2f3dac78ca9aa1abb9d27570bc9ece0f486ddb60 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 16 Mar 2021 09:15:03 -0700 Subject: kvm: prefault a floating point state before restoring it If physical pages of a memory region are not mapped yet, the kernel will trigger KVM_EXIT_MMIO and we will map physical pages in bluepillHandler(). An instruction that triggered a fault will not be re-executed, it will be emulated in the kernel, but it can't emulate complex instructions like xsave, xrstor. We can touch the memory with simple instructions to workaround this problem. --- pkg/sentry/platform/kvm/bluepill_amd64.go | 6 +++--- pkg/sentry/platform/kvm/bluepill_arm64.go | 10 +++++----- pkg/sentry/platform/kvm/context.go | 2 +- pkg/sentry/platform/kvm/machine_amd64.go | 25 ++++++++++++++++++++++++- pkg/sentry/platform/kvm/machine_arm64.go | 2 +- 5 files changed, 34 insertions(+), 11 deletions(-) (limited to 'pkg/sentry/platform/kvm') diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index f4b9a5321..308696efe 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -73,7 +73,7 @@ func (c *vCPU) KernelSyscall() { // We only trigger a bluepill entry in the bluepill function, and can // therefore be guaranteed that there is no floating point state to be // loaded on resuming from halt. We only worry about saving on exit. - ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no. + ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no. ring0.Halt() ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment. } @@ -92,7 +92,7 @@ func (c *vCPU) KernelException(vector ring0.Vector) { regs.Rip = 0 } // See above. - ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no. + ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no. ring0.Halt() ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment. } @@ -124,5 +124,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) { // Set the context pointer to the saved floating point state. This is // where the guest data has been serialized, the kernel will restore // from this new pointer value. - context.Fpstate = uint64(uintptrValue((*byte)(c.floatingPointState))) + context.Fpstate = uint64(uintptrValue(&c.floatingPointState[0])) } diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go index e26b7da8d..c317f1e99 100644 --- a/pkg/sentry/platform/kvm/bluepill_arm64.go +++ b/pkg/sentry/platform/kvm/bluepill_arm64.go @@ -92,7 +92,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) { lazyVfp := c.GetLazyVFP() if lazyVfp != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(&c.floatingPointState[0]) context.Fpsimd64.Fpsr = fpsimd.Fpsr context.Fpsimd64.Fpcr = fpsimd.Fpcr context.Fpsimd64.Vregs = fpsimd.Vregs @@ -112,12 +112,12 @@ func (c *vCPU) KernelSyscall() { fpDisableTrap := ring0.CPACREL1() if fpDisableTrap != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(&c.floatingPointState[0]) fpcr := ring0.GetFPCR() fpsr := ring0.GetFPSR() fpsimd.Fpcr = uint32(fpcr) fpsimd.Fpsr = uint32(fpsr) - ring0.SaveVRegs((*byte)(c.floatingPointState)) + ring0.SaveVRegs(&c.floatingPointState[0]) } ring0.Halt() @@ -136,12 +136,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) { fpDisableTrap := ring0.CPACREL1() if fpDisableTrap != 0 { - fpsimd := fpsimdPtr((*byte)(c.floatingPointState)) + fpsimd := fpsimdPtr(&c.floatingPointState[0]) fpcr := ring0.GetFPCR() fpsr := ring0.GetFPSR() fpsimd.Fpcr = uint32(fpcr) fpsimd.Fpsr = uint32(fpsr) - ring0.SaveVRegs((*byte)(c.floatingPointState)) + ring0.SaveVRegs(&c.floatingPointState[0]) } ring0.Halt() diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index aeae01dbd..706fa53dc 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -65,7 +65,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a // Prepare switch options. switchOpts := ring0.SwitchOpts{ Registers: &ac.StateData().Regs, - FloatingPointState: (*byte)(ac.FloatingPointData()), + FloatingPointState: ac.FloatingPointData(), PageTables: localAS.pageTables, Flush: localAS.Touch(cpu), FullRestore: ac.FullRestore(), diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 6e583baa3..916903881 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -70,7 +70,7 @@ type vCPUArchState struct { // floatingPointState is the floating point state buffer used in guest // to host transitions. See usage in bluepill_amd64.go. - floatingPointState *arch.FloatingPointData + floatingPointState arch.FloatingPointData } const ( @@ -293,6 +293,28 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e return accessType, platform.ErrContextSignal } +//go:nosplit +//go:noinline +func loadByte(ptr *byte) byte { + return *ptr +} + +// prefaultFloatingPointState touches each page of the floating point state to +// be sure that its physical pages are mapped. +// +// Otherwise the kernel can trigger KVM_EXIT_MMIO and an instruction that +// triggered a fault will be emulated by the kvm kernel code, but it can't +// emulate instructions like xsave and xrstor. +// +//go:nosplit +func prefaultFloatingPointState(data arch.FloatingPointData) { + size := len(data) + for i := 0; i < size; i += usermem.PageSize { + loadByte(&(data)[i]) + } + loadByte(&(data)[size-1]) +} + // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) { // Check for canonical addresses. @@ -323,6 +345,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) // allocations occur. entersyscall() bluepill(c) + prefaultFloatingPointState(switchOpts.FloatingPointState) vector = c.CPU.SwitchToUser(switchOpts) exitsyscall() diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index 7d7857067..3d715e570 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -32,7 +32,7 @@ type vCPUArchState struct { // floatingPointState is the floating point state buffer used in guest // to host transitions. See usage in bluepill_arm64.go. - floatingPointState *arch.FloatingPointData + floatingPointState arch.FloatingPointData } const ( -- cgit v1.2.3