summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@gmail.com>2021-03-16 09:15:03 -0700
committerAndrei Vagin <avagin@gmail.com>2021-03-16 21:55:20 -0700
commit2f3dac78ca9aa1abb9d27570bc9ece0f486ddb60 (patch)
tree195f9161e491c3f31ac6a1191e651f25f9743976 /pkg
parentf7e841c2cede357c4cbd6117605e3f3d54f1961c (diff)
kvm: prefault a floating point state before restoring it
If physical pages of a memory region are not mapped yet, the kernel will trigger KVM_EXIT_MMIO and we will map physical pages in bluepillHandler(). An instruction that triggered a fault will not be re-executed, it will be emulated in the kernel, but it can't emulate complex instructions like xsave, xrstor. We can touch the memory with simple instructions to workaround this problem.
Diffstat (limited to 'pkg')
-rw-r--r--pkg/ring0/defs.go2
-rw-r--r--pkg/ring0/kernel_amd64.go12
-rw-r--r--pkg/ring0/kernel_arm64.go4
-rw-r--r--pkg/sentry/arch/arch.go4
-rw-r--r--pkg/sentry/arch/arch_aarch64.go8
-rw-r--r--pkg/sentry/arch/arch_x86.go14
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go6
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go10
-rw-r--r--pkg/sentry/platform/kvm/context.go2
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go25
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go2
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go8
12 files changed, 60 insertions, 37 deletions
diff --git a/pkg/ring0/defs.go b/pkg/ring0/defs.go
index e2561e4c2..e8ce608ba 100644
--- a/pkg/ring0/defs.go
+++ b/pkg/ring0/defs.go
@@ -96,7 +96,7 @@ type SwitchOpts struct {
// FloatingPointState is a byte pointer where floating point state is
// saved and restored.
- FloatingPointState *byte
+ FloatingPointState arch.FloatingPointData
// PageTables are the application page tables.
PageTables *pagetables.PageTables
diff --git a/pkg/ring0/kernel_amd64.go b/pkg/ring0/kernel_amd64.go
index 36a60700e..e9e706716 100644
--- a/pkg/ring0/kernel_amd64.go
+++ b/pkg/ring0/kernel_amd64.go
@@ -239,17 +239,17 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
regs.Ss = uint64(Udata) // Ditto.
// Perform the switch.
- swapgs() // GS will be swapped on return.
- WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
- WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
- LoadFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy in floating point.
+ swapgs() // GS will be swapped on return.
+ WriteFS(uintptr(regs.Fs_base)) // escapes: no. Set application FS.
+ WriteGS(uintptr(regs.Gs_base)) // escapes: no. Set application GS.
+ LoadFloatingPoint(&switchOpts.FloatingPointState[0]) // escapes: no. Copy in floating point.
if switchOpts.FullRestore {
vector = iret(c, regs, uintptr(userCR3))
} else {
vector = sysret(c, regs, uintptr(userCR3))
}
- SaveFloatingPoint(switchOpts.FloatingPointState) // escapes: no. Copy out floating point.
- WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
+ SaveFloatingPoint(&switchOpts.FloatingPointState[0]) // escapes: no. Copy out floating point.
+ WriteFS(uintptr(c.registers.Fs_base)) // escapes: no. Restore kernel FS.
return
}
diff --git a/pkg/ring0/kernel_arm64.go b/pkg/ring0/kernel_arm64.go
index 41909b3a0..c9a120952 100644
--- a/pkg/ring0/kernel_arm64.go
+++ b/pkg/ring0/kernel_arm64.go
@@ -62,7 +62,7 @@ func IsCanonical(addr uint64) bool {
//go:nosplit
func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
storeAppASID(uintptr(switchOpts.UserASID))
- storeEl0Fpstate(switchOpts.FloatingPointState)
+ storeEl0Fpstate(&switchOpts.FloatingPointState[0])
if switchOpts.Flush {
FlushTlbByASID(uintptr(switchOpts.UserASID))
@@ -82,7 +82,7 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
fpDisableTrap = CPACREL1()
if fpDisableTrap != 0 {
- SaveFloatingPoint(switchOpts.FloatingPointState)
+ SaveFloatingPoint(&switchOpts.FloatingPointState[0])
}
vector = c.vecCode
diff --git a/pkg/sentry/arch/arch.go b/pkg/sentry/arch/arch.go
index dd2effdf9..3443b9e1b 100644
--- a/pkg/sentry/arch/arch.go
+++ b/pkg/sentry/arch/arch.go
@@ -54,7 +54,7 @@ func (a Arch) String() string {
// We rely on the individual arch implementations to meet all the necessary
// requirements. For example, on x86 the region must be 16-byte aligned and 512
// bytes in size.
-type FloatingPointData byte
+type FloatingPointData []byte
// Context provides architecture-dependent information for a specific thread.
//
@@ -187,7 +187,7 @@ type Context interface {
ClearSingleStep()
// FloatingPointData will be passed to underlying save routines.
- FloatingPointData() *FloatingPointData
+ FloatingPointData() FloatingPointData
// NewMmapLayout returns a layout for a new MM, where MinAddr for the
// returned layout must be no lower than min, and MaxAddr for the returned
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index fd73751e7..6b81e9708 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -88,15 +88,15 @@ func (f aarch64FPState) fork() aarch64FPState {
}
// FloatingPointData returns the raw data pointer.
-func (f aarch64FPState) FloatingPointData() *FloatingPointData {
- return (*FloatingPointData)(&f[0])
+func (f aarch64FPState) FloatingPointData() FloatingPointData {
+ return ([]byte)(f)
}
// NewFloatingPointData returns a new floating point data blob.
//
// This is primarily for use in tests.
-func NewFloatingPointData() *FloatingPointData {
- return (*FloatingPointData)(&(newAarch64FPState()[0]))
+func NewFloatingPointData() FloatingPointData {
+ return ([]byte)(newAarch64FPState())
}
// State contains the common architecture bits for aarch64 (the build tag of this
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 641ada92f..91edf0703 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -115,7 +115,7 @@ var (
type x86FPState []byte
// initX86FPState (defined in asm files) sets up initial state.
-func initX86FPState(data *FloatingPointData, useXsave bool)
+func initX86FPState(data *byte, useXsave bool)
func newX86FPStateSlice() []byte {
size, align := cpuid.HostFeatureSet().ExtendedStateSize()
@@ -139,7 +139,7 @@ func newX86FPStateSlice() []byte {
// CPUID we must ensure it does not contain any sentry state.
func newX86FPState() x86FPState {
f := x86FPState(newX86FPStateSlice())
- initX86FPState(f.FloatingPointData(), cpuid.HostFeatureSet().UseXsave())
+ initX86FPState(&f.FloatingPointData()[0], cpuid.HostFeatureSet().UseXsave())
return f
}
@@ -151,15 +151,15 @@ func (f x86FPState) fork() x86FPState {
}
// FloatingPointData returns the raw data pointer.
-func (f x86FPState) FloatingPointData() *FloatingPointData {
- return (*FloatingPointData)(&f[0])
+func (f x86FPState) FloatingPointData() FloatingPointData {
+ return []byte(f)
}
// NewFloatingPointData returns a new floating point data blob.
//
// This is primarily for use in tests.
-func NewFloatingPointData() *FloatingPointData {
- return (*FloatingPointData)(&(newX86FPState()[0]))
+func NewFloatingPointData() FloatingPointData {
+ return (FloatingPointData)(newX86FPState())
}
// Proto returns a protobuf representation of the system registers in State.
@@ -442,7 +442,7 @@ func sanitizeMXCSR(f x86FPState) {
mxcsr := usermem.ByteOrder.Uint32(f[mxcsrOffset:])
initMXCSRMask.Do(func() {
temp := x86FPState(alignedBytes(uint(ptraceFPRegsSize), 16))
- initX86FPState(temp.FloatingPointData(), false /* useXsave */)
+ initX86FPState(&temp.FloatingPointData()[0], false /* useXsave */)
mxcsrMask = usermem.ByteOrder.Uint32(temp[mxcsrMaskOffset:])
if mxcsrMask == 0 {
// "If the value of the MXCSR_MASK field is 00000000H, then the
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index f4b9a5321..308696efe 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -73,7 +73,7 @@ func (c *vCPU) KernelSyscall() {
// We only trigger a bluepill entry in the bluepill function, and can
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt. We only worry about saving on exit.
- ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no.
+ ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment.
}
@@ -92,7 +92,7 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
regs.Rip = 0
}
// See above.
- ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no.
+ ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment.
}
@@ -124,5 +124,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
// Set the context pointer to the saved floating point state. This is
// where the guest data has been serialized, the kernel will restore
// from this new pointer value.
- context.Fpstate = uint64(uintptrValue((*byte)(c.floatingPointState)))
+ context.Fpstate = uint64(uintptrValue(&c.floatingPointState[0]))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index e26b7da8d..c317f1e99 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -92,7 +92,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
lazyVfp := c.GetLazyVFP()
if lazyVfp != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
context.Fpsimd64.Fpsr = fpsimd.Fpsr
context.Fpsimd64.Fpcr = fpsimd.Fpcr
context.Fpsimd64.Vregs = fpsimd.Vregs
@@ -112,12 +112,12 @@ func (c *vCPU) KernelSyscall() {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs((*byte)(c.floatingPointState))
+ ring0.SaveVRegs(&c.floatingPointState[0])
}
ring0.Halt()
@@ -136,12 +136,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs((*byte)(c.floatingPointState))
+ ring0.SaveVRegs(&c.floatingPointState[0])
}
ring0.Halt()
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index aeae01dbd..706fa53dc 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -65,7 +65,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a
// Prepare switch options.
switchOpts := ring0.SwitchOpts{
Registers: &ac.StateData().Regs,
- FloatingPointState: (*byte)(ac.FloatingPointData()),
+ FloatingPointState: ac.FloatingPointData(),
PageTables: localAS.pageTables,
Flush: localAS.Touch(cpu),
FullRestore: ac.FullRestore(),
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 6e583baa3..916903881 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -70,7 +70,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_amd64.go.
- floatingPointState *arch.FloatingPointData
+ floatingPointState arch.FloatingPointData
}
const (
@@ -293,6 +293,28 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e
return accessType, platform.ErrContextSignal
}
+//go:nosplit
+//go:noinline
+func loadByte(ptr *byte) byte {
+ return *ptr
+}
+
+// prefaultFloatingPointState touches each page of the floating point state to
+// be sure that its physical pages are mapped.
+//
+// Otherwise the kernel can trigger KVM_EXIT_MMIO and an instruction that
+// triggered a fault will be emulated by the kvm kernel code, but it can't
+// emulate instructions like xsave and xrstor.
+//
+//go:nosplit
+func prefaultFloatingPointState(data arch.FloatingPointData) {
+ size := len(data)
+ for i := 0; i < size; i += usermem.PageSize {
+ loadByte(&(data)[i])
+ }
+ loadByte(&(data)[size-1])
+}
+
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
// Check for canonical addresses.
@@ -323,6 +345,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
// allocations occur.
entersyscall()
bluepill(c)
+ prefaultFloatingPointState(switchOpts.FloatingPointState)
vector = c.CPU.SwitchToUser(switchOpts)
exitsyscall()
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 7d7857067..3d715e570 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -32,7 +32,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_arm64.go.
- floatingPointState *arch.FloatingPointData
+ floatingPointState arch.FloatingPointData
}
const (
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 2c21f946e..6259350ec 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -62,9 +62,9 @@ func (t *thread) setRegs(regs *arch.Registers) error {
}
// getFPRegs gets the floating-point data via the GETREGSET ptrace unix.
-func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) getFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(fpState),
+ Base: (*byte)(&fpState[0]),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(
@@ -81,9 +81,9 @@ func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsa
}
// setFPRegs sets the floating-point data via the SETREGSET ptrace unix.
-func (t *thread) setFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) setFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(fpState),
+ Base: (*byte)(&fpState[0]),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(