summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@gmail.com>2021-03-16 09:15:03 -0700
committerAndrei Vagin <avagin@gmail.com>2021-03-16 21:55:20 -0700
commit2f3dac78ca9aa1abb9d27570bc9ece0f486ddb60 (patch)
tree195f9161e491c3f31ac6a1191e651f25f9743976 /pkg/sentry/platform
parentf7e841c2cede357c4cbd6117605e3f3d54f1961c (diff)
kvm: prefault a floating point state before restoring it
If physical pages of a memory region are not mapped yet, the kernel will trigger KVM_EXIT_MMIO and we will map physical pages in bluepillHandler(). An instruction that triggered a fault will not be re-executed, it will be emulated in the kernel, but it can't emulate complex instructions like xsave, xrstor. We can touch the memory with simple instructions to workaround this problem.
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go6
-rw-r--r--pkg/sentry/platform/kvm/bluepill_arm64.go10
-rw-r--r--pkg/sentry/platform/kvm/context.go2
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go25
-rw-r--r--pkg/sentry/platform/kvm/machine_arm64.go2
-rw-r--r--pkg/sentry/platform/ptrace/ptrace_unsafe.go8
6 files changed, 38 insertions, 15 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index f4b9a5321..308696efe 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -73,7 +73,7 @@ func (c *vCPU) KernelSyscall() {
// We only trigger a bluepill entry in the bluepill function, and can
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt. We only worry about saving on exit.
- ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no.
+ ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment.
}
@@ -92,7 +92,7 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
regs.Rip = 0
}
// See above.
- ring0.SaveFloatingPoint((*byte)(c.floatingPointState)) // escapes: no.
+ ring0.SaveFloatingPoint(&c.floatingPointState[0]) // escapes: no.
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment.
}
@@ -124,5 +124,5 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
// Set the context pointer to the saved floating point state. This is
// where the guest data has been serialized, the kernel will restore
// from this new pointer value.
- context.Fpstate = uint64(uintptrValue((*byte)(c.floatingPointState)))
+ context.Fpstate = uint64(uintptrValue(&c.floatingPointState[0]))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_arm64.go b/pkg/sentry/platform/kvm/bluepill_arm64.go
index e26b7da8d..c317f1e99 100644
--- a/pkg/sentry/platform/kvm/bluepill_arm64.go
+++ b/pkg/sentry/platform/kvm/bluepill_arm64.go
@@ -92,7 +92,7 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
lazyVfp := c.GetLazyVFP()
if lazyVfp != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
context.Fpsimd64.Fpsr = fpsimd.Fpsr
context.Fpsimd64.Fpcr = fpsimd.Fpcr
context.Fpsimd64.Vregs = fpsimd.Vregs
@@ -112,12 +112,12 @@ func (c *vCPU) KernelSyscall() {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs((*byte)(c.floatingPointState))
+ ring0.SaveVRegs(&c.floatingPointState[0])
}
ring0.Halt()
@@ -136,12 +136,12 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
fpDisableTrap := ring0.CPACREL1()
if fpDisableTrap != 0 {
- fpsimd := fpsimdPtr((*byte)(c.floatingPointState))
+ fpsimd := fpsimdPtr(&c.floatingPointState[0])
fpcr := ring0.GetFPCR()
fpsr := ring0.GetFPSR()
fpsimd.Fpcr = uint32(fpcr)
fpsimd.Fpsr = uint32(fpsr)
- ring0.SaveVRegs((*byte)(c.floatingPointState))
+ ring0.SaveVRegs(&c.floatingPointState[0])
}
ring0.Halt()
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index aeae01dbd..706fa53dc 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -65,7 +65,7 @@ func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac a
// Prepare switch options.
switchOpts := ring0.SwitchOpts{
Registers: &ac.StateData().Regs,
- FloatingPointState: (*byte)(ac.FloatingPointData()),
+ FloatingPointState: ac.FloatingPointData(),
PageTables: localAS.pageTables,
Flush: localAS.Touch(cpu),
FullRestore: ac.FullRestore(),
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 6e583baa3..916903881 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -70,7 +70,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_amd64.go.
- floatingPointState *arch.FloatingPointData
+ floatingPointState arch.FloatingPointData
}
const (
@@ -293,6 +293,28 @@ func (c *vCPU) fault(signal int32, info *arch.SignalInfo) (usermem.AccessType, e
return accessType, platform.ErrContextSignal
}
+//go:nosplit
+//go:noinline
+func loadByte(ptr *byte) byte {
+ return *ptr
+}
+
+// prefaultFloatingPointState touches each page of the floating point state to
+// be sure that its physical pages are mapped.
+//
+// Otherwise the kernel can trigger KVM_EXIT_MMIO and an instruction that
+// triggered a fault will be emulated by the kvm kernel code, but it can't
+// emulate instructions like xsave and xrstor.
+//
+//go:nosplit
+func prefaultFloatingPointState(data arch.FloatingPointData) {
+ size := len(data)
+ for i := 0; i < size; i += usermem.PageSize {
+ loadByte(&(data)[i])
+ }
+ loadByte(&(data)[size-1])
+}
+
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo) (usermem.AccessType, error) {
// Check for canonical addresses.
@@ -323,6 +345,7 @@ func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts, info *arch.SignalInfo)
// allocations occur.
entersyscall()
bluepill(c)
+ prefaultFloatingPointState(switchOpts.FloatingPointState)
vector = c.CPU.SwitchToUser(switchOpts)
exitsyscall()
diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go
index 7d7857067..3d715e570 100644
--- a/pkg/sentry/platform/kvm/machine_arm64.go
+++ b/pkg/sentry/platform/kvm/machine_arm64.go
@@ -32,7 +32,7 @@ type vCPUArchState struct {
// floatingPointState is the floating point state buffer used in guest
// to host transitions. See usage in bluepill_arm64.go.
- floatingPointState *arch.FloatingPointData
+ floatingPointState arch.FloatingPointData
}
const (
diff --git a/pkg/sentry/platform/ptrace/ptrace_unsafe.go b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
index 2c21f946e..6259350ec 100644
--- a/pkg/sentry/platform/ptrace/ptrace_unsafe.go
+++ b/pkg/sentry/platform/ptrace/ptrace_unsafe.go
@@ -62,9 +62,9 @@ func (t *thread) setRegs(regs *arch.Registers) error {
}
// getFPRegs gets the floating-point data via the GETREGSET ptrace unix.
-func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) getFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(fpState),
+ Base: (*byte)(&fpState[0]),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(
@@ -81,9 +81,9 @@ func (t *thread) getFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsa
}
// setFPRegs sets the floating-point data via the SETREGSET ptrace unix.
-func (t *thread) setFPRegs(fpState *arch.FloatingPointData, fpLen uint64, useXsave bool) error {
+func (t *thread) setFPRegs(fpState arch.FloatingPointData, fpLen uint64, useXsave bool) error {
iovec := unix.Iovec{
- Base: (*byte)(fpState),
+ Base: (*byte)(&fpState[0]),
Len: fpLen,
}
_, _, errno := unix.RawSyscall6(