summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go46
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go7
-rw-r--r--pkg/sentry/platform/kvm/machine.go4
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go10
-rw-r--r--pkg/sentry/platform/ring0/defs.go52
-rw-r--r--pkg/sentry/platform/ring0/entry_amd64.s41
-rw-r--r--pkg/sentry/platform/ring0/kernel.go34
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go2
-rw-r--r--pkg/sentry/platform/ring0/offsets_amd64.go2
9 files changed, 103 insertions, 95 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index f013d1dc9..6520682d7 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -47,8 +47,8 @@ func redpill() {
// bluepillArchEnter is called during bluepillEnter.
//
//go:nosplit
-func bluepillArchEnter(context *arch.SignalContext64) (c *vCPU) {
- c = vCPUPtr(uintptr(context.Rax))
+func bluepillArchEnter(context *arch.SignalContext64) *vCPU {
+ c := vCPUPtr(uintptr(context.Rax))
regs := c.CPU.Registers()
regs.R8 = context.R8
regs.R9 = context.R9
@@ -73,50 +73,41 @@ func bluepillArchEnter(context *arch.SignalContext64) (c *vCPU) {
regs.Cs = uint64(ring0.Kcode)
regs.Ds = uint64(ring0.Udata)
regs.Es = uint64(ring0.Udata)
- regs.Fs = uint64(ring0.Udata)
regs.Ss = uint64(ring0.Kdata)
-
- // ring0 uses GS exclusively, so we use GS_base to store the location
- // of the floating point address.
- //
- // The address will be restored directly after running the VCPU, and
- // will be saved again prior to halting. We rely on the fact that the
- // SaveFloatingPointer/LoadFloatingPoint functions use the most
- // efficient mechanism available (including compression) so the state
- // size is guaranteed to be less than what's pointed to here.
- regs.Gs_base = uint64(context.Fpstate)
- return
+ return c
}
-// bluepillSyscall handles kernel syscalls.
+// KernelSyscall handles kernel syscalls.
//
//go:nosplit
-func bluepillSyscall() {
- regs := ring0.Current().Registers()
+func (c *vCPU) KernelSyscall() {
+ regs := c.Registers()
if regs.Rax != ^uint64(0) {
regs.Rip -= 2 // Rewind.
}
- ring0.SaveFloatingPoint(bytePtr(uintptr(regs.Gs_base)))
+ // We only trigger a bluepill entry in the bluepill function, and can
+ // therefore be guaranteed that there is no floating point state to be
+ // loaded on resuming from halt. We only worry about saving on exit.
+ ring0.SaveFloatingPoint((*byte)(c.floatingPointState))
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // Reload host segment.
- ring0.LoadFloatingPoint(bytePtr(uintptr(regs.Gs_base)))
}
-// bluepillException handles kernel exceptions.
+// KernelException handles kernel exceptions.
//
//go:nosplit
-func bluepillException(vector ring0.Vector) {
- regs := ring0.Current().Registers()
+func (c *vCPU) KernelException(vector ring0.Vector) {
+ regs := c.Registers()
if vector == ring0.Vector(bounce) {
// These should not interrupt kernel execution; point the Rip
// to zero to ensure that we get a reasonable panic when we
- // attempt to return.
+ // attempt to return and a full stack trace.
regs.Rip = 0
}
- ring0.SaveFloatingPoint(bytePtr(uintptr(regs.Gs_base)))
+ // See above.
+ ring0.SaveFloatingPoint((*byte)(c.floatingPointState))
ring0.Halt()
ring0.WriteFS(uintptr(regs.Fs_base)) // Reload host segment.
- ring0.LoadFloatingPoint(bytePtr(uintptr(regs.Gs_base)))
}
// bluepillArchExit is called during bluepillEnter.
@@ -142,4 +133,9 @@ func bluepillArchExit(c *vCPU, context *arch.SignalContext64) {
context.Rsp = regs.Rsp
context.Rip = regs.Rip
context.Eflags = regs.Eflags
+
+ // Set the context pointer to the saved floating point state. This is
+ // where the guest data has been serialized, the kernel will restore
+ // from this new pointer value.
+ context.Fpstate = uint64(uintptrValue((*byte)(c.floatingPointState)))
}
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 77cf7e800..2605f8c93 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -37,6 +37,13 @@ func bytePtr(addr uintptr) *byte {
return (*byte)(unsafe.Pointer(addr))
}
+// uintptrValue returns a uintptr for the given address.
+//
+//go:nosplit
+func uintptrValue(addr *byte) uintptr {
+ return (uintptr)(unsafe.Pointer(addr))
+}
+
// bluepillHandler is called from the signal stub.
//
// The world may be stopped while this is executing, and it executes on the
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 4ba3a185a..deead1b5f 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -142,9 +142,7 @@ func (m *machine) newVCPU() *vCPU {
fd: int(fd),
machine: m,
}
- c.CPU.Init(&m.kernel)
- c.CPU.KernelSyscall = bluepillSyscall
- c.CPU.KernelException = bluepillException
+ c.CPU.Init(&m.kernel, c)
m.vCPUsByID[c.id] = c
// Ensure the signal mask is correct.
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index c03792a1b..5ad805b8b 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -63,6 +63,10 @@ type vCPUArchState struct {
//
// This starts above fixedKernelPCID.
PCIDs *pagetables.PCIDs
+
+ // floatingPointState is the floating point state buffer used in guest
+ // to host transitions. See usage in bluepill_amd64.go.
+ floatingPointState *arch.FloatingPointData
}
const (
@@ -149,6 +153,12 @@ func (c *vCPU) initArchState() error {
return err
}
+ // Allocate some floating point state save area for the local vCPU.
+ // This will be saved prior to leaving the guest, and we restore from
+ // this always. We cannot use the pointer in the context alone because
+ // we don't know how large the area there is in reality.
+ c.floatingPointState = arch.NewFloatingPointData()
+
// Set the time offset to the host native time.
return c.setSystemTime()
}
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
index 18137e55d..98d0a6de0 100644
--- a/pkg/sentry/platform/ring0/defs.go
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -38,6 +38,33 @@ type Kernel struct {
KernelArchState
}
+// Hooks are hooks for kernel functions.
+type Hooks interface {
+ // KernelSyscall is called for kernel system calls.
+ //
+ // Return from this call will restore registers and return to the kernel: the
+ // registers must be modified directly.
+ //
+ // If this function is not provided, a kernel exception results in halt.
+ //
+ // This must be go:nosplit, as this will be on the interrupt stack.
+ // Closures are permitted, as the pointer to the closure frame is not
+ // passed on the stack.
+ KernelSyscall()
+
+ // KernelException handles an exception during kernel execution.
+ //
+ // Return from this call will restore registers and return to the kernel: the
+ // registers must be modified directly.
+ //
+ // If this function is not provided, a kernel exception results in halt.
+ //
+ // This must be go:nosplit, as this will be on the interrupt stack.
+ // Closures are permitted, as the pointer to the closure frame is not
+ // passed on the stack.
+ KernelException(Vector)
+}
+
// CPU is the per-CPU struct.
type CPU struct {
// self is a self reference.
@@ -58,29 +85,8 @@ type CPU struct {
// calls and exceptions via the Registers function.
registers syscall.PtraceRegs
- // KernelException handles an exception during kernel execution.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelException func(Vector)
-
- // KernelSyscall is called for kernel system calls.
- //
- // Return from this call will restore registers and return to the kernel: the
- // registers must be modified directly.
- //
- // If this function is not provided, a kernel exception results in halt.
- //
- // This must be go:nosplit, as this will be on the interrupt stack.
- // Closures are permitted, as the pointer to the closure frame is not
- // passed on the stack.
- KernelSyscall func()
+ // hooks are kernel hooks.
+ hooks Hooks
}
// Registers returns a modifiable-copy of the kernel registers.
diff --git a/pkg/sentry/platform/ring0/entry_amd64.s b/pkg/sentry/platform/ring0/entry_amd64.s
index d48fbd2d1..afb040a6f 100644
--- a/pkg/sentry/platform/ring0/entry_amd64.s
+++ b/pkg/sentry/platform/ring0/entry_amd64.s
@@ -90,12 +90,6 @@ TEXT ·Halt(SB),NOSPLIT,$0
HLT
RET
-// See kernel.go.
-TEXT ·Current(SB),NOSPLIT,$0-8
- MOVQ CPU_SELF(GS), AX
- MOVQ AX, ret+0(FP)
- RET
-
// See entry_amd64.go.
TEXT ·swapgs(SB),NOSPLIT,$0
SWAP_GS()
@@ -205,19 +199,12 @@ kernel:
MOVQ $0, CPU_ERROR_CODE(GS) // Clear error code.
MOVQ $0, CPU_ERROR_TYPE(GS) // Set error type to kernel.
- // Load the function stored in KernelSyscall.
- //
- // Note that this function needs to be executed on the stack in case
- // the runtime decides to make use of the redzone (grumble). This also
- // protects against any functions that might not be go:nosplit, since
- // this will cause a failure immediately.
+ // Call the syscall trampoline.
LOAD_KERNEL_STACK(GS)
- MOVQ CPU_KERNEL_SYSCALL(GS), DX // Function data.
- MOVQ 0(DX), AX // Function pointer.
- PUSHQ BP // Push the frame pointer.
- MOVQ SP, BP // Set frame pointer value.
- CALL *AX // Call the function.
- POPQ BP // Restore the frame pointer.
+ MOVQ CPU_SELF(GS), AX // Load vCPU.
+ PUSHQ AX // First argument (vCPU).
+ CALL ·kernelSyscall(SB) // Call the trampoline.
+ POPQ AX // Pop vCPU.
JMP ·resume(SB)
// exception is a generic exception handler.
@@ -287,18 +274,14 @@ kernel:
MOVQ 0(SP), BX // BX contains the vector.
ADDQ $48, SP // Drop the exception frame.
- // Load the function stored in KernelException.
- //
- // See note above re: the kernel stack.
+ // Call the exception trampoline.
LOAD_KERNEL_STACK(GS)
- MOVQ CPU_KERNEL_EXCEPTION(GS), DX // Function data.
- MOVQ 0(DX), AX // Function pointer.
- PUSHQ BP // Push the frame pointer.
- MOVQ SP, BP // Set frame pointer value.
- PUSHQ BX // First argument (vector).
- CALL *AX // Call the function.
- POPQ BX // Discard the argument.
- POPQ BP // Restore the frame pointer.
+ MOVQ CPU_SELF(GS), AX // Load vCPU.
+ PUSHQ BX // Second argument (vector).
+ PUSHQ AX // First argument (vCPU).
+ CALL ·kernelException(SB) // Call the trampoline.
+ POPQ BX // Pop vector.
+ POPQ AX // Pop vCPU.
JMP ·resume(SB)
#define EXCEPTION_WITH_ERROR(value, symbol) \
diff --git a/pkg/sentry/platform/ring0/kernel.go b/pkg/sentry/platform/ring0/kernel.go
index e70eafde2..19ac6eb7c 100644
--- a/pkg/sentry/platform/ring0/kernel.go
+++ b/pkg/sentry/platform/ring0/kernel.go
@@ -26,31 +26,41 @@ func (k *Kernel) Init(opts KernelOpts) {
// Halt halts execution.
func Halt()
-// Current returns the current CPU.
+// defaultHooks implements hooks.
+type defaultHooks struct{}
+
+// KernelSyscall implements Hooks.KernelSyscall.
//
-// Its use is only legal in the KernelSyscall and KernelException contexts,
-// which must all be guarded go:nosplit.
-func Current() *CPU
+//go:nosplit
+func (defaultHooks) KernelSyscall() { Halt() }
+
+// KernelException implements Hooks.KernelException.
+//
+//go:nosplit
+func (defaultHooks) KernelException(Vector) { Halt() }
-// defaultSyscall is the default syscall hook.
+// kernelSyscall is a trampoline.
//
//go:nosplit
-func defaultSyscall() { Halt() }
+func kernelSyscall(c *CPU) { c.hooks.KernelSyscall() }
-// defaultException is the default exception hook.
+// kernelException is a trampoline.
//
//go:nosplit
-func defaultException(Vector) { Halt() }
+func kernelException(c *CPU, vector Vector) { c.hooks.KernelException(vector) }
// Init initializes a new CPU.
//
// Init allows embedding in other objects.
-func (c *CPU) Init(k *Kernel) {
+func (c *CPU) Init(k *Kernel, hooks Hooks) {
c.self = c // Set self reference.
c.kernel = k // Set kernel reference.
c.init() // Perform architectural init.
- // Defaults.
- c.KernelSyscall = defaultSyscall
- c.KernelException = defaultException
+ // Require hooks.
+ if hooks != nil {
+ c.hooks = hooks
+ } else {
+ c.hooks = defaultHooks{}
+ }
}
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index ab562bca7..9e8c56a54 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -204,7 +204,7 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
func start(c *CPU) {
// Save per-cpu & FS segment.
WriteGS(kernelAddr(c))
- WriteFS(uintptr(c.Registers().Fs_base))
+ WriteFS(uintptr(c.registers.Fs_base))
// Initialize floating point.
//
diff --git a/pkg/sentry/platform/ring0/offsets_amd64.go b/pkg/sentry/platform/ring0/offsets_amd64.go
index 753d31ef8..806e07ec0 100644
--- a/pkg/sentry/platform/ring0/offsets_amd64.go
+++ b/pkg/sentry/platform/ring0/offsets_amd64.go
@@ -34,8 +34,6 @@ func Emit(w io.Writer) {
fmt.Fprintf(w, "#define CPU_STACK_TOP 0x%02x\n", reflect.ValueOf(&c.stack[0]).Pointer()-reflect.ValueOf(c).Pointer()+uintptr(len(c.stack)))
fmt.Fprintf(w, "#define CPU_ERROR_CODE 0x%02x\n", reflect.ValueOf(&c.errorCode).Pointer()-reflect.ValueOf(c).Pointer())
fmt.Fprintf(w, "#define CPU_ERROR_TYPE 0x%02x\n", reflect.ValueOf(&c.errorType).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_KERNEL_EXCEPTION 0x%02x\n", reflect.ValueOf(&c.KernelException).Pointer()-reflect.ValueOf(c).Pointer())
- fmt.Fprintf(w, "#define CPU_KERNEL_SYSCALL 0x%02x\n", reflect.ValueOf(&c.KernelSyscall).Pointer()-reflect.ValueOf(c).Pointer())
fmt.Fprintf(w, "\n// Bits.\n")
fmt.Fprintf(w, "#define _RFLAGS_IF 0x%02x\n", _RFLAGS_IF)