summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-07-12 15:06:49 +0000
committergVisor bot <gvisor-bot@google.com>2021-07-12 15:06:49 +0000
commitbf97c23d1679690f0df5f7daa32795a839399f49 (patch)
treed2acc0cacccf318e5266bed7ee9cc8807e1ae4b8 /pkg/sentry/platform
parenta6333d999d80e980576c11a558b69ad3106952d2 (diff)
parent36a17a814bf90bad33eac25ddbb7a416143a4be7 (diff)
Merge release-20210628.0-35-g36a17a814 (automated)
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go27
-rw-r--r--pkg/sentry/platform/kvm/bluepill_impl_amd64.s4
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go7
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go2
4 files changed, 33 insertions, 7 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index d761bbdee..73ea73742 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -74,8 +74,27 @@ func (c *vCPU) KernelSyscall() {
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt. We only worry about saving on exit.
ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
- ring0.Halt()
- ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment.
+ // N.B. Since KernelSyscall is called when the kernel makes a syscall,
+ // FS_BASE is already set for correct execution of this function.
+ //
+ // Refresher on syscall/exception handling:
+ // 1. When the sentry is in guest mode and makes a syscall, it goes to
+ // sysenter(), which saves the register state (including RIP of SYSCALL
+ // instruction) to vCPU.registers.
+ // 2. It then calls KernelSyscall, which rewinds the IP and executes
+ // HLT.
+ // 3. HLT does a VM-exit to bluepillHandler, which returns from the
+ // signal handler using vCPU.registers, directly to the SYSCALL
+ // instruction.
+ // 4. Later, when we want to re-use the vCPU (perhaps on a different
+ // host thread), we set the new thread's registers in vCPU.registers
+ // (as opposed to setting the KVM registers with KVM_SET_REGS).
+ // 5. KVM_RUN thus enters the guest with the old register state,
+ // immediately following the HLT instruction, returning here.
+ // 6. We then restore FS_BASE and the full registers from vCPU.register
+ // to return from sysenter() back to the desired bluepill point from
+ // the host.
+ ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
}
// KernelException handles kernel exceptions.
@@ -93,8 +112,8 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
}
// See above.
ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
- ring0.Halt()
- ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment.
+ // See above.
+ ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
}
// bluepillArchExit is called during bluepillEnter.
diff --git a/pkg/sentry/platform/kvm/bluepill_impl_amd64.s b/pkg/sentry/platform/kvm/bluepill_impl_amd64.s
index b7f1dd5ac..7ad9e4e76 100644
--- a/pkg/sentry/platform/kvm/bluepill_impl_amd64.s
+++ b/pkg/sentry/platform/kvm/bluepill_impl_amd64.s
@@ -66,8 +66,8 @@
#define PTRACE_FLAGS 0x90
#define PTRACE_RSP 0x98
#define PTRACE_SS 0xa0
-#define PTRACE_FS 0xa8
-#define PTRACE_GS 0xb0
+#define PTRACE_FS_BASE 0xa8
+#define PTRACE_GS_BASE 0xb0
// Copyright 2018 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 6f87236ad..06fcf1d2e 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -85,6 +85,13 @@ func bluepillGuestExit(c *vCPU, context unsafe.Pointer) {
// signal stack. It should only execute raw system calls and functions that are
// explicitly marked go:nosplit.
//
+// Ideally, this function should switch to gsignal, as runtime.sigtramp does,
+// but that is tedious given all the runtime internals. That said, using
+// gsignal inside a signal handler is not _required_, provided we avoid stack
+// splits and allocations. Note that calling any splittable function here will
+// be flaky; if the signal stack is below the G stack then we will trigger a
+// split and crash. If above, we won't trigger a split.
+//
// +checkescape:all
//
//go:nosplit
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 7a10fd812..b28a2c4e8 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -136,7 +136,7 @@ func (c *vCPU) initArchState() error {
}
// Set the entrypoint for the kernel.
- kernelUserRegs.RIP = uint64(reflect.ValueOf(ring0.Start).Pointer())
+ kernelUserRegs.RIP = uint64(ring0.AddrOfStart())
kernelUserRegs.RAX = uint64(reflect.ValueOf(&c.CPU).Pointer())
kernelUserRegs.RSP = c.StackTop()
kernelUserRegs.RFLAGS = ring0.KernelFlagsSet