summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/platform/kvm')
-rw-r--r--pkg/sentry/platform/kvm/bluepill_amd64.go27
-rw-r--r--pkg/sentry/platform/kvm/bluepill_unsafe.go7
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_test.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go34
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go2
-rw-r--r--pkg/sentry/platform/kvm/testutil/testutil.go42
-rw-r--r--pkg/sentry/platform/kvm/testutil/testutil_amd64.go10
-rw-r--r--pkg/sentry/platform/kvm/testutil/testutil_amd64.s57
8 files changed, 133 insertions, 50 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go
index d761bbdee..73ea73742 100644
--- a/pkg/sentry/platform/kvm/bluepill_amd64.go
+++ b/pkg/sentry/platform/kvm/bluepill_amd64.go
@@ -74,8 +74,27 @@ func (c *vCPU) KernelSyscall() {
// therefore be guaranteed that there is no floating point state to be
// loaded on resuming from halt. We only worry about saving on exit.
ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
- ring0.Halt()
- ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment.
+ // N.B. Since KernelSyscall is called when the kernel makes a syscall,
+ // FS_BASE is already set for correct execution of this function.
+ //
+ // Refresher on syscall/exception handling:
+ // 1. When the sentry is in guest mode and makes a syscall, it goes to
+ // sysenter(), which saves the register state (including RIP of SYSCALL
+ // instruction) to vCPU.registers.
+ // 2. It then calls KernelSyscall, which rewinds the IP and executes
+ // HLT.
+ // 3. HLT does a VM-exit to bluepillHandler, which returns from the
+ // signal handler using vCPU.registers, directly to the SYSCALL
+ // instruction.
+ // 4. Later, when we want to re-use the vCPU (perhaps on a different
+ // host thread), we set the new thread's registers in vCPU.registers
+ // (as opposed to setting the KVM registers with KVM_SET_REGS).
+ // 5. KVM_RUN thus enters the guest with the old register state,
+ // immediately following the HLT instruction, returning here.
+ // 6. We then restore FS_BASE and the full registers from vCPU.register
+ // to return from sysenter() back to the desired bluepill point from
+ // the host.
+ ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
}
// KernelException handles kernel exceptions.
@@ -93,8 +112,8 @@ func (c *vCPU) KernelException(vector ring0.Vector) {
}
// See above.
ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no.
- ring0.Halt()
- ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment.
+ // See above.
+ ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment.
}
// bluepillArchExit is called during bluepillEnter.
diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go
index 6f87236ad..06fcf1d2e 100644
--- a/pkg/sentry/platform/kvm/bluepill_unsafe.go
+++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go
@@ -85,6 +85,13 @@ func bluepillGuestExit(c *vCPU, context unsafe.Pointer) {
// signal stack. It should only execute raw system calls and functions that are
// explicitly marked go:nosplit.
//
+// Ideally, this function should switch to gsignal, as runtime.sigtramp does,
+// but that is tedious given all the runtime internals. That said, using
+// gsignal inside a signal handler is not _required_, provided we avoid stack
+// splits and allocations. Note that calling any splittable function here will
+// be flaky; if the signal stack is below the G stack then we will trigger a
+// split and crash. If above, we won't trigger a split.
+//
// +checkescape:all
//
//go:nosplit
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go
index b1cab89a0..a002ae00c 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_test.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go
@@ -28,7 +28,7 @@ import (
)
func TestSegments(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTwiddleSegments(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestSegments(regs)
for {
var si linux.SignalInfo
@@ -55,7 +55,7 @@ func TestSegments(t *testing.T) {
func stmxcsr(addr *uint32)
func TestMXCSR(t *testing.T) {
- applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
switchOpts := ring0.SwitchOpts{
Registers: regs,
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index fe570aff9..3a30286e2 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -120,13 +120,13 @@ func TestKernelFloatingPoint(t *testing.T) {
})
}
-func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) {
+func applicationTest(t testHarness, useHostMappings bool, targetFn uintptr, fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) {
// Initialize registers & page tables.
var (
regs arch.Registers
pt *pagetables.PageTables
)
- testutil.SetTestTarget(&regs, target)
+ testutil.SetTestTarget(&regs, targetFn)
kvmTest(t, func(k *KVM) {
// Create new page tables.
@@ -157,7 +157,7 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
}
func TestApplicationSyscall(t *testing.T) {
- applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -171,7 +171,7 @@ func TestApplicationSyscall(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -187,7 +187,7 @@ func TestApplicationSyscall(t *testing.T) {
}
func TestApplicationFault(t *testing.T) {
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
@@ -202,7 +202,7 @@ func TestApplicationFault(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, nil) // Cause fault.
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
@@ -219,7 +219,7 @@ func TestApplicationFault(t *testing.T) {
}
func TestRegistersSyscall(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTwiddleRegsSyscall(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
var si linux.SignalInfo
@@ -242,7 +242,7 @@ func TestRegistersSyscall(t *testing.T) {
}
func TestRegistersFault(t *testing.T) {
- applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTwiddleRegsFault(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTestRegs(regs) // Fill values for all registers.
for {
var si linux.SignalInfo
@@ -266,7 +266,7 @@ func TestRegistersFault(t *testing.T) {
}
func TestBounce(t *testing.T) {
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
go func() {
time.Sleep(time.Millisecond)
c.BounceToKernel()
@@ -281,7 +281,7 @@ func TestBounce(t *testing.T) {
}
return false
})
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
go func() {
time.Sleep(time.Millisecond)
c.BounceToKernel()
@@ -300,7 +300,7 @@ func TestBounce(t *testing.T) {
}
func TestBounceStress(t *testing.T) {
- applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
randomSleep := func() {
// O(hundreds of microseconds) is appropriate to ensure
// different overlaps and different schedules.
@@ -336,7 +336,7 @@ func TestBounceStress(t *testing.T) {
func TestInvalidate(t *testing.T) {
var data uintptr // Used below.
- applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
testutil.SetTouchTarget(regs, &data) // Read legitimate value.
for {
var si linux.SignalInfo
@@ -377,7 +377,7 @@ func IsFault(err error, si *linux.SignalInfo) bool {
}
func TestEmptyAddressSpace(t *testing.T) {
- applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -391,7 +391,7 @@ func TestEmptyAddressSpace(t *testing.T) {
}
return false
})
- applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -467,7 +467,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
i int // Iteration includes machine.Get() / machine.Put().
a int // Count for ErrContextInterrupt.
)
- applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
@@ -489,7 +489,7 @@ func BenchmarkApplicationSyscall(b *testing.B) {
func BenchmarkKernelSyscall(b *testing.B) {
// Note that the target passed here is irrelevant, we never execute SwitchToUser.
- applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
// iteration does not include machine.Get() / machine.Put().
for i := 0; i < b.N; i++ {
testutil.Getpid()
@@ -504,7 +504,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) {
i int
a int
)
- applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
+ applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool {
var si linux.SignalInfo
if _, err := c.SwitchToUser(ring0.SwitchOpts{
Registers: regs,
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index 7a10fd812..b28a2c4e8 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -136,7 +136,7 @@ func (c *vCPU) initArchState() error {
}
// Set the entrypoint for the kernel.
- kernelUserRegs.RIP = uint64(reflect.ValueOf(ring0.Start).Pointer())
+ kernelUserRegs.RIP = uint64(ring0.AddrOfStart())
kernelUserRegs.RAX = uint64(reflect.ValueOf(&c.CPU).Pointer())
kernelUserRegs.RSP = c.StackTop()
kernelUserRegs.RFLAGS = ring0.KernelFlagsSet
diff --git a/pkg/sentry/platform/kvm/testutil/testutil.go b/pkg/sentry/platform/kvm/testutil/testutil.go
index 5c1efa0fd..d8c273796 100644
--- a/pkg/sentry/platform/kvm/testutil/testutil.go
+++ b/pkg/sentry/platform/kvm/testutil/testutil.go
@@ -23,23 +23,41 @@ import (
// Getpid executes a trivial system call.
func Getpid()
-// Touch touches the value in the first register.
-func Touch()
+// AddrOfGetpid returns the address of Getpid.
+//
+// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal
+// wrapper function rather than the function itself. We must reference from
+// assembly to get the ABI0 (i.e., primary) address.
+func AddrOfGetpid() uintptr
+
+// AddrOfTouch returns the address of a function that touches the value in the
+// first register.
+func AddrOfTouch() uintptr
+func touch()
-// SyscallLoop executes a syscall and loops.
-func SyscallLoop()
+// AddrOfSyscallLoop returns the address of a function that executes a syscall
+// and loops.
+func AddrOfSyscallLoop() uintptr
+func syscallLoop()
-// SpinLoop spins on the CPU.
-func SpinLoop()
+// AddrOfSpinLoop returns the address of a function that spins on the CPU.
+func AddrOfSpinLoop() uintptr
+func spinLoop()
-// HaltLoop immediately halts and loops.
-func HaltLoop()
+// AddrOfHaltLoop returns the address of a function that immediately halts and
+// loops.
+func AddrOfHaltLoop() uintptr
+func haltLoop()
-// TwiddleRegsFault twiddles registers then faults.
-func TwiddleRegsFault()
+// AddrOfTwiddleRegsFault returns the address of a function that twiddles
+// registers then faults.
+func AddrOfTwiddleRegsFault() uintptr
+func twiddleRegsFault()
-// TwiddleRegsSyscall twiddles registers then executes a syscall.
-func TwiddleRegsSyscall()
+// AddrOfTwiddleRegsSyscall returns the address of a function that twiddles
+// registers then executes a syscall.
+func AddrOfTwiddleRegsSyscall() uintptr
+func twiddleRegsSyscall()
// FloatingPointWorks is a floating point test.
//
diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
index 8048eedec..7c19b6a8f 100644
--- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
+++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go
@@ -22,12 +22,14 @@ import (
"gvisor.dev/gvisor/pkg/sentry/arch"
)
-// TwiddleSegments reads segments into known registers.
-func TwiddleSegments()
+// AddrOfTwiddleSegments return the address of a function that reads segments
+// into known registers.
+func AddrOfTwiddleSegments() uintptr
+func twiddleSegments()
// SetTestTarget sets the rip appropriately.
-func SetTestTarget(regs *arch.Registers, fn func()) {
- regs.Rip = uint64(reflect.ValueOf(fn).Pointer())
+func SetTestTarget(regs *arch.Registers, fn uintptr) {
+ regs.Rip = uint64(fn)
}
// SetTouchTarget sets rax appropriately.
diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s
index 491ec0c2a..65e7c05ea 100644
--- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s
+++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s
@@ -25,27 +25,46 @@ TEXT ·Getpid(SB),NOSPLIT,$0
SYSCALL
RET
-TEXT ·Touch(SB),NOSPLIT,$0
+// func AddrOfGetpid() uintptr
+TEXT ·AddrOfGetpid(SB), $0-8
+ MOVQ $·Getpid(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
+
+TEXT ·touch(SB),NOSPLIT,$0
start:
MOVQ 0(AX), BX // deref AX
MOVQ $39, AX // getpid
SYSCALL
JMP start
-TEXT ·HaltLoop(SB),NOSPLIT,$0
-start:
- HLT
- JMP start
+// func AddrOfTouch() uintptr
+TEXT ·AddrOfTouch(SB), $0-8
+ MOVQ $·touch(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
-TEXT ·SyscallLoop(SB),NOSPLIT,$0
+TEXT ·syscallLoop(SB),NOSPLIT,$0
start:
SYSCALL
JMP start
-TEXT ·SpinLoop(SB),NOSPLIT,$0
+// func AddrOfSyscallLoop() uintptr
+TEXT ·AddrOfSyscallLoop(SB), $0-8
+ MOVQ $·syscallLoop(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
+
+TEXT ·spinLoop(SB),NOSPLIT,$0
start:
JMP start
+// func AddrOfSpinLoop() uintptr
+TEXT ·AddrOfSpinLoop(SB), $0-8
+ MOVQ $·spinLoop(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
+
TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8
NO_LOCAL_POINTERS
MOVQ $1, AX
@@ -75,20 +94,32 @@ TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8
NOTQ DI; \
NOTQ SP;
-TEXT ·TwiddleRegsSyscall(SB),NOSPLIT,$0
+TEXT ·twiddleRegsSyscall(SB),NOSPLIT,$0
TWIDDLE_REGS()
SYSCALL
RET // never reached
-TEXT ·TwiddleRegsFault(SB),NOSPLIT,$0
+// func AddrOfTwiddleRegsSyscall() uintptr
+TEXT ·AddrOfTwiddleRegsSyscall(SB), $0-8
+ MOVQ $·twiddleRegsSyscall(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
+
+TEXT ·twiddleRegsFault(SB),NOSPLIT,$0
TWIDDLE_REGS()
JMP AX // must fault
RET // never reached
+// func AddrOfTwiddleRegsFault() uintptr
+TEXT ·AddrOfTwiddleRegsFault(SB), $0-8
+ MOVQ $·twiddleRegsFault(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET
+
#define READ_FS() BYTE $0x64; BYTE $0x48; BYTE $0x8b; BYTE $0x00;
#define READ_GS() BYTE $0x65; BYTE $0x48; BYTE $0x8b; BYTE $0x00;
-TEXT ·TwiddleSegments(SB),NOSPLIT,$0
+TEXT ·twiddleSegments(SB),NOSPLIT,$0
MOVQ $0x0, AX
READ_GS()
MOVQ AX, BX
@@ -96,3 +127,9 @@ TEXT ·TwiddleSegments(SB),NOSPLIT,$0
READ_FS()
SYSCALL
RET // never reached
+
+// func AddrOfTwiddleSegments() uintptr
+TEXT ·AddrOfTwiddleSegments(SB), $0-8
+ MOVQ $·twiddleSegments(SB), AX
+ MOVQ AX, ret+0(FP)
+ RET