diff options
author | Michael Pratt <mpratt@google.com> | 2021-07-12 07:59:47 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-07-12 08:01:53 -0700 |
commit | 36a17a814bf90bad33eac25ddbb7a416143a4be7 (patch) | |
tree | faa4d72267f1efc227cd56d0e8f8cb23c5dacb16 /pkg/sentry/platform | |
parent | d78713e2da5331a22fc51fb9a9ad33cc1873879c (diff) |
Go 1.17 support for the KVM platform
Go 1.17 adds a new register-based calling convention. While transparent for
most applications, the KVM platform needs special work in a few cases.
First of all, we need the actual address of some assembly functions, rather
than the address of a wrapper. See http://gvisor.dev/pr/5832 for complete
discussion of this.
More relevant to this CL is that ABI0-to-ABIInternal wrappers (i.e., calls from
assembly to Go) access the G via FS_BASE. The KVM quite fast-and-loose about
the Go environment, often calling into (nosplit) Go functions with
uninitialized FS_BASE.
That will no longer work in Go 1.17, so this CL changes the platform to
consistently restore FS_BASE before calling into Go code.
This CL does not affect arm64 code. Go 1.17 does not support the register-based
calling convention for arm64 (it will come in 1.18), but arm64 also does not
use a non-standard register like FS_BASE for TLS, so it may not require any
changes.
PiperOrigin-RevId: 384234305
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_amd64.go | 27 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_unsafe.go | 7 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_amd64_test.go | 4 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 34 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/testutil/testutil.go | 42 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/testutil/testutil_amd64.go | 10 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/testutil/testutil_amd64.s | 57 |
8 files changed, 133 insertions, 50 deletions
diff --git a/pkg/sentry/platform/kvm/bluepill_amd64.go b/pkg/sentry/platform/kvm/bluepill_amd64.go index d761bbdee..73ea73742 100644 --- a/pkg/sentry/platform/kvm/bluepill_amd64.go +++ b/pkg/sentry/platform/kvm/bluepill_amd64.go @@ -74,8 +74,27 @@ func (c *vCPU) KernelSyscall() { // therefore be guaranteed that there is no floating point state to be // loaded on resuming from halt. We only worry about saving on exit. ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. - ring0.Halt() - ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no, reload host segment. + // N.B. Since KernelSyscall is called when the kernel makes a syscall, + // FS_BASE is already set for correct execution of this function. + // + // Refresher on syscall/exception handling: + // 1. When the sentry is in guest mode and makes a syscall, it goes to + // sysenter(), which saves the register state (including RIP of SYSCALL + // instruction) to vCPU.registers. + // 2. It then calls KernelSyscall, which rewinds the IP and executes + // HLT. + // 3. HLT does a VM-exit to bluepillHandler, which returns from the + // signal handler using vCPU.registers, directly to the SYSCALL + // instruction. + // 4. Later, when we want to re-use the vCPU (perhaps on a different + // host thread), we set the new thread's registers in vCPU.registers + // (as opposed to setting the KVM registers with KVM_SET_REGS). + // 5. KVM_RUN thus enters the guest with the old register state, + // immediately following the HLT instruction, returning here. + // 6. We then restore FS_BASE and the full registers from vCPU.register + // to return from sysenter() back to the desired bluepill point from + // the host. + ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. } // KernelException handles kernel exceptions. @@ -93,8 +112,8 @@ func (c *vCPU) KernelException(vector ring0.Vector) { } // See above. ring0.SaveFloatingPoint(c.floatingPointState.BytePointer()) // escapes: no. - ring0.Halt() - ring0.WriteFS(uintptr(regs.Fs_base)) // escapes: no; reload host segment. + // See above. + ring0.HaltAndWriteFSBase(regs) // escapes: no, reload host segment. } // bluepillArchExit is called during bluepillEnter. diff --git a/pkg/sentry/platform/kvm/bluepill_unsafe.go b/pkg/sentry/platform/kvm/bluepill_unsafe.go index 6f87236ad..06fcf1d2e 100644 --- a/pkg/sentry/platform/kvm/bluepill_unsafe.go +++ b/pkg/sentry/platform/kvm/bluepill_unsafe.go @@ -85,6 +85,13 @@ func bluepillGuestExit(c *vCPU, context unsafe.Pointer) { // signal stack. It should only execute raw system calls and functions that are // explicitly marked go:nosplit. // +// Ideally, this function should switch to gsignal, as runtime.sigtramp does, +// but that is tedious given all the runtime internals. That said, using +// gsignal inside a signal handler is not _required_, provided we avoid stack +// splits and allocations. Note that calling any splittable function here will +// be flaky; if the signal stack is below the G stack then we will trigger a +// split and crash. If above, we won't trigger a split. +// // +checkescape:all // //go:nosplit diff --git a/pkg/sentry/platform/kvm/kvm_amd64_test.go b/pkg/sentry/platform/kvm/kvm_amd64_test.go index b1cab89a0..a002ae00c 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_test.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_test.go @@ -28,7 +28,7 @@ import ( ) func TestSegments(t *testing.T) { - applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleSegments(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestSegments(regs) for { var si linux.SignalInfo @@ -55,7 +55,7 @@ func TestSegments(t *testing.T) { func stmxcsr(addr *uint32) func TestMXCSR(t *testing.T) { - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo switchOpts := ring0.SwitchOpts{ Registers: regs, diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index fe570aff9..3a30286e2 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -120,13 +120,13 @@ func TestKernelFloatingPoint(t *testing.T) { }) } -func applicationTest(t testHarness, useHostMappings bool, target func(), fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) { +func applicationTest(t testHarness, useHostMappings bool, targetFn uintptr, fn func(*vCPU, *arch.Registers, *pagetables.PageTables) bool) { // Initialize registers & page tables. var ( regs arch.Registers pt *pagetables.PageTables ) - testutil.SetTestTarget(®s, target) + testutil.SetTestTarget(®s, targetFn) kvmTest(t, func(k *KVM) { // Create new page tables. @@ -157,7 +157,7 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func } func TestApplicationSyscall(t *testing.T) { - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -171,7 +171,7 @@ func TestApplicationSyscall(t *testing.T) { } return false }) - applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -187,7 +187,7 @@ func TestApplicationSyscall(t *testing.T) { } func TestApplicationFault(t *testing.T) { - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ @@ -202,7 +202,7 @@ func TestApplicationFault(t *testing.T) { } return false }) - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ @@ -219,7 +219,7 @@ func TestApplicationFault(t *testing.T) { } func TestRegistersSyscall(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleRegsSyscall(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. for { var si linux.SignalInfo @@ -242,7 +242,7 @@ func TestRegistersSyscall(t *testing.T) { } func TestRegistersFault(t *testing.T) { - applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTwiddleRegsFault(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. for { var si linux.SignalInfo @@ -266,7 +266,7 @@ func TestRegistersFault(t *testing.T) { } func TestBounce(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { go func() { time.Sleep(time.Millisecond) c.BounceToKernel() @@ -281,7 +281,7 @@ func TestBounce(t *testing.T) { } return false }) - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { go func() { time.Sleep(time.Millisecond) c.BounceToKernel() @@ -300,7 +300,7 @@ func TestBounce(t *testing.T) { } func TestBounceStress(t *testing.T) { - applicationTest(t, true, testutil.SpinLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfSpinLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { randomSleep := func() { // O(hundreds of microseconds) is appropriate to ensure // different overlaps and different schedules. @@ -336,7 +336,7 @@ func TestBounceStress(t *testing.T) { func TestInvalidate(t *testing.T) { var data uintptr // Used below. - applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, true, testutil.AddrOfTouch(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, &data) // Read legitimate value. for { var si linux.SignalInfo @@ -377,7 +377,7 @@ func IsFault(err error, si *linux.SignalInfo) bool { } func TestEmptyAddressSpace(t *testing.T) { - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -391,7 +391,7 @@ func TestEmptyAddressSpace(t *testing.T) { } return false }) - applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(t, false, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -467,7 +467,7 @@ func BenchmarkApplicationSyscall(b *testing.B) { i int // Iteration includes machine.Get() / machine.Put(). a int // Count for ErrContextInterrupt. ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, @@ -489,7 +489,7 @@ func BenchmarkApplicationSyscall(b *testing.B) { func BenchmarkKernelSyscall(b *testing.B) { // Note that the target passed here is irrelevant, we never execute SwitchToUser. - applicationTest(b, true, testutil.Getpid, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { // iteration does not include machine.Get() / machine.Put(). for i := 0; i < b.N; i++ { testutil.Getpid() @@ -504,7 +504,7 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { i int a int ) - applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + applicationTest(b, true, testutil.AddrOfSyscallLoop(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { var si linux.SignalInfo if _, err := c.SwitchToUser(ring0.SwitchOpts{ Registers: regs, diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 7a10fd812..b28a2c4e8 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -136,7 +136,7 @@ func (c *vCPU) initArchState() error { } // Set the entrypoint for the kernel. - kernelUserRegs.RIP = uint64(reflect.ValueOf(ring0.Start).Pointer()) + kernelUserRegs.RIP = uint64(ring0.AddrOfStart()) kernelUserRegs.RAX = uint64(reflect.ValueOf(&c.CPU).Pointer()) kernelUserRegs.RSP = c.StackTop() kernelUserRegs.RFLAGS = ring0.KernelFlagsSet diff --git a/pkg/sentry/platform/kvm/testutil/testutil.go b/pkg/sentry/platform/kvm/testutil/testutil.go index 5c1efa0fd..d8c273796 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil.go +++ b/pkg/sentry/platform/kvm/testutil/testutil.go @@ -23,23 +23,41 @@ import ( // Getpid executes a trivial system call. func Getpid() -// Touch touches the value in the first register. -func Touch() +// AddrOfGetpid returns the address of Getpid. +// +// In Go 1.17+, Go references to assembly functions resolve to an ABIInternal +// wrapper function rather than the function itself. We must reference from +// assembly to get the ABI0 (i.e., primary) address. +func AddrOfGetpid() uintptr + +// AddrOfTouch returns the address of a function that touches the value in the +// first register. +func AddrOfTouch() uintptr +func touch() -// SyscallLoop executes a syscall and loops. -func SyscallLoop() +// AddrOfSyscallLoop returns the address of a function that executes a syscall +// and loops. +func AddrOfSyscallLoop() uintptr +func syscallLoop() -// SpinLoop spins on the CPU. -func SpinLoop() +// AddrOfSpinLoop returns the address of a function that spins on the CPU. +func AddrOfSpinLoop() uintptr +func spinLoop() -// HaltLoop immediately halts and loops. -func HaltLoop() +// AddrOfHaltLoop returns the address of a function that immediately halts and +// loops. +func AddrOfHaltLoop() uintptr +func haltLoop() -// TwiddleRegsFault twiddles registers then faults. -func TwiddleRegsFault() +// AddrOfTwiddleRegsFault returns the address of a function that twiddles +// registers then faults. +func AddrOfTwiddleRegsFault() uintptr +func twiddleRegsFault() -// TwiddleRegsSyscall twiddles registers then executes a syscall. -func TwiddleRegsSyscall() +// AddrOfTwiddleRegsSyscall returns the address of a function that twiddles +// registers then executes a syscall. +func AddrOfTwiddleRegsSyscall() uintptr +func twiddleRegsSyscall() // FloatingPointWorks is a floating point test. // diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go index 8048eedec..7c19b6a8f 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.go +++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.go @@ -22,12 +22,14 @@ import ( "gvisor.dev/gvisor/pkg/sentry/arch" ) -// TwiddleSegments reads segments into known registers. -func TwiddleSegments() +// AddrOfTwiddleSegments return the address of a function that reads segments +// into known registers. +func AddrOfTwiddleSegments() uintptr +func twiddleSegments() // SetTestTarget sets the rip appropriately. -func SetTestTarget(regs *arch.Registers, fn func()) { - regs.Rip = uint64(reflect.ValueOf(fn).Pointer()) +func SetTestTarget(regs *arch.Registers, fn uintptr) { + regs.Rip = uint64(fn) } // SetTouchTarget sets rax appropriately. diff --git a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s index 491ec0c2a..65e7c05ea 100644 --- a/pkg/sentry/platform/kvm/testutil/testutil_amd64.s +++ b/pkg/sentry/platform/kvm/testutil/testutil_amd64.s @@ -25,27 +25,46 @@ TEXT ·Getpid(SB),NOSPLIT,$0 SYSCALL RET -TEXT ·Touch(SB),NOSPLIT,$0 +// func AddrOfGetpid() uintptr +TEXT ·AddrOfGetpid(SB), $0-8 + MOVQ $·Getpid(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·touch(SB),NOSPLIT,$0 start: MOVQ 0(AX), BX // deref AX MOVQ $39, AX // getpid SYSCALL JMP start -TEXT ·HaltLoop(SB),NOSPLIT,$0 -start: - HLT - JMP start +// func AddrOfTouch() uintptr +TEXT ·AddrOfTouch(SB), $0-8 + MOVQ $·touch(SB), AX + MOVQ AX, ret+0(FP) + RET -TEXT ·SyscallLoop(SB),NOSPLIT,$0 +TEXT ·syscallLoop(SB),NOSPLIT,$0 start: SYSCALL JMP start -TEXT ·SpinLoop(SB),NOSPLIT,$0 +// func AddrOfSyscallLoop() uintptr +TEXT ·AddrOfSyscallLoop(SB), $0-8 + MOVQ $·syscallLoop(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·spinLoop(SB),NOSPLIT,$0 start: JMP start +// func AddrOfSpinLoop() uintptr +TEXT ·AddrOfSpinLoop(SB), $0-8 + MOVQ $·spinLoop(SB), AX + MOVQ AX, ret+0(FP) + RET + TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 NO_LOCAL_POINTERS MOVQ $1, AX @@ -75,20 +94,32 @@ TEXT ·FloatingPointWorks(SB),NOSPLIT,$0-8 NOTQ DI; \ NOTQ SP; -TEXT ·TwiddleRegsSyscall(SB),NOSPLIT,$0 +TEXT ·twiddleRegsSyscall(SB),NOSPLIT,$0 TWIDDLE_REGS() SYSCALL RET // never reached -TEXT ·TwiddleRegsFault(SB),NOSPLIT,$0 +// func AddrOfTwiddleRegsSyscall() uintptr +TEXT ·AddrOfTwiddleRegsSyscall(SB), $0-8 + MOVQ $·twiddleRegsSyscall(SB), AX + MOVQ AX, ret+0(FP) + RET + +TEXT ·twiddleRegsFault(SB),NOSPLIT,$0 TWIDDLE_REGS() JMP AX // must fault RET // never reached +// func AddrOfTwiddleRegsFault() uintptr +TEXT ·AddrOfTwiddleRegsFault(SB), $0-8 + MOVQ $·twiddleRegsFault(SB), AX + MOVQ AX, ret+0(FP) + RET + #define READ_FS() BYTE $0x64; BYTE $0x48; BYTE $0x8b; BYTE $0x00; #define READ_GS() BYTE $0x65; BYTE $0x48; BYTE $0x8b; BYTE $0x00; -TEXT ·TwiddleSegments(SB),NOSPLIT,$0 +TEXT ·twiddleSegments(SB),NOSPLIT,$0 MOVQ $0x0, AX READ_GS() MOVQ AX, BX @@ -96,3 +127,9 @@ TEXT ·TwiddleSegments(SB),NOSPLIT,$0 READ_FS() SYSCALL RET // never reached + +// func AddrOfTwiddleSegments() uintptr +TEXT ·AddrOfTwiddleSegments(SB), $0-8 + MOVQ $·twiddleSegments(SB), AX + MOVQ AX, ret+0(FP) + RET |