From 722d7ca744db6aab442bc71bb50b64599c8fa522 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 26 Oct 2021 13:02:29 -0700 Subject: platform/kvm: map vdso and vvar into a guest address space Right now, each vdso call triggers vmexit. VDSO and VVAR pages are mapped with VM_IO and get_user_pages fails for such vma-s. KVM was not able to handle this case up to the v4.8 kernel. This problem was fixed by add6a0cd1c5ba ("KVM: MMU: try to fix up page faults before giving up"). For some unknown reasons, it still doesn't work in case of nested virtualization. Before: BenchmarkKernelVDSO-6 252519 4598 ns/op After: BenchmarkKernelVDSO-6 34431957 34.91 ns/op PiperOrigin-RevId: 405715941 --- pkg/sentry/platform/kvm/kvm_test.go | 28 ++++++++++++++++++++++++++++ pkg/sentry/platform/kvm/machine_amd64.go | 11 +++++++---- pkg/sentry/platform/kvm/virtual_map.go | 7 +------ 3 files changed, 36 insertions(+), 10 deletions(-) (limited to 'pkg/sentry/platform') diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 3a30286e2..2ed9cf766 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -462,6 +462,22 @@ func TestRdtsc(t *testing.T) { }) } +func TestKernelVDSO(t *testing.T) { + // Note that the target passed here is irrelevant, we never execute SwitchToUser. + applicationTest(t, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + // iteration does not include machine.Get() / machine.Put(). + const n = 100 + for i := 0; i < n; i++ { + bluepill(c) + time.Now() + } + if c.guestExits >= n { + t.Errorf("vdso calls trigger vmexit") + } + return false + }) +} + func BenchmarkApplicationSyscall(b *testing.B) { var ( i int // Iteration includes machine.Get() / machine.Put(). @@ -498,6 +514,18 @@ func BenchmarkKernelSyscall(b *testing.B) { }) } +func BenchmarkKernelVDSO(b *testing.B) { + // Note that the target passed here is irrelevant, we never execute SwitchToUser. + applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + // iteration does not include machine.Get() / machine.Put(). + for i := 0; i < b.N; i++ { + bluepill(c) + time.Now() + } + return false + }) +} + func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { // see BenchmarkApplicationSyscall. var ( diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 5bc023899..be6d954c8 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -279,10 +279,13 @@ func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, // Reset the pointed SignalInfo. *info = linux.SignalInfo{Signo: signal} info.SetAddr(uint64(faultAddr)) - accessType := hostarch.AccessType{ - Read: code&(1<<1) == 0, - Write: code&(1<<1) != 0, - Execute: code&(1<<4) != 0, + accessType := hostarch.AccessType{} + if signal == int32(unix.SIGSEGV) { + accessType = hostarch.AccessType{ + Read: code&(1<<1) == 0, + Write: code&(1<<1) != 0, + Execute: code&(1<<4) != 0, + } } if !accessType.Write && !accessType.Execute { info.Code = 1 // SEGV_MAPERR. diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go index 01d9eb39d..6027dad47 100644 --- a/pkg/sentry/platform/kvm/virtual_map.go +++ b/pkg/sentry/platform/kvm/virtual_map.go @@ -40,14 +40,9 @@ var mapsLine = regexp.MustCompile("([0-9a-f]+)-([0-9a-f]+) ([r-][w-][x-][sp]) ([ // physical map. Virtual regions need to be excluded if get_user_pages will // fail on those addresses, preventing KVM from satisfying EPT faults. // -// This includes the VVAR page because the VVAR page may be mapped as I/O -// memory. And the VDSO page is knocked out because the VVAR page is not even -// recorded in /proc/self/maps on older kernels; knocking out the VDSO page -// prevents code in the VDSO from accessing the VVAR address. -// // This is called by the physical map functions, not applyVirtualRegions. func excludeVirtualRegion(r virtualRegion) bool { - return r.filename == "[vvar]" || r.filename == "[vdso]" + return false } // applyVirtualRegions parses the process maps file. -- cgit v1.2.3