diff options
author | Andrei Vagin <avagin@google.com> | 2021-10-26 13:02:29 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-10-26 13:04:40 -0700 |
commit | 722d7ca744db6aab442bc71bb50b64599c8fa522 (patch) | |
tree | 7d4384600fe5d69981329711e944586f31771ae5 | |
parent | 07b22740a11632c0641657cc4dd145bd0d1e5afb (diff) |
platform/kvm: map vdso and vvar into a guest address space
Right now, each vdso call triggers vmexit. VDSO and VVAR pages are
mapped with VM_IO and get_user_pages fails for such vma-s. KVM was not
able to handle this case up to the v4.8 kernel. This problem was fixed by
add6a0cd1c5ba ("KVM: MMU: try to fix up page faults before giving up").
For some unknown reasons, it still doesn't work in case of nested
virtualization.
Before:
BenchmarkKernelVDSO-6 252519 4598 ns/op
After:
BenchmarkKernelVDSO-6 34431957 34.91 ns/op
PiperOrigin-RevId: 405715941
-rw-r--r-- | pkg/cpuid/cpuid_x86.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 28 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 11 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/virtual_map.go | 7 |
4 files changed, 37 insertions, 11 deletions
diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go index 8cbcc9f61..c236b942d 100644 --- a/pkg/cpuid/cpuid_x86.go +++ b/pkg/cpuid/cpuid_x86.go @@ -81,7 +81,7 @@ const ( X86FeatureAVX X86FeatureF16C X86FeatureRDRAND - _ // ecx bit 31 is reserved. + X86FeatureHypervisor ) // Block 1 constants are all of the "basic" feature bits returned by a cpuid in diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 3a30286e2..2ed9cf766 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -462,6 +462,22 @@ func TestRdtsc(t *testing.T) { }) } +func TestKernelVDSO(t *testing.T) { + // Note that the target passed here is irrelevant, we never execute SwitchToUser. + applicationTest(t, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + // iteration does not include machine.Get() / machine.Put(). + const n = 100 + for i := 0; i < n; i++ { + bluepill(c) + time.Now() + } + if c.guestExits >= n { + t.Errorf("vdso calls trigger vmexit") + } + return false + }) +} + func BenchmarkApplicationSyscall(b *testing.B) { var ( i int // Iteration includes machine.Get() / machine.Put(). @@ -498,6 +514,18 @@ func BenchmarkKernelSyscall(b *testing.B) { }) } +func BenchmarkKernelVDSO(b *testing.B) { + // Note that the target passed here is irrelevant, we never execute SwitchToUser. + applicationTest(b, true, testutil.AddrOfGetpid(), func(c *vCPU, regs *arch.Registers, pt *pagetables.PageTables) bool { + // iteration does not include machine.Get() / machine.Put(). + for i := 0; i < b.N; i++ { + bluepill(c) + time.Now() + } + return false + }) +} + func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { // see BenchmarkApplicationSyscall. var ( diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index 5bc023899..be6d954c8 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -279,10 +279,13 @@ func (c *vCPU) fault(signal int32, info *linux.SignalInfo) (hostarch.AccessType, // Reset the pointed SignalInfo. *info = linux.SignalInfo{Signo: signal} info.SetAddr(uint64(faultAddr)) - accessType := hostarch.AccessType{ - Read: code&(1<<1) == 0, - Write: code&(1<<1) != 0, - Execute: code&(1<<4) != 0, + accessType := hostarch.AccessType{} + if signal == int32(unix.SIGSEGV) { + accessType = hostarch.AccessType{ + Read: code&(1<<1) == 0, + Write: code&(1<<1) != 0, + Execute: code&(1<<4) != 0, + } } if !accessType.Write && !accessType.Execute { info.Code = 1 // SEGV_MAPERR. diff --git a/pkg/sentry/platform/kvm/virtual_map.go b/pkg/sentry/platform/kvm/virtual_map.go index 01d9eb39d..6027dad47 100644 --- a/pkg/sentry/platform/kvm/virtual_map.go +++ b/pkg/sentry/platform/kvm/virtual_map.go @@ -40,14 +40,9 @@ var mapsLine = regexp.MustCompile("([0-9a-f]+)-([0-9a-f]+) ([r-][w-][x-][sp]) ([ // physical map. Virtual regions need to be excluded if get_user_pages will // fail on those addresses, preventing KVM from satisfying EPT faults. // -// This includes the VVAR page because the VVAR page may be mapped as I/O -// memory. And the VDSO page is knocked out because the VVAR page is not even -// recorded in /proc/self/maps on older kernels; knocking out the VDSO page -// prevents code in the VDSO from accessing the VVAR address. -// // This is called by the physical map functions, not applyVirtualRegions. func excludeVirtualRegion(r virtualRegion) bool { - return r.filename == "[vvar]" || r.filename == "[vdso]" + return false } // applyVirtualRegions parses the process maps file. |