diff options
author | Adin Scannell <ascannell@google.com> | 2018-06-06 22:51:58 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-06-06 22:52:55 -0700 |
commit | 3374849cb553fab16e69d39cf6e49f843d94790b (patch) | |
tree | e3131aa5e77bef84a85c354ab939fc0bf81d8b6f /pkg/sentry/platform/kvm | |
parent | 1b5062263b4a3ca3dc0271d9e06ad0113197344c (diff) |
Split PCID implementation from page tables.
Instead of associating a single PCID with each set of page tables (which
will reach the maximum quickly), allow a dynamic pool for each vCPU.
This is the same way that Linux operates. We also split management of
PCIDs out of the page tables themselves for simplicity.
PiperOrigin-RevId: 199585631
Change-Id: I42f3486ada3cb2a26f623c65ac279b473ae63201
Diffstat (limited to 'pkg/sentry/platform/kvm')
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 4 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_amd64_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 10 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 53 |
6 files changed, 59 insertions, 21 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 15d45f5bc..c2f4559a0 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -226,8 +226,10 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { // Release releases the page tables. func (as *addressSpace) Release() { as.Unmap(0, ^uint64(0)) - as.pageTables.Release() // Free all pages from the allocator. as.pageTables.Allocator.(allocator).base.Drain() + + // Drop all cached machine references. + as.machine.dropPageTables(as.pageTables) } diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 13c363993..1a8e16ca0 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -121,7 +121,7 @@ func (*KVM) MaxUserAddress() usermem.Addr { // NewAddressSpace returns a new pagetable root. func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) { // Allocate page tables and install system mappings. - pageTables := k.machine.kernel.PageTables.New(newAllocator()) + pageTables := pagetables.New(newAllocator()) applyPhysicalRegions(func(pr physicalRegion) bool { // Map the kernel in the upper half. pageTables.Map( diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go index 834e6b96d..476e783a0 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go @@ -20,14 +20,11 @@ import ( "fmt" "syscall" "unsafe" - - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" ) var ( runDataSize int hasGuestPCID bool - pagetablesOpts pagetables.Opts cpuidSupported = cpuidEntries{nr: _KVM_NR_CPUID_ENTRIES} ) @@ -75,9 +72,6 @@ func updateSystemValues(fd int) error { } } - // Set the pagetables to use PCID if it's available. - pagetablesOpts.EnablePCID = hasGuestPCID - // Success. return nil } diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 00919b214..71c5c856e 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -121,11 +121,6 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func pt *pagetables.PageTables ) testutil.SetTestTarget(®s, target) - defer func() { - if pt != nil { - pt.Release() - } - }() kvmTest(t, func(k *KVM) { // Create new page tables. diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 949abd838..3c1e01241 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -112,6 +112,9 @@ type vCPU struct { // active is the current addressSpace: this is set and read atomically, // it is used to elide unnecessary interrupts due to invalidations. active atomicAddressSpace + + // vCPUArchState is the architecture-specific state. + vCPUArchState } // newMachine returns a new VM context. @@ -133,7 +136,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { vCPUs = n } m.kernel = ring0.New(ring0.KernelOpts{ - PageTables: pagetables.New(newAllocator(), pagetablesOpts), + PageTables: pagetables.New(newAllocator()), }) // Initialize architecture state. @@ -285,11 +288,6 @@ func (m *machine) Destroy() { } } - // Release host mappings. - if m.kernel.PageTables != nil { - m.kernel.PageTables.Release() - } - // vCPUs are gone: teardown machine state. if err := syscall.Close(m.fd); err != nil { panic(fmt.Sprintf("error closing VM fd: %v", err)) diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index ba7bbcb91..6afae5cae 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -24,6 +24,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/platform" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) @@ -41,6 +42,38 @@ func (m *machine) initArchState(vCPUs int) error { return nil } +type vCPUArchState struct { + // PCIDs is the set of PCIDs for this vCPU. + // + // This starts above fixedKernelPCID. + PCIDs *pagetables.PCIDs +} + +const ( + // fixedKernelPCID is a fixed kernel PCID used for the kernel page + // tables. We must start allocating user PCIDs above this in order to + // avoid any conflict (see below). + fixedKernelPCID = 1 + + // poolPCIDs is the number of PCIDs to record in the database. As this + // grows, assignment can take longer, since it is a simple linear scan. + // Beyond a relatively small number, there are likely few perform + // benefits, since the TLB has likely long since lost any translations + // from more than a few PCIDs past. + poolPCIDs = 8 +) + +// dropPageTables drops cached page table entries. +func (m *machine) dropPageTables(pt *pagetables.PageTables) { + m.mu.Lock() + defer m.mu.Unlock() + + // Clear from all PCIDs. + for _, c := range m.vCPUs { + c.PCIDs.Drop(pt) + } +} + // initArchState initializes architecture-specific state. func (c *vCPU) initArchState() error { var ( @@ -67,8 +100,16 @@ func (c *vCPU) initArchState() error { kernelSystemRegs.TR.base = tssBase kernelSystemRegs.TR.limit = uint32(tssLimit) - // Point to kernel page tables. - kernelSystemRegs.CR3 = c.machine.kernel.PageTables.FlushCR3() + // Point to kernel page tables, with no initial PCID. + kernelSystemRegs.CR3 = c.machine.kernel.PageTables.CR3(false, 0) + + // Initialize the PCID database. + if hasGuestPCID { + // Note that NewPCIDs may return a nil table here, in which + // case we simply don't use PCID support (see below). In + // practice, this should not happen, however. + c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs) + } // Set the CPUID; this is required before setting system registers, // since KVM will reject several CR4 bits if the CPUID does not @@ -121,6 +162,14 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error) // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, usermem.AccessType, error) { + // Assign PCIDs. + if c.PCIDs != nil { + var requireFlushPCID bool // Force a flush? + switchOpts.UserPCID, requireFlushPCID = c.PCIDs.Assign(switchOpts.PageTables) + switchOpts.KernelPCID = fixedKernelPCID + switchOpts.Flush = switchOpts.Flush || requireFlushPCID + } + // See below. var vector ring0.Vector |