summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/kvm
diff options
context:
space:
mode:
authorAdin Scannell <ascannell@google.com>2018-06-06 22:51:58 -0700
committerShentubot <shentubot@google.com>2018-06-06 22:52:55 -0700
commit3374849cb553fab16e69d39cf6e49f843d94790b (patch)
treee3131aa5e77bef84a85c354ab939fc0bf81d8b6f /pkg/sentry/platform/kvm
parent1b5062263b4a3ca3dc0271d9e06ad0113197344c (diff)
Split PCID implementation from page tables.
Instead of associating a single PCID with each set of page tables (which will reach the maximum quickly), allow a dynamic pool for each vCPU. This is the same way that Linux operates. We also split management of PCIDs out of the page tables themselves for simplicity. PiperOrigin-RevId: 199585631 Change-Id: I42f3486ada3cb2a26f623c65ac279b473ae63201
Diffstat (limited to 'pkg/sentry/platform/kvm')
-rw-r--r--pkg/sentry/platform/kvm/address_space.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go5
-rw-r--r--pkg/sentry/platform/kvm/machine.go10
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go53
6 files changed, 59 insertions, 21 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 15d45f5bc..c2f4559a0 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -226,8 +226,10 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) {
// Release releases the page tables.
func (as *addressSpace) Release() {
as.Unmap(0, ^uint64(0))
- as.pageTables.Release()
// Free all pages from the allocator.
as.pageTables.Allocator.(allocator).base.Drain()
+
+ // Drop all cached machine references.
+ as.machine.dropPageTables(as.pageTables)
}
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 13c363993..1a8e16ca0 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -121,7 +121,7 @@ func (*KVM) MaxUserAddress() usermem.Addr {
// NewAddressSpace returns a new pagetable root.
func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) {
// Allocate page tables and install system mappings.
- pageTables := k.machine.kernel.PageTables.New(newAllocator())
+ pageTables := pagetables.New(newAllocator())
applyPhysicalRegions(func(pr physicalRegion) bool {
// Map the kernel in the upper half.
pageTables.Map(
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
index 834e6b96d..476e783a0 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
@@ -20,14 +20,11 @@ import (
"fmt"
"syscall"
"unsafe"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
)
var (
runDataSize int
hasGuestPCID bool
- pagetablesOpts pagetables.Opts
cpuidSupported = cpuidEntries{nr: _KVM_NR_CPUID_ENTRIES}
)
@@ -75,9 +72,6 @@ func updateSystemValues(fd int) error {
}
}
- // Set the pagetables to use PCID if it's available.
- pagetablesOpts.EnablePCID = hasGuestPCID
-
// Success.
return nil
}
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index 00919b214..71c5c856e 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -121,11 +121,6 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
pt *pagetables.PageTables
)
testutil.SetTestTarget(&regs, target)
- defer func() {
- if pt != nil {
- pt.Release()
- }
- }()
kvmTest(t, func(k *KVM) {
// Create new page tables.
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 949abd838..3c1e01241 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -112,6 +112,9 @@ type vCPU struct {
// active is the current addressSpace: this is set and read atomically,
// it is used to elide unnecessary interrupts due to invalidations.
active atomicAddressSpace
+
+ // vCPUArchState is the architecture-specific state.
+ vCPUArchState
}
// newMachine returns a new VM context.
@@ -133,7 +136,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) {
vCPUs = n
}
m.kernel = ring0.New(ring0.KernelOpts{
- PageTables: pagetables.New(newAllocator(), pagetablesOpts),
+ PageTables: pagetables.New(newAllocator()),
})
// Initialize architecture state.
@@ -285,11 +288,6 @@ func (m *machine) Destroy() {
}
}
- // Release host mappings.
- if m.kernel.PageTables != nil {
- m.kernel.PageTables.Release()
- }
-
// vCPUs are gone: teardown machine state.
if err := syscall.Close(m.fd); err != nil {
panic(fmt.Sprintf("error closing VM fd: %v", err))
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index ba7bbcb91..6afae5cae 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -24,6 +24,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
@@ -41,6 +42,38 @@ func (m *machine) initArchState(vCPUs int) error {
return nil
}
+type vCPUArchState struct {
+ // PCIDs is the set of PCIDs for this vCPU.
+ //
+ // This starts above fixedKernelPCID.
+ PCIDs *pagetables.PCIDs
+}
+
+const (
+ // fixedKernelPCID is a fixed kernel PCID used for the kernel page
+ // tables. We must start allocating user PCIDs above this in order to
+ // avoid any conflict (see below).
+ fixedKernelPCID = 1
+
+ // poolPCIDs is the number of PCIDs to record in the database. As this
+ // grows, assignment can take longer, since it is a simple linear scan.
+ // Beyond a relatively small number, there are likely few perform
+ // benefits, since the TLB has likely long since lost any translations
+ // from more than a few PCIDs past.
+ poolPCIDs = 8
+)
+
+// dropPageTables drops cached page table entries.
+func (m *machine) dropPageTables(pt *pagetables.PageTables) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ // Clear from all PCIDs.
+ for _, c := range m.vCPUs {
+ c.PCIDs.Drop(pt)
+ }
+}
+
// initArchState initializes architecture-specific state.
func (c *vCPU) initArchState() error {
var (
@@ -67,8 +100,16 @@ func (c *vCPU) initArchState() error {
kernelSystemRegs.TR.base = tssBase
kernelSystemRegs.TR.limit = uint32(tssLimit)
- // Point to kernel page tables.
- kernelSystemRegs.CR3 = c.machine.kernel.PageTables.FlushCR3()
+ // Point to kernel page tables, with no initial PCID.
+ kernelSystemRegs.CR3 = c.machine.kernel.PageTables.CR3(false, 0)
+
+ // Initialize the PCID database.
+ if hasGuestPCID {
+ // Note that NewPCIDs may return a nil table here, in which
+ // case we simply don't use PCID support (see below). In
+ // practice, this should not happen, however.
+ c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs)
+ }
// Set the CPUID; this is required before setting system registers,
// since KVM will reject several CR4 bits if the CPUID does not
@@ -121,6 +162,14 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error)
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, usermem.AccessType, error) {
+ // Assign PCIDs.
+ if c.PCIDs != nil {
+ var requireFlushPCID bool // Force a flush?
+ switchOpts.UserPCID, requireFlushPCID = c.PCIDs.Assign(switchOpts.PageTables)
+ switchOpts.KernelPCID = fixedKernelPCID
+ switchOpts.Flush = switchOpts.Flush || requireFlushPCID
+ }
+
// See below.
var vector ring0.Vector