summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdin Scannell <ascannell@google.com>2018-06-06 22:51:58 -0700
committerShentubot <shentubot@google.com>2018-06-06 22:52:55 -0700
commit3374849cb553fab16e69d39cf6e49f843d94790b (patch)
treee3131aa5e77bef84a85c354ab939fc0bf81d8b6f
parent1b5062263b4a3ca3dc0271d9e06ad0113197344c (diff)
Split PCID implementation from page tables.
Instead of associating a single PCID with each set of page tables (which will reach the maximum quickly), allow a dynamic pool for each vCPU. This is the same way that Linux operates. We also split management of PCIDs out of the page tables themselves for simplicity. PiperOrigin-RevId: 199585631 Change-Id: I42f3486ada3cb2a26f623c65ac279b473ae63201
-rw-r--r--pkg/sentry/platform/kvm/address_space.go4
-rw-r--r--pkg/sentry/platform/kvm/kvm.go2
-rw-r--r--pkg/sentry/platform/kvm/kvm_amd64_unsafe.go6
-rw-r--r--pkg/sentry/platform/kvm/kvm_test.go5
-rw-r--r--pkg/sentry/platform/kvm/machine.go10
-rw-r--r--pkg/sentry/platform/kvm/machine_amd64.go53
-rw-r--r--pkg/sentry/platform/ring0/defs.go3
-rw-r--r--pkg/sentry/platform/ring0/defs_amd64.go15
-rw-r--r--pkg/sentry/platform/ring0/kernel_amd64.go13
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD1
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go25
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go12
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_test.go23
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go60
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go102
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go65
16 files changed, 162 insertions, 237 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 15d45f5bc..c2f4559a0 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -226,8 +226,10 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) {
// Release releases the page tables.
func (as *addressSpace) Release() {
as.Unmap(0, ^uint64(0))
- as.pageTables.Release()
// Free all pages from the allocator.
as.pageTables.Allocator.(allocator).base.Drain()
+
+ // Drop all cached machine references.
+ as.machine.dropPageTables(as.pageTables)
}
diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go
index 13c363993..1a8e16ca0 100644
--- a/pkg/sentry/platform/kvm/kvm.go
+++ b/pkg/sentry/platform/kvm/kvm.go
@@ -121,7 +121,7 @@ func (*KVM) MaxUserAddress() usermem.Addr {
// NewAddressSpace returns a new pagetable root.
func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) {
// Allocate page tables and install system mappings.
- pageTables := k.machine.kernel.PageTables.New(newAllocator())
+ pageTables := pagetables.New(newAllocator())
applyPhysicalRegions(func(pr physicalRegion) bool {
// Map the kernel in the upper half.
pageTables.Map(
diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
index 834e6b96d..476e783a0 100644
--- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
+++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go
@@ -20,14 +20,11 @@ import (
"fmt"
"syscall"
"unsafe"
-
- "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
)
var (
runDataSize int
hasGuestPCID bool
- pagetablesOpts pagetables.Opts
cpuidSupported = cpuidEntries{nr: _KVM_NR_CPUID_ENTRIES}
)
@@ -75,9 +72,6 @@ func updateSystemValues(fd int) error {
}
}
- // Set the pagetables to use PCID if it's available.
- pagetablesOpts.EnablePCID = hasGuestPCID
-
// Success.
return nil
}
diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go
index 00919b214..71c5c856e 100644
--- a/pkg/sentry/platform/kvm/kvm_test.go
+++ b/pkg/sentry/platform/kvm/kvm_test.go
@@ -121,11 +121,6 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func
pt *pagetables.PageTables
)
testutil.SetTestTarget(&regs, target)
- defer func() {
- if pt != nil {
- pt.Release()
- }
- }()
kvmTest(t, func(k *KVM) {
// Create new page tables.
diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go
index 949abd838..3c1e01241 100644
--- a/pkg/sentry/platform/kvm/machine.go
+++ b/pkg/sentry/platform/kvm/machine.go
@@ -112,6 +112,9 @@ type vCPU struct {
// active is the current addressSpace: this is set and read atomically,
// it is used to elide unnecessary interrupts due to invalidations.
active atomicAddressSpace
+
+ // vCPUArchState is the architecture-specific state.
+ vCPUArchState
}
// newMachine returns a new VM context.
@@ -133,7 +136,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) {
vCPUs = n
}
m.kernel = ring0.New(ring0.KernelOpts{
- PageTables: pagetables.New(newAllocator(), pagetablesOpts),
+ PageTables: pagetables.New(newAllocator()),
})
// Initialize architecture state.
@@ -285,11 +288,6 @@ func (m *machine) Destroy() {
}
}
- // Release host mappings.
- if m.kernel.PageTables != nil {
- m.kernel.PageTables.Release()
- }
-
// vCPUs are gone: teardown machine state.
if err := syscall.Close(m.fd); err != nil {
panic(fmt.Sprintf("error closing VM fd: %v", err))
diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go
index ba7bbcb91..6afae5cae 100644
--- a/pkg/sentry/platform/kvm/machine_amd64.go
+++ b/pkg/sentry/platform/kvm/machine_amd64.go
@@ -24,6 +24,7 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/arch"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform"
"gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0"
+ "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
@@ -41,6 +42,38 @@ func (m *machine) initArchState(vCPUs int) error {
return nil
}
+type vCPUArchState struct {
+ // PCIDs is the set of PCIDs for this vCPU.
+ //
+ // This starts above fixedKernelPCID.
+ PCIDs *pagetables.PCIDs
+}
+
+const (
+ // fixedKernelPCID is a fixed kernel PCID used for the kernel page
+ // tables. We must start allocating user PCIDs above this in order to
+ // avoid any conflict (see below).
+ fixedKernelPCID = 1
+
+ // poolPCIDs is the number of PCIDs to record in the database. As this
+ // grows, assignment can take longer, since it is a simple linear scan.
+ // Beyond a relatively small number, there are likely few perform
+ // benefits, since the TLB has likely long since lost any translations
+ // from more than a few PCIDs past.
+ poolPCIDs = 8
+)
+
+// dropPageTables drops cached page table entries.
+func (m *machine) dropPageTables(pt *pagetables.PageTables) {
+ m.mu.Lock()
+ defer m.mu.Unlock()
+
+ // Clear from all PCIDs.
+ for _, c := range m.vCPUs {
+ c.PCIDs.Drop(pt)
+ }
+}
+
// initArchState initializes architecture-specific state.
func (c *vCPU) initArchState() error {
var (
@@ -67,8 +100,16 @@ func (c *vCPU) initArchState() error {
kernelSystemRegs.TR.base = tssBase
kernelSystemRegs.TR.limit = uint32(tssLimit)
- // Point to kernel page tables.
- kernelSystemRegs.CR3 = c.machine.kernel.PageTables.FlushCR3()
+ // Point to kernel page tables, with no initial PCID.
+ kernelSystemRegs.CR3 = c.machine.kernel.PageTables.CR3(false, 0)
+
+ // Initialize the PCID database.
+ if hasGuestPCID {
+ // Note that NewPCIDs may return a nil table here, in which
+ // case we simply don't use PCID support (see below). In
+ // practice, this should not happen, however.
+ c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs)
+ }
// Set the CPUID; this is required before setting system registers,
// since KVM will reject several CR4 bits if the CPUID does not
@@ -121,6 +162,14 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error)
// SwitchToUser unpacks architectural-details.
func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, usermem.AccessType, error) {
+ // Assign PCIDs.
+ if c.PCIDs != nil {
+ var requireFlushPCID bool // Force a flush?
+ switchOpts.UserPCID, requireFlushPCID = c.PCIDs.Assign(switchOpts.PageTables)
+ switchOpts.KernelPCID = fixedKernelPCID
+ switchOpts.Flush = switchOpts.Flush || requireFlushPCID
+ }
+
// See below.
var vector ring0.Vector
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go
index 7b3bed1c7..f09d045eb 100644
--- a/pkg/sentry/platform/ring0/defs.go
+++ b/pkg/sentry/platform/ring0/defs.go
@@ -109,4 +109,7 @@ type SwitchOpts struct {
// FullRestore indicates that an iret-based restore should be used.
FullRestore bool
+
+ // SwitchArchOpts are architecture-specific options.
+ SwitchArchOpts
}
diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go
index bb3420125..0d068c00a 100644
--- a/pkg/sentry/platform/ring0/defs_amd64.go
+++ b/pkg/sentry/platform/ring0/defs_amd64.go
@@ -104,6 +104,21 @@ func (c *CPU) ErrorCode() (value uintptr, user bool) {
return c.errorCode, c.errorType != 0
}
+// SwitchArchOpts are embedded in SwitchOpts.
+type SwitchArchOpts struct {
+ // UserPCID indicates that the application PCID to be used on switch,
+ // assuming that PCIDs are supported.
+ //
+ // Per pagetables_x86.go, a zero PCID implies a flush.
+ UserPCID uint16
+
+ // KernelPCID indicates that the kernel PCID to be used on return,
+ // assuming that PCIDs are supported.
+ //
+ // Per pagetables_x86.go, a zero PCID implies a flush.
+ KernelPCID uint16
+}
+
func init() {
KernelCodeSegment.setCode64(0, 0, 0)
KernelDataSegment.setData(0, 0xffffffff, 0)
diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go
index 58ac4b4b2..37d5484e1 100644
--- a/pkg/sentry/platform/ring0/kernel_amd64.go
+++ b/pkg/sentry/platform/ring0/kernel_amd64.go
@@ -180,23 +180,14 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) {
if !IsCanonical(regs.Rip) || !IsCanonical(regs.Rsp) || !IsCanonical(regs.Fs_base) || !IsCanonical(regs.Gs_base) {
return GeneralProtectionFault
}
-
- var (
- userCR3 uint64
- kernelCR3 uint64
- )
+ userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID)
+ kernelCR3 := c.kernel.PageTables.CR3(true, switchOpts.KernelPCID)
// Sanitize registers.
- if switchOpts.Flush {
- userCR3 = switchOpts.PageTables.FlushCR3()
- } else {
- userCR3 = switchOpts.PageTables.CR3()
- }
regs.Eflags &= ^uint64(UserFlagsClear)
regs.Eflags |= UserFlagsSet
regs.Cs = uint64(Ucode64) // Required for iret.
regs.Ss = uint64(Udata) // Ditto.
- kernelCR3 = c.kernel.PageTables.CR3()
// Perform the switch.
swapgs() // GS will be swapped on return.
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index 768f96678..08b73e87d 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -26,7 +26,6 @@ go_test(
srcs = [
"pagetables_amd64_test.go",
"pagetables_test.go",
- "pcids_x86_test.go",
],
embed = [":pagetables"],
deps = ["//pkg/sentry/usermem"],
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
index 929771cca..6963ba62d 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go
@@ -37,27 +37,13 @@ type PageTables struct {
}
// New returns new PageTables.
-func New(a Allocator, opts Opts) *PageTables {
+func New(a Allocator) *PageTables {
p := &PageTables{Allocator: a}
p.root = p.Allocator.NewPTEs()
p.rootPhysical = p.Allocator.PhysicalFor(p.root)
- p.init(opts)
return p
}
-// New returns a new set of PageTables derived from the given one.
-//
-// This function should always be preferred to New if there are existing
-// pagetables, as this function preserves architectural constraints relevant to
-// managing multiple sets of pagetables.
-func (p *PageTables) New(a Allocator) *PageTables {
- np := &PageTables{Allocator: a}
- np.root = np.Allocator.NewPTEs()
- np.rootPhysical = p.Allocator.PhysicalFor(np.root)
- np.initFrom(&p.archPageTables)
- return np
-}
-
// Map installs a mapping with the given physical address.
//
// True is returned iff there was a previous mapping in the range.
@@ -99,15 +85,6 @@ func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
return count > 0
}
-// Release releases this address space.
-//
-// This must be called to release the PCID.
-func (p *PageTables) Release() {
- // Clear all pages.
- p.Unmap(0, ^uintptr(0))
- p.release()
-}
-
// Lookup returns the physical address for the given virtual address.
func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) {
mask := uintptr(usermem.PageSize - 1)
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
index c81786133..a7f2ad9a4 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
@@ -23,7 +23,7 @@ import (
)
func Test2MAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a small page and a huge page.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -33,11 +33,10 @@ func Test2MAnd4K(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func Test1GAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a small page and a super page.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -47,11 +46,10 @@ func Test1GAnd4K(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSplit1GPage(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a super page and knock out the middle.
pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42)
@@ -61,11 +59,10 @@ func TestSplit1GPage(t *testing.T) {
{0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSplit2MPage(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a huge page and knock out the middle.
pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42)
@@ -75,5 +72,4 @@ func TestSplit2MPage(t *testing.T) {
{0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
index dec8def7f..28178f656 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
@@ -72,24 +72,18 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) {
}
}
-func TestAllocFree(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
- pt.Release()
-}
-
func TestUnmap(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map and unmap one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
pt.Unmap(0x400000, pteSize)
checkMappings(t, pt, nil)
- pt.Release()
}
func TestReadOnly(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
@@ -97,11 +91,10 @@ func TestReadOnly(t *testing.T) {
checkMappings(t, pt, []mapping{
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestReadWrite(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -109,11 +102,10 @@ func TestReadWrite(t *testing.T) {
checkMappings(t, pt, []mapping{
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
})
- pt.Release()
}
func TestSerialEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map two sequential entries.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -123,11 +115,10 @@ func TestSerialEntries(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}},
})
- pt.Release()
}
func TestSpanningEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Span a pgd with two pages.
pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
@@ -136,11 +127,10 @@ func TestSpanningEntries(t *testing.T) {
{0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSparseEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map two entries in different pgds.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -150,5 +140,4 @@ func TestSparseEntries(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
index 72a955d08..ca49d20f8 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
@@ -22,66 +22,28 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
-// Opts are pagetable options.
-type Opts struct {
- EnablePCID bool
-}
-
-// archPageTables has x86-specific features.
+// archPageTables is architecture-specific data.
type archPageTables struct {
- // pcids is the PCID database.
- pcids *PCIDs
-
- // pcid is the globally unique identifier, or zero if none were
- // available or pcids is nil.
+ // pcid is the value assigned by PCIDs.Assign.
+ //
+ // Note that zero is a valid PCID.
pcid uint16
}
-// init initializes arch-specific features.
-func (a *archPageTables) init(opts Opts) {
- if opts.EnablePCID {
- a.pcids = NewPCIDs()
- a.pcid = a.pcids.allocate()
- }
-}
-
-// initFrom initializes arch-specific features from an existing entry.'
-func (a *archPageTables) initFrom(other *archPageTables) {
- a.pcids = other.pcids // Refer to the same PCID database.
- if a.pcids != nil {
- a.pcid = a.pcids.allocate()
- }
-}
-
-// release is called from Release.
-func (a *archPageTables) release() {
- // Return the PCID.
- if a.pcids != nil {
- a.pcids.free(a.pcid)
- }
-}
-
// CR3 returns the CR3 value for these tables.
//
-// This may be called in interrupt contexts.
+// This may be called in interrupt contexts. A PCID of zero always implies a
+// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for
+// more information.
//
//go:nosplit
-func (p *PageTables) CR3() uint64 {
+func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 {
// Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1).
const noFlushBit uint64 = 0x8000000000000000
- if p.pcid != 0 {
- return noFlushBit | uint64(p.rootPhysical) | uint64(p.pcid)
+ if noFlush && pcid != 0 {
+ return noFlushBit | uint64(p.rootPhysical) | uint64(pcid)
}
- return uint64(p.rootPhysical)
-}
-
-// FlushCR3 returns the CR3 value that flushes the TLB.
-//
-// This may be called in interrupt contexts.
-//
-//go:nosplit
-func (p *PageTables) FlushCR3() uint64 {
- return uint64(p.rootPhysical) | uint64(p.pcid)
+ return uint64(p.rootPhysical) | uint64(pcid)
}
// Bits in page table entries.
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
index 509e8c0d9..4296371e8 100644
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
@@ -16,59 +16,79 @@
package pagetables
-import (
- "sync"
-)
-
-// maxPCID is the maximum allowed PCID.
-const maxPCID = 4095
+// limitPCID is the number of valid PCIDs.
+const limitPCID = 4096
// PCIDs is a simple PCID database.
+//
+// This is not protected by locks and is thus suitable for use only with a
+// single CPU at a time.
type PCIDs struct {
- mu sync.Mutex
+ // cache are the assigned page tables.
+ cache map[*PageTables]uint16
- // last is the last fresh PCID given out (not including the available
- // pool). If last >= maxPCID, then the only PCIDs available in the
- // available pool below.
- last uint16
-
- // available are PCIDs that have been freed.
- available map[uint16]struct{}
+ // avail are available PCIDs.
+ avail []uint16
}
-// NewPCIDs returns a new PCID set.
-func NewPCIDs() *PCIDs {
- return &PCIDs{
- available: make(map[uint16]struct{}),
+// NewPCIDs returns a new PCID database.
+//
+// start is the first index to assign. Typically this will be one, as the zero
+// pcid will always be flushed on transition (see pagetables_x86.go). This may
+// be more than one if specific PCIDs are reserved.
+//
+// Nil is returned iff the start and size are out of range.
+func NewPCIDs(start, size uint16) *PCIDs {
+ if start+uint16(size) >= limitPCID {
+ return nil // See comment.
+ }
+ p := &PCIDs{
+ cache: make(map[*PageTables]uint16),
}
+ for pcid := start; pcid < start+size; pcid++ {
+ p.avail = append(p.avail, pcid)
+ }
+ return p
}
-// allocate returns an unused PCID, or zero if all are taken.
-func (p *PCIDs) allocate() uint16 {
- p.mu.Lock()
- defer p.mu.Unlock()
- if len(p.available) > 0 {
- for id := range p.available {
- delete(p.available, id)
- return id
- }
+// Assign assigns a PCID to the given PageTables.
+//
+// This may overwrite any previous assignment provided. If this in the case,
+// true is returned to indicate that the PCID should be flushed.
+func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
+ if pcid, ok := p.cache[pt]; ok {
+ return pcid, false // No flush.
}
- if id := p.last + 1; id <= maxPCID {
- p.last = id
- return id
+
+ // Is there something available?
+ if len(p.avail) > 0 {
+ pcid := p.avail[len(p.avail)-1]
+ p.avail = p.avail[:len(p.avail)-1]
+
+ // We need to flush because while this is in the available
+ // pool, it may have been used previously.
+ return pcid, true
}
- // Nothing available.
- return 0
+
+ // Evict an existing table.
+ for old, pcid := range p.cache {
+ delete(p.cache, old)
+ p.cache[pt] = pcid
+
+ // A flush is definitely required in this case, these page
+ // tables may still be active. (They will just be assigned some
+ // other PCID if and when they hit the given CPU again.)
+ return pcid, true
+ }
+
+ // No PCID.
+ return 0, false
}
-// free returns a PCID to the pool.
-//
-// It is safe to call free with a zero pcid. That is, you may always call free
-// with anything returned by allocate.
-func (p *PCIDs) free(id uint16) {
- p.mu.Lock()
- defer p.mu.Unlock()
- if id != 0 {
- p.available[id] = struct{}{}
+// Drop drops references to a set of page tables.
+func (p *PCIDs) Drop(pt *PageTables) {
+ if pcid, ok := p.cache[pt]; ok {
+ delete(p.cache, pt)
+ p.avail = append(p.avail, pcid)
}
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
deleted file mode 100644
index 0b555cd76..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2018 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build i386 amd64
-
-package pagetables
-
-import (
- "testing"
-)
-
-func TestMaxPCID(t *testing.T) {
- p := NewPCIDs()
- for i := 0; i < maxPCID; i++ {
- if id := p.allocate(); id != uint16(i+1) {
- t.Errorf("got %d, expected %d", id, i+1)
- }
- }
- if id := p.allocate(); id != 0 {
- if id != 0 {
- t.Errorf("got %d, expected 0", id)
- }
- }
-}
-
-func TestFirstPCID(t *testing.T) {
- p := NewPCIDs()
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
-}
-
-func TestFreePCID(t *testing.T) {
- p := NewPCIDs()
- p.free(0)
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1 (not zero)", id)
- }
-}
-
-func TestReusePCID(t *testing.T) {
- p := NewPCIDs()
- id := p.allocate()
- if id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
- p.free(id)
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
- if id := p.allocate(); id != 2 {
- t.Errorf("got %d, expected 2", id)
- }
-}