diff options
author | Adin Scannell <ascannell@google.com> | 2018-06-06 22:51:58 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-06-06 22:52:55 -0700 |
commit | 3374849cb553fab16e69d39cf6e49f843d94790b (patch) | |
tree | e3131aa5e77bef84a85c354ab939fc0bf81d8b6f /pkg/sentry/platform | |
parent | 1b5062263b4a3ca3dc0271d9e06ad0113197344c (diff) |
Split PCID implementation from page tables.
Instead of associating a single PCID with each set of page tables (which
will reach the maximum quickly), allow a dynamic pool for each vCPU.
This is the same way that Linux operates. We also split management of
PCIDs out of the page tables themselves for simplicity.
PiperOrigin-RevId: 199585631
Change-Id: I42f3486ada3cb2a26f623c65ac279b473ae63201
Diffstat (limited to 'pkg/sentry/platform')
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 4 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_amd64_unsafe.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 10 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 53 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/defs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/defs_amd64.go | 15 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_amd64.go | 13 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables.go | 25 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go | 12 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_test.go | 23 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_x86.go | 60 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_x86.go | 102 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go | 65 |
16 files changed, 162 insertions, 237 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 15d45f5bc..c2f4559a0 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -226,8 +226,10 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { // Release releases the page tables. func (as *addressSpace) Release() { as.Unmap(0, ^uint64(0)) - as.pageTables.Release() // Free all pages from the allocator. as.pageTables.Allocator.(allocator).base.Drain() + + // Drop all cached machine references. + as.machine.dropPageTables(as.pageTables) } diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 13c363993..1a8e16ca0 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -121,7 +121,7 @@ func (*KVM) MaxUserAddress() usermem.Addr { // NewAddressSpace returns a new pagetable root. func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) { // Allocate page tables and install system mappings. - pageTables := k.machine.kernel.PageTables.New(newAllocator()) + pageTables := pagetables.New(newAllocator()) applyPhysicalRegions(func(pr physicalRegion) bool { // Map the kernel in the upper half. pageTables.Map( diff --git a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go index 834e6b96d..476e783a0 100644 --- a/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go +++ b/pkg/sentry/platform/kvm/kvm_amd64_unsafe.go @@ -20,14 +20,11 @@ import ( "fmt" "syscall" "unsafe" - - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" ) var ( runDataSize int hasGuestPCID bool - pagetablesOpts pagetables.Opts cpuidSupported = cpuidEntries{nr: _KVM_NR_CPUID_ENTRIES} ) @@ -75,9 +72,6 @@ func updateSystemValues(fd int) error { } } - // Set the pagetables to use PCID if it's available. - pagetablesOpts.EnablePCID = hasGuestPCID - // Success. return nil } diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index 00919b214..71c5c856e 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -121,11 +121,6 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func pt *pagetables.PageTables ) testutil.SetTestTarget(®s, target) - defer func() { - if pt != nil { - pt.Release() - } - }() kvmTest(t, func(k *KVM) { // Create new page tables. diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 949abd838..3c1e01241 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -112,6 +112,9 @@ type vCPU struct { // active is the current addressSpace: this is set and read atomically, // it is used to elide unnecessary interrupts due to invalidations. active atomicAddressSpace + + // vCPUArchState is the architecture-specific state. + vCPUArchState } // newMachine returns a new VM context. @@ -133,7 +136,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { vCPUs = n } m.kernel = ring0.New(ring0.KernelOpts{ - PageTables: pagetables.New(newAllocator(), pagetablesOpts), + PageTables: pagetables.New(newAllocator()), }) // Initialize architecture state. @@ -285,11 +288,6 @@ func (m *machine) Destroy() { } } - // Release host mappings. - if m.kernel.PageTables != nil { - m.kernel.PageTables.Release() - } - // vCPUs are gone: teardown machine state. if err := syscall.Close(m.fd); err != nil { panic(fmt.Sprintf("error closing VM fd: %v", err)) diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index ba7bbcb91..6afae5cae 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -24,6 +24,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/platform" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) @@ -41,6 +42,38 @@ func (m *machine) initArchState(vCPUs int) error { return nil } +type vCPUArchState struct { + // PCIDs is the set of PCIDs for this vCPU. + // + // This starts above fixedKernelPCID. + PCIDs *pagetables.PCIDs +} + +const ( + // fixedKernelPCID is a fixed kernel PCID used for the kernel page + // tables. We must start allocating user PCIDs above this in order to + // avoid any conflict (see below). + fixedKernelPCID = 1 + + // poolPCIDs is the number of PCIDs to record in the database. As this + // grows, assignment can take longer, since it is a simple linear scan. + // Beyond a relatively small number, there are likely few perform + // benefits, since the TLB has likely long since lost any translations + // from more than a few PCIDs past. + poolPCIDs = 8 +) + +// dropPageTables drops cached page table entries. +func (m *machine) dropPageTables(pt *pagetables.PageTables) { + m.mu.Lock() + defer m.mu.Unlock() + + // Clear from all PCIDs. + for _, c := range m.vCPUs { + c.PCIDs.Drop(pt) + } +} + // initArchState initializes architecture-specific state. func (c *vCPU) initArchState() error { var ( @@ -67,8 +100,16 @@ func (c *vCPU) initArchState() error { kernelSystemRegs.TR.base = tssBase kernelSystemRegs.TR.limit = uint32(tssLimit) - // Point to kernel page tables. - kernelSystemRegs.CR3 = c.machine.kernel.PageTables.FlushCR3() + // Point to kernel page tables, with no initial PCID. + kernelSystemRegs.CR3 = c.machine.kernel.PageTables.CR3(false, 0) + + // Initialize the PCID database. + if hasGuestPCID { + // Note that NewPCIDs may return a nil table here, in which + // case we simply don't use PCID support (see below). In + // practice, this should not happen, however. + c.PCIDs = pagetables.NewPCIDs(fixedKernelPCID+1, poolPCIDs) + } // Set the CPUID; this is required before setting system registers, // since KVM will reject several CR4 bits if the CPUID does not @@ -121,6 +162,14 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error) // SwitchToUser unpacks architectural-details. func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, usermem.AccessType, error) { + // Assign PCIDs. + if c.PCIDs != nil { + var requireFlushPCID bool // Force a flush? + switchOpts.UserPCID, requireFlushPCID = c.PCIDs.Assign(switchOpts.PageTables) + switchOpts.KernelPCID = fixedKernelPCID + switchOpts.Flush = switchOpts.Flush || requireFlushPCID + } + // See below. var vector ring0.Vector diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go index 7b3bed1c7..f09d045eb 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/sentry/platform/ring0/defs.go @@ -109,4 +109,7 @@ type SwitchOpts struct { // FullRestore indicates that an iret-based restore should be used. FullRestore bool + + // SwitchArchOpts are architecture-specific options. + SwitchArchOpts } diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go index bb3420125..0d068c00a 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/sentry/platform/ring0/defs_amd64.go @@ -104,6 +104,21 @@ func (c *CPU) ErrorCode() (value uintptr, user bool) { return c.errorCode, c.errorType != 0 } +// SwitchArchOpts are embedded in SwitchOpts. +type SwitchArchOpts struct { + // UserPCID indicates that the application PCID to be used on switch, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + UserPCID uint16 + + // KernelPCID indicates that the kernel PCID to be used on return, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + KernelPCID uint16 +} + func init() { KernelCodeSegment.setCode64(0, 0, 0) KernelDataSegment.setData(0, 0xffffffff, 0) diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 58ac4b4b2..37d5484e1 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -180,23 +180,14 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { if !IsCanonical(regs.Rip) || !IsCanonical(regs.Rsp) || !IsCanonical(regs.Fs_base) || !IsCanonical(regs.Gs_base) { return GeneralProtectionFault } - - var ( - userCR3 uint64 - kernelCR3 uint64 - ) + userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID) + kernelCR3 := c.kernel.PageTables.CR3(true, switchOpts.KernelPCID) // Sanitize registers. - if switchOpts.Flush { - userCR3 = switchOpts.PageTables.FlushCR3() - } else { - userCR3 = switchOpts.PageTables.CR3() - } regs.Eflags &= ^uint64(UserFlagsClear) regs.Eflags |= UserFlagsSet regs.Cs = uint64(Ucode64) // Required for iret. regs.Ss = uint64(Udata) // Ditto. - kernelCR3 = c.kernel.PageTables.CR3() // Perform the switch. swapgs() // GS will be swapped on return. diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 768f96678..08b73e87d 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -26,7 +26,6 @@ go_test( srcs = [ "pagetables_amd64_test.go", "pagetables_test.go", - "pcids_x86_test.go", ], embed = [":pagetables"], deps = ["//pkg/sentry/usermem"], diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index 929771cca..6963ba62d 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -37,27 +37,13 @@ type PageTables struct { } // New returns new PageTables. -func New(a Allocator, opts Opts) *PageTables { +func New(a Allocator) *PageTables { p := &PageTables{Allocator: a} p.root = p.Allocator.NewPTEs() p.rootPhysical = p.Allocator.PhysicalFor(p.root) - p.init(opts) return p } -// New returns a new set of PageTables derived from the given one. -// -// This function should always be preferred to New if there are existing -// pagetables, as this function preserves architectural constraints relevant to -// managing multiple sets of pagetables. -func (p *PageTables) New(a Allocator) *PageTables { - np := &PageTables{Allocator: a} - np.root = np.Allocator.NewPTEs() - np.rootPhysical = p.Allocator.PhysicalFor(np.root) - np.initFrom(&p.archPageTables) - return np -} - // Map installs a mapping with the given physical address. // // True is returned iff there was a previous mapping in the range. @@ -99,15 +85,6 @@ func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool { return count > 0 } -// Release releases this address space. -// -// This must be called to release the PCID. -func (p *PageTables) Release() { - // Clear all pages. - p.Unmap(0, ^uintptr(0)) - p.release() -} - // Lookup returns the physical address for the given virtual address. func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) { mask := uintptr(usermem.PageSize - 1) diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index c81786133..a7f2ad9a4 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -23,7 +23,7 @@ import ( ) func Test2MAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a small page and a huge page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -33,11 +33,10 @@ func Test2MAnd4K(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func Test1GAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a small page and a super page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -47,11 +46,10 @@ func Test1GAnd4K(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSplit1GPage(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a super page and knock out the middle. pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) @@ -61,11 +59,10 @@ func TestSplit1GPage(t *testing.T) { {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSplit2MPage(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a huge page and knock out the middle. pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) @@ -75,5 +72,4 @@ func TestSplit2MPage(t *testing.T) { {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index dec8def7f..28178f656 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -72,24 +72,18 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) { } } -func TestAllocFree(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) - pt.Release() -} - func TestUnmap(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map and unmap one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) pt.Unmap(0x400000, pteSize) checkMappings(t, pt, nil) - pt.Release() } func TestReadOnly(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -97,11 +91,10 @@ func TestReadOnly(t *testing.T) { checkMappings(t, pt, []mapping{ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestReadWrite(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -109,11 +102,10 @@ func TestReadWrite(t *testing.T) { checkMappings(t, pt, []mapping{ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, }) - pt.Release() } func TestSerialEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map two sequential entries. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -123,11 +115,10 @@ func TestSerialEntries(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}}, }) - pt.Release() } func TestSpanningEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Span a pgd with two pages. pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -136,11 +127,10 @@ func TestSpanningEntries(t *testing.T) { {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSparseEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map two entries in different pgds. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -150,5 +140,4 @@ func TestSparseEntries(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index 72a955d08..ca49d20f8 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -22,66 +22,28 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) -// Opts are pagetable options. -type Opts struct { - EnablePCID bool -} - -// archPageTables has x86-specific features. +// archPageTables is architecture-specific data. type archPageTables struct { - // pcids is the PCID database. - pcids *PCIDs - - // pcid is the globally unique identifier, or zero if none were - // available or pcids is nil. + // pcid is the value assigned by PCIDs.Assign. + // + // Note that zero is a valid PCID. pcid uint16 } -// init initializes arch-specific features. -func (a *archPageTables) init(opts Opts) { - if opts.EnablePCID { - a.pcids = NewPCIDs() - a.pcid = a.pcids.allocate() - } -} - -// initFrom initializes arch-specific features from an existing entry.' -func (a *archPageTables) initFrom(other *archPageTables) { - a.pcids = other.pcids // Refer to the same PCID database. - if a.pcids != nil { - a.pcid = a.pcids.allocate() - } -} - -// release is called from Release. -func (a *archPageTables) release() { - // Return the PCID. - if a.pcids != nil { - a.pcids.free(a.pcid) - } -} - // CR3 returns the CR3 value for these tables. // -// This may be called in interrupt contexts. +// This may be called in interrupt contexts. A PCID of zero always implies a +// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for +// more information. // //go:nosplit -func (p *PageTables) CR3() uint64 { +func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 { // Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1). const noFlushBit uint64 = 0x8000000000000000 - if p.pcid != 0 { - return noFlushBit | uint64(p.rootPhysical) | uint64(p.pcid) + if noFlush && pcid != 0 { + return noFlushBit | uint64(p.rootPhysical) | uint64(pcid) } - return uint64(p.rootPhysical) -} - -// FlushCR3 returns the CR3 value that flushes the TLB. -// -// This may be called in interrupt contexts. -// -//go:nosplit -func (p *PageTables) FlushCR3() uint64 { - return uint64(p.rootPhysical) | uint64(p.pcid) + return uint64(p.rootPhysical) | uint64(pcid) } // Bits in page table entries. diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go index 509e8c0d9..4296371e8 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go @@ -16,59 +16,79 @@ package pagetables -import ( - "sync" -) - -// maxPCID is the maximum allowed PCID. -const maxPCID = 4095 +// limitPCID is the number of valid PCIDs. +const limitPCID = 4096 // PCIDs is a simple PCID database. +// +// This is not protected by locks and is thus suitable for use only with a +// single CPU at a time. type PCIDs struct { - mu sync.Mutex + // cache are the assigned page tables. + cache map[*PageTables]uint16 - // last is the last fresh PCID given out (not including the available - // pool). If last >= maxPCID, then the only PCIDs available in the - // available pool below. - last uint16 - - // available are PCIDs that have been freed. - available map[uint16]struct{} + // avail are available PCIDs. + avail []uint16 } -// NewPCIDs returns a new PCID set. -func NewPCIDs() *PCIDs { - return &PCIDs{ - available: make(map[uint16]struct{}), +// NewPCIDs returns a new PCID database. +// +// start is the first index to assign. Typically this will be one, as the zero +// pcid will always be flushed on transition (see pagetables_x86.go). This may +// be more than one if specific PCIDs are reserved. +// +// Nil is returned iff the start and size are out of range. +func NewPCIDs(start, size uint16) *PCIDs { + if start+uint16(size) >= limitPCID { + return nil // See comment. + } + p := &PCIDs{ + cache: make(map[*PageTables]uint16), } + for pcid := start; pcid < start+size; pcid++ { + p.avail = append(p.avail, pcid) + } + return p } -// allocate returns an unused PCID, or zero if all are taken. -func (p *PCIDs) allocate() uint16 { - p.mu.Lock() - defer p.mu.Unlock() - if len(p.available) > 0 { - for id := range p.available { - delete(p.available, id) - return id - } +// Assign assigns a PCID to the given PageTables. +// +// This may overwrite any previous assignment provided. If this in the case, +// true is returned to indicate that the PCID should be flushed. +func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) { + if pcid, ok := p.cache[pt]; ok { + return pcid, false // No flush. } - if id := p.last + 1; id <= maxPCID { - p.last = id - return id + + // Is there something available? + if len(p.avail) > 0 { + pcid := p.avail[len(p.avail)-1] + p.avail = p.avail[:len(p.avail)-1] + + // We need to flush because while this is in the available + // pool, it may have been used previously. + return pcid, true } - // Nothing available. - return 0 + + // Evict an existing table. + for old, pcid := range p.cache { + delete(p.cache, old) + p.cache[pt] = pcid + + // A flush is definitely required in this case, these page + // tables may still be active. (They will just be assigned some + // other PCID if and when they hit the given CPU again.) + return pcid, true + } + + // No PCID. + return 0, false } -// free returns a PCID to the pool. -// -// It is safe to call free with a zero pcid. That is, you may always call free -// with anything returned by allocate. -func (p *PCIDs) free(id uint16) { - p.mu.Lock() - defer p.mu.Unlock() - if id != 0 { - p.available[id] = struct{}{} +// Drop drops references to a set of page tables. +func (p *PCIDs) Drop(pt *PageTables) { + if pcid, ok := p.cache[pt]; ok { + delete(p.cache, pt) + p.avail = append(p.avail, pcid) } } diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go deleted file mode 100644 index 0b555cd76..000000000 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2018 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build i386 amd64 - -package pagetables - -import ( - "testing" -) - -func TestMaxPCID(t *testing.T) { - p := NewPCIDs() - for i := 0; i < maxPCID; i++ { - if id := p.allocate(); id != uint16(i+1) { - t.Errorf("got %d, expected %d", id, i+1) - } - } - if id := p.allocate(); id != 0 { - if id != 0 { - t.Errorf("got %d, expected 0", id) - } - } -} - -func TestFirstPCID(t *testing.T) { - p := NewPCIDs() - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1", id) - } -} - -func TestFreePCID(t *testing.T) { - p := NewPCIDs() - p.free(0) - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1 (not zero)", id) - } -} - -func TestReusePCID(t *testing.T) { - p := NewPCIDs() - id := p.allocate() - if id != 1 { - t.Errorf("got %d, expected 1", id) - } - p.free(id) - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1", id) - } - if id := p.allocate(); id != 2 { - t.Errorf("got %d, expected 2", id) - } -} |