summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/platform/ring0/pagetables
diff options
context:
space:
mode:
authorAdin Scannell <ascannell@google.com>2018-06-06 22:51:58 -0700
committerShentubot <shentubot@google.com>2018-06-06 22:52:55 -0700
commit3374849cb553fab16e69d39cf6e49f843d94790b (patch)
treee3131aa5e77bef84a85c354ab939fc0bf81d8b6f /pkg/sentry/platform/ring0/pagetables
parent1b5062263b4a3ca3dc0271d9e06ad0113197344c (diff)
Split PCID implementation from page tables.
Instead of associating a single PCID with each set of page tables (which will reach the maximum quickly), allow a dynamic pool for each vCPU. This is the same way that Linux operates. We also split management of PCIDs out of the page tables themselves for simplicity. PiperOrigin-RevId: 199585631 Change-Id: I42f3486ada3cb2a26f623c65ac279b473ae63201
Diffstat (limited to 'pkg/sentry/platform/ring0/pagetables')
-rw-r--r--pkg/sentry/platform/ring0/pagetables/BUILD1
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables.go25
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go12
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_test.go23
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pagetables_x86.go60
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86.go102
-rw-r--r--pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go65
7 files changed, 83 insertions, 205 deletions
diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD
index 768f96678..08b73e87d 100644
--- a/pkg/sentry/platform/ring0/pagetables/BUILD
+++ b/pkg/sentry/platform/ring0/pagetables/BUILD
@@ -26,7 +26,6 @@ go_test(
srcs = [
"pagetables_amd64_test.go",
"pagetables_test.go",
- "pcids_x86_test.go",
],
embed = [":pagetables"],
deps = ["//pkg/sentry/usermem"],
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go
index 929771cca..6963ba62d 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go
@@ -37,27 +37,13 @@ type PageTables struct {
}
// New returns new PageTables.
-func New(a Allocator, opts Opts) *PageTables {
+func New(a Allocator) *PageTables {
p := &PageTables{Allocator: a}
p.root = p.Allocator.NewPTEs()
p.rootPhysical = p.Allocator.PhysicalFor(p.root)
- p.init(opts)
return p
}
-// New returns a new set of PageTables derived from the given one.
-//
-// This function should always be preferred to New if there are existing
-// pagetables, as this function preserves architectural constraints relevant to
-// managing multiple sets of pagetables.
-func (p *PageTables) New(a Allocator) *PageTables {
- np := &PageTables{Allocator: a}
- np.root = np.Allocator.NewPTEs()
- np.rootPhysical = p.Allocator.PhysicalFor(np.root)
- np.initFrom(&p.archPageTables)
- return np
-}
-
// Map installs a mapping with the given physical address.
//
// True is returned iff there was a previous mapping in the range.
@@ -99,15 +85,6 @@ func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool {
return count > 0
}
-// Release releases this address space.
-//
-// This must be called to release the PCID.
-func (p *PageTables) Release() {
- // Clear all pages.
- p.Unmap(0, ^uintptr(0))
- p.release()
-}
-
// Lookup returns the physical address for the given virtual address.
func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) {
mask := uintptr(usermem.PageSize - 1)
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
index c81786133..a7f2ad9a4 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go
@@ -23,7 +23,7 @@ import (
)
func Test2MAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a small page and a huge page.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -33,11 +33,10 @@ func Test2MAnd4K(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func Test1GAnd4K(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a small page and a super page.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -47,11 +46,10 @@ func Test1GAnd4K(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSplit1GPage(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a super page and knock out the middle.
pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42)
@@ -61,11 +59,10 @@ func TestSplit1GPage(t *testing.T) {
{0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSplit2MPage(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map a huge page and knock out the middle.
pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42)
@@ -75,5 +72,4 @@ func TestSplit2MPage(t *testing.T) {
{0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
index dec8def7f..28178f656 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go
@@ -72,24 +72,18 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) {
}
}
-func TestAllocFree(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
- pt.Release()
-}
-
func TestUnmap(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map and unmap one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
pt.Unmap(0x400000, pteSize)
checkMappings(t, pt, nil)
- pt.Release()
}
func TestReadOnly(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
@@ -97,11 +91,10 @@ func TestReadOnly(t *testing.T) {
checkMappings(t, pt, []mapping{
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestReadWrite(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map one entry.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -109,11 +102,10 @@ func TestReadWrite(t *testing.T) {
checkMappings(t, pt, []mapping{
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
})
- pt.Release()
}
func TestSerialEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map two sequential entries.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -123,11 +115,10 @@ func TestSerialEntries(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}},
})
- pt.Release()
}
func TestSpanningEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Span a pgd with two pages.
pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42)
@@ -136,11 +127,10 @@ func TestSpanningEntries(t *testing.T) {
{0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}},
{0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
func TestSparseEntries(t *testing.T) {
- pt := New(NewRuntimeAllocator(), Opts{})
+ pt := New(NewRuntimeAllocator())
// Map two entries in different pgds.
pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42)
@@ -150,5 +140,4 @@ func TestSparseEntries(t *testing.T) {
{0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}},
{0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}},
})
- pt.Release()
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
index 72a955d08..ca49d20f8 100644
--- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go
@@ -22,66 +22,28 @@ import (
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
)
-// Opts are pagetable options.
-type Opts struct {
- EnablePCID bool
-}
-
-// archPageTables has x86-specific features.
+// archPageTables is architecture-specific data.
type archPageTables struct {
- // pcids is the PCID database.
- pcids *PCIDs
-
- // pcid is the globally unique identifier, or zero if none were
- // available or pcids is nil.
+ // pcid is the value assigned by PCIDs.Assign.
+ //
+ // Note that zero is a valid PCID.
pcid uint16
}
-// init initializes arch-specific features.
-func (a *archPageTables) init(opts Opts) {
- if opts.EnablePCID {
- a.pcids = NewPCIDs()
- a.pcid = a.pcids.allocate()
- }
-}
-
-// initFrom initializes arch-specific features from an existing entry.'
-func (a *archPageTables) initFrom(other *archPageTables) {
- a.pcids = other.pcids // Refer to the same PCID database.
- if a.pcids != nil {
- a.pcid = a.pcids.allocate()
- }
-}
-
-// release is called from Release.
-func (a *archPageTables) release() {
- // Return the PCID.
- if a.pcids != nil {
- a.pcids.free(a.pcid)
- }
-}
-
// CR3 returns the CR3 value for these tables.
//
-// This may be called in interrupt contexts.
+// This may be called in interrupt contexts. A PCID of zero always implies a
+// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for
+// more information.
//
//go:nosplit
-func (p *PageTables) CR3() uint64 {
+func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 {
// Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1).
const noFlushBit uint64 = 0x8000000000000000
- if p.pcid != 0 {
- return noFlushBit | uint64(p.rootPhysical) | uint64(p.pcid)
+ if noFlush && pcid != 0 {
+ return noFlushBit | uint64(p.rootPhysical) | uint64(pcid)
}
- return uint64(p.rootPhysical)
-}
-
-// FlushCR3 returns the CR3 value that flushes the TLB.
-//
-// This may be called in interrupt contexts.
-//
-//go:nosplit
-func (p *PageTables) FlushCR3() uint64 {
- return uint64(p.rootPhysical) | uint64(p.pcid)
+ return uint64(p.rootPhysical) | uint64(pcid)
}
// Bits in page table entries.
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
index 509e8c0d9..4296371e8 100644
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
+++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go
@@ -16,59 +16,79 @@
package pagetables
-import (
- "sync"
-)
-
-// maxPCID is the maximum allowed PCID.
-const maxPCID = 4095
+// limitPCID is the number of valid PCIDs.
+const limitPCID = 4096
// PCIDs is a simple PCID database.
+//
+// This is not protected by locks and is thus suitable for use only with a
+// single CPU at a time.
type PCIDs struct {
- mu sync.Mutex
+ // cache are the assigned page tables.
+ cache map[*PageTables]uint16
- // last is the last fresh PCID given out (not including the available
- // pool). If last >= maxPCID, then the only PCIDs available in the
- // available pool below.
- last uint16
-
- // available are PCIDs that have been freed.
- available map[uint16]struct{}
+ // avail are available PCIDs.
+ avail []uint16
}
-// NewPCIDs returns a new PCID set.
-func NewPCIDs() *PCIDs {
- return &PCIDs{
- available: make(map[uint16]struct{}),
+// NewPCIDs returns a new PCID database.
+//
+// start is the first index to assign. Typically this will be one, as the zero
+// pcid will always be flushed on transition (see pagetables_x86.go). This may
+// be more than one if specific PCIDs are reserved.
+//
+// Nil is returned iff the start and size are out of range.
+func NewPCIDs(start, size uint16) *PCIDs {
+ if start+uint16(size) >= limitPCID {
+ return nil // See comment.
+ }
+ p := &PCIDs{
+ cache: make(map[*PageTables]uint16),
}
+ for pcid := start; pcid < start+size; pcid++ {
+ p.avail = append(p.avail, pcid)
+ }
+ return p
}
-// allocate returns an unused PCID, or zero if all are taken.
-func (p *PCIDs) allocate() uint16 {
- p.mu.Lock()
- defer p.mu.Unlock()
- if len(p.available) > 0 {
- for id := range p.available {
- delete(p.available, id)
- return id
- }
+// Assign assigns a PCID to the given PageTables.
+//
+// This may overwrite any previous assignment provided. If this in the case,
+// true is returned to indicate that the PCID should be flushed.
+func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) {
+ if pcid, ok := p.cache[pt]; ok {
+ return pcid, false // No flush.
}
- if id := p.last + 1; id <= maxPCID {
- p.last = id
- return id
+
+ // Is there something available?
+ if len(p.avail) > 0 {
+ pcid := p.avail[len(p.avail)-1]
+ p.avail = p.avail[:len(p.avail)-1]
+
+ // We need to flush because while this is in the available
+ // pool, it may have been used previously.
+ return pcid, true
}
- // Nothing available.
- return 0
+
+ // Evict an existing table.
+ for old, pcid := range p.cache {
+ delete(p.cache, old)
+ p.cache[pt] = pcid
+
+ // A flush is definitely required in this case, these page
+ // tables may still be active. (They will just be assigned some
+ // other PCID if and when they hit the given CPU again.)
+ return pcid, true
+ }
+
+ // No PCID.
+ return 0, false
}
-// free returns a PCID to the pool.
-//
-// It is safe to call free with a zero pcid. That is, you may always call free
-// with anything returned by allocate.
-func (p *PCIDs) free(id uint16) {
- p.mu.Lock()
- defer p.mu.Unlock()
- if id != 0 {
- p.available[id] = struct{}{}
+// Drop drops references to a set of page tables.
+func (p *PCIDs) Drop(pt *PageTables) {
+ if pcid, ok := p.cache[pt]; ok {
+ delete(p.cache, pt)
+ p.avail = append(p.avail, pcid)
}
}
diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
deleted file mode 100644
index 0b555cd76..000000000
--- a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2018 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// +build i386 amd64
-
-package pagetables
-
-import (
- "testing"
-)
-
-func TestMaxPCID(t *testing.T) {
- p := NewPCIDs()
- for i := 0; i < maxPCID; i++ {
- if id := p.allocate(); id != uint16(i+1) {
- t.Errorf("got %d, expected %d", id, i+1)
- }
- }
- if id := p.allocate(); id != 0 {
- if id != 0 {
- t.Errorf("got %d, expected 0", id)
- }
- }
-}
-
-func TestFirstPCID(t *testing.T) {
- p := NewPCIDs()
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
-}
-
-func TestFreePCID(t *testing.T) {
- p := NewPCIDs()
- p.free(0)
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1 (not zero)", id)
- }
-}
-
-func TestReusePCID(t *testing.T) {
- p := NewPCIDs()
- id := p.allocate()
- if id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
- p.free(id)
- if id := p.allocate(); id != 1 {
- t.Errorf("got %d, expected 1", id)
- }
- if id := p.allocate(); id != 2 {
- t.Errorf("got %d, expected 2", id)
- }
-}