diff options
Diffstat (limited to 'pkg/sentry/platform/ring0')
-rw-r--r-- | pkg/sentry/platform/ring0/defs.go | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/defs_amd64.go | 15 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_amd64.go | 13 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables.go | 25 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go | 12 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_test.go | 23 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_x86.go | 60 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_x86.go | 102 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go | 65 |
10 files changed, 103 insertions, 216 deletions
diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go index 7b3bed1c7..f09d045eb 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/sentry/platform/ring0/defs.go @@ -109,4 +109,7 @@ type SwitchOpts struct { // FullRestore indicates that an iret-based restore should be used. FullRestore bool + + // SwitchArchOpts are architecture-specific options. + SwitchArchOpts } diff --git a/pkg/sentry/platform/ring0/defs_amd64.go b/pkg/sentry/platform/ring0/defs_amd64.go index bb3420125..0d068c00a 100644 --- a/pkg/sentry/platform/ring0/defs_amd64.go +++ b/pkg/sentry/platform/ring0/defs_amd64.go @@ -104,6 +104,21 @@ func (c *CPU) ErrorCode() (value uintptr, user bool) { return c.errorCode, c.errorType != 0 } +// SwitchArchOpts are embedded in SwitchOpts. +type SwitchArchOpts struct { + // UserPCID indicates that the application PCID to be used on switch, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + UserPCID uint16 + + // KernelPCID indicates that the kernel PCID to be used on return, + // assuming that PCIDs are supported. + // + // Per pagetables_x86.go, a zero PCID implies a flush. + KernelPCID uint16 +} + func init() { KernelCodeSegment.setCode64(0, 0, 0) KernelDataSegment.setData(0, 0xffffffff, 0) diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 58ac4b4b2..37d5484e1 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -180,23 +180,14 @@ func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { if !IsCanonical(regs.Rip) || !IsCanonical(regs.Rsp) || !IsCanonical(regs.Fs_base) || !IsCanonical(regs.Gs_base) { return GeneralProtectionFault } - - var ( - userCR3 uint64 - kernelCR3 uint64 - ) + userCR3 := switchOpts.PageTables.CR3(!switchOpts.Flush, switchOpts.UserPCID) + kernelCR3 := c.kernel.PageTables.CR3(true, switchOpts.KernelPCID) // Sanitize registers. - if switchOpts.Flush { - userCR3 = switchOpts.PageTables.FlushCR3() - } else { - userCR3 = switchOpts.PageTables.CR3() - } regs.Eflags &= ^uint64(UserFlagsClear) regs.Eflags |= UserFlagsSet regs.Cs = uint64(Ucode64) // Required for iret. regs.Ss = uint64(Udata) // Ditto. - kernelCR3 = c.kernel.PageTables.CR3() // Perform the switch. swapgs() // GS will be swapped on return. diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 768f96678..08b73e87d 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -26,7 +26,6 @@ go_test( srcs = [ "pagetables_amd64_test.go", "pagetables_test.go", - "pcids_x86_test.go", ], embed = [":pagetables"], deps = ["//pkg/sentry/usermem"], diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index 929771cca..6963ba62d 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -37,27 +37,13 @@ type PageTables struct { } // New returns new PageTables. -func New(a Allocator, opts Opts) *PageTables { +func New(a Allocator) *PageTables { p := &PageTables{Allocator: a} p.root = p.Allocator.NewPTEs() p.rootPhysical = p.Allocator.PhysicalFor(p.root) - p.init(opts) return p } -// New returns a new set of PageTables derived from the given one. -// -// This function should always be preferred to New if there are existing -// pagetables, as this function preserves architectural constraints relevant to -// managing multiple sets of pagetables. -func (p *PageTables) New(a Allocator) *PageTables { - np := &PageTables{Allocator: a} - np.root = np.Allocator.NewPTEs() - np.rootPhysical = p.Allocator.PhysicalFor(np.root) - np.initFrom(&p.archPageTables) - return np -} - // Map installs a mapping with the given physical address. // // True is returned iff there was a previous mapping in the range. @@ -99,15 +85,6 @@ func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool { return count > 0 } -// Release releases this address space. -// -// This must be called to release the PCID. -func (p *PageTables) Release() { - // Clear all pages. - p.Unmap(0, ^uintptr(0)) - p.release() -} - // Lookup returns the physical address for the given virtual address. func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) { mask := uintptr(usermem.PageSize - 1) diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index c81786133..a7f2ad9a4 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -23,7 +23,7 @@ import ( ) func Test2MAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a small page and a huge page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -33,11 +33,10 @@ func Test2MAnd4K(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func Test1GAnd4K(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a small page and a super page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -47,11 +46,10 @@ func Test1GAnd4K(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSplit1GPage(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a super page and knock out the middle. pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) @@ -61,11 +59,10 @@ func TestSplit1GPage(t *testing.T) { {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSplit2MPage(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map a huge page and knock out the middle. pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) @@ -75,5 +72,4 @@ func TestSplit2MPage(t *testing.T) { {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index dec8def7f..28178f656 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -72,24 +72,18 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) { } } -func TestAllocFree(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) - pt.Release() -} - func TestUnmap(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map and unmap one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) pt.Unmap(0x400000, pteSize) checkMappings(t, pt, nil) - pt.Release() } func TestReadOnly(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -97,11 +91,10 @@ func TestReadOnly(t *testing.T) { checkMappings(t, pt, []mapping{ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestReadWrite(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -109,11 +102,10 @@ func TestReadWrite(t *testing.T) { checkMappings(t, pt, []mapping{ {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, }) - pt.Release() } func TestSerialEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map two sequential entries. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -123,11 +115,10 @@ func TestSerialEntries(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}}, }) - pt.Release() } func TestSpanningEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Span a pgd with two pages. pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -136,11 +127,10 @@ func TestSpanningEntries(t *testing.T) { {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } func TestSparseEntries(t *testing.T) { - pt := New(NewRuntimeAllocator(), Opts{}) + pt := New(NewRuntimeAllocator()) // Map two entries in different pgds. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -150,5 +140,4 @@ func TestSparseEntries(t *testing.T) { {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}}, }) - pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index 72a955d08..ca49d20f8 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -22,66 +22,28 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) -// Opts are pagetable options. -type Opts struct { - EnablePCID bool -} - -// archPageTables has x86-specific features. +// archPageTables is architecture-specific data. type archPageTables struct { - // pcids is the PCID database. - pcids *PCIDs - - // pcid is the globally unique identifier, or zero if none were - // available or pcids is nil. + // pcid is the value assigned by PCIDs.Assign. + // + // Note that zero is a valid PCID. pcid uint16 } -// init initializes arch-specific features. -func (a *archPageTables) init(opts Opts) { - if opts.EnablePCID { - a.pcids = NewPCIDs() - a.pcid = a.pcids.allocate() - } -} - -// initFrom initializes arch-specific features from an existing entry.' -func (a *archPageTables) initFrom(other *archPageTables) { - a.pcids = other.pcids // Refer to the same PCID database. - if a.pcids != nil { - a.pcid = a.pcids.allocate() - } -} - -// release is called from Release. -func (a *archPageTables) release() { - // Return the PCID. - if a.pcids != nil { - a.pcids.free(a.pcid) - } -} - // CR3 returns the CR3 value for these tables. // -// This may be called in interrupt contexts. +// This may be called in interrupt contexts. A PCID of zero always implies a +// flush and should be passed when PCIDs are not enabled. See pcids_x86.go for +// more information. // //go:nosplit -func (p *PageTables) CR3() uint64 { +func (p *PageTables) CR3(noFlush bool, pcid uint16) uint64 { // Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1). const noFlushBit uint64 = 0x8000000000000000 - if p.pcid != 0 { - return noFlushBit | uint64(p.rootPhysical) | uint64(p.pcid) + if noFlush && pcid != 0 { + return noFlushBit | uint64(p.rootPhysical) | uint64(pcid) } - return uint64(p.rootPhysical) -} - -// FlushCR3 returns the CR3 value that flushes the TLB. -// -// This may be called in interrupt contexts. -// -//go:nosplit -func (p *PageTables) FlushCR3() uint64 { - return uint64(p.rootPhysical) | uint64(p.pcid) + return uint64(p.rootPhysical) | uint64(pcid) } // Bits in page table entries. diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go index 509e8c0d9..4296371e8 100644 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pcids_x86.go @@ -16,59 +16,79 @@ package pagetables -import ( - "sync" -) - -// maxPCID is the maximum allowed PCID. -const maxPCID = 4095 +// limitPCID is the number of valid PCIDs. +const limitPCID = 4096 // PCIDs is a simple PCID database. +// +// This is not protected by locks and is thus suitable for use only with a +// single CPU at a time. type PCIDs struct { - mu sync.Mutex + // cache are the assigned page tables. + cache map[*PageTables]uint16 - // last is the last fresh PCID given out (not including the available - // pool). If last >= maxPCID, then the only PCIDs available in the - // available pool below. - last uint16 - - // available are PCIDs that have been freed. - available map[uint16]struct{} + // avail are available PCIDs. + avail []uint16 } -// NewPCIDs returns a new PCID set. -func NewPCIDs() *PCIDs { - return &PCIDs{ - available: make(map[uint16]struct{}), +// NewPCIDs returns a new PCID database. +// +// start is the first index to assign. Typically this will be one, as the zero +// pcid will always be flushed on transition (see pagetables_x86.go). This may +// be more than one if specific PCIDs are reserved. +// +// Nil is returned iff the start and size are out of range. +func NewPCIDs(start, size uint16) *PCIDs { + if start+uint16(size) >= limitPCID { + return nil // See comment. + } + p := &PCIDs{ + cache: make(map[*PageTables]uint16), } + for pcid := start; pcid < start+size; pcid++ { + p.avail = append(p.avail, pcid) + } + return p } -// allocate returns an unused PCID, or zero if all are taken. -func (p *PCIDs) allocate() uint16 { - p.mu.Lock() - defer p.mu.Unlock() - if len(p.available) > 0 { - for id := range p.available { - delete(p.available, id) - return id - } +// Assign assigns a PCID to the given PageTables. +// +// This may overwrite any previous assignment provided. If this in the case, +// true is returned to indicate that the PCID should be flushed. +func (p *PCIDs) Assign(pt *PageTables) (uint16, bool) { + if pcid, ok := p.cache[pt]; ok { + return pcid, false // No flush. } - if id := p.last + 1; id <= maxPCID { - p.last = id - return id + + // Is there something available? + if len(p.avail) > 0 { + pcid := p.avail[len(p.avail)-1] + p.avail = p.avail[:len(p.avail)-1] + + // We need to flush because while this is in the available + // pool, it may have been used previously. + return pcid, true } - // Nothing available. - return 0 + + // Evict an existing table. + for old, pcid := range p.cache { + delete(p.cache, old) + p.cache[pt] = pcid + + // A flush is definitely required in this case, these page + // tables may still be active. (They will just be assigned some + // other PCID if and when they hit the given CPU again.) + return pcid, true + } + + // No PCID. + return 0, false } -// free returns a PCID to the pool. -// -// It is safe to call free with a zero pcid. That is, you may always call free -// with anything returned by allocate. -func (p *PCIDs) free(id uint16) { - p.mu.Lock() - defer p.mu.Unlock() - if id != 0 { - p.available[id] = struct{}{} +// Drop drops references to a set of page tables. +func (p *PCIDs) Drop(pt *PageTables) { + if pcid, ok := p.cache[pt]; ok { + delete(p.cache, pt) + p.avail = append(p.avail, pcid) } } diff --git a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go deleted file mode 100644 index 0b555cd76..000000000 --- a/pkg/sentry/platform/ring0/pagetables/pcids_x86_test.go +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2018 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build i386 amd64 - -package pagetables - -import ( - "testing" -) - -func TestMaxPCID(t *testing.T) { - p := NewPCIDs() - for i := 0; i < maxPCID; i++ { - if id := p.allocate(); id != uint16(i+1) { - t.Errorf("got %d, expected %d", id, i+1) - } - } - if id := p.allocate(); id != 0 { - if id != 0 { - t.Errorf("got %d, expected 0", id) - } - } -} - -func TestFirstPCID(t *testing.T) { - p := NewPCIDs() - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1", id) - } -} - -func TestFreePCID(t *testing.T) { - p := NewPCIDs() - p.free(0) - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1 (not zero)", id) - } -} - -func TestReusePCID(t *testing.T) { - p := NewPCIDs() - id := p.allocate() - if id != 1 { - t.Errorf("got %d, expected 1", id) - } - p.free(id) - if id := p.allocate(); id != 1 { - t.Errorf("got %d, expected 1", id) - } - if id := p.allocate(); id != 2 { - t.Errorf("got %d, expected 2", id) - } -} |