diff options
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/platform/kvm/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/allocator.go | 69 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/bluepill_fault.go | 4 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 6 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_unsafe.go | 12 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/physical_map.go | 14 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/allocator.go | 109 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go | 53 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables.go | 89 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go | 56 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go | 8 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_test.go | 21 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go | 31 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_x86.go | 15 |
17 files changed, 326 insertions, 172 deletions
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD index 004938080..89d98c5c7 100644 --- a/pkg/sentry/platform/kvm/BUILD +++ b/pkg/sentry/platform/kvm/BUILD @@ -28,6 +28,7 @@ go_library( srcs = [ "address_space.go", "address_space_unsafe.go", + "allocator.go", "bluepill.go", "bluepill_amd64.go", "bluepill_amd64.s", diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index 4c76883ad..15d45f5bc 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -84,7 +84,7 @@ func (as *addressSpace) Touch(c *vCPU) bool { func (as *addressSpace) mapHost(addr usermem.Addr, m hostMapEntry, at usermem.AccessType) (inv bool) { for m.length > 0 { - physical, length, ok := TranslateToPhysical(m.addr) + physical, length, ok := translateToPhysical(m.addr) if !ok { panic("unable to translate segment") } @@ -227,4 +227,7 @@ func (as *addressSpace) Unmap(addr usermem.Addr, length uint64) { func (as *addressSpace) Release() { as.Unmap(0, ^uint64(0)) as.pageTables.Release() + + // Free all pages from the allocator. + as.pageTables.Allocator.(allocator).base.Drain() } diff --git a/pkg/sentry/platform/kvm/allocator.go b/pkg/sentry/platform/kvm/allocator.go new file mode 100644 index 000000000..80066bfc5 --- /dev/null +++ b/pkg/sentry/platform/kvm/allocator.go @@ -0,0 +1,69 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kvm + +import ( + "fmt" + + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" +) + +type allocator struct { + base *pagetables.RuntimeAllocator +} + +// newAllocator is used to define the allocator. +func newAllocator() allocator { + return allocator{ + base: pagetables.NewRuntimeAllocator(), + } +} + +// NewPTEs implements pagetables.Allocator.NewPTEs. +// +//go:nosplit +func (a allocator) NewPTEs() *pagetables.PTEs { + return a.base.NewPTEs() +} + +// PhysicalFor returns the physical address for a set of PTEs. +// +//go:nosplit +func (a allocator) PhysicalFor(ptes *pagetables.PTEs) uintptr { + virtual := a.base.PhysicalFor(ptes) + physical, _, ok := translateToPhysical(virtual) + if !ok { + panic(fmt.Sprintf("PhysicalFor failed for %p", ptes)) + } + return physical +} + +// LookupPTEs implements pagetables.Allocator.LookupPTEs. +// +//go:nosplit +func (a allocator) LookupPTEs(physical uintptr) *pagetables.PTEs { + virtualStart, physicalStart, _, ok := calculateBluepillFault(physical) + if !ok { + panic(fmt.Sprintf("LookupPTEs failed for 0x%x", physical)) + } + return a.base.LookupPTEs(virtualStart + (physical - physicalStart)) +} + +// FreePTEs implements pagetables.Allocator.FreePTEs. +// +//go:nosplit +func (a allocator) FreePTEs(ptes *pagetables.PTEs) { + a.base.FreePTEs(ptes) +} diff --git a/pkg/sentry/platform/kvm/bluepill_fault.go b/pkg/sentry/platform/kvm/bluepill_fault.go index 7c8c7bc37..8650cd78f 100644 --- a/pkg/sentry/platform/kvm/bluepill_fault.go +++ b/pkg/sentry/platform/kvm/bluepill_fault.go @@ -46,7 +46,7 @@ func yield() { // calculateBluepillFault calculates the fault address range. // //go:nosplit -func calculateBluepillFault(m *machine, physical uintptr) (virtualStart, physicalStart, length uintptr, ok bool) { +func calculateBluepillFault(physical uintptr) (virtualStart, physicalStart, length uintptr, ok bool) { alignedPhysical := physical &^ uintptr(usermem.PageSize-1) for _, pr := range physicalRegions { end := pr.physical + pr.length @@ -82,7 +82,7 @@ func handleBluepillFault(m *machine, physical uintptr) (uintptr, bool) { // fault. This all has to be done in this function because we're in a // signal handler context. (We can't call any functions that might // split the stack.) - virtualStart, physicalStart, length, ok := calculateBluepillFault(m, physical) + virtualStart, physicalStart, length, ok := calculateBluepillFault(physical) if !ok { return 0, false } diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 6defb1c46..13c363993 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -121,7 +121,7 @@ func (*KVM) MaxUserAddress() usermem.Addr { // NewAddressSpace returns a new pagetable root. func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) { // Allocate page tables and install system mappings. - pageTables := k.machine.kernel.PageTables.New() + pageTables := k.machine.kernel.PageTables.New(newAllocator()) applyPhysicalRegions(func(pr physicalRegion) bool { // Map the kernel in the upper half. pageTables.Map( diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 5a6109ced..949abd838 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -133,7 +133,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { vCPUs = n } m.kernel = ring0.New(ring0.KernelOpts{ - PageTables: pagetables.New(m, pagetablesOpts), + PageTables: pagetables.New(newAllocator(), pagetablesOpts), }) // Initialize architecture state. @@ -211,7 +211,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { return // skip region. } for virtual := vr.virtual; virtual < vr.virtual+vr.length; { - physical, length, ok := TranslateToPhysical(virtual) + physical, length, ok := translateToPhysical(virtual) if !ok { // This must be an invalid region that was // knocked out by creation of the physical map. @@ -239,7 +239,7 @@ func newMachine(vm int, vCPUs int) (*machine, error) { // This panics on error. func (m *machine) mapPhysical(physical, length uintptr) { for end := physical + length; physical < end; { - _, physicalStart, length, ok := calculateBluepillFault(m, physical) + _, physicalStart, length, ok := calculateBluepillFault(physical) if !ok { // Should never happen. panic("mapPhysical on unknown physical address") diff --git a/pkg/sentry/platform/kvm/machine_unsafe.go b/pkg/sentry/platform/kvm/machine_unsafe.go index 516098a2b..86323c891 100644 --- a/pkg/sentry/platform/kvm/machine_unsafe.go +++ b/pkg/sentry/platform/kvm/machine_unsafe.go @@ -21,7 +21,6 @@ import ( "unsafe" "gvisor.googlesource.com/gvisor/pkg/abi/linux" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" ) //go:linkname entersyscall runtime.entersyscall @@ -30,17 +29,6 @@ func entersyscall() //go:linkname exitsyscall runtime.exitsyscall func exitsyscall() -// TranslateToVirtual implements pagetables.Translater.TranslateToPhysical. -func (m *machine) TranslateToPhysical(ptes *pagetables.PTEs) uintptr { - // The length doesn't matter because all these translations require - // only a single page, which is guaranteed to be satisfied. - physical, _, ok := TranslateToPhysical(uintptr(unsafe.Pointer(ptes))) - if !ok { - panic("unable to translate pagetables.Node to physical address") - } - return physical -} - // mapRunData maps the vCPU run data. func mapRunData(fd int) (*runData, error) { r, _, errno := syscall.RawSyscall6( diff --git a/pkg/sentry/platform/kvm/physical_map.go b/pkg/sentry/platform/kvm/physical_map.go index 5d55c9486..81a98656d 100644 --- a/pkg/sentry/platform/kvm/physical_map.go +++ b/pkg/sentry/platform/kvm/physical_map.go @@ -205,17 +205,19 @@ func applyPhysicalRegions(fn func(pr physicalRegion) bool) bool { return true } -// TranslateToPhysical translates the given virtual address. +// translateToPhysical translates the given virtual address. // // Precondition: physicalInit must have been called. -func TranslateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) { - ok = !applyPhysicalRegions(func(pr physicalRegion) bool { +// +//go:nosplit +func translateToPhysical(virtual uintptr) (physical uintptr, length uintptr, ok bool) { + for _, pr := range physicalRegions { if pr.virtual <= virtual && virtual < pr.virtual+pr.length { physical = pr.physical + (virtual - pr.virtual) length = pr.length - (virtual - pr.virtual) - return false + ok = true + return } - return true - }) + } return } diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 1a8b7931e..768f96678 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -5,9 +5,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "pagetables", srcs = [ + "allocator.go", + "allocator_unsafe.go", "pagetables.go", "pagetables_amd64.go", - "pagetables_unsafe.go", "pagetables_x86.go", "pcids_x86.go", ], diff --git a/pkg/sentry/platform/ring0/pagetables/allocator.go b/pkg/sentry/platform/ring0/pagetables/allocator.go new file mode 100644 index 000000000..1499623fb --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/allocator.go @@ -0,0 +1,109 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pagetables + +// Allocator is used to allocate and map PTEs. +// +// Note that allocators may be called concurrently. +type Allocator interface { + // NewPTEs returns a new set of PTEs and their physical address. + NewPTEs() *PTEs + + // PhysicalFor gives the physical address for a set of PTEs. + PhysicalFor(ptes *PTEs) uintptr + + // LookupPTEs looks up PTEs by physical address. + LookupPTEs(physical uintptr) *PTEs + + // FreePTEs frees a set of PTEs. + FreePTEs(ptes *PTEs) +} + +// RuntimeAllocator is a trivial allocator. +type RuntimeAllocator struct { + // used is the set of PTEs that have been allocated. This includes any + // PTEs that may be in the pool below. PTEs are only freed from this + // map by the Drain call. + // + // This exists to prevent accidental garbage collection. + used map[*PTEs]struct{} + + // pool is the set of free-to-use PTEs. + pool []*PTEs +} + +// NewRuntimeAllocator returns an allocator that uses runtime allocation. +func NewRuntimeAllocator() *RuntimeAllocator { + return &RuntimeAllocator{ + used: make(map[*PTEs]struct{}), + } +} + +// Drain empties the pool. +func (r *RuntimeAllocator) Drain() { + for i, ptes := range r.pool { + // Zap the entry in the underlying array to ensure that it can + // be properly garbage collected. + r.pool[i] = nil + // Similarly, free the reference held by the used map (these + // also apply for the pool entries). + delete(r.used, ptes) + } + r.pool = r.pool[:0] +} + +// NewPTEs implements Allocator.NewPTEs. +// +// Note that the "physical" address here is actually the virtual address of the +// PTEs structure. The entries are tracked only to avoid garbage collection. +// +// This is guaranteed not to split as long as the pool is sufficiently full. +// +//go:nosplit +func (r *RuntimeAllocator) NewPTEs() *PTEs { + // Pull from the pool if we can. + if len(r.pool) > 0 { + ptes := r.pool[len(r.pool)-1] + r.pool = r.pool[:len(r.pool)-1] + return ptes + } + + // Allocate a new entry. + ptes := newAlignedPTEs() + r.used[ptes] = struct{}{} + return ptes +} + +// PhysicalFor returns the physical address for the given PTEs. +// +//go:nosplit +func (r *RuntimeAllocator) PhysicalFor(ptes *PTEs) uintptr { + return physicalFor(ptes) +} + +// LookupPTEs implements Allocator.LookupPTEs. +// +//go:nosplit +func (r *RuntimeAllocator) LookupPTEs(physical uintptr) *PTEs { + return fromPhysical(physical) +} + +// FreePTEs implements Allocator.FreePTEs. +// +//go:nosplit +func (r *RuntimeAllocator) FreePTEs(ptes *PTEs) { + // Add to the pool. + r.pool = append(r.pool, ptes) +} diff --git a/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go new file mode 100644 index 000000000..aca778913 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/allocator_unsafe.go @@ -0,0 +1,53 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pagetables + +import ( + "unsafe" + + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + +// newAlignedPTEs returns a set of aligned PTEs. +func newAlignedPTEs() *PTEs { + ptes := new(PTEs) + offset := physicalFor(ptes) & (usermem.PageSize - 1) + if offset == 0 { + // Already aligned. + return ptes + } + + // Need to force an aligned allocation. + unaligned := make([]byte, (2*usermem.PageSize)-1) + offset = uintptr(unsafe.Pointer(&unaligned[0])) & (usermem.PageSize - 1) + if offset != 0 { + offset = usermem.PageSize - offset + } + return (*PTEs)(unsafe.Pointer(&unaligned[offset])) +} + +// physicalFor returns the "physical" address for PTEs. +// +//go:nosplit +func physicalFor(ptes *PTEs) uintptr { + return uintptr(unsafe.Pointer(ptes)) +} + +// fromPhysical returns the PTEs from the "physical" address. +// +//go:nosplit +func fromPhysical(physical uintptr) *PTEs { + return (*PTEs)(unsafe.Pointer(physical)) +} diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index 2a83bbff2..929771cca 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -19,52 +19,28 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) -// Node is a single node within a set of page tables. -type Node struct { - // unalignedData has unaligned data. Unfortunately, we can't really - // rely on the allocator to give us what we want here. So we just throw - // it at the wall and use the portion that matches. Gross. This may be - // changed in the future to use a different allocation mechanism. - // - // Access must happen via functions found in pagetables_unsafe.go. - unalignedData [(2 * usermem.PageSize) - 1]byte - - // physical is the translated address of these entries. - // - // This is filled in at creation time. - physical uintptr -} - // PageTables is a set of page tables. type PageTables struct { + // Allocator is used to allocate nodes. + Allocator Allocator + // root is the pagetable root. - root *Node + root *PTEs - // translator is the translator passed at creation. - translator Translator + // rootPhysical is the cached physical address of the root. + // + // This is saved only to prevent constant translation. + rootPhysical uintptr // archPageTables includes architecture-specific features. archPageTables - - // allNodes is a set of nodes indexed by translator address. - allNodes map[uintptr]*Node -} - -// Translator translates to guest physical addresses. -type Translator interface { - // TranslateToPhysical translates the given pointer object into a - // "physical" address. We do not require that it translates back, the - // reverse mapping is maintained internally. - TranslateToPhysical(*PTEs) uintptr } // New returns new PageTables. -func New(t Translator, opts Opts) *PageTables { - p := &PageTables{ - translator: t, - allNodes: make(map[uintptr]*Node), - } - p.root = p.allocNode() +func New(a Allocator, opts Opts) *PageTables { + p := &PageTables{Allocator: a} + p.root = p.Allocator.NewPTEs() + p.rootPhysical = p.Allocator.PhysicalFor(p.root) p.init(opts) return p } @@ -74,40 +50,14 @@ func New(t Translator, opts Opts) *PageTables { // This function should always be preferred to New if there are existing // pagetables, as this function preserves architectural constraints relevant to // managing multiple sets of pagetables. -func (p *PageTables) New() *PageTables { - np := &PageTables{ - translator: p.translator, - allNodes: make(map[uintptr]*Node), - } - np.root = np.allocNode() +func (p *PageTables) New(a Allocator) *PageTables { + np := &PageTables{Allocator: a} + np.root = np.Allocator.NewPTEs() + np.rootPhysical = p.Allocator.PhysicalFor(np.root) np.initFrom(&p.archPageTables) return np } -// setPageTable sets the given index as a page table. -func (p *PageTables) setPageTable(n *Node, index int, child *Node) { - phys := p.translator.TranslateToPhysical(child.PTEs()) - p.allNodes[phys] = child - pte := &n.PTEs()[index] - pte.setPageTable(phys) -} - -// clearPageTable clears the given entry. -func (p *PageTables) clearPageTable(n *Node, index int) { - pte := &n.PTEs()[index] - physical := pte.Address() - pte.Clear() - delete(p.allNodes, physical) -} - -// getPageTable returns the page table entry. -func (p *PageTables) getPageTable(n *Node, index int) *Node { - pte := &n.PTEs()[index] - physical := pte.Address() - child := p.allNodes[physical] - return child -} - // Map installs a mapping with the given physical address. // // True is returned iff there was a previous mapping in the range. @@ -172,10 +122,3 @@ func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) }) return } - -// allocNode allocates a new page. -func (p *PageTables) allocNode() *Node { - n := new(Node) - n.physical = p.translator.TranslateToPhysical(n.PTEs()) - return n -} diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go index 8dc50f9dd..6a724e4fd 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go @@ -121,7 +121,10 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } for pgdIndex := int((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { - pgdEntry := &p.root.PTEs()[pgdIndex] + var ( + pgdEntry = &p.root[pgdIndex] + pudEntries *PTEs + ) if !pgdEntry.Valid() { if !alloc { // Skip over this entry. @@ -130,15 +133,20 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } // Allocate a new pgd. - p.setPageTable(p.root, pgdIndex, p.allocNode()) + pudEntries = p.Allocator.NewPTEs() + pgdEntry.setPageTable(p, pudEntries) + } else { + pudEntries = p.Allocator.LookupPTEs(pgdEntry.Address()) } // Map the next level. - pudNode := p.getPageTable(p.root, pgdIndex) clearPUDEntries := 0 for pudIndex := int((start & pudMask) >> pudShift); start < end && pudIndex < entriesPerPage; pudIndex++ { - pudEntry := &(pudNode.PTEs()[pudIndex]) + var ( + pudEntry = &pudEntries[pudIndex] + pmdEntries *PTEs + ) if !pudEntry.Valid() { if !alloc { // Skip over this entry. @@ -161,7 +169,8 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } // Allocate a new pud. - p.setPageTable(pudNode, pudIndex, p.allocNode()) + pmdEntries = p.Allocator.NewPTEs() + pudEntry.setPageTable(p, pmdEntries) } else if pudEntry.IsSuper() { // Does this page need to be split? @@ -169,8 +178,7 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun currentAddr := uint64(pudEntry.Address()) // Install the relevant entries. - pmdNode := p.allocNode() - pmdEntries := pmdNode.PTEs() + pmdEntries = p.Allocator.NewPTEs() for index := 0; index < entriesPerPage; index++ { pmdEntry := &pmdEntries[index] pmdEntry.SetSuper() @@ -179,7 +187,7 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } // Reset to point to the new page. - p.setPageTable(pudNode, pudIndex, pmdNode) + pudEntry.setPageTable(p, pmdEntries) } else { // A super page to be checked directly. fn(uintptr(start), uintptr(start+pudSize), pudEntry, pudSize-1) @@ -193,14 +201,18 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun start = next(start, pudSize) continue } + } else { + pmdEntries = p.Allocator.LookupPTEs(pudEntry.Address()) } // Map the next level, since this is valid. - pmdNode := p.getPageTable(pudNode, pudIndex) clearPMDEntries := 0 for pmdIndex := int((start & pmdMask) >> pmdShift); start < end && pmdIndex < entriesPerPage; pmdIndex++ { - pmdEntry := &pmdNode.PTEs()[pmdIndex] + var ( + pmdEntry = &pmdEntries[pmdIndex] + pteEntries *PTEs + ) if !pmdEntry.Valid() { if !alloc { // Skip over this entry. @@ -222,7 +234,8 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } // Allocate a new pmd. - p.setPageTable(pmdNode, pmdIndex, p.allocNode()) + pteEntries = p.Allocator.NewPTEs() + pmdEntry.setPageTable(p, pteEntries) } else if pmdEntry.IsSuper() { // Does this page need to be split? @@ -230,8 +243,7 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun currentAddr := uint64(pmdEntry.Address()) // Install the relevant entries. - pteNode := p.allocNode() - pteEntries := pteNode.PTEs() + pteEntries = p.Allocator.NewPTEs() for index := 0; index < entriesPerPage; index++ { pteEntry := &pteEntries[index] pteEntry.Set(uintptr(currentAddr), pmdEntry.Opts()) @@ -239,7 +251,7 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun } // Reset to point to the new page. - p.setPageTable(pmdNode, pmdIndex, pteNode) + pmdEntry.setPageTable(p, pteEntries) } else { // A huge page to be checked directly. fn(uintptr(start), uintptr(start+pmdSize), pmdEntry, pmdSize-1) @@ -253,14 +265,17 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun start = next(start, pmdSize) continue } + } else { + pteEntries = p.Allocator.LookupPTEs(pmdEntry.Address()) } // Map the next level, since this is valid. - pteNode := p.getPageTable(pmdNode, pmdIndex) clearPTEEntries := 0 for pteIndex := int((start & pteMask) >> pteShift); start < end && pteIndex < entriesPerPage; pteIndex++ { - pteEntry := &pteNode.PTEs()[pteIndex] + var ( + pteEntry = &pteEntries[pteIndex] + ) if !pteEntry.Valid() && !alloc { clearPTEEntries++ start += pteSize @@ -283,21 +298,24 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun // Check if we no longer need this page. if clearPTEEntries == entriesPerPage { - p.clearPageTable(pmdNode, pmdIndex) + pmdEntry.Clear() + p.Allocator.FreePTEs(pteEntries) clearPMDEntries++ } } // Check if we no longer need this page. if clearPMDEntries == entriesPerPage { - p.clearPageTable(pudNode, pudIndex) + pudEntry.Clear() + p.Allocator.FreePTEs(pmdEntries) clearPUDEntries++ } } // Check if we no longer need this page. if clearPUDEntries == entriesPerPage { - p.clearPageTable(p.root, pgdIndex) + pgdEntry.Clear() + p.Allocator.FreePTEs(pudEntries) } } } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index 4f15c6b58..c81786133 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -23,7 +23,7 @@ import ( ) func Test2MAnd4K(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map a small page and a huge page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -37,7 +37,7 @@ func Test2MAnd4K(t *testing.T) { } func Test1GAnd4K(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map a small page and a super page. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -51,7 +51,7 @@ func Test1GAnd4K(t *testing.T) { } func TestSplit1GPage(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map a super page and knock out the middle. pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) @@ -65,7 +65,7 @@ func TestSplit1GPage(t *testing.T) { } func TestSplit2MPage(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map a huge page and knock out the middle. pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index a4f684af2..dec8def7f 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -15,18 +15,11 @@ package pagetables import ( - "reflect" "testing" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) -type reflectTranslater struct{} - -func (r reflectTranslater) TranslateToPhysical(ptes *PTEs) uintptr { - return reflect.ValueOf(ptes).Pointer() -} - type mapping struct { start uintptr length uintptr @@ -80,12 +73,12 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) { } func TestAllocFree(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) pt.Release() } func TestUnmap(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map and unmap one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -96,7 +89,7 @@ func TestUnmap(t *testing.T) { } func TestReadOnly(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -108,7 +101,7 @@ func TestReadOnly(t *testing.T) { } func TestReadWrite(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map one entry. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -120,7 +113,7 @@ func TestReadWrite(t *testing.T) { } func TestSerialEntries(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map two sequential entries. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) @@ -134,7 +127,7 @@ func TestSerialEntries(t *testing.T) { } func TestSpanningEntries(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Span a pgd with two pages. pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) @@ -147,7 +140,7 @@ func TestSpanningEntries(t *testing.T) { } func TestSparseEntries(t *testing.T) { - pt := New(reflectTranslater{}, Opts{}) + pt := New(NewRuntimeAllocator(), Opts{}) // Map two entries in different pgds. pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go b/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go deleted file mode 100644 index a2b44fb79..000000000 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_unsafe.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2018 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package pagetables - -import ( - "unsafe" - - "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" -) - -// PTEs returns aligned PTE entries. -func (n *Node) PTEs() *PTEs { - addr := uintptr(unsafe.Pointer(&n.unalignedData[0])) - offset := addr & (usermem.PageSize - 1) - if offset != 0 { - offset = usermem.PageSize - offset - } - return (*PTEs)(unsafe.Pointer(addr + offset)) -} diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index 8ba78ed0d..72a955d08 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -70,9 +70,9 @@ func (p *PageTables) CR3() uint64 { // Bit 63 is set to avoid flushing the PCID (per SDM 4.10.4.1). const noFlushBit uint64 = 0x8000000000000000 if p.pcid != 0 { - return noFlushBit | uint64(p.root.physical) | uint64(p.pcid) + return noFlushBit | uint64(p.rootPhysical) | uint64(p.pcid) } - return uint64(p.root.physical) + return uint64(p.rootPhysical) } // FlushCR3 returns the CR3 value that flushes the TLB. @@ -81,7 +81,7 @@ func (p *PageTables) CR3() uint64 { // //go:nosplit func (p *PageTables) FlushCR3() uint64 { - return uint64(p.root.physical) | uint64(p.pcid) + return uint64(p.rootPhysical) | uint64(p.pcid) } // Bits in page table entries. @@ -200,8 +200,13 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { // be cleared. This is used explicitly for breaking super pages. // //go:nosplit -func (p *PTE) setPageTable(addr uintptr) { - v := (addr &^ optionMask) | present | user | writable | accessed | dirty +func (p *PTE) setPageTable(pt *PageTables, ptes *PTEs) { + addr := pt.Allocator.PhysicalFor(ptes) + if addr&^optionMask != addr { + // This should never happen. + panic("unaligned physical address!") + } + v := addr | present | user | writable | accessed | dirty atomic.StoreUintptr((*uintptr)(p), v) } |