From 5fa683ffdf0bbb86f4b97befb4b377821be8513d Mon Sep 17 00:00:00 2001 From: Adin Scannell Date: Tue, 2 Feb 2021 00:08:37 -0800 Subject: Minor page tables improvements. * Make split safe. * Enable looking up next valid address. * Support mappings with !accessType.Any(), distinct from unmap. These changes allow for the use of pagetables in low-level OS packages, such as ring0, and allow for the use of pagetables for more generic address space reservation (by writing entries with no access specified). Updates #5039 PiperOrigin-RevId: 355109016 --- pkg/sentry/platform/ring0/pagetables/BUILD | 6 +- pkg/sentry/platform/ring0/pagetables/pagetables.go | 60 +++++---- .../ring0/pagetables/pagetables_aarch64.go | 11 +- .../platform/ring0/pagetables/pagetables_amd64.go | 2 + .../platform/ring0/pagetables/pagetables_arm64.go | 1 + .../platform/ring0/pagetables/pagetables_test.go | 5 +- .../platform/ring0/pagetables/pagetables_x86.go | 5 +- .../platform/ring0/pagetables/walker_amd64.go | 142 ++++----------------- .../platform/ring0/pagetables/walker_arm64.go | 117 +++-------------- .../platform/ring0/pagetables/walker_generic.go | 110 ++++++++++++++++ 10 files changed, 212 insertions(+), 247 deletions(-) create mode 100644 pkg/sentry/platform/ring0/pagetables/walker_generic.go (limited to 'pkg/sentry/platform') diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index 9e3539e4c..7ce8d0078 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -9,7 +9,10 @@ package(licenses = ["notice"]) # architecture builds. go_template( name = "generic_walker_%s" % arch, - srcs = ["walker_%s.go" % arch], + srcs = [ + "walker_generic.go", + "walker_%s.go" % arch, + ], opt_types = [ "Visitor", ], @@ -50,6 +53,7 @@ go_library( "pcids_x86.go", "walker_amd64.go", "walker_arm64.go", + "walker_generic.go", ":walker_empty_amd64", ":walker_empty_arm64", ":walker_lookup_amd64", diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index 7605d0cb2..8c0a6aa82 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -60,6 +60,7 @@ type PageTables struct { // Init initializes a set of PageTables. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) Init(allocator Allocator) { p.Allocator = allocator @@ -92,7 +93,6 @@ func NewWithUpper(a Allocator, upperSharedPageTables *PageTables, upperStart uin } p.InitArch(a) - return p } @@ -112,7 +112,7 @@ type mapVisitor struct { // visit is used for map. // //go:nosplit -func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { p := v.physical + (start - uintptr(v.target)) if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) { v.prev = true @@ -122,9 +122,10 @@ func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) { // install a valid entry here, however we must zap any existing // entry to ensure this happens. pte.Clear() - return + return true } pte.Set(p, v.opts) + return true } //go:nosplit @@ -140,7 +141,6 @@ func (*mapVisitor) requiresSplit() bool { return true } // Precondition: addr & length must be page-aligned, their sum must not overflow. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool { if p.readOnlyShared { @@ -158,9 +158,6 @@ func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physic length = p.upperStart - uintptr(addr) } } - if !opts.AccessType.Any() { - return p.Unmap(addr, length) - } w := mapWalker{ pageTables: p, visitor: mapVisitor{ @@ -187,9 +184,10 @@ func (*unmapVisitor) requiresSplit() bool { return true } // visit unmaps the given entry. // //go:nosplit -func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { pte.Clear() v.count++ + return true } // Unmap unmaps the given range. @@ -199,7 +197,6 @@ func (v *unmapVisitor) visit(start uintptr, pte *PTE, align uintptr) { // Precondition: addr & length must be page-aligned, their sum must not overflow. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) Unmap(addr usermem.Addr, length uintptr) bool { if p.readOnlyShared { @@ -241,8 +238,9 @@ func (*emptyVisitor) requiresSplit() bool { return false } // visit unmaps the given entry. // //go:nosplit -func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { v.count++ + return true } // IsEmpty checks if the given range is empty. @@ -250,7 +248,6 @@ func (v *emptyVisitor) visit(start uintptr, pte *PTE, align uintptr) { // Precondition: addr & length must be page-aligned. // // +checkescape:hard,stack -// //go:nosplit func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool { w := emptyWalker{ @@ -262,20 +259,28 @@ func (p *PageTables) IsEmpty(addr usermem.Addr, length uintptr) bool { // lookupVisitor is used for lookup. type lookupVisitor struct { - target uintptr // Input. - physical uintptr // Output. - opts MapOpts // Output. + target uintptr // Input & Output. + findFirst bool // Input. + physical uintptr // Output. + size uintptr // Output. + opts MapOpts // Output. } // visit matches the given address. // //go:nosplit -func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *lookupVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { if !pte.Valid() { - return + // If looking for the first, then we just keep iterating until + // we find a valid entry. + return v.findFirst } - v.physical = pte.Address() + (start - uintptr(v.target)) + // Is this within the current range? + v.target = start + v.physical = pte.Address() + v.size = (align + 1) v.opts = pte.Opts() + return false } //go:nosplit @@ -286,20 +291,29 @@ func (*lookupVisitor) requiresSplit() bool { return false } // Lookup returns the physical address for the given virtual address. // -// +checkescape:hard,stack +// If findFirst is true, then the next valid address after addr is returned. +// If findFirst is false, then only a mapping for addr will be returned. +// +// Note that if size is zero, then no matching entry was found. // +// +checkescape:hard,stack //go:nosplit -func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) { +func (p *PageTables) Lookup(addr usermem.Addr, findFirst bool) (virtual usermem.Addr, physical, size uintptr, opts MapOpts) { mask := uintptr(usermem.PageSize - 1) - offset := uintptr(addr) & mask + addr &^= usermem.Addr(mask) w := lookupWalker{ pageTables: p, visitor: lookupVisitor{ - target: uintptr(addr &^ usermem.Addr(mask)), + target: uintptr(addr), + findFirst: findFirst, }, } - w.iterateRange(uintptr(addr), uintptr(addr)+1) - return w.visitor.physical + offset, w.visitor.opts + end := ^usermem.Addr(0) &^ usermem.Addr(mask) + if !findFirst { + end = addr + 1 + } + w.iterateRange(uintptr(addr), uintptr(end)) + return usermem.Addr(w.visitor.target), w.visitor.physical, w.visitor.size, w.visitor.opts } // MarkReadOnlyShared marks the pagetables read-only and can be shared. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go index 520161755..163a3aea3 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_aarch64.go @@ -156,12 +156,7 @@ func (p *PTE) IsSect() bool { // //go:nosplit func (p *PTE) Set(addr uintptr, opts MapOpts) { - if !opts.AccessType.Any() { - p.Clear() - return - } - v := (addr &^ optionMask) | protDefault | nG | readOnly - + v := (addr &^ optionMask) | nG | readOnly | protDefault if p.IsSect() { // Note that this is inherited from the previous instance. Set // does not change the value of Sect. See above. @@ -169,6 +164,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { } else { v |= typePage } + if !opts.AccessType.Any() { + // Leave as non-valid if no access is available. + v &^= pteValid + } if opts.Global { v = v &^ nG diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go index 4bdde8448..a217f404c 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go @@ -43,6 +43,7 @@ const ( // InitArch does some additional initialization related to the architecture. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) InitArch(allocator Allocator) { if p.upperSharedPageTables != nil { @@ -50,6 +51,7 @@ func (p *PageTables) InitArch(allocator Allocator) { } } +//go:nosplit func pgdIndex(upperStart uintptr) uintptr { if upperStart&(pgdSize-1) != 0 { panic("upperStart should be pgd size aligned") diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go index ad0e30c88..fef7a0fd1 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_arm64.go @@ -44,6 +44,7 @@ const ( // InitArch does some additional initialization related to the architecture. // +// +checkescape:hard,stack //go:nosplit func (p *PageTables) InitArch(allocator Allocator) { if p.upperSharedPageTables != nil { diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index 5c88d087d..772f4fc5e 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -34,7 +34,7 @@ type checkVisitor struct { failed string // Output. } -func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { +func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) bool { v.found = append(v.found, mapping{ start: start, length: align + 1, @@ -43,7 +43,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { }) if v.failed != "" { // Don't keep looking for errors. - return + return false } if v.current >= len(v.expected) { @@ -58,6 +58,7 @@ func (v *checkVisitor) visit(start uintptr, pte *PTE, align uintptr) { v.failed = "opts didn't match" } v.current++ + return true } func (*checkVisitor) requiresAlloc() bool { return false } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index 157438d9b..32edd2f0a 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -137,7 +137,10 @@ func (p *PTE) Set(addr uintptr, opts MapOpts) { p.Clear() return } - v := (addr &^ optionMask) | present | accessed + v := (addr &^ optionMask) + if opts.AccessType.Any() { + v |= present | accessed + } if opts.User { v |= user } diff --git a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go b/pkg/sentry/platform/ring0/pagetables/walker_amd64.go index 8f9dacd93..eb4fbcc31 100644 --- a/pkg/sentry/platform/ring0/pagetables/walker_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/walker_amd64.go @@ -16,104 +16,10 @@ package pagetables -// Visitor is a generic type. -type Visitor interface { - // visit is called on each PTE. - visit(start uintptr, pte *PTE, align uintptr) - - // requiresAlloc indicates that new entries should be allocated within - // the walked range. - requiresAlloc() bool - - // requiresSplit indicates that entries in the given range should be - // split if they are huge or jumbo pages. - requiresSplit() bool -} - -// Walker walks page tables. -type Walker struct { - // pageTables are the tables to walk. - pageTables *PageTables - - // Visitor is the set of arguments. - visitor Visitor -} - -// iterateRange iterates over all appropriate levels of page tables for the given range. -// -// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The -// exception is super pages. If a valid super page (huge or jumbo) cannot be -// installed, then the walk will continue to individual entries. -// -// This algorithm will attempt to maximize the use of super pages whenever -// possible. Whether a super page is provided will be clear through the range -// provided in the callback. -// -// Note that if requiresAlloc is true, then no gaps will be present. However, -// if alloc is not set, then the iteration will likely be full of gaps. -// -// Note that this function should generally be avoided in favor of Map, Unmap, -// etc. when not necessary. -// -// Precondition: start must be page-aligned. -// -// Precondition: start must be less than end. -// -// Precondition: If requiresAlloc is true, then start and end should not span -// non-canonical ranges. If they do, a panic will result. -// -//go:nosplit -func (w *Walker) iterateRange(start, end uintptr) { - if start%pteSize != 0 { - panic("unaligned start") - } - if end < start { - panic("start > end") - } - if start < lowerTop { - if end <= lowerTop { - w.iterateRangeCanonical(start, end) - } else if end > lowerTop && end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - w.iterateRangeCanonical(upperBottom, end) - } - } else if start < upperBottom { - if end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(upperBottom, end) - } - } else { - w.iterateRangeCanonical(start, end) - } -} - -// next returns the next address quantized by the given size. -// -//go:nosplit -func next(start uintptr, size uintptr) uintptr { - start &= ^(size - 1) - start += size - return start -} - // iterateRangeCanonical walks a canonical range. // //go:nosplit -func (w *Walker) iterateRangeCanonical(start, end uintptr) { +func (w *Walker) iterateRangeCanonical(start, end uintptr) bool { for pgdIndex := uint16((start & pgdMask) >> pgdShift); start < end && pgdIndex < entriesPerPage; pgdIndex++ { var ( pgdEntry = &w.pageTables.root[pgdIndex] @@ -127,10 +33,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pgd. - pudEntries = w.pageTables.Allocator.NewPTEs() + pudEntries = w.pageTables.Allocator.NewPTEs() // escapes: depends on allocator. pgdEntry.setPageTable(w.pageTables, pudEntries) } else { - pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) + pudEntries = w.pageTables.Allocator.LookupPTEs(pgdEntry.Address()) // escapes: see above. } // Map the next level. @@ -155,7 +61,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // new page for the pmd. if start&(pudSize-1) == 0 && end-start >= pudSize { pudEntry.SetSuper() - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) { + return false + } if pudEntry.Valid() { start = next(start, pudSize) continue @@ -163,14 +71,14 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pud. - pmdEntries = w.pageTables.Allocator.NewPTEs() + pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. pudEntry.setPageTable(w.pageTables, pmdEntries) } else if pudEntry.IsSuper() { // Does this page need to be split? if w.visitor.requiresSplit() && (start&(pudSize-1) != 0 || end < next(start, pudSize)) { // Install the relevant entries. - pmdEntries = w.pageTables.Allocator.NewPTEs() + pmdEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. for index := uint16(0); index < entriesPerPage; index++ { pmdEntries[index].SetSuper() pmdEntries[index].Set( @@ -180,7 +88,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pudEntry.setPageTable(w.pageTables, pmdEntries) } else { // A super page to be checked directly. - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start&^(pudSize-1)), pudEntry, pudSize-1) { + return false + } // Might have been cleared. if !pudEntry.Valid() { @@ -192,7 +102,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { continue } } else { - pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) + pmdEntries = w.pageTables.Allocator.LookupPTEs(pudEntry.Address()) // escapes: see above. } // Map the next level, since this is valid. @@ -216,7 +126,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // As above, we can skip allocating a new page. if start&(pmdSize-1) == 0 && end-start >= pmdSize { pmdEntry.SetSuper() - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) { + return false + } if pmdEntry.Valid() { start = next(start, pmdSize) continue @@ -224,7 +136,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // Allocate a new pmd. - pteEntries = w.pageTables.Allocator.NewPTEs() + pteEntries = w.pageTables.Allocator.NewPTEs() // escapes: see above. pmdEntry.setPageTable(w.pageTables, pteEntries) } else if pmdEntry.IsSuper() { @@ -240,7 +152,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pmdEntry.setPageTable(w.pageTables, pteEntries) } else { // A huge page to be checked directly. - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start&^(pmdSize-1)), pmdEntry, pmdSize-1) { + return false + } // Might have been cleared. if !pmdEntry.Valid() { @@ -252,7 +166,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { continue } } else { - pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) + pteEntries = w.pageTables.Allocator.LookupPTEs(pmdEntry.Address()) // escapes: see above. } // Map the next level, since this is valid. @@ -269,11 +183,10 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // At this point, we are guaranteed that start%pteSize == 0. - w.visitor.visit(uintptr(start), pteEntry, pteSize-1) - if !pteEntry.Valid() { - if w.visitor.requiresAlloc() { - panic("PTE not set after iteration with requiresAlloc!") - } + if !w.visitor.visit(uintptr(start&^(pteSize-1)), pteEntry, pteSize-1) { + return false + } + if !pteEntry.Valid() && !w.visitor.requiresAlloc() { clearPTEEntries++ } @@ -285,7 +198,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPTEEntries == entriesPerPage { pmdEntry.Clear() - w.pageTables.Allocator.FreePTEs(pteEntries) + w.pageTables.Allocator.FreePTEs(pteEntries) // escapes: see above. clearPMDEntries++ } } @@ -293,7 +206,7 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPMDEntries == entriesPerPage { pudEntry.Clear() - w.pageTables.Allocator.FreePTEs(pmdEntries) + w.pageTables.Allocator.FreePTEs(pmdEntries) // escapes: see above. clearPUDEntries++ } } @@ -301,7 +214,8 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // Check if we no longer need this page. if clearPUDEntries == entriesPerPage { pgdEntry.Clear() - w.pageTables.Allocator.FreePTEs(pudEntries) + w.pageTables.Allocator.FreePTEs(pudEntries) // escapes: see above. } } + return true } diff --git a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go index c261d393a..5ed881c7a 100644 --- a/pkg/sentry/platform/ring0/pagetables/walker_arm64.go +++ b/pkg/sentry/platform/ring0/pagetables/walker_arm64.go @@ -16,104 +16,10 @@ package pagetables -// Visitor is a generic type. -type Visitor interface { - // visit is called on each PTE. - visit(start uintptr, pte *PTE, align uintptr) - - // requiresAlloc indicates that new entries should be allocated within - // the walked range. - requiresAlloc() bool - - // requiresSplit indicates that entries in the given range should be - // split if they are huge or jumbo pages. - requiresSplit() bool -} - -// Walker walks page tables. -type Walker struct { - // pageTables are the tables to walk. - pageTables *PageTables - - // Visitor is the set of arguments. - visitor Visitor -} - -// iterateRange iterates over all appropriate levels of page tables for the given range. -// -// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The -// exception is sect pages. If a valid sect page (huge or jumbo) cannot be -// installed, then the walk will continue to individual entries. -// -// This algorithm will attempt to maximize the use of sect pages whenever -// possible. Whether a sect page is provided will be clear through the range -// provided in the callback. -// -// Note that if requiresAlloc is true, then no gaps will be present. However, -// if alloc is not set, then the iteration will likely be full of gaps. -// -// Note that this function should generally be avoided in favor of Map, Unmap, -// etc. when not necessary. -// -// Precondition: start must be page-aligned. -// -// Precondition: start must be less than end. -// -// Precondition: If requiresAlloc is true, then start and end should not span -// non-canonical ranges. If they do, a panic will result. -// -//go:nosplit -func (w *Walker) iterateRange(start, end uintptr) { - if start%pteSize != 0 { - panic("unaligned start") - } - if end < start { - panic("start > end") - } - if start < lowerTop { - if end <= lowerTop { - w.iterateRangeCanonical(start, end) - } else if end > lowerTop && end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(start, lowerTop) - w.iterateRangeCanonical(upperBottom, end) - } - } else if start < upperBottom { - if end <= upperBottom { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - } else { - if w.visitor.requiresAlloc() { - panic("alloc spans non-canonical range") - } - w.iterateRangeCanonical(upperBottom, end) - } - } else { - w.iterateRangeCanonical(start, end) - } -} - -// next returns the next address quantized by the given size. -// -//go:nosplit -func next(start uintptr, size uintptr) uintptr { - start &= ^(size - 1) - start += size - return start -} - // iterateRangeCanonical walks a canonical range. // //go:nosplit -func (w *Walker) iterateRangeCanonical(start, end uintptr) { +func (w *Walker) iterateRangeCanonical(start, end uintptr) bool { pgdEntryIndex := w.pageTables.root if start >= upperBottom { pgdEntryIndex = w.pageTables.archPageTables.root @@ -160,7 +66,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // new page for the pmd. if start&(pudSize-1) == 0 && end-start >= pudSize { pudEntry.SetSect() - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) { + return false + } if pudEntry.Valid() { start = next(start, pudSize) continue @@ -185,7 +93,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pudEntry.setPageTable(w.pageTables, pmdEntries) } else { // A sect page to be checked directly. - w.visitor.visit(uintptr(start), pudEntry, pudSize-1) + if !w.visitor.visit(uintptr(start), pudEntry, pudSize-1) { + return false + } // Might have been cleared. if !pudEntry.Valid() { @@ -222,7 +132,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { // As above, we can skip allocating a new page. if start&(pmdSize-1) == 0 && end-start >= pmdSize { pmdEntry.SetSect() - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) { + return false + } if pmdEntry.Valid() { start = next(start, pmdSize) continue @@ -246,7 +158,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { pmdEntry.setPageTable(w.pageTables, pteEntries) } else { // A huge page to be checked directly. - w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) + if !w.visitor.visit(uintptr(start), pmdEntry, pmdSize-1) { + return false + } // Might have been cleared. if !pmdEntry.Valid() { @@ -276,7 +190,9 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { } // At this point, we are guaranteed that start%pteSize == 0. - w.visitor.visit(uintptr(start), pteEntry, pteSize-1) + if !w.visitor.visit(uintptr(start), pteEntry, pteSize-1) { + return false + } if !pteEntry.Valid() { if w.visitor.requiresAlloc() { panic("PTE not set after iteration with requiresAlloc!") @@ -311,4 +227,5 @@ func (w *Walker) iterateRangeCanonical(start, end uintptr) { w.pageTables.Allocator.FreePTEs(pudEntries) } } + return true } diff --git a/pkg/sentry/platform/ring0/pagetables/walker_generic.go b/pkg/sentry/platform/ring0/pagetables/walker_generic.go new file mode 100644 index 000000000..34fba7b84 --- /dev/null +++ b/pkg/sentry/platform/ring0/pagetables/walker_generic.go @@ -0,0 +1,110 @@ +// Copyright 2018 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pagetables + +// Visitor is a generic type. +type Visitor interface { + // visit is called on each PTE. The returned boolean indicates whether + // the walk should continue. + visit(start uintptr, pte *PTE, align uintptr) bool + + // requiresAlloc indicates that new entries should be allocated within + // the walked range. + requiresAlloc() bool + + // requiresSplit indicates that entries in the given range should be + // split if they are huge or jumbo pages. + requiresSplit() bool +} + +// Walker walks page tables. +type Walker struct { + // pageTables are the tables to walk. + pageTables *PageTables + + // Visitor is the set of arguments. + visitor Visitor +} + +// iterateRange iterates over all appropriate levels of page tables for the given range. +// +// If requiresAlloc is true, then Set _must_ be called on all given PTEs. The +// exception is super pages. If a valid super page (huge or jumbo) cannot be +// installed, then the walk will continue to individual entries. +// +// This algorithm will attempt to maximize the use of super/sect pages whenever +// possible. Whether a super page is provided will be clear through the range +// provided in the callback. +// +// Note that if requiresAlloc is true, then no gaps will be present. However, +// if alloc is not set, then the iteration will likely be full of gaps. +// +// Note that this function should generally be avoided in favor of Map, Unmap, +// etc. when not necessary. +// +// Precondition: start must be page-aligned. +// Precondition: start must be less than end. +// Precondition: If requiresAlloc is true, then start and end should not span +// non-canonical ranges. If they do, a panic will result. +// +//go:nosplit +func (w *Walker) iterateRange(start, end uintptr) { + if start%pteSize != 0 { + panic("unaligned start") + } + if end < start { + panic("start > end") + } + if start < lowerTop { + if end <= lowerTop { + w.iterateRangeCanonical(start, end) + } else if end > lowerTop && end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(start, lowerTop) + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + if !w.iterateRangeCanonical(start, lowerTop) { + return + } + w.iterateRangeCanonical(upperBottom, end) + } + } else if start < upperBottom { + if end <= upperBottom { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + } else { + if w.visitor.requiresAlloc() { + panic("alloc spans non-canonical range") + } + w.iterateRangeCanonical(upperBottom, end) + } + } else { + w.iterateRangeCanonical(start, end) + } +} + +// next returns the next address quantized by the given size. +// +//go:nosplit +func next(start uintptr, size uintptr) uintptr { + start &= ^(size - 1) + start += size + return start +} -- cgit v1.2.3