diff options
author | Adin Scannell <ascannell@google.com> | 2018-05-30 15:13:36 -0700 |
---|---|---|
committer | Shentubot <shentubot@google.com> | 2018-05-30 15:14:44 -0700 |
commit | c59475599dbcc226e1ef516f40b581d6f2f3be75 (patch) | |
tree | 26eec98c27286aecb2ec91ee1f2c3484677c59d9 /pkg | |
parent | 812e83d3bbb99d4fa1ece4712a1ac85e84fe6ec3 (diff) |
Change ring0 & page tables arguments to structs.
This is a refactor of ring0 and ring0/pagetables that changes from
individual arguments to opts structures. This should involve no
functional changes, but sets the stage for subsequent changes.
PiperOrigin-RevId: 198627556
Change-Id: Id4460340f6a73f0c793cd879324398139cd58ae9
Diffstat (limited to 'pkg')
-rw-r--r-- | pkg/sentry/platform/kvm/address_space.go | 5 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/context.go | 19 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm.go | 8 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/kvm_test.go | 108 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine.go | 15 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_amd64.go | 7 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/defs.go | 19 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/kernel_amd64.go | 46 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables.go | 18 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go | 100 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go (renamed from pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go) | 30 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_test.go | 52 | ||||
-rw-r--r-- | pkg/sentry/platform/ring0/pagetables/pagetables_x86.go | 134 |
14 files changed, 345 insertions, 218 deletions
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go index e81cc0caf..a777533c5 100644 --- a/pkg/sentry/platform/kvm/address_space.go +++ b/pkg/sentry/platform/kvm/address_space.go @@ -89,7 +89,10 @@ func (as *addressSpace) mapHost(addr usermem.Addr, m hostMapEntry, at usermem.Ac // important; if the pagetable mappings were installed before // ensuring the physical pages were available, then some other // thread could theoretically access them. - prev := as.pageTables.Map(addr, length, true /* user */, at, physical) + prev := as.pageTables.Map(addr, length, pagetables.MapOpts{ + AccessType: at, + User: true, + }, physical) inv = inv || prev m.addr += length m.length -= length diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go index dec26a23a..aac84febf 100644 --- a/pkg/sentry/platform/kvm/context.go +++ b/pkg/sentry/platform/kvm/context.go @@ -35,10 +35,7 @@ type context struct { // Switch runs the provided context in the given address space. func (c *context) Switch(as platform.AddressSpace, ac arch.Context, _ int32) (*arch.SignalInfo, usermem.AccessType, error) { - // Extract data. localAS := as.(*addressSpace) - regs := &ac.StateData().Regs - fp := (*byte)(ac.FloatingPointData()) // Grab a vCPU. cpu := c.machine.Get() @@ -58,17 +55,17 @@ func (c *context) Switch(as platform.AddressSpace, ac arch.Context, _ int32) (*a // that the flush can occur naturally on the next user entry. cpu.active.set(localAS) - // Mark the address space as dirty. - flags := ring0.Flags(0) - if localAS.Touch(cpu) { - flags |= ring0.FlagFlush - } - if ac.FullRestore() { - flags |= ring0.FlagFull + // Prepare switch options. + switchOpts := ring0.SwitchOpts{ + Registers: &ac.StateData().Regs, + FloatingPointState: (*byte)(ac.FloatingPointData()), + PageTables: localAS.pageTables, + Flush: localAS.Touch(cpu), + FullRestore: ac.FullRestore(), } // Take the blue pill. - si, at, err := cpu.SwitchToUser(regs, fp, localAS.pageTables, flags) + si, at, err := cpu.SwitchToUser(switchOpts) // Clear the address space. cpu.active.set(nil) diff --git a/pkg/sentry/platform/kvm/kvm.go b/pkg/sentry/platform/kvm/kvm.go index 15a241f01..6defb1c46 100644 --- a/pkg/sentry/platform/kvm/kvm.go +++ b/pkg/sentry/platform/kvm/kvm.go @@ -25,6 +25,7 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/platform" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/filemem" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" + "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) @@ -123,8 +124,11 @@ func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan stru pageTables := k.machine.kernel.PageTables.New() applyPhysicalRegions(func(pr physicalRegion) bool { // Map the kernel in the upper half. - kernelVirtual := usermem.Addr(ring0.KernelStartAddress | pr.virtual) - pageTables.Map(kernelVirtual, pr.length, false /* kernel */, usermem.AnyAccess, pr.physical) + pageTables.Map( + usermem.Addr(ring0.KernelStartAddress|pr.virtual), + pr.length, + pagetables.MapOpts{AccessType: usermem.AnyAccess}, + pr.physical) return true // Keep iterating. }) diff --git a/pkg/sentry/platform/kvm/kvm_test.go b/pkg/sentry/platform/kvm/kvm_test.go index a3466fbed..00919b214 100644 --- a/pkg/sentry/platform/kvm/kvm_test.go +++ b/pkg/sentry/platform/kvm/kvm_test.go @@ -142,7 +142,10 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func // done for regular user code, but is fine for test // purposes.) applyPhysicalRegions(func(pr physicalRegion) bool { - pt.Map(usermem.Addr(pr.virtual), pr.length, true /* user */, usermem.AnyAccess, pr.physical) + pt.Map(usermem.Addr(pr.virtual), pr.length, pagetables.MapOpts{ + AccessType: usermem.AnyAccess, + User: true, + }, pr.physical) return true // Keep iterating. }) } @@ -154,13 +157,22 @@ func applicationTest(t testHarness, useHostMappings bool, target func(), fn func func TestApplicationSyscall(t *testing.T) { applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); err != nil { t.Errorf("application syscall with full restore failed: %v", err) } return false }) applicationTest(t, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != nil { t.Errorf("application syscall with partial restore failed: %v", err) } return false @@ -170,14 +182,23 @@ func TestApplicationSyscall(t *testing.T) { func TestApplicationFault(t *testing.T) { applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. - if si, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); err != platform.ErrContextSignal || (si != nil && si.Signo != int32(syscall.SIGSEGV)) { + if si, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); err != platform.ErrContextSignal || (si != nil && si.Signo != int32(syscall.SIGSEGV)) { t.Errorf("application fault with full restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) } return false }) applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, nil) // Cause fault. - if si, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != platform.ErrContextSignal || (si != nil && si.Signo != int32(syscall.SIGSEGV)) { + if si, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != platform.ErrContextSignal || (si != nil && si.Signo != int32(syscall.SIGSEGV)) { t.Errorf("application fault with partial restore got (%v, %v), expected (%v, SIGSEGV)", err, si, platform.ErrContextSignal) } return false @@ -187,7 +208,11 @@ func TestApplicationFault(t *testing.T) { func TestRegistersSyscall(t *testing.T) { applicationTest(t, true, testutil.TwiddleRegsSyscall, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != nil { t.Errorf("application register check with partial restore got unexpected error: %v", err) } if err := testutil.CheckTestRegs(regs, false); err != nil { @@ -200,7 +225,12 @@ func TestRegistersSyscall(t *testing.T) { func TestRegistersFault(t *testing.T) { applicationTest(t, true, testutil.TwiddleRegsFault, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTestRegs(regs) // Fill values for all registers. - if si, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); err != platform.ErrContextSignal || si.Signo != int32(syscall.SIGSEGV) { + if si, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); err != platform.ErrContextSignal || si.Signo != int32(syscall.SIGSEGV) { t.Errorf("application register check with full restore got unexpected error: %v", err) } if err := testutil.CheckTestRegs(regs, true); err != nil { @@ -213,7 +243,12 @@ func TestRegistersFault(t *testing.T) { func TestSegments(t *testing.T) { applicationTest(t, true, testutil.TwiddleSegments, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTestSegments(regs) - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); err != nil { t.Errorf("application segment check with full restore got unexpected error: %v", err) } if err := testutil.CheckTestSegments(regs); err != nil { @@ -229,7 +264,11 @@ func TestBounce(t *testing.T) { time.Sleep(time.Millisecond) c.BounceToKernel() }() - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != platform.ErrContextInterrupt { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != platform.ErrContextInterrupt { t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) } return false @@ -239,7 +278,12 @@ func TestBounce(t *testing.T) { time.Sleep(time.Millisecond) c.BounceToKernel() }() - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); err != platform.ErrContextInterrupt { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); err != platform.ErrContextInterrupt { t.Errorf("application full restore: got %v, wanted %v", err, platform.ErrContextInterrupt) } return false @@ -265,7 +309,11 @@ func TestBounceStress(t *testing.T) { c.BounceToKernel() }() randomSleep() - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != platform.ErrContextInterrupt { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != platform.ErrContextInterrupt { t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextInterrupt) } c.unlock() @@ -280,12 +328,21 @@ func TestInvalidate(t *testing.T) { var data uintptr // Used below. applicationTest(t, true, testutil.Touch, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { testutil.SetTouchTarget(regs, &data) // Read legitimate value. - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != nil { t.Errorf("application partial restore: got %v, wanted nil", err) } // Unmap the page containing data & invalidate. pt.Unmap(usermem.Addr(reflect.ValueOf(&data).Pointer() & ^uintptr(usermem.PageSize-1)), usermem.PageSize) - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFlush); err != platform.ErrContextSignal { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + Flush: true, + }); err != platform.ErrContextSignal { t.Errorf("application partial restore: got %v, wanted %v", err, platform.ErrContextSignal) } return false @@ -299,14 +356,23 @@ func IsFault(err error, si *arch.SignalInfo) bool { func TestEmptyAddressSpace(t *testing.T) { applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if si, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); !IsFault(err, si) { + if si, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); !IsFault(err, si) { t.Errorf("first fault with partial restore failed got %v", err) t.Logf("registers: %#v", ®s) } return false }) applicationTest(t, false, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if si, _, err := c.SwitchToUser(regs, dummyFPState, pt, ring0.FlagFull); !IsFault(err, si) { + if si, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + FullRestore: true, + }); !IsFault(err, si) { t.Errorf("first fault with full restore failed got %v", err) t.Logf("registers: %#v", ®s) } @@ -357,7 +423,11 @@ func BenchmarkApplicationSyscall(b *testing.B) { a int // Count for ErrContextInterrupt. ) applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != nil { if err == platform.ErrContextInterrupt { a++ return true // Ignore. @@ -390,7 +460,11 @@ func BenchmarkWorldSwitchToUserRoundtrip(b *testing.B) { a int ) applicationTest(b, true, testutil.SyscallLoop, func(c *vCPU, regs *syscall.PtraceRegs, pt *pagetables.PageTables) bool { - if _, _, err := c.SwitchToUser(regs, dummyFPState, pt, 0); err != nil { + if _, _, err := c.SwitchToUser(ring0.SwitchOpts{ + Registers: regs, + FloatingPointState: dummyFPState, + PageTables: pt, + }); err != nil { if err == platform.ErrContextInterrupt { a++ return true // Ignore. diff --git a/pkg/sentry/platform/kvm/machine.go b/pkg/sentry/platform/kvm/machine.go index 9b7e5130c..5a6109ced 100644 --- a/pkg/sentry/platform/kvm/machine.go +++ b/pkg/sentry/platform/kvm/machine.go @@ -186,10 +186,19 @@ func newMachine(vm int, vCPUs int) (*machine, error) { // physical pages are mapped on demand, see kernel_unsafe.go. applyPhysicalRegions(func(pr physicalRegion) bool { // Map everything in the lower half. - m.kernel.PageTables.Map(usermem.Addr(pr.virtual), pr.length, false /* kernel */, usermem.AnyAccess, pr.physical) + m.kernel.PageTables.Map( + usermem.Addr(pr.virtual), + pr.length, + pagetables.MapOpts{AccessType: usermem.AnyAccess}, + pr.physical) + // And keep everything in the upper half. - kernelAddr := usermem.Addr(ring0.KernelStartAddress | pr.virtual) - m.kernel.PageTables.Map(kernelAddr, pr.length, false /* kernel */, usermem.AnyAccess, pr.physical) + m.kernel.PageTables.Map( + usermem.Addr(ring0.KernelStartAddress|pr.virtual), + pr.length, + pagetables.MapOpts{AccessType: usermem.AnyAccess}, + pr.physical) + return true // Keep iterating. }) diff --git a/pkg/sentry/platform/kvm/machine_amd64.go b/pkg/sentry/platform/kvm/machine_amd64.go index f583f68f7..ba7bbcb91 100644 --- a/pkg/sentry/platform/kvm/machine_amd64.go +++ b/pkg/sentry/platform/kvm/machine_amd64.go @@ -24,7 +24,6 @@ import ( "gvisor.googlesource.com/gvisor/pkg/sentry/arch" "gvisor.googlesource.com/gvisor/pkg/sentry/platform" "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0" - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" ) @@ -121,7 +120,7 @@ func (c *vCPU) fault(signal int32) (*arch.SignalInfo, usermem.AccessType, error) } // SwitchToUser unpacks architectural-details. -func (c *vCPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetables.PageTables, flags ring0.Flags) (*arch.SignalInfo, usermem.AccessType, error) { +func (c *vCPU) SwitchToUser(switchOpts ring0.SwitchOpts) (*arch.SignalInfo, usermem.AccessType, error) { // See below. var vector ring0.Vector @@ -131,7 +130,7 @@ func (c *vCPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetab // allocations occur. entersyscall() bluepill(c) - vector = c.CPU.SwitchToUser(regs, fpState, pt, flags) + vector = c.CPU.SwitchToUser(switchOpts) exitsyscall() switch vector { @@ -147,7 +146,7 @@ func (c *vCPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetab return info, usermem.AccessType{}, platform.ErrContextSignal case ring0.GeneralProtectionFault: - if !ring0.IsCanonical(regs.Rip) { + if !ring0.IsCanonical(switchOpts.Registers.Rip) { // If the RIP is non-canonical, it's a SEGV. info := &arch.SignalInfo{Signo: int32(syscall.SIGSEGV)} return info, usermem.AccessType{}, platform.ErrContextSignal diff --git a/pkg/sentry/platform/ring0/defs.go b/pkg/sentry/platform/ring0/defs.go index 9d947b73d..7b3bed1c7 100644 --- a/pkg/sentry/platform/ring0/defs.go +++ b/pkg/sentry/platform/ring0/defs.go @@ -91,3 +91,22 @@ type CPU struct { func (c *CPU) Registers() *syscall.PtraceRegs { return &c.registers } + +// SwitchOpts are passed to the Switch function. +type SwitchOpts struct { + // Registers are the user register state. + Registers *syscall.PtraceRegs + + // FloatingPointState is a byte pointer where floating point state is + // saved and restored. + FloatingPointState *byte + + // PageTables are the application page tables. + PageTables *pagetables.PageTables + + // Flush indicates that a TLB flush should be forced on switch. + Flush bool + + // FullRestore indicates that an iret-based restore should be used. + FullRestore bool +} diff --git a/pkg/sentry/platform/ring0/kernel_amd64.go b/pkg/sentry/platform/ring0/kernel_amd64.go index 76ba65b3f..02d6d0de4 100644 --- a/pkg/sentry/platform/ring0/kernel_amd64.go +++ b/pkg/sentry/platform/ring0/kernel_amd64.go @@ -18,9 +18,6 @@ package ring0 import ( "encoding/binary" - "syscall" - - "gvisor.googlesource.com/gvisor/pkg/sentry/platform/ring0/pagetables" ) const ( @@ -159,18 +156,6 @@ func IsCanonical(addr uint64) bool { return addr <= 0x00007fffffffffff || addr > 0xffff800000000000 } -// Flags contains flags related to switch. -type Flags uintptr - -const ( - // FlagFull indicates that a full restore should be not, not a fast - // restore (on the syscall return path.) - FlagFull = 1 << iota - - // FlagFlush indicates that a full TLB flush is required. - FlagFlush -) - // SwitchToUser performs either a sysret or an iret. // // The return value is the vector that interrupted execution. @@ -189,8 +174,9 @@ const ( // the case for amd64, but may not be the case for other architectures. // //go:nosplit -func (c *CPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetables.PageTables, flags Flags) (vector Vector) { +func (c *CPU) SwitchToUser(switchOpts SwitchOpts) (vector Vector) { // Check for canonical addresses. + regs := switchOpts.Registers if !IsCanonical(regs.Rip) || !IsCanonical(regs.Rsp) || !IsCanonical(regs.Fs_base) || !IsCanonical(regs.Gs_base) { return GeneralProtectionFault } @@ -201,10 +187,10 @@ func (c *CPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetabl ) // Sanitize registers. - if flags&FlagFlush != 0 { - userCR3 = pt.FlushCR3() + if switchOpts.Flush { + userCR3 = switchOpts.PageTables.FlushCR3() } else { - userCR3 = pt.CR3() + userCR3 = switchOpts.PageTables.CR3() } regs.Eflags &= ^uint64(UserFlagsClear) regs.Eflags |= UserFlagsSet @@ -213,21 +199,21 @@ func (c *CPU) SwitchToUser(regs *syscall.PtraceRegs, fpState *byte, pt *pagetabl kernelCR3 = c.kernel.PageTables.CR3() // Perform the switch. - swapgs() // GS will be swapped on return. - wrfs(uintptr(regs.Fs_base)) // Set application FS. - wrgs(uintptr(regs.Gs_base)) // Set application GS. - LoadFloatingPoint(fpState) // Copy in floating point. - jumpToKernel() // Switch to upper half. - writeCR3(uintptr(userCR3)) // Change to user address space. - if flags&FlagFull != 0 { + swapgs() // GS will be swapped on return. + wrfs(uintptr(regs.Fs_base)) // Set application FS. + wrgs(uintptr(regs.Gs_base)) // Set application GS. + LoadFloatingPoint(switchOpts.FloatingPointState) // Copy in floating point. + jumpToKernel() // Switch to upper half. + writeCR3(uintptr(userCR3)) // Change to user address space. + if switchOpts.FullRestore { vector = iret(c, regs) } else { vector = sysret(c, regs) } - writeCR3(uintptr(kernelCR3)) // Return to kernel address space. - jumpToUser() // Return to lower half. - SaveFloatingPoint(fpState) // Copy out floating point. - wrfs(uintptr(c.registers.Fs_base)) // Restore kernel FS. + writeCR3(uintptr(kernelCR3)) // Return to kernel address space. + jumpToUser() // Return to lower half. + SaveFloatingPoint(switchOpts.FloatingPointState) // Copy out floating point. + wrfs(uintptr(c.registers.Fs_base)) // Restore kernel FS. return } diff --git a/pkg/sentry/platform/ring0/pagetables/BUILD b/pkg/sentry/platform/ring0/pagetables/BUILD index c0c481ab3..1a8b7931e 100644 --- a/pkg/sentry/platform/ring0/pagetables/BUILD +++ b/pkg/sentry/platform/ring0/pagetables/BUILD @@ -23,8 +23,8 @@ go_test( name = "pagetables_test", size = "small", srcs = [ + "pagetables_amd64_test.go", "pagetables_test.go", - "pagetables_x86_test.go", "pcids_x86_test.go", ], embed = [":pagetables"], diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables.go b/pkg/sentry/platform/ring0/pagetables/pagetables.go index ee7f27601..2df6792f7 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables.go @@ -117,8 +117,8 @@ func (p *PageTables) getPageTable(n *Node, index int) *Node { // True is returned iff there was a previous mapping in the range. // // Precondition: addr & length must be aligned, their sum must not overflow. -func (p *PageTables) Map(addr usermem.Addr, length uintptr, user bool, at usermem.AccessType, physical uintptr) bool { - if at == usermem.NoAccess { +func (p *PageTables) Map(addr usermem.Addr, length uintptr, opts MapOpts, physical uintptr) bool { + if !opts.AccessType.Any() { return p.Unmap(addr, length) } prev := false @@ -129,7 +129,7 @@ func (p *PageTables) Map(addr usermem.Addr, length uintptr, user bool, at userme } p.iterateRange(uintptr(addr), uintptr(end), true, func(s, e uintptr, pte *PTE, align uintptr) { p := physical + (s - uintptr(addr)) - prev = prev || (pte.Valid() && (p != pte.Address() || at.Write != pte.Writeable() || at.Execute != pte.Executable())) + prev = prev || (pte.Valid() && (p != pte.Address() || opts != pte.Opts())) if p&align != 0 { // We will install entries at a smaller granulaity if // we don't install a valid entry here, however we must @@ -137,7 +137,7 @@ func (p *PageTables) Map(addr usermem.Addr, length uintptr, user bool, at userme pte.Clear() return } - pte.Set(p, at.Write, at.Execute, user) + pte.Set(p, opts) }) p.mu.Unlock() return prev @@ -167,7 +167,7 @@ func (p *PageTables) Release() { } // Lookup returns the physical address for the given virtual address. -func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, accessType usermem.AccessType) { +func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, opts MapOpts) { mask := uintptr(usermem.PageSize - 1) off := uintptr(addr) & mask addr = addr &^ usermem.Addr(mask) @@ -176,13 +176,9 @@ func (p *PageTables) Lookup(addr usermem.Addr) (physical uintptr, accessType use return } physical = pte.Address() + (s - uintptr(addr)) + off - accessType = usermem.AccessType{ - Read: true, - Write: pte.Writeable(), - Execute: pte.Executable(), - } + opts = pte.Opts() }) - return physical, accessType + return } // allocNode allocates a new page. diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go index a2050b99c..8dc50f9dd 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64.go @@ -18,7 +18,6 @@ package pagetables import ( "fmt" - "sync/atomic" ) // Address constraints. @@ -43,98 +42,11 @@ const ( pmdSize = 1 << pmdShift pudSize = 1 << pudShift pgdSize = 1 << pgdShift -) -// Bits in page table entries. -const ( - present = 0x001 - writable = 0x002 - user = 0x004 - writeThrough = 0x008 - cacheDisable = 0x010 - accessed = 0x020 - dirty = 0x040 - super = 0x080 executeDisable = 1 << 63 + entriesPerPage = 512 ) -// PTE is a page table entry. -type PTE uint64 - -// Clear clears this PTE, including super page information. -func (p *PTE) Clear() { - atomic.StoreUint64((*uint64)(p), 0) -} - -// Valid returns true iff this entry is valid. -func (p *PTE) Valid() bool { - return atomic.LoadUint64((*uint64)(p))&present != 0 -} - -// Writeable returns true iff the page is writable. -func (p *PTE) Writeable() bool { - return atomic.LoadUint64((*uint64)(p))&writable != 0 -} - -// User returns true iff the page is user-accessible. -func (p *PTE) User() bool { - return atomic.LoadUint64((*uint64)(p))&user != 0 -} - -// Executable returns true iff the page is executable. -func (p *PTE) Executable() bool { - return atomic.LoadUint64((*uint64)(p))&executeDisable == 0 -} - -// SetSuper sets this page as a super page. -// -// The page must not be valid or a panic will result. -func (p *PTE) SetSuper() { - if p.Valid() { - // This is not allowed. - panic("SetSuper called on valid page!") - } - atomic.StoreUint64((*uint64)(p), super) -} - -// IsSuper returns true iff this page is a super page. -func (p *PTE) IsSuper() bool { - return atomic.LoadUint64((*uint64)(p))&super != 0 -} - -// Set sets this PTE value. -func (p *PTE) Set(addr uintptr, write, execute bool, userAccessible bool) { - v := uint64(addr)&^uint64(0xfff) | present | accessed - if userAccessible { - v |= user - } - if !execute { - v |= executeDisable - } - if write { - v |= writable | dirty - } - if p.IsSuper() { - v |= super - } - atomic.StoreUint64((*uint64)(p), v) -} - -// setPageTable sets this PTE value and forces the write bit and super bit to -// be cleared. This is used explicitly for breaking super pages. -func (p *PTE) setPageTable(addr uintptr) { - v := uint64(addr)&^uint64(0xfff) | present | user | writable | accessed | dirty - atomic.StoreUint64((*uint64)(p), v) -} - -// Address extracts the address. This should only be used if Valid returns true. -func (p *PTE) Address() uintptr { - return uintptr(atomic.LoadUint64((*uint64)(p)) & ^uint64(executeDisable|0xfff)) -} - -// entriesPerPage is the number of PTEs per page. -const entriesPerPage = 512 - // PTEs is a collection of entries. type PTEs [entriesPerPage]PTE @@ -255,9 +167,6 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun // Does this page need to be split? if start&(pudSize-1) != 0 || end < next(start, pudSize) { currentAddr := uint64(pudEntry.Address()) - writeable := pudEntry.Writeable() - executable := pudEntry.Executable() - user := pudEntry.User() // Install the relevant entries. pmdNode := p.allocNode() @@ -265,7 +174,7 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun for index := 0; index < entriesPerPage; index++ { pmdEntry := &pmdEntries[index] pmdEntry.SetSuper() - pmdEntry.Set(uintptr(currentAddr), writeable, executable, user) + pmdEntry.Set(uintptr(currentAddr), pudEntry.Opts()) currentAddr += pmdSize } @@ -319,16 +228,13 @@ func (p *PageTables) iterateRange(startAddr, endAddr uintptr, alloc bool, fn fun // Does this page need to be split? if start&(pmdSize-1) != 0 || end < next(start, pmdSize) { currentAddr := uint64(pmdEntry.Address()) - writeable := pmdEntry.Writeable() - executable := pmdEntry.Executable() - user := pmdEntry.User() // Install the relevant entries. pteNode := p.allocNode() pteEntries := pteNode.PTEs() for index := 0; index < entriesPerPage; index++ { pteEntry := &pteEntries[index] - pteEntry.Set(uintptr(currentAddr), writeable, executable, user) + pteEntry.Set(uintptr(currentAddr), pmdEntry.Opts()) currentAddr += pteSize } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go index 1fc403c48..4f15c6b58 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_amd64_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build i386 amd64 +// +build amd64 package pagetables @@ -26,12 +26,12 @@ func Test2MAnd4K(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map a small page and a huge page. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) - pt.Map(0x00007f0000000000, 1<<21, true, usermem.Read, pmdSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, true}, - {0x00007f0000000000, pmdSize, pmdSize * 47, false}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, + {0x00007f0000000000, pmdSize, pmdSize * 47, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } @@ -40,12 +40,12 @@ func Test1GAnd4K(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map a small page and a super page. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) - pt.Map(0x00007f0000000000, pudSize, true, usermem.Read, pudSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, true}, - {0x00007f0000000000, pudSize, pudSize * 47, false}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, + {0x00007f0000000000, pudSize, pudSize * 47, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } @@ -54,12 +54,12 @@ func TestSplit1GPage(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map a super page and knock out the middle. - pt.Map(0x00007f0000000000, pudSize, true, usermem.Read, pudSize*42) + pt.Map(0x00007f0000000000, pudSize, MapOpts{AccessType: usermem.Read}, pudSize*42) pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pudSize-(2*pteSize)) checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pudSize * 42, false}, - {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, false}, + {0x00007f0000000000, pteSize, pudSize * 42, MapOpts{AccessType: usermem.Read}}, + {0x00007f0000000000 + pudSize - pteSize, pteSize, pudSize*42 + pudSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } @@ -68,12 +68,12 @@ func TestSplit2MPage(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map a huge page and knock out the middle. - pt.Map(0x00007f0000000000, pmdSize, true, usermem.Read, pmdSize*42) + pt.Map(0x00007f0000000000, pmdSize, MapOpts{AccessType: usermem.Read}, pmdSize*42) pt.Unmap(usermem.Addr(0x00007f0000000000+pteSize), pmdSize-(2*pteSize)) checkMappings(t, pt, []mapping{ - {0x00007f0000000000, pteSize, pmdSize * 42, false}, - {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, false}, + {0x00007f0000000000, pteSize, pmdSize * 42, MapOpts{AccessType: usermem.Read}}, + {0x00007f0000000000 + pmdSize - pteSize, pteSize, pmdSize*42 + pmdSize - pteSize, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go index 9cbc0e3b0..a4f684af2 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_test.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_test.go @@ -28,10 +28,10 @@ func (r reflectTranslater) TranslateToPhysical(ptes *PTEs) uintptr { } type mapping struct { - start uintptr - length uintptr - addr uintptr - writeable bool + start uintptr + length uintptr + addr uintptr + opts MapOpts } func checkMappings(t *testing.T, pt *PageTables, m []mapping) { @@ -44,10 +44,10 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) { // Iterate over all the mappings. pt.iterateRange(0, ^uintptr(0), false, func(s, e uintptr, pte *PTE, align uintptr) { found = append(found, mapping{ - start: s, - length: e - s, - addr: pte.Address(), - writeable: pte.Writeable(), + start: s, + length: e - s, + addr: pte.Address(), + opts: pte.Opts(), }) if failed != "" { // Don't keep looking for errors. @@ -62,8 +62,8 @@ func checkMappings(t *testing.T, pt *PageTables, m []mapping) { failed = "end didn't match expected" } else if m[current].addr != pte.Address() { failed = "address didn't match expected" - } else if m[current].writeable != pte.Writeable() { - failed = "writeable didn't match" + } else if m[current].opts != pte.Opts() { + failed = "opts didn't match" } current++ }) @@ -88,7 +88,7 @@ func TestUnmap(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map and unmap one entry. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) pt.Unmap(0x400000, pteSize) checkMappings(t, pt, nil) @@ -99,10 +99,10 @@ func TestReadOnly(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map one entry. - pt.Map(0x400000, pteSize, true, usermem.Read, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, false}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } @@ -111,10 +111,10 @@ func TestReadWrite(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map one entry. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, true}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, }) pt.Release() } @@ -123,12 +123,12 @@ func TestSerialEntries(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map two sequential entries. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) - pt.Map(0x401000, pteSize, true, usermem.ReadWrite, pteSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x401000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, true}, - {0x401000, pteSize, pteSize * 47, true}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, + {0x401000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.ReadWrite}}, }) pt.Release() } @@ -137,11 +137,11 @@ func TestSpanningEntries(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Span a pgd with two pages. - pt.Map(0x00007efffffff000, 2*pteSize, true, usermem.Read, pteSize*42) + pt.Map(0x00007efffffff000, 2*pteSize, MapOpts{AccessType: usermem.Read}, pteSize*42) checkMappings(t, pt, []mapping{ - {0x00007efffffff000, pteSize, pteSize * 42, false}, - {0x00007f0000000000, pteSize, pteSize * 43, false}, + {0x00007efffffff000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.Read}}, + {0x00007f0000000000, pteSize, pteSize * 43, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } @@ -150,12 +150,12 @@ func TestSparseEntries(t *testing.T) { pt := New(reflectTranslater{}, Opts{}) // Map two entries in different pgds. - pt.Map(0x400000, pteSize, true, usermem.ReadWrite, pteSize*42) - pt.Map(0x00007f0000000000, pteSize, true, usermem.Read, pteSize*47) + pt.Map(0x400000, pteSize, MapOpts{AccessType: usermem.ReadWrite}, pteSize*42) + pt.Map(0x00007f0000000000, pteSize, MapOpts{AccessType: usermem.Read}, pteSize*47) checkMappings(t, pt, []mapping{ - {0x400000, pteSize, pteSize * 42, true}, - {0x00007f0000000000, pteSize, pteSize * 47, false}, + {0x400000, pteSize, pteSize * 42, MapOpts{AccessType: usermem.ReadWrite}}, + {0x00007f0000000000, pteSize, pteSize * 47, MapOpts{AccessType: usermem.Read}}, }) pt.Release() } diff --git a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go index dac66373f..8ba78ed0d 100644 --- a/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go +++ b/pkg/sentry/platform/ring0/pagetables/pagetables_x86.go @@ -16,6 +16,12 @@ package pagetables +import ( + "sync/atomic" + + "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" +) + // Opts are pagetable options. type Opts struct { EnablePCID bool @@ -77,3 +83,131 @@ func (p *PageTables) CR3() uint64 { func (p *PageTables) FlushCR3() uint64 { return uint64(p.root.physical) | uint64(p.pcid) } + +// Bits in page table entries. +const ( + present = 0x001 + writable = 0x002 + user = 0x004 + writeThrough = 0x008 + cacheDisable = 0x010 + accessed = 0x020 + dirty = 0x040 + super = 0x080 + global = 0x100 + optionMask = executeDisable | 0xfff +) + +// MapOpts are x86 options. +type MapOpts struct { + // AccessType defines permissions. + AccessType usermem.AccessType + + // Global indicates the page is globally accessible. + Global bool + + // User indicates the page is a user page. + User bool +} + +// PTE is a page table entry. +type PTE uintptr + +// Clear clears this PTE, including super page information. +// +//go:nosplit +func (p *PTE) Clear() { + atomic.StoreUintptr((*uintptr)(p), 0) +} + +// Valid returns true iff this entry is valid. +// +//go:nosplit +func (p *PTE) Valid() bool { + return atomic.LoadUintptr((*uintptr)(p))&present != 0 +} + +// Opts returns the PTE options. +// +// These are all options except Valid and Super. +// +//go:nosplit +func (p *PTE) Opts() MapOpts { + v := atomic.LoadUintptr((*uintptr)(p)) + return MapOpts{ + AccessType: usermem.AccessType{ + Read: v&present != 0, + Write: v&writable != 0, + Execute: v&executeDisable == 0, + }, + Global: v&global != 0, + User: v&user != 0, + } +} + +// SetSuper sets this page as a super page. +// +// The page must not be valid or a panic will result. +// +//go:nosplit +func (p *PTE) SetSuper() { + if p.Valid() { + // This is not allowed. + panic("SetSuper called on valid page!") + } + atomic.StoreUintptr((*uintptr)(p), super) +} + +// IsSuper returns true iff this page is a super page. +// +//go:nosplit +func (p *PTE) IsSuper() bool { + return atomic.LoadUintptr((*uintptr)(p))&super != 0 +} + +// Set sets this PTE value. +// +// This does not change the super page property. +// +//go:nosplit +func (p *PTE) Set(addr uintptr, opts MapOpts) { + if !opts.AccessType.Any() { + p.Clear() + return + } + v := (addr &^ optionMask) | present | accessed + if opts.User { + v |= user + } + if opts.Global { + v |= global + } + if !opts.AccessType.Execute { + v |= executeDisable + } + if opts.AccessType.Write { + v |= writable | dirty + } + if p.IsSuper() { + // Note that this is inherited from the previous instance. Set + // does not change the value of Super. See above. + v |= super + } + atomic.StoreUintptr((*uintptr)(p), v) +} + +// setPageTable sets this PTE value and forces the write bit and super bit to +// be cleared. This is used explicitly for breaking super pages. +// +//go:nosplit +func (p *PTE) setPageTable(addr uintptr) { + v := (addr &^ optionMask) | present | user | writable | accessed | dirty + atomic.StoreUintptr((*uintptr)(p), v) +} + +// Address extracts the address. This should only be used if Valid returns true. +// +//go:nosplit +func (p *PTE) Address() uintptr { + return atomic.LoadUintptr((*uintptr)(p)) &^ optionMask +} |