summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
authorAndrei Vagin <avagin@google.com>2020-08-03 22:06:46 -0700
committergVisor bot <gvisor-bot@google.com>2020-08-03 22:08:25 -0700
commit25798f214c6d1991916906ea8fca9e7029a8c423 (patch)
treee2240d0174da0d95a0a19660ea8c2bb0ebbf0082 /pkg
parentb5c9ff81922ba785d83eaccc464b0b15a8120798 (diff)
Add callbacks to support lazy loading/restoring thread states
PiperOrigin-RevId: 324748508
Diffstat (limited to 'pkg')
-rw-r--r--pkg/sentry/kernel/kernel.go7
-rw-r--r--pkg/sentry/kernel/ptrace.go10
-rw-r--r--pkg/sentry/kernel/task_clone.go4
-rw-r--r--pkg/sentry/kernel/task_log.go43
-rw-r--r--pkg/sentry/kernel/task_run.go2
-rw-r--r--pkg/sentry/kernel/task_signals.go10
-rw-r--r--pkg/sentry/kernel/task_stop.go16
-rw-r--r--pkg/sentry/memmap/memmap.go7
-rw-r--r--pkg/sentry/mm/lifecycle.go2
-rw-r--r--pkg/sentry/mm/vma.go7
-rw-r--r--pkg/sentry/platform/kvm/BUILD1
-rw-r--r--pkg/sentry/platform/kvm/address_space.go6
-rw-r--r--pkg/sentry/platform/kvm/context.go10
-rw-r--r--pkg/sentry/platform/platform.go44
-rw-r--r--pkg/sentry/platform/ptrace/BUILD1
-rw-r--r--pkg/sentry/platform/ptrace/ptrace.go10
-rw-r--r--pkg/sentry/platform/ptrace/subprocess.go6
-rw-r--r--pkg/sentry/state/state.go1
18 files changed, 179 insertions, 8 deletions
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 316df249d..1028d13c6 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -1263,6 +1263,13 @@ func (k *Kernel) Pause() {
k.tasks.aioGoroutines.Wait()
}
+// ReceiveTaskStates receives full states for all tasks.
+func (k *Kernel) ReceiveTaskStates() {
+ k.extMu.Lock()
+ k.tasks.PullFullState()
+ k.extMu.Unlock()
+}
+
// Unpause ends the effect of a previous call to Pause. If Unpause is called
// without a matching preceding call to Pause, Unpause may panic.
func (k *Kernel) Unpause() {
diff --git a/pkg/sentry/kernel/ptrace.go b/pkg/sentry/kernel/ptrace.go
index e23e796ef..6c03d9041 100644
--- a/pkg/sentry/kernel/ptrace.go
+++ b/pkg/sentry/kernel/ptrace.go
@@ -1018,6 +1018,9 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
if err != nil {
return err
}
+
+ t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
+
ar := ars.Head()
n, err := target.Arch().PtraceGetRegSet(uintptr(addr), &usermem.IOReadWriter{
Ctx: t,
@@ -1044,10 +1047,14 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
if err != nil {
return err
}
+
+ mm := t.MemoryManager()
+ t.p.PullFullState(mm.AddressSpace(), t.Arch())
+
ar := ars.Head()
n, err := target.Arch().PtraceSetRegSet(uintptr(addr), &usermem.IOReadWriter{
Ctx: t,
- IO: t.MemoryManager(),
+ IO: mm,
Addr: ar.Start,
Opts: usermem.IOOpts{
AddressSpaceActive: true,
@@ -1056,6 +1063,7 @@ func (t *Task) Ptrace(req int64, pid ThreadID, addr, data usermem.Addr) error {
if err != nil {
return err
}
+ t.p.FloatingPointStateChanged()
ar.End -= usermem.Addr(n)
return t.CopyOutIovecs(data, usermem.AddrRangeSeqOf(ar))
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index fe6ba6041..9d7a9128f 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -161,6 +161,10 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
return 0, nil, syserror.EINVAL
}
+ // Pull task registers and FPU state, a cloned task will inherit the
+ // state of the current task.
+ t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
+
// "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a
// single clone(2) or unshare(2) call, the user namespace is guaranteed to
// be created first, giving the child (clone(2)) or caller (unshare(2))
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index ab86ceedc..d23cea802 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -27,6 +27,9 @@ const (
// maxStackDebugBytes is the maximum number of user stack bytes that may be
// printed by debugDumpStack.
maxStackDebugBytes = 1024
+ // maxCodeDebugBytes is the maximum number of user code bytes that may be
+ // printed by debugDumpCode.
+ maxCodeDebugBytes = 128
)
// Infof logs an formatted info message by calling log.Infof.
@@ -61,6 +64,7 @@ func (t *Task) IsLogging(level log.Level) bool {
func (t *Task) DebugDumpState() {
t.debugDumpRegisters()
t.debugDumpStack()
+ t.debugDumpCode()
if mm := t.MemoryManager(); mm != nil {
t.Debugf("Mappings:\n%s", mm)
}
@@ -128,6 +132,45 @@ func (t *Task) debugDumpStack() {
}
}
+// debugDumpCode logs user code contents at log level debug.
+//
+// Preconditions: The caller must be running on the task goroutine.
+func (t *Task) debugDumpCode() {
+ if !t.IsLogging(log.Debug) {
+ return
+ }
+ m := t.MemoryManager()
+ if m == nil {
+ t.Debugf("Memory manager for task is gone, skipping application code dump.")
+ return
+ }
+ t.Debugf("Code:")
+ // Print code on both sides of the instruction register.
+ start := usermem.Addr(t.Arch().IP()) - maxCodeDebugBytes/2
+ // Round addr down to a 16-byte boundary.
+ start &= ^usermem.Addr(15)
+ // Print 16 bytes per line, one byte at a time.
+ for offset := uint64(0); offset < maxCodeDebugBytes; offset += 16 {
+ addr, ok := start.AddLength(offset)
+ if !ok {
+ break
+ }
+ var data [16]byte
+ n, err := m.CopyIn(t, addr, data[:], usermem.IOOpts{
+ IgnorePermissions: true,
+ })
+ // Print as much of the line as we can, even if an error was
+ // encountered.
+ if n > 0 {
+ t.Debugf("%x: % x", addr, data[:n])
+ }
+ if err != nil {
+ t.Debugf("Error reading stack at address %x: %v", addr+usermem.Addr(n), err)
+ break
+ }
+ }
+}
+
// trace definitions.
//
// Note that all region names are prefixed by ':' in order to ensure that they
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index 7d4f44caf..abaf29216 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -260,7 +260,7 @@ func (app *runApp) execute(t *Task) taskRunState {
region := trace.StartRegion(t.traceContext, runRegion)
t.accountTaskGoroutineEnter(TaskGoroutineRunningApp)
- info, at, err := t.p.Switch(t.MemoryManager().AddressSpace(), t.Arch(), t.rseqCPU)
+ info, at, err := t.p.Switch(t, t.MemoryManager(), t.Arch(), t.rseqCPU)
t.accountTaskGoroutineLeave(TaskGoroutineRunningApp)
region.End()
diff --git a/pkg/sentry/kernel/task_signals.go b/pkg/sentry/kernel/task_signals.go
index 79766cafe..2180fd27d 100644
--- a/pkg/sentry/kernel/task_signals.go
+++ b/pkg/sentry/kernel/task_signals.go
@@ -255,10 +255,11 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
}
}
+ mm := t.MemoryManager()
// Set up the signal handler. If we have a saved signal mask, the signal
// handler should run with the current mask, but sigreturn should restore
// the saved one.
- st := &arch.Stack{t.Arch(), t.MemoryManager(), sp}
+ st := &arch.Stack{t.Arch(), mm, sp}
mask := t.signalMask
if t.haveSavedSignalMask {
mask = t.savedSignalMask
@@ -273,12 +274,13 @@ func (t *Task) deliverSignalToHandler(info *arch.SignalInfo, act arch.SignalAct)
// Please see the linux code as reference:
// linux/arch/arm64/kernel/signal.c:setup_return()
if act.Flags&linux.SA_RESTORER == 0 {
- act.Restorer = t.MemoryManager().VDSOSigReturn()
+ act.Restorer = mm.VDSOSigReturn()
}
if err := t.Arch().SignalSetup(st, &act, info, &alt, mask); err != nil {
return err
}
+ t.p.FloatingPointStateChanged()
t.haveSavedSignalMask = false
// Add our signal mask.
@@ -310,6 +312,7 @@ func (t *Task) SignalReturn(rt bool) (*SyscallControl, error) {
// Restore our signal mask. SIGKILL and SIGSTOP should not be blocked.
t.SetSignalMask(sigset &^ UnblockableSignals)
+ t.p.FloatingPointStateChanged()
return ctrlResume, nil
}
@@ -636,6 +639,7 @@ func (t *Task) SetSavedSignalMask(mask linux.SignalSet) {
// SignalStack returns the task-private signal stack.
func (t *Task) SignalStack() arch.SignalStack {
+ t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
alt := t.signalStack
if t.onSignalStack(alt) {
alt.Flags |= arch.SignalStackFlagOnStack
@@ -1050,6 +1054,8 @@ func (*runInterrupt) execute(t *Task) taskRunState {
// Are there signals pending?
if info := t.dequeueSignalLocked(t.signalMask); info != nil {
+ t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
+
if linux.SignalSetOf(linux.Signal(info.Signo))&StopSignals != 0 {
// Indicate that we've dequeued a stop signal before unlocking the
// signal mutex; initiateGroupStop will check for races with
diff --git a/pkg/sentry/kernel/task_stop.go b/pkg/sentry/kernel/task_stop.go
index 10c6e455c..296735d32 100644
--- a/pkg/sentry/kernel/task_stop.go
+++ b/pkg/sentry/kernel/task_stop.go
@@ -205,6 +205,22 @@ func (ts *TaskSet) BeginExternalStop() {
}
}
+// PullFullState receives full states for all tasks.
+func (ts *TaskSet) PullFullState() {
+ ts.mu.Lock()
+ defer ts.mu.Unlock()
+ if ts.Root == nil {
+ return
+ }
+ for t := range ts.Root.tids {
+ t.Activate()
+ if mm := t.MemoryManager(); mm != nil {
+ t.p.PullFullState(t.MemoryManager().AddressSpace(), t.Arch())
+ }
+ t.Deactivate()
+ }
+}
+
// EndExternalStop indicates the end of an external stop started by a previous
// call to TaskSet.BeginExternalStop. EndExternalStop does not wait for task
// goroutines to resume.
diff --git a/pkg/sentry/memmap/memmap.go b/pkg/sentry/memmap/memmap.go
index 59c92c7e8..65d83096f 100644
--- a/pkg/sentry/memmap/memmap.go
+++ b/pkg/sentry/memmap/memmap.go
@@ -360,6 +360,13 @@ type MMapOpts struct {
//
// TODO(jamieliu): Replace entirely with MappingIdentity?
Hint string
+
+ // Force means to skip validation checks of Addr and Length. It can be
+ // used to create special mappings below mm.layout.MinAddr and
+ // mm.layout.MaxAddr. It has to be used with caution.
+ //
+ // If Force is true, Unmap and Fixed must be true.
+ Force bool
}
// File represents a host file that may be mapped into an platform.AddressSpace.
diff --git a/pkg/sentry/mm/lifecycle.go b/pkg/sentry/mm/lifecycle.go
index 4d7773f8b..09dbc06a4 100644
--- a/pkg/sentry/mm/lifecycle.go
+++ b/pkg/sentry/mm/lifecycle.go
@@ -57,6 +57,8 @@ func (mm *MemoryManager) SetMmapLayout(ac arch.Context, r *limits.LimitSet) (arc
// Fork creates a copy of mm with 1 user, as for Linux syscalls fork() or
// clone() (without CLONE_VM).
func (mm *MemoryManager) Fork(ctx context.Context) (*MemoryManager, error) {
+ mm.AddressSpace().PreFork()
+ defer mm.AddressSpace().PostFork()
mm.metadataMu.Lock()
defer mm.metadataMu.Unlock()
mm.mappingMu.RLock()
diff --git a/pkg/sentry/mm/vma.go b/pkg/sentry/mm/vma.go
index bd751d696..c4e1989ed 100644
--- a/pkg/sentry/mm/vma.go
+++ b/pkg/sentry/mm/vma.go
@@ -42,7 +42,12 @@ func (mm *MemoryManager) createVMALocked(ctx context.Context, opts memmap.MMapOp
Map32Bit: opts.Map32Bit,
})
if err != nil {
- return vmaIterator{}, usermem.AddrRange{}, err
+ // Can't force without opts.Unmap and opts.Fixed.
+ if opts.Force && opts.Unmap && opts.Fixed {
+ addr = opts.Addr
+ } else {
+ return vmaIterator{}, usermem.AddrRange{}, err
+ }
}
ar, _ := addr.ToRange(opts.Length)
diff --git a/pkg/sentry/platform/kvm/BUILD b/pkg/sentry/platform/kvm/BUILD
index b5d27a72a..3970dd81d 100644
--- a/pkg/sentry/platform/kvm/BUILD
+++ b/pkg/sentry/platform/kvm/BUILD
@@ -41,6 +41,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/atomicbitops",
+ "//pkg/context",
"//pkg/cpuid",
"//pkg/log",
"//pkg/procid",
diff --git a/pkg/sentry/platform/kvm/address_space.go b/pkg/sentry/platform/kvm/address_space.go
index 98a3e539d..af5c5e191 100644
--- a/pkg/sentry/platform/kvm/address_space.go
+++ b/pkg/sentry/platform/kvm/address_space.go
@@ -248,3 +248,9 @@ func (as *addressSpace) Release() {
// Drop all cached machine references.
as.machine.dropPageTables(as.pageTables)
}
+
+// PreFork implements platform.AddressSpace.PreFork.
+func (as *addressSpace) PreFork() {}
+
+// PostFork implements platform.AddressSpace.PostFork.
+func (as *addressSpace) PostFork() {}
diff --git a/pkg/sentry/platform/kvm/context.go b/pkg/sentry/platform/kvm/context.go
index 6507121ea..eb92721fb 100644
--- a/pkg/sentry/platform/kvm/context.go
+++ b/pkg/sentry/platform/kvm/context.go
@@ -15,6 +15,7 @@
package kvm
import (
+ pkgcontext "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
@@ -37,7 +38,8 @@ type context struct {
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(as platform.AddressSpace, ac arch.Context, _ int32) (*arch.SignalInfo, usermem.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, _ int32) (*arch.SignalInfo, usermem.AccessType, error) {
+ as := mm.AddressSpace()
localAS := as.(*addressSpace)
// Grab a vCPU.
@@ -88,3 +90,9 @@ func (c *context) Interrupt() {
// Release implements platform.Context.Release().
func (c *context) Release() {}
+
+// FloatingPointStateChanged implements platform.Context.FloatingPointStateChanged.
+func (c *context) FloatingPointStateChanged() {}
+
+// PullFullState implements platform.Context.PullFullState.
+func (c *context) PullFullState(as platform.AddressSpace, ac arch.Context) {}
diff --git a/pkg/sentry/platform/platform.go b/pkg/sentry/platform/platform.go
index 4b13eec30..3f99afdd1 100644
--- a/pkg/sentry/platform/platform.go
+++ b/pkg/sentry/platform/platform.go
@@ -22,6 +22,7 @@ import (
"os"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -114,6 +115,17 @@ func (NoCPUPreemptionDetection) PreemptAllCPUs() error {
panic("This platform does not support CPU preemption detection")
}
+// MemoryManager represents an abstraction above the platform address space
+// which manages memory mappings and their contents.
+type MemoryManager interface {
+ //usermem.IO provides access to the contents of a virtual memory space.
+ usermem.IO
+ // MMap establishes a memory mapping.
+ MMap(ctx context.Context, opts memmap.MMapOpts) (usermem.Addr, error)
+ // AddressSpace returns the AddressSpace bound to mm.
+ AddressSpace() AddressSpace
+}
+
// Context represents the execution context for a single thread.
type Context interface {
// Switch resumes execution of the thread specified by the arch.Context
@@ -143,7 +155,30 @@ type Context interface {
// concurrent call to Switch().
//
// - ErrContextCPUPreempted: See the definition of that error for details.
- Switch(as AddressSpace, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error)
+ Switch(ctx context.Context, mm MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error)
+
+ // PullFullState() pulls a full state of the application thread.
+ //
+ // A platform can support lazy loading/restoring of a thread state
+ // which includes registers and a floating point state.
+ //
+ // For example, when the Sentry handles a system call, it may have only
+ // syscall arguments without other registers and a floating point
+ // state. And in this case, if the Sentry will need to construct a
+ // signal frame to call a signal handler, it will need to call
+ // PullFullState() to load all registers and FPU state.
+ //
+ // Preconditions: The caller must be running on the task goroutine.
+ PullFullState(as AddressSpace, ac arch.Context)
+
+ // FloatingPointStateChanged forces restoring a full state of the application thread.
+ //
+ // A platform can support lazy loading/restoring of a thread state.
+ // This means that if the Sentry has not changed a thread state,
+ // the platform may not restore it.
+ //
+ // Preconditions: The caller must be running on the task goroutine.
+ FloatingPointStateChanged()
// Interrupt interrupts a concurrent call to Switch(), causing it to return
// ErrContextInterrupt.
@@ -218,6 +253,13 @@ type AddressSpace interface {
// must be acquired via platform.NewAddressSpace().
Release()
+ // PreFork() is called before creating a copy of AddressSpace. This
+ // guarantees that this address space will be in a consistent state.
+ PreFork()
+
+ // PostFork() is called after creating a copy of AddressSpace.
+ PostFork()
+
// AddressSpaceIO methods are supported iff the associated platform's
// Platform.SupportsAddressSpaceIO() == true. AddressSpaces for which this
// does not hold may panic if AddressSpaceIO methods are invoked.
diff --git a/pkg/sentry/platform/ptrace/BUILD b/pkg/sentry/platform/ptrace/BUILD
index 29fd23cc3..e04165fbf 100644
--- a/pkg/sentry/platform/ptrace/BUILD
+++ b/pkg/sentry/platform/ptrace/BUILD
@@ -24,6 +24,7 @@ go_library(
visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
+ "//pkg/context",
"//pkg/log",
"//pkg/procid",
"//pkg/safecopy",
diff --git a/pkg/sentry/platform/ptrace/ptrace.go b/pkg/sentry/platform/ptrace/ptrace.go
index 08d055e05..45ff2bcc3 100644
--- a/pkg/sentry/platform/ptrace/ptrace.go
+++ b/pkg/sentry/platform/ptrace/ptrace.go
@@ -48,6 +48,7 @@ import (
"os"
"gvisor.dev/gvisor/pkg/abi/linux"
+ pkgcontext "gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/platform"
"gvisor.dev/gvisor/pkg/sentry/platform/interrupt"
@@ -95,7 +96,8 @@ type context struct {
}
// Switch runs the provided context in the given address space.
-func (c *context) Switch(as platform.AddressSpace, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) {
+func (c *context) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac arch.Context, cpu int32) (*arch.SignalInfo, usermem.AccessType, error) {
+ as := mm.AddressSpace()
s := as.(*subprocess)
isSyscall := s.switchToApp(c, ac)
@@ -180,6 +182,12 @@ func (c *context) Interrupt() {
// Release implements platform.Context.Release().
func (c *context) Release() {}
+// FloatingPointStateChanged implements platform.Context.FloatingPointStateChanged.
+func (c *context) FloatingPointStateChanged() {}
+
+// PullFullState implements platform.Context.PullFullState.
+func (c *context) PullFullState(as platform.AddressSpace, ac arch.Context) {}
+
// PTrace represents a collection of ptrace subprocesses.
type PTrace struct {
platform.MMapMinAddr
diff --git a/pkg/sentry/platform/ptrace/subprocess.go b/pkg/sentry/platform/ptrace/subprocess.go
index c990f3454..e1d54d8a2 100644
--- a/pkg/sentry/platform/ptrace/subprocess.go
+++ b/pkg/sentry/platform/ptrace/subprocess.go
@@ -662,3 +662,9 @@ func (s *subprocess) Unmap(addr usermem.Addr, length uint64) {
panic(fmt.Sprintf("munmap(%x, %x)) failed: %v", addr, length, err))
}
}
+
+// PreFork implements platform.AddressSpace.PreFork.
+func (s *subprocess) PreFork() {}
+
+// PostFork implements platform.AddressSpace.PostFork.
+func (s *subprocess) PostFork() {}
diff --git a/pkg/sentry/state/state.go b/pkg/sentry/state/state.go
index 9eb626b76..a06c9b8ab 100644
--- a/pkg/sentry/state/state.go
+++ b/pkg/sentry/state/state.go
@@ -60,6 +60,7 @@ type SaveOpts struct {
func (opts SaveOpts) Save(k *kernel.Kernel, w *watchdog.Watchdog) error {
log.Infof("Sandbox save started, pausing all tasks.")
k.Pause()
+ k.ReceiveTaskStates()
defer k.Unpause()
defer log.Infof("Tasks resumed after save.")