summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/abi/linux/linux_abi_autogen_unsafe.go10
-rw-r--r--pkg/sentry/kernel/kernel_state_autogen.go230
-rw-r--r--pkg/sentry/kernel/task.go15
-rw-r--r--pkg/sentry/kernel/task_run.go17
-rw-r--r--pkg/sentry/kernel/task_work.go38
5 files changed, 192 insertions, 118 deletions
diff --git a/pkg/abi/linux/linux_abi_autogen_unsafe.go b/pkg/abi/linux/linux_abi_autogen_unsafe.go
index 79920059e..fb022bc9f 100644
--- a/pkg/abi/linux/linux_abi_autogen_unsafe.go
+++ b/pkg/abi/linux/linux_abi_autogen_unsafe.go
@@ -139,7 +139,7 @@ func (s *Statx) Packed() bool {
// MarshalUnsafe implements marshal.Marshallable.MarshalUnsafe.
func (s *Statx) MarshalUnsafe(dst []byte) {
- if s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() {
+ if s.Mtime.Packed() && s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() {
safecopy.CopyIn(dst, unsafe.Pointer(s))
} else {
s.MarshalBytes(dst)
@@ -214,7 +214,7 @@ func (s *Statx) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
// WriteTo implements io.WriterTo.WriteTo.
func (s *Statx) WriteTo(w io.Writer) (int64, error) {
- if !s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() && s.Atime.Packed() {
+ if !s.Atime.Packed() && s.Btime.Packed() && s.Ctime.Packed() && s.Mtime.Packed() {
// Type Statx doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := make([]byte, s.SizeBytes())
s.MarshalBytes(buf)
@@ -524,7 +524,7 @@ func (i *IPTEntry) MarshalUnsafe(dst []byte) {
// UnmarshalUnsafe implements marshal.Marshallable.UnmarshalUnsafe.
func (i *IPTEntry) UnmarshalUnsafe(src []byte) {
- if i.IP.Packed() && i.Counters.Packed() {
+ if i.Counters.Packed() && i.IP.Packed() {
safecopy.CopyOut(unsafe.Pointer(i), src)
} else {
i.UnmarshalBytes(src)
@@ -590,7 +590,7 @@ func (i *IPTEntry) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
// WriteTo implements io.WriterTo.WriteTo.
func (i *IPTEntry) WriteTo(w io.Writer) (int64, error) {
- if !i.IP.Packed() && i.Counters.Packed() {
+ if !i.Counters.Packed() && i.IP.Packed() {
// Type IPTEntry doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := make([]byte, i.SizeBytes())
i.MarshalBytes(buf)
@@ -776,7 +776,7 @@ func (i *IPTIP) CopyIn(task marshal.Task, addr usermem.Addr) (int, error) {
// WriteTo implements io.WriterTo.WriteTo.
func (i *IPTIP) WriteTo(w io.Writer) (int64, error) {
- if !i.Src.Packed() && i.Dst.Packed() && i.SrcMask.Packed() && i.DstMask.Packed() {
+ if !i.SrcMask.Packed() && i.DstMask.Packed() && i.Src.Packed() && i.Dst.Packed() {
// Type IPTIP doesn't have a packed layout in memory, fall back to MarshalBytes.
buf := make([]byte, i.SizeBytes())
i.MarshalBytes(buf)
diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go
index 8f184e712..b267f0556 100644
--- a/pkg/sentry/kernel/kernel_state_autogen.go
+++ b/pkg/sentry/kernel/kernel_state_autogen.go
@@ -1002,6 +1002,8 @@ func (x *Task) StateFields() []string {
return []string{
"taskNode",
"runState",
+ "taskWorkCount",
+ "taskWork",
"haveSyscallReturn",
"gosched",
"yieldCount",
@@ -1069,126 +1071,130 @@ func (x *Task) StateSave(m state.Sink) {
state.Failf("signalQueue is %#v, expected zero", &x.signalQueue)
}
var ptraceTracer *Task = x.savePtraceTracer()
- m.SaveValue(29, ptraceTracer)
+ m.SaveValue(31, ptraceTracer)
var syscallFilters []bpf.Program = x.saveSyscallFilters()
- m.SaveValue(45, syscallFilters)
+ m.SaveValue(47, syscallFilters)
m.Save(0, &x.taskNode)
m.Save(1, &x.runState)
- m.Save(2, &x.haveSyscallReturn)
- m.Save(3, &x.gosched)
- m.Save(4, &x.yieldCount)
- m.Save(5, &x.pendingSignals)
- m.Save(6, &x.signalMask)
- m.Save(7, &x.realSignalMask)
- m.Save(8, &x.haveSavedSignalMask)
- m.Save(9, &x.savedSignalMask)
- m.Save(10, &x.signalStack)
- m.Save(11, &x.groupStopPending)
- m.Save(12, &x.groupStopAcknowledged)
- m.Save(13, &x.trapStopPending)
- m.Save(14, &x.trapNotifyPending)
- m.Save(15, &x.stop)
- m.Save(16, &x.exitStatus)
- m.Save(17, &x.syscallRestartBlock)
- m.Save(18, &x.k)
- m.Save(19, &x.containerID)
- m.Save(20, &x.tc)
- m.Save(21, &x.fsContext)
- m.Save(22, &x.fdTable)
- m.Save(23, &x.vforkParent)
- m.Save(24, &x.exitState)
- m.Save(25, &x.exitTracerNotified)
- m.Save(26, &x.exitTracerAcked)
- m.Save(27, &x.exitParentNotified)
- m.Save(28, &x.exitParentAcked)
- m.Save(30, &x.ptraceTracees)
- m.Save(31, &x.ptraceSeized)
- m.Save(32, &x.ptraceOpts)
- m.Save(33, &x.ptraceSyscallMode)
- m.Save(34, &x.ptraceSinglestep)
- m.Save(35, &x.ptraceCode)
- m.Save(36, &x.ptraceSiginfo)
- m.Save(37, &x.ptraceEventMsg)
- m.Save(38, &x.ioUsage)
- m.Save(39, &x.creds)
- m.Save(40, &x.utsns)
- m.Save(41, &x.ipcns)
- m.Save(42, &x.abstractSockets)
- m.Save(43, &x.mountNamespaceVFS2)
- m.Save(44, &x.parentDeathSignal)
- m.Save(46, &x.cleartid)
- m.Save(47, &x.allowedCPUMask)
- m.Save(48, &x.cpu)
- m.Save(49, &x.niceness)
- m.Save(50, &x.numaPolicy)
- m.Save(51, &x.numaNodeMask)
- m.Save(52, &x.netns)
- m.Save(53, &x.rseqCPU)
- m.Save(54, &x.oldRSeqCPUAddr)
- m.Save(55, &x.rseqAddr)
- m.Save(56, &x.rseqSignature)
- m.Save(57, &x.startTime)
+ m.Save(2, &x.taskWorkCount)
+ m.Save(3, &x.taskWork)
+ m.Save(4, &x.haveSyscallReturn)
+ m.Save(5, &x.gosched)
+ m.Save(6, &x.yieldCount)
+ m.Save(7, &x.pendingSignals)
+ m.Save(8, &x.signalMask)
+ m.Save(9, &x.realSignalMask)
+ m.Save(10, &x.haveSavedSignalMask)
+ m.Save(11, &x.savedSignalMask)
+ m.Save(12, &x.signalStack)
+ m.Save(13, &x.groupStopPending)
+ m.Save(14, &x.groupStopAcknowledged)
+ m.Save(15, &x.trapStopPending)
+ m.Save(16, &x.trapNotifyPending)
+ m.Save(17, &x.stop)
+ m.Save(18, &x.exitStatus)
+ m.Save(19, &x.syscallRestartBlock)
+ m.Save(20, &x.k)
+ m.Save(21, &x.containerID)
+ m.Save(22, &x.tc)
+ m.Save(23, &x.fsContext)
+ m.Save(24, &x.fdTable)
+ m.Save(25, &x.vforkParent)
+ m.Save(26, &x.exitState)
+ m.Save(27, &x.exitTracerNotified)
+ m.Save(28, &x.exitTracerAcked)
+ m.Save(29, &x.exitParentNotified)
+ m.Save(30, &x.exitParentAcked)
+ m.Save(32, &x.ptraceTracees)
+ m.Save(33, &x.ptraceSeized)
+ m.Save(34, &x.ptraceOpts)
+ m.Save(35, &x.ptraceSyscallMode)
+ m.Save(36, &x.ptraceSinglestep)
+ m.Save(37, &x.ptraceCode)
+ m.Save(38, &x.ptraceSiginfo)
+ m.Save(39, &x.ptraceEventMsg)
+ m.Save(40, &x.ioUsage)
+ m.Save(41, &x.creds)
+ m.Save(42, &x.utsns)
+ m.Save(43, &x.ipcns)
+ m.Save(44, &x.abstractSockets)
+ m.Save(45, &x.mountNamespaceVFS2)
+ m.Save(46, &x.parentDeathSignal)
+ m.Save(48, &x.cleartid)
+ m.Save(49, &x.allowedCPUMask)
+ m.Save(50, &x.cpu)
+ m.Save(51, &x.niceness)
+ m.Save(52, &x.numaPolicy)
+ m.Save(53, &x.numaNodeMask)
+ m.Save(54, &x.netns)
+ m.Save(55, &x.rseqCPU)
+ m.Save(56, &x.oldRSeqCPUAddr)
+ m.Save(57, &x.rseqAddr)
+ m.Save(58, &x.rseqSignature)
+ m.Save(59, &x.startTime)
}
func (x *Task) StateLoad(m state.Source) {
m.Load(0, &x.taskNode)
m.Load(1, &x.runState)
- m.Load(2, &x.haveSyscallReturn)
- m.Load(3, &x.gosched)
- m.Load(4, &x.yieldCount)
- m.Load(5, &x.pendingSignals)
- m.Load(6, &x.signalMask)
- m.Load(7, &x.realSignalMask)
- m.Load(8, &x.haveSavedSignalMask)
- m.Load(9, &x.savedSignalMask)
- m.Load(10, &x.signalStack)
- m.Load(11, &x.groupStopPending)
- m.Load(12, &x.groupStopAcknowledged)
- m.Load(13, &x.trapStopPending)
- m.Load(14, &x.trapNotifyPending)
- m.Load(15, &x.stop)
- m.Load(16, &x.exitStatus)
- m.Load(17, &x.syscallRestartBlock)
- m.Load(18, &x.k)
- m.Load(19, &x.containerID)
- m.Load(20, &x.tc)
- m.Load(21, &x.fsContext)
- m.Load(22, &x.fdTable)
- m.Load(23, &x.vforkParent)
- m.Load(24, &x.exitState)
- m.Load(25, &x.exitTracerNotified)
- m.Load(26, &x.exitTracerAcked)
- m.Load(27, &x.exitParentNotified)
- m.Load(28, &x.exitParentAcked)
- m.Load(30, &x.ptraceTracees)
- m.Load(31, &x.ptraceSeized)
- m.Load(32, &x.ptraceOpts)
- m.Load(33, &x.ptraceSyscallMode)
- m.Load(34, &x.ptraceSinglestep)
- m.Load(35, &x.ptraceCode)
- m.Load(36, &x.ptraceSiginfo)
- m.Load(37, &x.ptraceEventMsg)
- m.Load(38, &x.ioUsage)
- m.Load(39, &x.creds)
- m.Load(40, &x.utsns)
- m.Load(41, &x.ipcns)
- m.Load(42, &x.abstractSockets)
- m.Load(43, &x.mountNamespaceVFS2)
- m.Load(44, &x.parentDeathSignal)
- m.Load(46, &x.cleartid)
- m.Load(47, &x.allowedCPUMask)
- m.Load(48, &x.cpu)
- m.Load(49, &x.niceness)
- m.Load(50, &x.numaPolicy)
- m.Load(51, &x.numaNodeMask)
- m.Load(52, &x.netns)
- m.Load(53, &x.rseqCPU)
- m.Load(54, &x.oldRSeqCPUAddr)
- m.Load(55, &x.rseqAddr)
- m.Load(56, &x.rseqSignature)
- m.Load(57, &x.startTime)
- m.LoadValue(29, new(*Task), func(y interface{}) { x.loadPtraceTracer(y.(*Task)) })
- m.LoadValue(45, new([]bpf.Program), func(y interface{}) { x.loadSyscallFilters(y.([]bpf.Program)) })
+ m.Load(2, &x.taskWorkCount)
+ m.Load(3, &x.taskWork)
+ m.Load(4, &x.haveSyscallReturn)
+ m.Load(5, &x.gosched)
+ m.Load(6, &x.yieldCount)
+ m.Load(7, &x.pendingSignals)
+ m.Load(8, &x.signalMask)
+ m.Load(9, &x.realSignalMask)
+ m.Load(10, &x.haveSavedSignalMask)
+ m.Load(11, &x.savedSignalMask)
+ m.Load(12, &x.signalStack)
+ m.Load(13, &x.groupStopPending)
+ m.Load(14, &x.groupStopAcknowledged)
+ m.Load(15, &x.trapStopPending)
+ m.Load(16, &x.trapNotifyPending)
+ m.Load(17, &x.stop)
+ m.Load(18, &x.exitStatus)
+ m.Load(19, &x.syscallRestartBlock)
+ m.Load(20, &x.k)
+ m.Load(21, &x.containerID)
+ m.Load(22, &x.tc)
+ m.Load(23, &x.fsContext)
+ m.Load(24, &x.fdTable)
+ m.Load(25, &x.vforkParent)
+ m.Load(26, &x.exitState)
+ m.Load(27, &x.exitTracerNotified)
+ m.Load(28, &x.exitTracerAcked)
+ m.Load(29, &x.exitParentNotified)
+ m.Load(30, &x.exitParentAcked)
+ m.Load(32, &x.ptraceTracees)
+ m.Load(33, &x.ptraceSeized)
+ m.Load(34, &x.ptraceOpts)
+ m.Load(35, &x.ptraceSyscallMode)
+ m.Load(36, &x.ptraceSinglestep)
+ m.Load(37, &x.ptraceCode)
+ m.Load(38, &x.ptraceSiginfo)
+ m.Load(39, &x.ptraceEventMsg)
+ m.Load(40, &x.ioUsage)
+ m.Load(41, &x.creds)
+ m.Load(42, &x.utsns)
+ m.Load(43, &x.ipcns)
+ m.Load(44, &x.abstractSockets)
+ m.Load(45, &x.mountNamespaceVFS2)
+ m.Load(46, &x.parentDeathSignal)
+ m.Load(48, &x.cleartid)
+ m.Load(49, &x.allowedCPUMask)
+ m.Load(50, &x.cpu)
+ m.Load(51, &x.niceness)
+ m.Load(52, &x.numaPolicy)
+ m.Load(53, &x.numaNodeMask)
+ m.Load(54, &x.netns)
+ m.Load(55, &x.rseqCPU)
+ m.Load(56, &x.oldRSeqCPUAddr)
+ m.Load(57, &x.rseqAddr)
+ m.Load(58, &x.rseqSignature)
+ m.Load(59, &x.startTime)
+ m.LoadValue(31, new(*Task), func(y interface{}) { x.loadPtraceTracer(y.(*Task)) })
+ m.LoadValue(47, new([]bpf.Program), func(y interface{}) { x.loadSyscallFilters(y.([]bpf.Program)) })
m.AfterLoad(x.afterLoad)
}
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index f48247c94..b3d655b6e 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -68,6 +68,21 @@ type Task struct {
// runState is exclusive to the task goroutine.
runState taskRunState
+ // taskWorkCount represents the current size of the task work queue. It is
+ // used to avoid acquiring taskWorkMu when the queue is empty.
+ //
+ // Must accessed with atomic memory operations.
+ taskWorkCount int32
+
+ // taskWorkMu protects taskWork.
+ taskWorkMu sync.Mutex `state:"nosave"`
+
+ // taskWork is a queue of work to be executed before resuming user execution.
+ // It is similar to the task_work mechanism in Linux.
+ //
+ // taskWork is exclusive to the task goroutine.
+ taskWork []TaskWorker
+
// haveSyscallReturn is true if tc.Arch().Return() represents a value
// returned by a syscall (or set by ptrace after a syscall).
//
diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go
index d654dd997..7d4f44caf 100644
--- a/pkg/sentry/kernel/task_run.go
+++ b/pkg/sentry/kernel/task_run.go
@@ -167,7 +167,22 @@ func (app *runApp) execute(t *Task) taskRunState {
return (*runInterrupt)(nil)
}
- // We're about to switch to the application again. If there's still a
+ // Execute any task work callbacks before returning to user space.
+ if atomic.LoadInt32(&t.taskWorkCount) > 0 {
+ t.taskWorkMu.Lock()
+ queue := t.taskWork
+ t.taskWork = nil
+ atomic.StoreInt32(&t.taskWorkCount, 0)
+ t.taskWorkMu.Unlock()
+
+ // Do not hold taskWorkMu while executing task work, which may register
+ // more work.
+ for _, work := range queue {
+ work.TaskWork(t)
+ }
+ }
+
+ // We're about to switch to the application again. If there's still an
// unhandled SyscallRestartErrno that wasn't translated to an EINTR,
// restart the syscall that was interrupted. If there's a saved signal
// mask, restore it. (Note that restoring the saved signal mask may unblock
diff --git a/pkg/sentry/kernel/task_work.go b/pkg/sentry/kernel/task_work.go
new file mode 100644
index 000000000..dda5a433a
--- /dev/null
+++ b/pkg/sentry/kernel/task_work.go
@@ -0,0 +1,38 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernel
+
+import "sync/atomic"
+
+// TaskWorker is a deferred task.
+//
+// This must be savable.
+type TaskWorker interface {
+ // TaskWork will be executed prior to returning to user space. Note that
+ // TaskWork may call RegisterWork again, but this will not be executed until
+ // the next return to user space, unlike in Linux. This effectively allows
+ // registration of indefinite user return hooks, but not by default.
+ TaskWork(t *Task)
+}
+
+// RegisterWork can be used to register additional task work that will be
+// performed prior to returning to user space. See TaskWorker.TaskWork for
+// semantics regarding registration.
+func (t *Task) RegisterWork(work TaskWorker) {
+ t.taskWorkMu.Lock()
+ defer t.taskWorkMu.Unlock()
+ atomic.AddInt32(&t.taskWorkCount, 1)
+ t.taskWork = append(t.taskWork, work)
+}