diff options
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r-- | pkg/sentry/kernel/kernel_state_autogen.go | 230 | ||||
-rw-r--r-- | pkg/sentry/kernel/task.go | 15 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_run.go | 17 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_work.go | 38 |
4 files changed, 187 insertions, 113 deletions
diff --git a/pkg/sentry/kernel/kernel_state_autogen.go b/pkg/sentry/kernel/kernel_state_autogen.go index 8f184e712..b267f0556 100644 --- a/pkg/sentry/kernel/kernel_state_autogen.go +++ b/pkg/sentry/kernel/kernel_state_autogen.go @@ -1002,6 +1002,8 @@ func (x *Task) StateFields() []string { return []string{ "taskNode", "runState", + "taskWorkCount", + "taskWork", "haveSyscallReturn", "gosched", "yieldCount", @@ -1069,126 +1071,130 @@ func (x *Task) StateSave(m state.Sink) { state.Failf("signalQueue is %#v, expected zero", &x.signalQueue) } var ptraceTracer *Task = x.savePtraceTracer() - m.SaveValue(29, ptraceTracer) + m.SaveValue(31, ptraceTracer) var syscallFilters []bpf.Program = x.saveSyscallFilters() - m.SaveValue(45, syscallFilters) + m.SaveValue(47, syscallFilters) m.Save(0, &x.taskNode) m.Save(1, &x.runState) - m.Save(2, &x.haveSyscallReturn) - m.Save(3, &x.gosched) - m.Save(4, &x.yieldCount) - m.Save(5, &x.pendingSignals) - m.Save(6, &x.signalMask) - m.Save(7, &x.realSignalMask) - m.Save(8, &x.haveSavedSignalMask) - m.Save(9, &x.savedSignalMask) - m.Save(10, &x.signalStack) - m.Save(11, &x.groupStopPending) - m.Save(12, &x.groupStopAcknowledged) - m.Save(13, &x.trapStopPending) - m.Save(14, &x.trapNotifyPending) - m.Save(15, &x.stop) - m.Save(16, &x.exitStatus) - m.Save(17, &x.syscallRestartBlock) - m.Save(18, &x.k) - m.Save(19, &x.containerID) - m.Save(20, &x.tc) - m.Save(21, &x.fsContext) - m.Save(22, &x.fdTable) - m.Save(23, &x.vforkParent) - m.Save(24, &x.exitState) - m.Save(25, &x.exitTracerNotified) - m.Save(26, &x.exitTracerAcked) - m.Save(27, &x.exitParentNotified) - m.Save(28, &x.exitParentAcked) - m.Save(30, &x.ptraceTracees) - m.Save(31, &x.ptraceSeized) - m.Save(32, &x.ptraceOpts) - m.Save(33, &x.ptraceSyscallMode) - m.Save(34, &x.ptraceSinglestep) - m.Save(35, &x.ptraceCode) - m.Save(36, &x.ptraceSiginfo) - m.Save(37, &x.ptraceEventMsg) - m.Save(38, &x.ioUsage) - m.Save(39, &x.creds) - m.Save(40, &x.utsns) - m.Save(41, &x.ipcns) - m.Save(42, &x.abstractSockets) - m.Save(43, &x.mountNamespaceVFS2) - m.Save(44, &x.parentDeathSignal) - m.Save(46, &x.cleartid) - m.Save(47, &x.allowedCPUMask) - m.Save(48, &x.cpu) - m.Save(49, &x.niceness) - m.Save(50, &x.numaPolicy) - m.Save(51, &x.numaNodeMask) - m.Save(52, &x.netns) - m.Save(53, &x.rseqCPU) - m.Save(54, &x.oldRSeqCPUAddr) - m.Save(55, &x.rseqAddr) - m.Save(56, &x.rseqSignature) - m.Save(57, &x.startTime) + m.Save(2, &x.taskWorkCount) + m.Save(3, &x.taskWork) + m.Save(4, &x.haveSyscallReturn) + m.Save(5, &x.gosched) + m.Save(6, &x.yieldCount) + m.Save(7, &x.pendingSignals) + m.Save(8, &x.signalMask) + m.Save(9, &x.realSignalMask) + m.Save(10, &x.haveSavedSignalMask) + m.Save(11, &x.savedSignalMask) + m.Save(12, &x.signalStack) + m.Save(13, &x.groupStopPending) + m.Save(14, &x.groupStopAcknowledged) + m.Save(15, &x.trapStopPending) + m.Save(16, &x.trapNotifyPending) + m.Save(17, &x.stop) + m.Save(18, &x.exitStatus) + m.Save(19, &x.syscallRestartBlock) + m.Save(20, &x.k) + m.Save(21, &x.containerID) + m.Save(22, &x.tc) + m.Save(23, &x.fsContext) + m.Save(24, &x.fdTable) + m.Save(25, &x.vforkParent) + m.Save(26, &x.exitState) + m.Save(27, &x.exitTracerNotified) + m.Save(28, &x.exitTracerAcked) + m.Save(29, &x.exitParentNotified) + m.Save(30, &x.exitParentAcked) + m.Save(32, &x.ptraceTracees) + m.Save(33, &x.ptraceSeized) + m.Save(34, &x.ptraceOpts) + m.Save(35, &x.ptraceSyscallMode) + m.Save(36, &x.ptraceSinglestep) + m.Save(37, &x.ptraceCode) + m.Save(38, &x.ptraceSiginfo) + m.Save(39, &x.ptraceEventMsg) + m.Save(40, &x.ioUsage) + m.Save(41, &x.creds) + m.Save(42, &x.utsns) + m.Save(43, &x.ipcns) + m.Save(44, &x.abstractSockets) + m.Save(45, &x.mountNamespaceVFS2) + m.Save(46, &x.parentDeathSignal) + m.Save(48, &x.cleartid) + m.Save(49, &x.allowedCPUMask) + m.Save(50, &x.cpu) + m.Save(51, &x.niceness) + m.Save(52, &x.numaPolicy) + m.Save(53, &x.numaNodeMask) + m.Save(54, &x.netns) + m.Save(55, &x.rseqCPU) + m.Save(56, &x.oldRSeqCPUAddr) + m.Save(57, &x.rseqAddr) + m.Save(58, &x.rseqSignature) + m.Save(59, &x.startTime) } func (x *Task) StateLoad(m state.Source) { m.Load(0, &x.taskNode) m.Load(1, &x.runState) - m.Load(2, &x.haveSyscallReturn) - m.Load(3, &x.gosched) - m.Load(4, &x.yieldCount) - m.Load(5, &x.pendingSignals) - m.Load(6, &x.signalMask) - m.Load(7, &x.realSignalMask) - m.Load(8, &x.haveSavedSignalMask) - m.Load(9, &x.savedSignalMask) - m.Load(10, &x.signalStack) - m.Load(11, &x.groupStopPending) - m.Load(12, &x.groupStopAcknowledged) - m.Load(13, &x.trapStopPending) - m.Load(14, &x.trapNotifyPending) - m.Load(15, &x.stop) - m.Load(16, &x.exitStatus) - m.Load(17, &x.syscallRestartBlock) - m.Load(18, &x.k) - m.Load(19, &x.containerID) - m.Load(20, &x.tc) - m.Load(21, &x.fsContext) - m.Load(22, &x.fdTable) - m.Load(23, &x.vforkParent) - m.Load(24, &x.exitState) - m.Load(25, &x.exitTracerNotified) - m.Load(26, &x.exitTracerAcked) - m.Load(27, &x.exitParentNotified) - m.Load(28, &x.exitParentAcked) - m.Load(30, &x.ptraceTracees) - m.Load(31, &x.ptraceSeized) - m.Load(32, &x.ptraceOpts) - m.Load(33, &x.ptraceSyscallMode) - m.Load(34, &x.ptraceSinglestep) - m.Load(35, &x.ptraceCode) - m.Load(36, &x.ptraceSiginfo) - m.Load(37, &x.ptraceEventMsg) - m.Load(38, &x.ioUsage) - m.Load(39, &x.creds) - m.Load(40, &x.utsns) - m.Load(41, &x.ipcns) - m.Load(42, &x.abstractSockets) - m.Load(43, &x.mountNamespaceVFS2) - m.Load(44, &x.parentDeathSignal) - m.Load(46, &x.cleartid) - m.Load(47, &x.allowedCPUMask) - m.Load(48, &x.cpu) - m.Load(49, &x.niceness) - m.Load(50, &x.numaPolicy) - m.Load(51, &x.numaNodeMask) - m.Load(52, &x.netns) - m.Load(53, &x.rseqCPU) - m.Load(54, &x.oldRSeqCPUAddr) - m.Load(55, &x.rseqAddr) - m.Load(56, &x.rseqSignature) - m.Load(57, &x.startTime) - m.LoadValue(29, new(*Task), func(y interface{}) { x.loadPtraceTracer(y.(*Task)) }) - m.LoadValue(45, new([]bpf.Program), func(y interface{}) { x.loadSyscallFilters(y.([]bpf.Program)) }) + m.Load(2, &x.taskWorkCount) + m.Load(3, &x.taskWork) + m.Load(4, &x.haveSyscallReturn) + m.Load(5, &x.gosched) + m.Load(6, &x.yieldCount) + m.Load(7, &x.pendingSignals) + m.Load(8, &x.signalMask) + m.Load(9, &x.realSignalMask) + m.Load(10, &x.haveSavedSignalMask) + m.Load(11, &x.savedSignalMask) + m.Load(12, &x.signalStack) + m.Load(13, &x.groupStopPending) + m.Load(14, &x.groupStopAcknowledged) + m.Load(15, &x.trapStopPending) + m.Load(16, &x.trapNotifyPending) + m.Load(17, &x.stop) + m.Load(18, &x.exitStatus) + m.Load(19, &x.syscallRestartBlock) + m.Load(20, &x.k) + m.Load(21, &x.containerID) + m.Load(22, &x.tc) + m.Load(23, &x.fsContext) + m.Load(24, &x.fdTable) + m.Load(25, &x.vforkParent) + m.Load(26, &x.exitState) + m.Load(27, &x.exitTracerNotified) + m.Load(28, &x.exitTracerAcked) + m.Load(29, &x.exitParentNotified) + m.Load(30, &x.exitParentAcked) + m.Load(32, &x.ptraceTracees) + m.Load(33, &x.ptraceSeized) + m.Load(34, &x.ptraceOpts) + m.Load(35, &x.ptraceSyscallMode) + m.Load(36, &x.ptraceSinglestep) + m.Load(37, &x.ptraceCode) + m.Load(38, &x.ptraceSiginfo) + m.Load(39, &x.ptraceEventMsg) + m.Load(40, &x.ioUsage) + m.Load(41, &x.creds) + m.Load(42, &x.utsns) + m.Load(43, &x.ipcns) + m.Load(44, &x.abstractSockets) + m.Load(45, &x.mountNamespaceVFS2) + m.Load(46, &x.parentDeathSignal) + m.Load(48, &x.cleartid) + m.Load(49, &x.allowedCPUMask) + m.Load(50, &x.cpu) + m.Load(51, &x.niceness) + m.Load(52, &x.numaPolicy) + m.Load(53, &x.numaNodeMask) + m.Load(54, &x.netns) + m.Load(55, &x.rseqCPU) + m.Load(56, &x.oldRSeqCPUAddr) + m.Load(57, &x.rseqAddr) + m.Load(58, &x.rseqSignature) + m.Load(59, &x.startTime) + m.LoadValue(31, new(*Task), func(y interface{}) { x.loadPtraceTracer(y.(*Task)) }) + m.LoadValue(47, new([]bpf.Program), func(y interface{}) { x.loadSyscallFilters(y.([]bpf.Program)) }) m.AfterLoad(x.afterLoad) } diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index f48247c94..b3d655b6e 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -68,6 +68,21 @@ type Task struct { // runState is exclusive to the task goroutine. runState taskRunState + // taskWorkCount represents the current size of the task work queue. It is + // used to avoid acquiring taskWorkMu when the queue is empty. + // + // Must accessed with atomic memory operations. + taskWorkCount int32 + + // taskWorkMu protects taskWork. + taskWorkMu sync.Mutex `state:"nosave"` + + // taskWork is a queue of work to be executed before resuming user execution. + // It is similar to the task_work mechanism in Linux. + // + // taskWork is exclusive to the task goroutine. + taskWork []TaskWorker + // haveSyscallReturn is true if tc.Arch().Return() represents a value // returned by a syscall (or set by ptrace after a syscall). // diff --git a/pkg/sentry/kernel/task_run.go b/pkg/sentry/kernel/task_run.go index d654dd997..7d4f44caf 100644 --- a/pkg/sentry/kernel/task_run.go +++ b/pkg/sentry/kernel/task_run.go @@ -167,7 +167,22 @@ func (app *runApp) execute(t *Task) taskRunState { return (*runInterrupt)(nil) } - // We're about to switch to the application again. If there's still a + // Execute any task work callbacks before returning to user space. + if atomic.LoadInt32(&t.taskWorkCount) > 0 { + t.taskWorkMu.Lock() + queue := t.taskWork + t.taskWork = nil + atomic.StoreInt32(&t.taskWorkCount, 0) + t.taskWorkMu.Unlock() + + // Do not hold taskWorkMu while executing task work, which may register + // more work. + for _, work := range queue { + work.TaskWork(t) + } + } + + // We're about to switch to the application again. If there's still an // unhandled SyscallRestartErrno that wasn't translated to an EINTR, // restart the syscall that was interrupted. If there's a saved signal // mask, restore it. (Note that restoring the saved signal mask may unblock diff --git a/pkg/sentry/kernel/task_work.go b/pkg/sentry/kernel/task_work.go new file mode 100644 index 000000000..dda5a433a --- /dev/null +++ b/pkg/sentry/kernel/task_work.go @@ -0,0 +1,38 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kernel + +import "sync/atomic" + +// TaskWorker is a deferred task. +// +// This must be savable. +type TaskWorker interface { + // TaskWork will be executed prior to returning to user space. Note that + // TaskWork may call RegisterWork again, but this will not be executed until + // the next return to user space, unlike in Linux. This effectively allows + // registration of indefinite user return hooks, but not by default. + TaskWork(t *Task) +} + +// RegisterWork can be used to register additional task work that will be +// performed prior to returning to user space. See TaskWorker.TaskWork for +// semantics regarding registration. +func (t *Task) RegisterWork(work TaskWorker) { + t.taskWorkMu.Lock() + defer t.taskWorkMu.Unlock() + atomic.AddInt32(&t.taskWorkCount, 1) + t.taskWork = append(t.taskWork, work) +} |