summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/BUILD3
-rw-r--r--pkg/sentry/kernel/epoll/epoll.go15
-rw-r--r--pkg/sentry/kernel/epoll/epoll_state.go2
-rw-r--r--pkg/sentry/kernel/eventfd/eventfd.go2
-rw-r--r--pkg/sentry/kernel/ipc/BUILD2
-rw-r--r--pkg/sentry/kernel/kernel.go46
-rw-r--r--pkg/sentry/kernel/mq/mq.go4
-rw-r--r--pkg/sentry/kernel/pipe/pipe.go2
-rw-r--r--pkg/sentry/kernel/task.go6
-rw-r--r--pkg/sentry/kernel/task_clone.go2
-rw-r--r--pkg/sentry/kernel/task_log.go12
-rw-r--r--pkg/sentry/kernel/task_net.go12
-rw-r--r--pkg/sentry/kernel/task_start.go2
-rw-r--r--pkg/sentry/kernel/threads.go6
14 files changed, 37 insertions, 79 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 9f30a7706..f3f16eb7a 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -216,7 +216,6 @@ go_library(
visibility = ["//:sandbox"],
deps = [
":uncaught_signal_go_proto",
- "//pkg/sentry/kernel/ipc",
"//pkg/abi",
"//pkg/abi/linux",
"//pkg/abi/linux/errno",
@@ -257,8 +256,8 @@ go_library(
"//pkg/sentry/hostcpu",
"//pkg/sentry/inet",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/epoll",
"//pkg/sentry/kernel/futex",
+ "//pkg/sentry/kernel/ipc",
"//pkg/sentry/kernel/mq",
"//pkg/sentry/kernel/msgqueue",
"//pkg/sentry/kernel/sched",
diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go
index 6006c46a9..8d0a21baf 100644
--- a/pkg/sentry/kernel/epoll/epoll.go
+++ b/pkg/sentry/kernel/epoll/epoll.go
@@ -66,7 +66,7 @@ type pollEntry struct {
file *refs.WeakRef `state:"manual"`
id FileIdentifier `state:"wait"`
userData [2]int32
- waiter waiter.Entry `state:"manual"`
+ waiter waiter.Entry
mask waiter.EventMask
flags EntryFlags
@@ -102,7 +102,7 @@ type EventPoll struct {
// Wait queue is used to notify interested parties when the event poll
// object itself becomes readable or writable.
- waiter.Queue `state:"zerovalue"`
+ waiter.Queue
// files is the map of all the files currently being observed, it is
// protected by mu.
@@ -454,14 +454,3 @@ func (e *EventPoll) RemoveEntry(ctx context.Context, id FileIdentifier) error {
return nil
}
-
-// UnregisterEpollWaiters removes the epoll waiter objects from the waiting
-// queues. This is different from Release() as the file is not dereferenced.
-func (e *EventPoll) UnregisterEpollWaiters() {
- e.mu.Lock()
- defer e.mu.Unlock()
-
- for _, entry := range e.files {
- entry.id.File.EventUnregister(&entry.waiter)
- }
-}
diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go
index e08d6287f..135a6d72c 100644
--- a/pkg/sentry/kernel/epoll/epoll_state.go
+++ b/pkg/sentry/kernel/epoll/epoll_state.go
@@ -21,9 +21,7 @@ import (
// afterLoad is invoked by stateify.
func (p *pollEntry) afterLoad() {
- p.waiter.Callback = p
p.file = refs.NewWeakRef(p.id.File, p)
- p.id.File.EventRegister(&p.waiter, p.mask)
}
// afterLoad is invoked by stateify.
diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go
index 5ea44a2c2..bf625dede 100644
--- a/pkg/sentry/kernel/eventfd/eventfd.go
+++ b/pkg/sentry/kernel/eventfd/eventfd.go
@@ -54,7 +54,7 @@ type EventOperations struct {
// Queue is used to notify interested parties when the event object
// becomes readable or writable.
- wq waiter.Queue `state:"zerovalue"`
+ wq waiter.Queue
// val is the current value of the event counter.
val uint64
diff --git a/pkg/sentry/kernel/ipc/BUILD b/pkg/sentry/kernel/ipc/BUILD
index a5cbb2b51..bb5cf1c17 100644
--- a/pkg/sentry/kernel/ipc/BUILD
+++ b/pkg/sentry/kernel/ipc/BUILD
@@ -5,9 +5,9 @@ package(licenses = ["notice"])
go_library(
name = "ipc",
srcs = [
+ "ns.go",
"object.go",
"registry.go",
- "ns.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 04b24369a..d4851ccda 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -57,7 +57,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/epoll"
"gvisor.dev/gvisor/pkg/sentry/kernel/futex"
"gvisor.dev/gvisor/pkg/sentry/kernel/ipc"
"gvisor.dev/gvisor/pkg/sentry/kernel/sched"
@@ -79,11 +78,19 @@ import (
"gvisor.dev/gvisor/pkg/tcpip"
)
-// VFS2Enabled is set to true when VFS2 is enabled. Added as a global for allow
-// easy access everywhere. To be removed once VFS2 becomes the default.
+// VFS2Enabled is set to true when VFS2 is enabled. Added as a global to allow
+// easy access everywhere.
+//
+// TODO(gvisor.dev/issue/1624): Remove when VFS1 is no longer used.
var VFS2Enabled = false
-// FUSEEnabled is set to true when FUSE is enabled. Added as a global for allow
+// LISAFSEnabled is set to true when lisafs protocol is enabled. Added as a
+// global to allow easy access everywhere.
+//
+// TODO(gvisor.dev/issue/6319): Remove when lisafs is default.
+var LISAFSEnabled = false
+
+// FUSEEnabled is set to true when FUSE is enabled. Added as a global to allow
// easy access everywhere. To be removed once FUSE is completed.
var FUSEEnabled = false
@@ -484,11 +491,6 @@ func (k *Kernel) SaveTo(ctx context.Context, w wire.Writer) error {
return err
}
- // Remove all epoll waiter objects from underlying wait queues.
- // NOTE: for programs to resume execution in future snapshot scenarios,
- // we will need to re-establish these waiter objects after saving.
- k.tasks.unregisterEpollWaiters(ctx)
-
// Clear the dirent cache before saving because Dirents must be Loaded in a
// particular order (parents before children), and Loading dirents from a cache
// breaks that order.
@@ -621,32 +623,6 @@ func (k *Kernel) flushWritesToFiles(ctx context.Context) error {
})
}
-// Preconditions: !VFS2Enabled.
-func (ts *TaskSet) unregisterEpollWaiters(ctx context.Context) {
- ts.mu.RLock()
- defer ts.mu.RUnlock()
-
- // Tasks that belong to the same process could potentially point to the
- // same FDTable. So we retain a map of processed ones to avoid
- // processing the same FDTable multiple times.
- processed := make(map[*FDTable]struct{})
- for t := range ts.Root.tids {
- // We can skip locking Task.mu here since the kernel is paused.
- if t.fdTable == nil {
- continue
- }
- if _, ok := processed[t.fdTable]; ok {
- continue
- }
- t.fdTable.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) {
- if e, ok := file.FileOperations.(*epoll.EventPoll); ok {
- e.UnregisterEpollWaiters()
- }
- })
- processed[t.fdTable] = struct{}{}
- }
-}
-
// Preconditions: The kernel must be paused.
func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error {
invalidated := make(map[*mm.MemoryManager]struct{})
diff --git a/pkg/sentry/kernel/mq/mq.go b/pkg/sentry/kernel/mq/mq.go
index a7c787081..50ca6d34a 100644
--- a/pkg/sentry/kernel/mq/mq.go
+++ b/pkg/sentry/kernel/mq/mq.go
@@ -40,8 +40,10 @@ const (
ReadWrite
)
+// MaxName is the maximum size for a queue name.
+const MaxName = 255
+
const (
- MaxName = 255 // Maximum size for a queue name.
maxPriority = linux.MQ_PRIO_MAX - 1 // Highest possible message priority.
maxQueuesDefault = linux.DFLT_QUEUESMAX // Default max number of queues.
diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go
index 86beee6fe..8345473f3 100644
--- a/pkg/sentry/kernel/pipe/pipe.go
+++ b/pkg/sentry/kernel/pipe/pipe.go
@@ -55,7 +55,7 @@ const (
//
// +stateify savable
type Pipe struct {
- waiter.Queue `state:"nosave"`
+ waiter.Queue
// isNamed indicates whether this is a named pipe.
//
diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go
index 9a95bf44c..1ea3c1bf7 100644
--- a/pkg/sentry/kernel/task.go
+++ b/pkg/sentry/kernel/task.go
@@ -158,7 +158,7 @@ type Task struct {
// signalQueue is protected by the signalMutex. Note that the task does
// not implement all queue methods, specifically the readiness checks.
// The task only broadcast a notification on signal delivery.
- signalQueue waiter.Queue `state:"zerovalue"`
+ signalQueue waiter.Queue
// If groupStopPending is true, the task should participate in a group
// stop in the interrupt path.
@@ -511,9 +511,7 @@ type Task struct {
numaNodeMask uint64
// netns is the task's network namespace. netns is never nil.
- //
- // netns is protected by mu.
- netns *inet.Namespace
+ netns inet.NamespaceAtomicPtr
// If rseqPreempted is true, before the next call to p.Switch(),
// interrupt rseq critical regions as defined by rseqAddr and
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index e174913d1..69a3227f0 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -447,7 +447,7 @@ func (t *Task) Unshare(flags int32) error {
t.mu.Unlock()
return linuxerr.EPERM
}
- t.netns = inet.NewNamespace(t.netns)
+ t.netns.Store(inet.NewNamespace(t.netns.Load()))
}
if flags&linux.CLONE_NEWUTS != 0 {
if !haveCapSysAdmin {
diff --git a/pkg/sentry/kernel/task_log.go b/pkg/sentry/kernel/task_log.go
index 8de08151a..f0c168ecc 100644
--- a/pkg/sentry/kernel/task_log.go
+++ b/pkg/sentry/kernel/task_log.go
@@ -191,9 +191,11 @@ const (
//
// Preconditions: The task's owning TaskSet.mu must be locked.
func (t *Task) updateInfoLocked() {
- // Use the task's TID in the root PID namespace for logging.
+ // Use the task's TID and PID in the root PID namespace for logging.
+ pid := t.tg.pidns.owner.Root.tgids[t.tg]
tid := t.tg.pidns.owner.Root.tids[t]
- t.logPrefix.Store(fmt.Sprintf("[% 4d] ", tid))
+ t.logPrefix.Store(fmt.Sprintf("[% 4d:% 4d] ", pid, tid))
+
t.rebuildTraceContext(tid)
}
@@ -249,5 +251,9 @@ func (t *Task) traceExecEvent(image *TaskImage) {
return
}
defer file.DecRef(t)
- trace.Logf(t.traceContext, traceCategory, "exec: %s", file.PathnameWithDeleted(t))
+
+ // traceExecEvent function may be called before the task goroutine
+ // starts, so we must use the async context.
+ name := file.PathnameWithDeleted(t.AsyncContext())
+ trace.Logf(t.traceContext, traceCategory, "exec: %s", name)
}
diff --git a/pkg/sentry/kernel/task_net.go b/pkg/sentry/kernel/task_net.go
index f7711232c..e31e2b2e8 100644
--- a/pkg/sentry/kernel/task_net.go
+++ b/pkg/sentry/kernel/task_net.go
@@ -20,9 +20,7 @@ import (
// IsNetworkNamespaced returns true if t is in a non-root network namespace.
func (t *Task) IsNetworkNamespaced() bool {
- t.mu.Lock()
- defer t.mu.Unlock()
- return !t.netns.IsRoot()
+ return !t.netns.Load().IsRoot()
}
// NetworkContext returns the network stack used by the task. NetworkContext
@@ -31,14 +29,10 @@ func (t *Task) IsNetworkNamespaced() bool {
// TODO(gvisor.dev/issue/1833): Migrate callers of this method to
// NetworkNamespace().
func (t *Task) NetworkContext() inet.Stack {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.netns.Stack()
+ return t.netns.Load().Stack()
}
// NetworkNamespace returns the network namespace observed by the task.
func (t *Task) NetworkNamespace() *inet.Namespace {
- t.mu.Lock()
- defer t.mu.Unlock()
- return t.netns
+ return t.netns.Load()
}
diff --git a/pkg/sentry/kernel/task_start.go b/pkg/sentry/kernel/task_start.go
index 217c6f531..4919dea7c 100644
--- a/pkg/sentry/kernel/task_start.go
+++ b/pkg/sentry/kernel/task_start.go
@@ -140,7 +140,6 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
allowedCPUMask: cfg.AllowedCPUMask.Copy(),
ioUsage: &usage.IO{},
niceness: cfg.Niceness,
- netns: cfg.NetworkNamespace,
utsns: cfg.UTSNamespace,
ipcns: cfg.IPCNamespace,
abstractSockets: cfg.AbstractSocketNamespace,
@@ -152,6 +151,7 @@ func (ts *TaskSet) newTask(cfg *TaskConfig) (*Task, error) {
containerID: cfg.ContainerID,
cgroups: make(map[Cgroup]struct{}),
}
+ t.netns.Store(cfg.NetworkNamespace)
t.creds.Store(cfg.Credentials)
t.endStopCond.L = &t.tg.signalHandlers.mu
t.ptraceTracer.Store((*Task)(nil))
diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go
index 77ad62445..e38b723ce 100644
--- a/pkg/sentry/kernel/threads.go
+++ b/pkg/sentry/kernel/threads.go
@@ -324,11 +324,7 @@ type threadGroupNode struct {
// eventQueue is notified whenever a event of interest to Task.Wait occurs
// in a child of this thread group, or a ptrace tracee of a task in this
// thread group. Events are defined in task_exit.go.
- //
- // Note that we cannot check and save this wait queue similarly to other
- // wait queues, as the queue will not be empty by the time of saving, due
- // to the wait sourced from Exec().
- eventQueue waiter.Queue `state:"nosave"`
+ eventQueue waiter.Queue
// leader is the thread group's leader, which is the oldest task in the
// thread group; usually the last task in the thread group to call