diff options
author | Jamie Liu <jamieliu@google.com> | 2021-10-12 10:23:01 -0700 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-10-12 10:25:30 -0700 |
commit | 8682ce689e928ec32ec810a7eb038fb582c66093 (patch) | |
tree | 055b5c20bf179fe839ac6da0ceb434c7effbc512 /pkg/sentry/kernel | |
parent | ab1ef0baba1d15c4d4e7717d23cf8b32fd6b5feb (diff) |
Remove state:"nosave"/"zerovalue" annotations from all waiter.Queues.
Prior to cl/318010298, //pkg/state couldn't handle pointers to struct fields,
which meant that it couldn't handle intrusive linked lists, which meant that it
couldn't handle waiter.Queue, which meant that it couldn't handle epoll. As a
result, VFS1 unregisters all epoll waiters before saving and re-registers them
after loading, and waitable VFS1 file implementations tag their waiter.Queues
state:"nosave" (causing them to be skipped by the save/restore machinery) or
state:"zerovalue" (causing them to only be checked for zero-value-equality on
save).
VFS2 required cl/318010298 to support save/restore (due to the Impl inheritance
pattern used by vfs.FileDescription, vfs.Dentry, etc.); correspondingly, VFS2
epoll assumes that waiter.Queues *will be* saved and loaded correctly, and VFS2
file implementations do not tag waiter.Queues.
Some waiter.Queues, e.g. pipe.Pipe.Queue and kernel.Task.signalQueue, are used
by both VFS1 and VFS2 (the latter via signalfd); as a result of the above,
tagging these Queues state:"nosave" or state:"zerovalue" breaks VFS2 epoll.
Remove VFS1 epoll unregistration before saving (bringing it in line with VFS2),
and remove these tags from all waiter.Queues.
Also clean up after the epoll test added by cl/402323053, which implied this
issue (by instantiating DisableSave in the new test) without reporting it.
PiperOrigin-RevId: 402596216
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r-- | pkg/sentry/kernel/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/kernel/epoll/epoll.go | 15 | ||||
-rw-r--r-- | pkg/sentry/kernel/epoll/epoll_state.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/eventfd/eventfd.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/kernel.go | 32 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/pipe.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/task.go | 2 | ||||
-rw-r--r-- | pkg/sentry/kernel/threads.go | 6 |
8 files changed, 6 insertions, 56 deletions
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index c0f13bf52..53a21e1e2 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -255,7 +255,6 @@ go_library( "//pkg/sentry/hostcpu", "//pkg/sentry/inet", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/epoll", "//pkg/sentry/kernel/futex", "//pkg/sentry/kernel/msgqueue", "//pkg/sentry/kernel/sched", diff --git a/pkg/sentry/kernel/epoll/epoll.go b/pkg/sentry/kernel/epoll/epoll.go index 6006c46a9..8d0a21baf 100644 --- a/pkg/sentry/kernel/epoll/epoll.go +++ b/pkg/sentry/kernel/epoll/epoll.go @@ -66,7 +66,7 @@ type pollEntry struct { file *refs.WeakRef `state:"manual"` id FileIdentifier `state:"wait"` userData [2]int32 - waiter waiter.Entry `state:"manual"` + waiter waiter.Entry mask waiter.EventMask flags EntryFlags @@ -102,7 +102,7 @@ type EventPoll struct { // Wait queue is used to notify interested parties when the event poll // object itself becomes readable or writable. - waiter.Queue `state:"zerovalue"` + waiter.Queue // files is the map of all the files currently being observed, it is // protected by mu. @@ -454,14 +454,3 @@ func (e *EventPoll) RemoveEntry(ctx context.Context, id FileIdentifier) error { return nil } - -// UnregisterEpollWaiters removes the epoll waiter objects from the waiting -// queues. This is different from Release() as the file is not dereferenced. -func (e *EventPoll) UnregisterEpollWaiters() { - e.mu.Lock() - defer e.mu.Unlock() - - for _, entry := range e.files { - entry.id.File.EventUnregister(&entry.waiter) - } -} diff --git a/pkg/sentry/kernel/epoll/epoll_state.go b/pkg/sentry/kernel/epoll/epoll_state.go index e08d6287f..135a6d72c 100644 --- a/pkg/sentry/kernel/epoll/epoll_state.go +++ b/pkg/sentry/kernel/epoll/epoll_state.go @@ -21,9 +21,7 @@ import ( // afterLoad is invoked by stateify. func (p *pollEntry) afterLoad() { - p.waiter.Callback = p p.file = refs.NewWeakRef(p.id.File, p) - p.id.File.EventRegister(&p.waiter, p.mask) } // afterLoad is invoked by stateify. diff --git a/pkg/sentry/kernel/eventfd/eventfd.go b/pkg/sentry/kernel/eventfd/eventfd.go index 5ea44a2c2..bf625dede 100644 --- a/pkg/sentry/kernel/eventfd/eventfd.go +++ b/pkg/sentry/kernel/eventfd/eventfd.go @@ -54,7 +54,7 @@ type EventOperations struct { // Queue is used to notify interested parties when the event object // becomes readable or writable. - wq waiter.Queue `state:"zerovalue"` + wq waiter.Queue // val is the current value of the event counter. val uint64 diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index f913d25db..5dc821a48 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -57,7 +57,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/epoll" "gvisor.dev/gvisor/pkg/sentry/kernel/futex" "gvisor.dev/gvisor/pkg/sentry/kernel/sched" ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" @@ -486,11 +485,6 @@ func (k *Kernel) SaveTo(ctx context.Context, w wire.Writer) error { return err } - // Remove all epoll waiter objects from underlying wait queues. - // NOTE: for programs to resume execution in future snapshot scenarios, - // we will need to re-establish these waiter objects after saving. - k.tasks.unregisterEpollWaiters(ctx) - // Clear the dirent cache before saving because Dirents must be Loaded in a // particular order (parents before children), and Loading dirents from a cache // breaks that order. @@ -623,32 +617,6 @@ func (k *Kernel) flushWritesToFiles(ctx context.Context) error { }) } -// Preconditions: !VFS2Enabled. -func (ts *TaskSet) unregisterEpollWaiters(ctx context.Context) { - ts.mu.RLock() - defer ts.mu.RUnlock() - - // Tasks that belong to the same process could potentially point to the - // same FDTable. So we retain a map of processed ones to avoid - // processing the same FDTable multiple times. - processed := make(map[*FDTable]struct{}) - for t := range ts.Root.tids { - // We can skip locking Task.mu here since the kernel is paused. - if t.fdTable == nil { - continue - } - if _, ok := processed[t.fdTable]; ok { - continue - } - t.fdTable.forEach(ctx, func(_ int32, file *fs.File, _ *vfs.FileDescription, _ FDFlags) { - if e, ok := file.FileOperations.(*epoll.EventPoll); ok { - e.UnregisterEpollWaiters() - } - }) - processed[t.fdTable] = struct{}{} - } -} - // Preconditions: The kernel must be paused. func (k *Kernel) invalidateUnsavableMappings(ctx context.Context) error { invalidated := make(map[*mm.MemoryManager]struct{}) diff --git a/pkg/sentry/kernel/pipe/pipe.go b/pkg/sentry/kernel/pipe/pipe.go index 86beee6fe..8345473f3 100644 --- a/pkg/sentry/kernel/pipe/pipe.go +++ b/pkg/sentry/kernel/pipe/pipe.go @@ -55,7 +55,7 @@ const ( // // +stateify savable type Pipe struct { - waiter.Queue `state:"nosave"` + waiter.Queue // isNamed indicates whether this is a named pipe. // diff --git a/pkg/sentry/kernel/task.go b/pkg/sentry/kernel/task.go index b0004482c..1ea3c1bf7 100644 --- a/pkg/sentry/kernel/task.go +++ b/pkg/sentry/kernel/task.go @@ -158,7 +158,7 @@ type Task struct { // signalQueue is protected by the signalMutex. Note that the task does // not implement all queue methods, specifically the readiness checks. // The task only broadcast a notification on signal delivery. - signalQueue waiter.Queue `state:"zerovalue"` + signalQueue waiter.Queue // If groupStopPending is true, the task should participate in a group // stop in the interrupt path. diff --git a/pkg/sentry/kernel/threads.go b/pkg/sentry/kernel/threads.go index 77ad62445..e38b723ce 100644 --- a/pkg/sentry/kernel/threads.go +++ b/pkg/sentry/kernel/threads.go @@ -324,11 +324,7 @@ type threadGroupNode struct { // eventQueue is notified whenever a event of interest to Task.Wait occurs // in a child of this thread group, or a ptrace tracee of a task in this // thread group. Events are defined in task_exit.go. - // - // Note that we cannot check and save this wait queue similarly to other - // wait queues, as the queue will not be empty by the time of saving, due - // to the wait sourced from Exec(). - eventQueue waiter.Queue `state:"nosave"` + eventQueue waiter.Queue // leader is the thread group's leader, which is the oldest task in the // thread group; usually the last task in the thread group to call |