diff options
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r-- | pkg/sentry/kernel/cgroup.go | 71 | ||||
-rw-r--r-- | pkg/sentry/kernel/fd_table.go | 8 | ||||
-rw-r--r-- | pkg/sentry/kernel/pipe/pipe_util.go | 6 | ||||
-rw-r--r-- | pkg/sentry/kernel/semaphore/semaphore.go | 4 | ||||
-rw-r--r-- | pkg/sentry/kernel/task_context.go | 5 |
5 files changed, 74 insertions, 20 deletions
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go index 1f1c63f37..c93ef6ac1 100644 --- a/pkg/sentry/kernel/cgroup.go +++ b/pkg/sentry/kernel/cgroup.go @@ -48,10 +48,6 @@ type CgroupController interface { // attached to. Returned value is valid for the lifetime of the controller. HierarchyID() uint32 - // Filesystem returns the filesystem this controller is attached to. - // Returned value is valid for the lifetime of the controller. - Filesystem() *vfs.Filesystem - // RootCgroup returns the root cgroup for this controller. Returned value is // valid for the lifetime of the controller. RootCgroup() Cgroup @@ -124,6 +120,19 @@ func (h *hierarchy) match(ctypes []CgroupControllerType) bool { return true } +// cgroupFS is the public interface to cgroupfs. This lets the kernel package +// refer to cgroupfs.filesystem methods without directly depending on the +// cgroupfs package, which would lead to a circular dependency. +type cgroupFS interface { + // Returns the vfs.Filesystem for the cgroupfs. + VFSFilesystem() *vfs.Filesystem + + // InitializeHierarchyID sets the hierarchy ID for this filesystem during + // filesystem creation. May only be called before the filesystem is visible + // to the vfs layer. + InitializeHierarchyID(hid uint32) +} + // CgroupRegistry tracks the active set of cgroup controllers on the system. // // +stateify savable @@ -172,7 +181,23 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files for _, h := range r.hierarchies { if h.match(ctypes) { - h.fs.IncRef() + if !h.fs.TryIncRef() { + // Racing with filesystem destruction, namely h.fs.Release. + // Since we hold r.mu, we know the hierarchy hasn't been + // unregistered yet, but its associated filesystem is tearing + // down. + // + // If we simply indicate the hierarchy wasn't found without + // cleaning up the registry, the caller can race with the + // unregister and find itself temporarily unable to create a new + // hierarchy with a subset of the relevant controllers. + // + // To keep the result of FindHierarchy consistent with the + // uniqueness of controllers enforced by Register, drop the + // dying hierarchy now. The eventual unregister by the FS + // teardown will become a no-op. + return nil + } return h.fs } } @@ -182,31 +207,35 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files // Register registers the provided set of controllers with the registry as a new // hierarchy. If any controller is already registered, the function returns an -// error without modifying the registry. The hierarchy can be later referenced -// by the returned id. -func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) { +// error without modifying the registry. Register sets the hierarchy ID for the +// filesystem on success. +func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error { r.mu.Lock() defer r.mu.Unlock() if len(cs) == 0 { - return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers") + return fmt.Errorf("can't register hierarchy with no controllers") } for _, c := range cs { if _, ok := r.controllers[c.Type()]; ok { - return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy") + return fmt.Errorf("controllers may only be mounted on a single hierarchy") } } hid, err := r.nextHierarchyID() if err != nil { - return hid, err + return err } + // Must not fail below here, once we publish the hierarchy ID. + + fs.InitializeHierarchyID(hid) + h := hierarchy{ id: hid, controllers: make(map[CgroupControllerType]CgroupController), - fs: cs[0].Filesystem(), + fs: fs.VFSFilesystem(), } for _, c := range cs { n := c.Type() @@ -214,15 +243,20 @@ func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) { h.controllers[n] = c } r.hierarchies[hid] = h - return hid, nil + return nil } -// Unregister removes a previously registered hierarchy from the registry. If -// the controller was not previously registered, Unregister is a no-op. +// Unregister removes a previously registered hierarchy from the registry. If no +// such hierarchy is registered, Unregister is a no-op. func (r *CgroupRegistry) Unregister(hid uint32) { r.mu.Lock() - defer r.mu.Unlock() + r.unregisterLocked(hid) + r.mu.Unlock() +} +// Precondition: Caller must hold r.mu. +// +checklocks:r.mu +func (r *CgroupRegistry) unregisterLocked(hid uint32) { if h, ok := r.hierarchies[hid]; ok { for name, _ := range h.controllers { delete(r.controllers, name) @@ -253,6 +287,11 @@ func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[C for name, ctl := range r.controllers { if _, ok := ctlSet[name]; !ok { cg := ctl.RootCgroup() + // Multiple controllers may share the same hierarchy, so may have + // the same root cgroup. Grab a single ref per hierarchy root. + if _, ok := cgset[cg]; ok { + continue + } cg.IncRef() // Ref transferred to caller. cgset[cg] = struct{}{} } diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go index 10885688c..62777faa8 100644 --- a/pkg/sentry/kernel/fd_table.go +++ b/pkg/sentry/kernel/fd_table.go @@ -154,9 +154,11 @@ func (f *FDTable) drop(ctx context.Context, file *fs.File) { // dropVFS2 drops the table reference. func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) { // Release any POSIX lock possibly held by the FDTable. - err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF}) - if err != nil && err != syserror.ENOLCK { - panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) + if file.SupportsLocks() { + err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF}) + if err != nil && err != syserror.ENOLCK { + panic(fmt.Sprintf("UnlockPOSIX failed: %v", err)) + } } // Drop the table's reference. diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 2d89b9ccd..24e467e93 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -86,6 +86,12 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error) if n > 0 { p.Notify(waiter.ReadableEvents) } + if err == unix.EPIPE { + // If we are returning EPIPE send SIGPIPE to the task. + if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil { + sendSig(linux.SIGPIPE) + } + } return n, err } diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go index fe2ab1662..3c5bd8ff7 100644 --- a/pkg/sentry/kernel/semaphore/semaphore.go +++ b/pkg/sentry/kernel/semaphore/semaphore.go @@ -702,7 +702,9 @@ func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool { return s.checkCapability(creds) } -// destroy destroys the set. Caller must hold 's.mu'. +// destroy destroys the set. +// +// Preconditions: Caller must hold 's.mu'. func (s *Set) destroy() { // Notify all waiters. They will fail on the next attempt to execute // operations and return error. diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go index 70b0699dc..c82d9e82b 100644 --- a/pkg/sentry/kernel/task_context.go +++ b/pkg/sentry/kernel/task_context.go @@ -17,6 +17,7 @@ package kernel import ( "time" + "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/fs" @@ -113,6 +114,10 @@ func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} { return t.k.RealtimeClock() case limits.CtxLimits: return t.tg.limits + case linux.CtxSignalNoInfoFunc: + return func(sig linux.Signal) error { + return t.SendSignal(SignalInfoNoInfo(sig, t, t)) + } case pgalloc.CtxMemoryFile: return t.k.mf case pgalloc.CtxMemoryFileProvider: |