summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/kernel')
-rw-r--r--pkg/sentry/kernel/cgroup.go71
-rw-r--r--pkg/sentry/kernel/fd_table.go8
-rw-r--r--pkg/sentry/kernel/pipe/pipe_util.go6
-rw-r--r--pkg/sentry/kernel/semaphore/semaphore.go4
-rw-r--r--pkg/sentry/kernel/task_context.go5
5 files changed, 74 insertions, 20 deletions
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index 1f1c63f37..c93ef6ac1 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -48,10 +48,6 @@ type CgroupController interface {
// attached to. Returned value is valid for the lifetime of the controller.
HierarchyID() uint32
- // Filesystem returns the filesystem this controller is attached to.
- // Returned value is valid for the lifetime of the controller.
- Filesystem() *vfs.Filesystem
-
// RootCgroup returns the root cgroup for this controller. Returned value is
// valid for the lifetime of the controller.
RootCgroup() Cgroup
@@ -124,6 +120,19 @@ func (h *hierarchy) match(ctypes []CgroupControllerType) bool {
return true
}
+// cgroupFS is the public interface to cgroupfs. This lets the kernel package
+// refer to cgroupfs.filesystem methods without directly depending on the
+// cgroupfs package, which would lead to a circular dependency.
+type cgroupFS interface {
+ // Returns the vfs.Filesystem for the cgroupfs.
+ VFSFilesystem() *vfs.Filesystem
+
+ // InitializeHierarchyID sets the hierarchy ID for this filesystem during
+ // filesystem creation. May only be called before the filesystem is visible
+ // to the vfs layer.
+ InitializeHierarchyID(hid uint32)
+}
+
// CgroupRegistry tracks the active set of cgroup controllers on the system.
//
// +stateify savable
@@ -172,7 +181,23 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
for _, h := range r.hierarchies {
if h.match(ctypes) {
- h.fs.IncRef()
+ if !h.fs.TryIncRef() {
+ // Racing with filesystem destruction, namely h.fs.Release.
+ // Since we hold r.mu, we know the hierarchy hasn't been
+ // unregistered yet, but its associated filesystem is tearing
+ // down.
+ //
+ // If we simply indicate the hierarchy wasn't found without
+ // cleaning up the registry, the caller can race with the
+ // unregister and find itself temporarily unable to create a new
+ // hierarchy with a subset of the relevant controllers.
+ //
+ // To keep the result of FindHierarchy consistent with the
+ // uniqueness of controllers enforced by Register, drop the
+ // dying hierarchy now. The eventual unregister by the FS
+ // teardown will become a no-op.
+ return nil
+ }
return h.fs
}
}
@@ -182,31 +207,35 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
// Register registers the provided set of controllers with the registry as a new
// hierarchy. If any controller is already registered, the function returns an
-// error without modifying the registry. The hierarchy can be later referenced
-// by the returned id.
-func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
+// error without modifying the registry. Register sets the hierarchy ID for the
+// filesystem on success.
+func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error {
r.mu.Lock()
defer r.mu.Unlock()
if len(cs) == 0 {
- return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers")
+ return fmt.Errorf("can't register hierarchy with no controllers")
}
for _, c := range cs {
if _, ok := r.controllers[c.Type()]; ok {
- return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy")
+ return fmt.Errorf("controllers may only be mounted on a single hierarchy")
}
}
hid, err := r.nextHierarchyID()
if err != nil {
- return hid, err
+ return err
}
+ // Must not fail below here, once we publish the hierarchy ID.
+
+ fs.InitializeHierarchyID(hid)
+
h := hierarchy{
id: hid,
controllers: make(map[CgroupControllerType]CgroupController),
- fs: cs[0].Filesystem(),
+ fs: fs.VFSFilesystem(),
}
for _, c := range cs {
n := c.Type()
@@ -214,15 +243,20 @@ func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
h.controllers[n] = c
}
r.hierarchies[hid] = h
- return hid, nil
+ return nil
}
-// Unregister removes a previously registered hierarchy from the registry. If
-// the controller was not previously registered, Unregister is a no-op.
+// Unregister removes a previously registered hierarchy from the registry. If no
+// such hierarchy is registered, Unregister is a no-op.
func (r *CgroupRegistry) Unregister(hid uint32) {
r.mu.Lock()
- defer r.mu.Unlock()
+ r.unregisterLocked(hid)
+ r.mu.Unlock()
+}
+// Precondition: Caller must hold r.mu.
+// +checklocks:r.mu
+func (r *CgroupRegistry) unregisterLocked(hid uint32) {
if h, ok := r.hierarchies[hid]; ok {
for name, _ := range h.controllers {
delete(r.controllers, name)
@@ -253,6 +287,11 @@ func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[C
for name, ctl := range r.controllers {
if _, ok := ctlSet[name]; !ok {
cg := ctl.RootCgroup()
+ // Multiple controllers may share the same hierarchy, so may have
+ // the same root cgroup. Grab a single ref per hierarchy root.
+ if _, ok := cgset[cg]; ok {
+ continue
+ }
cg.IncRef() // Ref transferred to caller.
cgset[cg] = struct{}{}
}
diff --git a/pkg/sentry/kernel/fd_table.go b/pkg/sentry/kernel/fd_table.go
index 10885688c..62777faa8 100644
--- a/pkg/sentry/kernel/fd_table.go
+++ b/pkg/sentry/kernel/fd_table.go
@@ -154,9 +154,11 @@ func (f *FDTable) drop(ctx context.Context, file *fs.File) {
// dropVFS2 drops the table reference.
func (f *FDTable) dropVFS2(ctx context.Context, file *vfs.FileDescription) {
// Release any POSIX lock possibly held by the FDTable.
- err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
- if err != nil && err != syserror.ENOLCK {
- panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
+ if file.SupportsLocks() {
+ err := file.UnlockPOSIX(ctx, f, lock.LockRange{0, lock.LockEOF})
+ if err != nil && err != syserror.ENOLCK {
+ panic(fmt.Sprintf("UnlockPOSIX failed: %v", err))
+ }
}
// Drop the table's reference.
diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go
index 2d89b9ccd..24e467e93 100644
--- a/pkg/sentry/kernel/pipe/pipe_util.go
+++ b/pkg/sentry/kernel/pipe/pipe_util.go
@@ -86,6 +86,12 @@ func (p *Pipe) Write(ctx context.Context, src usermem.IOSequence) (int64, error)
if n > 0 {
p.Notify(waiter.ReadableEvents)
}
+ if err == unix.EPIPE {
+ // If we are returning EPIPE send SIGPIPE to the task.
+ if sendSig := linux.SignalNoInfoFuncFromContext(ctx); sendSig != nil {
+ sendSig(linux.SIGPIPE)
+ }
+ }
return n, err
}
diff --git a/pkg/sentry/kernel/semaphore/semaphore.go b/pkg/sentry/kernel/semaphore/semaphore.go
index fe2ab1662..3c5bd8ff7 100644
--- a/pkg/sentry/kernel/semaphore/semaphore.go
+++ b/pkg/sentry/kernel/semaphore/semaphore.go
@@ -702,7 +702,9 @@ func (s *Set) checkPerms(creds *auth.Credentials, reqPerms fs.PermMask) bool {
return s.checkCapability(creds)
}
-// destroy destroys the set. Caller must hold 's.mu'.
+// destroy destroys the set.
+//
+// Preconditions: Caller must hold 's.mu'.
func (s *Set) destroy() {
// Notify all waiters. They will fail on the next attempt to execute
// operations and return error.
diff --git a/pkg/sentry/kernel/task_context.go b/pkg/sentry/kernel/task_context.go
index 70b0699dc..c82d9e82b 100644
--- a/pkg/sentry/kernel/task_context.go
+++ b/pkg/sentry/kernel/task_context.go
@@ -17,6 +17,7 @@ package kernel
import (
"time"
+ "gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/fs"
@@ -113,6 +114,10 @@ func (t *Task) contextValue(key interface{}, isTaskGoroutine bool) interface{} {
return t.k.RealtimeClock()
case limits.CtxLimits:
return t.tg.limits
+ case linux.CtxSignalNoInfoFunc:
+ return func(sig linux.Signal) error {
+ return t.SendSignal(SignalInfoNoInfo(sig, t, t))
+ }
case pgalloc.CtxMemoryFile:
return t.k.mf
case pgalloc.CtxMemoryFileProvider: