summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go6
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go11
-rw-r--r--pkg/sentry/kernel/cgroup.go42
-rw-r--r--test/syscalls/linux/BUILD2
-rw-r--r--test/syscalls/linux/cgroup.cc52
-rw-r--r--test/util/cgroup_util.cc14
-rw-r--r--test/util/cgroup_util.h2
7 files changed, 107 insertions, 22 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
index 0f54888d8..6512e9cdb 100644
--- a/pkg/sentry/fsimpl/cgroupfs/base.go
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -26,7 +26,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -68,11 +67,6 @@ func (c *controllerCommon) Enabled() bool {
return true
}
-// Filesystem implements kernel.CgroupController.Filesystem.
-func (c *controllerCommon) Filesystem() *vfs.Filesystem {
- return c.fs.VFSFilesystem()
-}
-
// RootCgroup implements kernel.CgroupController.RootCgroup.
func (c *controllerCommon) RootCgroup() kernel.Cgroup {
return c.fs.rootCgroup()
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
index bd3e69757..54050de3c 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -109,7 +109,7 @@ type InternalData struct {
DefaultControlValues map[string]int64
}
-// filesystem implements vfs.FilesystemImpl.
+// filesystem implements vfs.FilesystemImpl and kernel.cgroupFS.
//
// +stateify savable
type filesystem struct {
@@ -139,6 +139,11 @@ type filesystem struct {
tasksMu sync.RWMutex `state:"nosave"`
}
+// InitializeHierarchyID implements kernel.cgroupFS.InitializeHierarchyID.
+func (fs *filesystem) InitializeHierarchyID(hid uint32) {
+ fs.hierarchyID = hid
+}
+
// Name implements vfs.FilesystemType.Name.
func (FilesystemType) Name() string {
return Name
@@ -284,14 +289,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Register controllers. The registry may be modified concurrently, so if we
// get an error, we raced with someone else who registered the same
// controllers first.
- hid, err := r.Register(fs.kcontrollers)
- if err != nil {
+ if err := r.Register(fs.kcontrollers, fs); err != nil {
ctx.Infof("cgroupfs.FilesystemType.GetFilesystem: failed to register new hierarchy with controllers %v: %v", wantControllers, err)
rootD.DecRef(ctx)
fs.VFSFilesystem().DecRef(ctx)
return nil, nil, syserror.EBUSY
}
- fs.hierarchyID = hid
// Move all existing tasks to the root of the new hierarchy.
k.PopulateNewCgroupHierarchy(fs.rootCgroup())
diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go
index 1f1c63f37..0fbf27f64 100644
--- a/pkg/sentry/kernel/cgroup.go
+++ b/pkg/sentry/kernel/cgroup.go
@@ -48,10 +48,6 @@ type CgroupController interface {
// attached to. Returned value is valid for the lifetime of the controller.
HierarchyID() uint32
- // Filesystem returns the filesystem this controller is attached to.
- // Returned value is valid for the lifetime of the controller.
- Filesystem() *vfs.Filesystem
-
// RootCgroup returns the root cgroup for this controller. Returned value is
// valid for the lifetime of the controller.
RootCgroup() Cgroup
@@ -124,6 +120,19 @@ func (h *hierarchy) match(ctypes []CgroupControllerType) bool {
return true
}
+// cgroupFS is the public interface to cgroupfs. This lets the kernel package
+// refer to cgroupfs.filesystem methods without directly depending on the
+// cgroupfs package, which would lead to a circular dependency.
+type cgroupFS interface {
+ // Returns the vfs.Filesystem for the cgroupfs.
+ VFSFilesystem() *vfs.Filesystem
+
+ // InitializeHierarchyID sets the hierarchy ID for this filesystem during
+ // filesystem creation. May only be called before the filesystem is visible
+ // to the vfs layer.
+ InitializeHierarchyID(hid uint32)
+}
+
// CgroupRegistry tracks the active set of cgroup controllers on the system.
//
// +stateify savable
@@ -182,31 +191,35 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files
// Register registers the provided set of controllers with the registry as a new
// hierarchy. If any controller is already registered, the function returns an
-// error without modifying the registry. The hierarchy can be later referenced
-// by the returned id.
-func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
+// error without modifying the registry. Register sets the hierarchy ID for the
+// filesystem on success.
+func (r *CgroupRegistry) Register(cs []CgroupController, fs cgroupFS) error {
r.mu.Lock()
defer r.mu.Unlock()
if len(cs) == 0 {
- return InvalidCgroupHierarchyID, fmt.Errorf("can't register hierarchy with no controllers")
+ return fmt.Errorf("can't register hierarchy with no controllers")
}
for _, c := range cs {
if _, ok := r.controllers[c.Type()]; ok {
- return InvalidCgroupHierarchyID, fmt.Errorf("controllers may only be mounted on a single hierarchy")
+ return fmt.Errorf("controllers may only be mounted on a single hierarchy")
}
}
hid, err := r.nextHierarchyID()
if err != nil {
- return hid, err
+ return err
}
+ // Must not fail below here, once we publish the hierarchy ID.
+
+ fs.InitializeHierarchyID(hid)
+
h := hierarchy{
id: hid,
controllers: make(map[CgroupControllerType]CgroupController),
- fs: cs[0].Filesystem(),
+ fs: fs.VFSFilesystem(),
}
for _, c := range cs {
n := c.Type()
@@ -214,7 +227,7 @@ func (r *CgroupRegistry) Register(cs []CgroupController) (uint32, error) {
h.controllers[n] = c
}
r.hierarchies[hid] = h
- return hid, nil
+ return nil
}
// Unregister removes a previously registered hierarchy from the registry. If
@@ -253,6 +266,11 @@ func (r *CgroupRegistry) computeInitialGroups(inherit map[Cgroup]struct{}) map[C
for name, ctl := range r.controllers {
if _, ok := ctlSet[name]; !ok {
cg := ctl.RootCgroup()
+ // Multiple controllers may share the same hierarchy, so may have
+ // the same root cgroup. Grab a single ref per hierarchy root.
+ if _, ok := cgset[cg]; ok {
+ continue
+ }
cg.IncRef() // Ref transferred to caller.
cgset[cg] = struct{}{}
}
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 94a582256..efed4aeb0 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -4247,10 +4247,12 @@ cc_binary(
"//test/util:mount_util",
"@com_google_absl//absl/strings",
gtest,
+ "//test/util:cleanup",
"//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
+ "//test/util:thread_util",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:flat_hash_set",
],
diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc
index 70ad5868f..a009ade7e 100644
--- a/test/syscalls/linux/cgroup.cc
+++ b/test/syscalls/linux/cgroup.cc
@@ -25,9 +25,11 @@
#include "absl/strings/str_split.h"
#include "test/util/capability_util.h"
#include "test/util/cgroup_util.h"
+#include "test/util/cleanup.h"
#include "test/util/mount_util.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
+#include "test/util/thread_util.h"
namespace gvisor {
namespace testing {
@@ -192,6 +194,56 @@ TEST(Cgroup, MoptAllMustBeExclusive) {
SyscallFailsWithErrno(EINVAL));
}
+TEST(Cgroup, MountRace) {
+ SKIP_IF(!CgroupsAvailable());
+
+ TempPath mountpoint = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
+
+ const DisableSave ds; // Too many syscalls.
+
+ auto mount_thread = [&mountpoint]() {
+ for (int i = 0; i < 100; ++i) {
+ mount("none", mountpoint.path().c_str(), "cgroup", 0, 0);
+ }
+ };
+ std::list<ScopedThread> threads;
+ for (int i = 0; i < 10; ++i) {
+ threads.emplace_back(mount_thread);
+ }
+ for (auto& t : threads) {
+ t.Join();
+ }
+
+ auto cleanup = Cleanup([&mountpoint] {
+ // We need 1 umount call per successful mount. If some of the mount calls
+ // were unsuccessful, their corresponding umount will silently fail.
+ for (int i = 0; i < (10 * 100) + 1; ++i) {
+ umount(mountpoint.path().c_str());
+ }
+ });
+
+ Cgroup c = Cgroup(mountpoint.path());
+ // c should be a valid cgroup.
+ EXPECT_NO_ERRNO(c.ContainsCallingProcess());
+}
+
+TEST(Cgroup, UnmountRepeated) {
+ SKIP_IF(!CgroupsAvailable());
+
+ const DisableSave ds; // Too many syscalls.
+
+ Mounter m(ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()));
+ Cgroup c = ASSERT_NO_ERRNO_AND_VALUE(m.MountCgroupfs(""));
+
+ // First unmount should succeed.
+ EXPECT_THAT(umount(c.Path().c_str()), SyscallSucceeds());
+
+ // We just manually unmounted, so release managed resources.
+ m.release(c);
+
+ EXPECT_THAT(umount(c.Path().c_str()), SyscallFailsWithErrno(EINVAL));
+}
+
TEST(MemoryCgroup, MemoryUsageInBytes) {
SKIP_IF(!CgroupsAvailable());
diff --git a/test/util/cgroup_util.cc b/test/util/cgroup_util.cc
index 04d4f8de0..977993f41 100644
--- a/test/util/cgroup_util.cc
+++ b/test/util/cgroup_util.cc
@@ -142,6 +142,20 @@ PosixError Mounter::Unmount(const Cgroup& c) {
return NoError();
}
+void Mounter::release(const Cgroup& c) {
+ auto mp = mountpoints_.find(c.id());
+ if (mp != mountpoints_.end()) {
+ mp->second.release();
+ mountpoints_.erase(mp);
+ }
+
+ auto m = mounts_.find(c.id());
+ if (m != mounts_.end()) {
+ m->second.Release();
+ mounts_.erase(m);
+ }
+}
+
constexpr char kProcCgroupsHeader[] =
"#subsys_name\thierarchy\tnum_cgroups\tenabled";
diff --git a/test/util/cgroup_util.h b/test/util/cgroup_util.h
index b797a8b24..e3f696a89 100644
--- a/test/util/cgroup_util.h
+++ b/test/util/cgroup_util.h
@@ -83,6 +83,8 @@ class Mounter {
PosixError Unmount(const Cgroup& c);
+ void release(const Cgroup& c);
+
private:
// The destruction order of these members avoids errors during cleanup. We
// first unmount (by executing the mounts_ cleanups), then delete the