summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/kernel/task_clone.go7
-rw-r--r--test/syscalls/linux/BUILD2
-rw-r--r--test/syscalls/linux/fork.cc31
3 files changed, 36 insertions, 4 deletions
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go
index b66fa34a9..114e7f858 100644
--- a/pkg/sentry/kernel/task_clone.go
+++ b/pkg/sentry/kernel/task_clone.go
@@ -17,7 +17,6 @@ package kernel
import (
"gvisor.googlesource.com/gvisor/pkg/abi/linux"
"gvisor.googlesource.com/gvisor/pkg/bpf"
- "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth"
"gvisor.googlesource.com/gvisor/pkg/sentry/usermem"
"gvisor.googlesource.com/gvisor/pkg/syserror"
)
@@ -166,7 +165,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
// privileges over the remaining namespaces created by the call." -
// user_namespaces(7)
creds := t.Credentials()
- var userns *auth.UserNamespace
+ userns := creds.UserNamespace
if opts.NewUserNamespace {
var err error
// "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and
@@ -182,7 +181,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
return 0, nil, err
}
}
- if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapability(linux.CAP_SYS_ADMIN) {
+ if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) {
return 0, nil, syserror.EPERM
}
@@ -287,7 +286,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) {
nt.SetSignalStack(t.SignalStack())
}
- if userns != nil {
+ if userns != creds.UserNamespace {
if err := nt.SetUserNamespace(userns); err != nil {
// This shouldn't be possible: userns was created from nt.creds, so
// nt should have CAP_SYS_ADMIN in userns.
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 3c61c48ef..e7f5ea998 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -732,7 +732,9 @@ cc_binary(
srcs = ["fork.cc"],
linkstatic = 1,
deps = [
+ "//test/util:capability_util",
"//test/util:logging",
+ "//test/util:memory_util",
"//test/util:test_main",
"//test/util:test_util",
"//test/util:thread_util",
diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc
index 1bff5e50f..73ac885b5 100644
--- a/test/syscalls/linux/fork.cc
+++ b/test/syscalls/linux/fork.cc
@@ -21,11 +21,14 @@
#include <sys/types.h>
#include <unistd.h>
#include <atomic>
+#include <cstdlib>
#include "gtest/gtest.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
+#include "test/util/capability_util.h"
#include "test/util/logging.h"
+#include "test/util/memory_util.h"
#include "test/util/test_util.h"
#include "test/util/thread_util.h"
@@ -393,6 +396,34 @@ TEST_F(ForkTest, Affinity) {
EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0));
}
+TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) {
+ // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a
+ // single clone(2) or unshare(2) call, the user namespace is guaranteed to be
+ // created first, giving the child (clone(2)) or caller (unshare(2))
+ // privileges over the remaining namespaces created by the call. Thus, it is
+ // possible for an unprivileged caller to specify this combination of flags."
+ // - user_namespaces(7)
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
+ Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ int child_pid;
+ // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these
+ // namespaces were implemented in Linux before user namespaces.
+ ASSERT_THAT(
+ child_pid = clone(
+ +[](void*) { return 0; },
+ reinterpret_cast<void*>(child_stack.addr() + kPageSize),
+ CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD,
+ /* arg = */ nullptr),
+ SyscallSucceeds());
+
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0),
+ SyscallSucceedsWithValue(child_pid));
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << "status = " << status;
+}
+
#ifdef __x86_64__
// Clone with CLONE_SETTLS and a non-canonical TLS address is rejected.
TEST(CloneTest, NonCanonicalTLS) {