diff options
-rw-r--r-- | pkg/sentry/kernel/task_clone.go | 7 | ||||
-rw-r--r-- | test/syscalls/linux/BUILD | 2 | ||||
-rw-r--r-- | test/syscalls/linux/fork.cc | 31 |
3 files changed, 36 insertions, 4 deletions
diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index b66fa34a9..114e7f858 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -17,7 +17,6 @@ package kernel import ( "gvisor.googlesource.com/gvisor/pkg/abi/linux" "gvisor.googlesource.com/gvisor/pkg/bpf" - "gvisor.googlesource.com/gvisor/pkg/sentry/kernel/auth" "gvisor.googlesource.com/gvisor/pkg/sentry/usermem" "gvisor.googlesource.com/gvisor/pkg/syserror" ) @@ -166,7 +165,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { // privileges over the remaining namespaces created by the call." - // user_namespaces(7) creds := t.Credentials() - var userns *auth.UserNamespace + userns := creds.UserNamespace if opts.NewUserNamespace { var err error // "EPERM (since Linux 3.9): CLONE_NEWUSER was specified in flags and @@ -182,7 +181,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { return 0, nil, err } } - if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapability(linux.CAP_SYS_ADMIN) { + if (opts.NewPIDNamespace || opts.NewNetworkNamespace || opts.NewUTSNamespace) && !creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, userns) { return 0, nil, syserror.EPERM } @@ -287,7 +286,7 @@ func (t *Task) Clone(opts *CloneOptions) (ThreadID, *SyscallControl, error) { nt.SetSignalStack(t.SignalStack()) } - if userns != nil { + if userns != creds.UserNamespace { if err := nt.SetUserNamespace(userns); err != nil { // This shouldn't be possible: userns was created from nt.creds, so // nt should have CAP_SYS_ADMIN in userns. diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index 3c61c48ef..e7f5ea998 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -732,7 +732,9 @@ cc_binary( srcs = ["fork.cc"], linkstatic = 1, deps = [ + "//test/util:capability_util", "//test/util:logging", + "//test/util:memory_util", "//test/util:test_main", "//test/util:test_util", "//test/util:thread_util", diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 1bff5e50f..73ac885b5 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -21,11 +21,14 @@ #include <sys/types.h> #include <unistd.h> #include <atomic> +#include <cstdlib> #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/util/capability_util.h" #include "test/util/logging.h" +#include "test/util/memory_util.h" #include "test/util/test_util.h" #include "test/util/thread_util.h" @@ -393,6 +396,34 @@ TEST_F(ForkTest, Affinity) { EXPECT_THAT(Wait(child), SyscallSucceedsWithValue(0)); } +TEST(CloneTest, NewUserNamespacePermitsAllOtherNamespaces) { + // "If CLONE_NEWUSER is specified along with other CLONE_NEW* flags in a + // single clone(2) or unshare(2) call, the user namespace is guaranteed to be + // created first, giving the child (clone(2)) or caller (unshare(2)) + // privileges over the remaining namespaces created by the call. Thus, it is + // possible for an unprivileged caller to specify this combination of flags." + // - user_namespaces(7) + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + Mapping child_stack = ASSERT_NO_ERRNO_AND_VALUE( + MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE)); + int child_pid; + // We only test with CLONE_NEWIPC, CLONE_NEWNET, and CLONE_NEWUTS since these + // namespaces were implemented in Linux before user namespaces. + ASSERT_THAT( + child_pid = clone( + +[](void*) { return 0; }, + reinterpret_cast<void*>(child_stack.addr() + kPageSize), + CLONE_NEWUSER | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUTS | SIGCHLD, + /* arg = */ nullptr), + SyscallSucceeds()); + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) + << "status = " << status; +} + #ifdef __x86_64__ // Clone with CLONE_SETTLS and a non-canonical TLS address is rejected. TEST(CloneTest, NonCanonicalTLS) { |