summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorLennart <3391295+lnsp@users.noreply.github.com>2020-10-27 18:05:16 -0700
committergVisor bot <gvisor-bot@google.com>2020-10-27 18:07:22 -0700
commit1c2836da37261c47cb8372e3ae5a49adab369694 (patch)
tree7d83c7846f788254801a3df69f204ec0a9cd68c5
parent013d79d8e4e008f113004e766986ac89474b210d (diff)
Implement /proc/[pid]/mem
This PR implements /proc/[pid]/mem for `pkg/sentry/fs` (refer to #2716) and `pkg/sentry/fsimpl`. @majek COPYBARA_INTEGRATE_REVIEW=https://github.com/google/gvisor/pull/4060 from lnsp:proc-pid-mem 2caf9021254646f441be618a9bb5528610e44d43 PiperOrigin-RevId: 339369629
-rw-r--r--pkg/sentry/fs/proc/task.go83
-rw-r--r--pkg/sentry/fsimpl/proc/task.go1
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go157
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_test.go1
-rw-r--r--test/syscalls/linux/proc.cc409
5 files changed, 651 insertions, 0 deletions
diff --git a/pkg/sentry/fs/proc/task.go b/pkg/sentry/fs/proc/task.go
index 22d658acf..450044c9c 100644
--- a/pkg/sentry/fs/proc/task.go
+++ b/pkg/sentry/fs/proc/task.go
@@ -92,6 +92,7 @@ func (p *proc) newTaskDir(t *kernel.Task, msrc *fs.MountSource, isThreadGroup bo
"gid_map": newGIDMap(t, msrc),
"io": newIO(t, msrc, isThreadGroup),
"maps": newMaps(t, msrc),
+ "mem": newMem(t, msrc),
"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc),
"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc),
"net": newNetDir(t, msrc),
@@ -399,6 +400,88 @@ func newNamespaceDir(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
return newProcInode(t, d, msrc, fs.SpecialDirectory, t)
}
+// memData implements fs.Inode for /proc/[pid]/mem.
+//
+// +stateify savable
+type memData struct {
+ fsutil.SimpleFileInode
+
+ t *kernel.Task
+}
+
+// memDataFile implements fs.FileOperations for /proc/[pid]/mem.
+//
+// +stateify savable
+type memDataFile struct {
+ fsutil.FileGenericSeek `state:"nosave"`
+ fsutil.FileNoIoctl `state:"nosave"`
+ fsutil.FileNoMMap `state:"nosave"`
+ fsutil.FileNoWrite `state:"nosave"`
+ fsutil.FileNoSplice `state:"nosave"`
+ fsutil.FileNoopFlush `state:"nosave"`
+ fsutil.FileNoopFsync `state:"nosave"`
+ fsutil.FileNoopRelease `state:"nosave"`
+ fsutil.FileNotDirReaddir `state:"nosave"`
+ fsutil.FileUseInodeUnstableAttr `state:"nosave"`
+ waiter.AlwaysReady `state:"nosave"`
+
+ t *kernel.Task
+}
+
+func newMem(t *kernel.Task, msrc *fs.MountSource) *fs.Inode {
+ inode := &memData{
+ SimpleFileInode: *fsutil.NewSimpleFileInode(t, fs.RootOwner, fs.FilePermsFromMode(0400), linux.PROC_SUPER_MAGIC),
+ t: t,
+ }
+ return newProcInode(t, inode, msrc, fs.SpecialFile, t)
+}
+
+// Truncate implements fs.InodeOperations.Truncate.
+func (m *memData) Truncate(context.Context, *fs.Inode, int64) error {
+ return nil
+}
+
+// GetFile implements fs.InodeOperations.GetFile.
+func (m *memData) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) {
+ // TODO(gvisor.dev/issue/260): Add check for PTRACE_MODE_ATTACH_FSCREDS
+ // Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS
+ // Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH
+ if !kernel.ContextCanTrace(ctx, m.t, true) {
+ return nil, syserror.EACCES
+ }
+ if err := checkTaskState(m.t); err != nil {
+ return nil, err
+ }
+ // Enable random access reads
+ flags.Pread = true
+ return fs.NewFile(ctx, dirent, flags, &memDataFile{t: m.t}), nil
+}
+
+// Read implements fs.FileOperations.Read.
+func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
+ if dst.NumBytes() == 0 {
+ return 0, nil
+ }
+ mm, err := getTaskMM(m.t)
+ if err != nil {
+ return 0, nil
+ }
+ defer mm.DecUsers(ctx)
+ // Buffer the read data because of MM locks
+ buf := make([]byte, dst.NumBytes())
+ n, readErr := mm.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
+ if n > 0 {
+ if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
+ return 0, syserror.EFAULT
+ }
+ return int64(n), nil
+ }
+ if readErr != nil {
+ return 0, syserror.EIO
+ }
+ return 0, nil
+}
+
// mapsData implements seqfile.SeqSource for /proc/[pid]/maps.
//
// +stateify savable
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 57cf8ce26..19011b010 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -64,6 +64,7 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace
"gid_map": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}),
"io": fs.newTaskOwnedInode(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
"maps": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mapsData{task: task}),
+ "mem": fs.newMemInode(task, fs.NextIno(), 0400),
"mountinfo": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountInfoData{task: task}),
"mounts": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountsData{task: task}),
"net": fs.newTaskNetDir(task),
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index d3f4e259b..ba71d0fde 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -31,6 +31,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/mm"
"gvisor.dev/gvisor/pkg/sentry/usage"
"gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -366,6 +367,162 @@ func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset in
return int64(srclen), nil
}
+var _ kernfs.Inode = (*memInode)(nil)
+
+// memInode implements kernfs.Inode for /proc/[pid]/mem.
+//
+// +stateify savable
+type memInode struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoStatFS
+ kernfs.InodeNoopRefCount
+ kernfs.InodeNotDirectory
+ kernfs.InodeNotSymlink
+
+ task *kernel.Task
+ locks vfs.FileLocks
+}
+
+func (fs *filesystem) newMemInode(task *kernel.Task, ino uint64, perm linux.FileMode) kernfs.Inode {
+ // Note: credentials are overridden by taskOwnedInode.
+ inode := &memInode{task: task}
+ inode.init(task, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm)
+ return &taskOwnedInode{Inode: inode, owner: task}
+}
+
+func (f *memInode) init(ctx context.Context, creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
+ if perm&^linux.PermissionsMask != 0 {
+ panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
+ }
+ f.InodeAttrs.Init(ctx, creds, devMajor, devMinor, ino, linux.ModeRegular|perm)
+}
+
+// Open implements kernfs.Inode.Open.
+func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ // TODO(gvisor.dev/issue/260): Add check for PTRACE_MODE_ATTACH_FSCREDS
+ // Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS
+ // Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH
+ if !kernel.ContextCanTrace(ctx, f.task, true) {
+ return nil, syserror.EACCES
+ }
+ if err := checkTaskState(f.task); err != nil {
+ return nil, err
+ }
+ fd := &memFD{}
+ if err := fd.Init(rp.Mount(), d, f, opts.Flags); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+}
+
+// SetStat implements kernfs.Inode.SetStat.
+func (*memInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+var _ vfs.FileDescriptionImpl = (*memFD)(nil)
+
+// memFD implements vfs.FileDescriptionImpl for /proc/[pid]/mem.
+//
+// +stateify savable
+type memFD struct {
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+ vfs.LockFD
+
+ inode *memInode
+
+ // mu guards the fields below.
+ mu sync.Mutex `state:"nosave"`
+ offset int64
+}
+
+// Init initializes memFD.
+func (fd *memFD) Init(m *vfs.Mount, d *kernfs.Dentry, inode *memInode, flags uint32) error {
+ fd.LockFD.Init(&inode.locks)
+ if err := fd.vfsfd.Init(fd, flags, m, d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil {
+ return err
+ }
+ fd.inode = inode
+ return nil
+}
+
+// Seek implements vfs.FileDescriptionImpl.Seek.
+func (fd *memFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ switch whence {
+ case linux.SEEK_SET:
+ case linux.SEEK_CUR:
+ offset += fd.offset
+ default:
+ return 0, syserror.EINVAL
+ }
+ if offset < 0 {
+ return 0, syserror.EINVAL
+ }
+ fd.offset = offset
+ return offset, nil
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
+func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ if dst.NumBytes() == 0 {
+ return 0, nil
+ }
+ m, err := getMMIncRef(fd.inode.task)
+ if err != nil {
+ return 0, nil
+ }
+ defer m.DecUsers(ctx)
+ // Buffer the read data because of MM locks
+ buf := make([]byte, dst.NumBytes())
+ n, readErr := m.CopyIn(ctx, usermem.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true})
+ if n > 0 {
+ if _, err := dst.CopyOut(ctx, buf[:n]); err != nil {
+ return 0, syserror.EFAULT
+ }
+ return int64(n), nil
+ }
+ if readErr != nil {
+ return 0, syserror.EIO
+ }
+ return 0, nil
+}
+
+// Read implements vfs.FileDescriptionImpl.Read.
+func (fd *memFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ fd.mu.Lock()
+ n, err := fd.PRead(ctx, dst, fd.offset, opts)
+ fd.offset += n
+ fd.mu.Unlock()
+ return n, err
+}
+
+// Stat implements vfs.FileDescriptionImpl.Stat.
+func (fd *memFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ fs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
+ return fd.inode.Stat(ctx, fs, opts)
+}
+
+// SetStat implements vfs.FileDescriptionImpl.SetStat.
+func (fd *memFD) SetStat(context.Context, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// Release implements vfs.FileDescriptionImpl.Release.
+func (fd *memFD) Release(context.Context) {}
+
+// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
+func (fd *memFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
+ return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
+}
+
+// UnlockPOSIX implements vfs.FileDescriptionImpl.UnlockPOSIX.
+func (fd *memFD) UnlockPOSIX(ctx context.Context, uid fslock.UniqueID, start, length uint64, whence int16) error {
+ return fd.Locks().UnlockPOSIX(ctx, &fd.vfsfd, uid, start, length, whence)
+}
+
// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
//
// +stateify savable
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
index 2582ababd..7ee6227a9 100644
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ b/pkg/sentry/fsimpl/proc/tasks_test.go
@@ -77,6 +77,7 @@ var (
"gid_map": linux.DT_REG,
"io": linux.DT_REG,
"maps": linux.DT_REG,
+ "mem": linux.DT_REG,
"mountinfo": linux.DT_REG,
"mounts": linux.DT_REG,
"net": linux.DT_DIR,
diff --git a/test/syscalls/linux/proc.cc b/test/syscalls/linux/proc.cc
index e8fcc4439..7a0f33dff 100644
--- a/test/syscalls/linux/proc.cc
+++ b/test/syscalls/linux/proc.cc
@@ -26,6 +26,7 @@
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
+#include <sys/ptrace.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/utsname.h>
@@ -512,6 +513,414 @@ TEST(ProcSelfAuxv, EntryValues) {
EXPECT_EQ(i, proc_auxv.size());
}
+// Just open and read a part of /proc/self/mem, check that we can read an item.
+TEST(ProcPidMem, Read) {
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
+ char input[] = "hello-world";
+ char output[sizeof(input)];
+ ASSERT_THAT(pread(memfd.get(), output, sizeof(output),
+ reinterpret_cast<off_t>(input)),
+ SyscallSucceedsWithValue(sizeof(input)));
+ ASSERT_STREQ(input, output);
+}
+
+// Perform read on an unmapped region.
+TEST(ProcPidMem, Unmapped) {
+ // Strategy: map then unmap, so we have a guaranteed unmapped region
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
+ Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ // Fill it with things
+ memset(mapping.ptr(), 'x', mapping.len());
+ char expected = 'x', output;
+ ASSERT_THAT(pread(memfd.get(), &output, sizeof(output),
+ reinterpret_cast<off_t>(mapping.ptr())),
+ SyscallSucceedsWithValue(sizeof(output)));
+ ASSERT_EQ(expected, output);
+
+ // Unmap region again
+ ASSERT_THAT(munmap(mapping.ptr(), mapping.len()), SyscallSucceeds());
+
+ // Now we want EIO error
+ ASSERT_THAT(pread(memfd.get(), &output, sizeof(output),
+ reinterpret_cast<off_t>(mapping.ptr())),
+ SyscallFailsWithErrno(EIO));
+}
+
+// Perform read repeatedly to verify offset change.
+TEST(ProcPidMem, RepeatedRead) {
+ auto const num_reads = 3;
+ char expected[] = "01234567890abcdefghijkl";
+ char output[sizeof(expected) / num_reads];
+
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
+ ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET),
+ SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
+ for (auto i = 0; i < num_reads; i++) {
+ ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ ASSERT_EQ(strncmp(&expected[i * sizeof(output)], output, sizeof(output)),
+ 0);
+ }
+}
+
+// Perform seek operations repeatedly.
+TEST(ProcPidMem, RepeatedSeek) {
+ auto const num_reads = 3;
+ char expected[] = "01234567890abcdefghijkl";
+ char output[sizeof(expected) / num_reads];
+
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
+ ASSERT_THAT(lseek(memfd.get(), reinterpret_cast<off_t>(&expected), SEEK_SET),
+ SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
+ // Read from start
+ ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0);
+ // Skip ahead one read
+ ASSERT_THAT(lseek(memfd.get(), sizeof(output), SEEK_CUR),
+ SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected) +
+ sizeof(output) * 2));
+ // Do read again
+ ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ ASSERT_EQ(strncmp(&expected[2 * sizeof(output)], output, sizeof(output)), 0);
+ // Skip back three reads
+ ASSERT_THAT(lseek(memfd.get(), -3 * sizeof(output), SEEK_CUR),
+ SyscallSucceedsWithValue(reinterpret_cast<off_t>(&expected)));
+ // Do read again
+ ASSERT_THAT(read(memfd.get(), &output, sizeof(output)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ ASSERT_EQ(strncmp(&expected[0 * sizeof(output)], output, sizeof(output)), 0);
+ // Check that SEEK_END does not work
+ ASSERT_THAT(lseek(memfd.get(), 0, SEEK_END), SyscallFailsWithErrno(EINVAL));
+}
+
+// Perform read past an allocated memory region.
+TEST(ProcPidMem, PartialRead) {
+ // Strategy: map large region, then do unmap and remap smaller region
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
+
+ Mapping mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ MmapAnon(2 * kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
+ ASSERT_THAT(munmap(mapping.ptr(), mapping.len()), SyscallSucceeds());
+ Mapping smaller_mapping = ASSERT_NO_ERRNO_AND_VALUE(
+ Mmap(mapping.ptr(), kPageSize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+
+ // Fill it with things
+ memset(smaller_mapping.ptr(), 'x', smaller_mapping.len());
+
+ // Now we want no error
+ char expected[] = {'x'};
+ std::unique_ptr<char[]> output(new char[kPageSize]);
+ off_t read_offset =
+ reinterpret_cast<off_t>(smaller_mapping.ptr()) + kPageSize - 1;
+ ASSERT_THAT(
+ pread(memfd.get(), output.get(), sizeof(output.get()), read_offset),
+ SyscallSucceedsWithValue(sizeof(expected)));
+ // Since output is larger, than expected we have to do manual compare
+ ASSERT_EQ(expected[0], (output).get()[0]);
+}
+
+// Perform read on /proc/[pid]/mem after exit.
+TEST(ProcPidMem, AfterExit) {
+ int pfd1[2] = {};
+ int pfd2[2] = {};
+
+ char expected[] = "hello-world";
+
+ ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
+ ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
+
+ // Create child process
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // Close reading end of first pipe
+ close(pfd1[0]);
+
+ // Tell parent about location of input
+ char ok = 1;
+ TEST_CHECK(WriteFd(pfd1[1], &ok, sizeof(ok)) == sizeof(ok));
+ TEST_PCHECK(close(pfd1[1]) == 0);
+
+ // Close writing end of second pipe
+ TEST_PCHECK(close(pfd2[1]) == 0);
+
+ // Await parent OK to die
+ ok = 0;
+ TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
+
+ // Close rest pipes
+ TEST_PCHECK(close(pfd2[0]) == 0);
+ _exit(0);
+ }
+
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Close writing end of first pipe
+ EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
+
+ // Wait for child to be alive and well
+ char ok = 0;
+ EXPECT_THAT(ReadFd(pfd1[0], &ok, sizeof(ok)),
+ SyscallSucceedsWithValue(sizeof(ok)));
+ // Close reading end of first pipe
+ EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
+
+ // Open /proc/pid/mem fd
+ std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
+
+ // Expect that we can read
+ char output[sizeof(expected)];
+ EXPECT_THAT(pread(memfd.get(), &output, sizeof(output),
+ reinterpret_cast<off_t>(&expected)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ EXPECT_STREQ(expected, output);
+
+ // Tell proc its ok to go
+ EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
+ ok = 1;
+ EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
+ SyscallSucceedsWithValue(sizeof(ok)));
+ EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
+
+ // Expect termination
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+
+ // Expect that we can't read anymore
+ EXPECT_THAT(pread(memfd.get(), &output, sizeof(output),
+ reinterpret_cast<off_t>(&expected)),
+ SyscallSucceedsWithValue(0));
+}
+
+// Read from /proc/[pid]/mem with different UID/GID and attached state.
+TEST(ProcPidMem, DifferentUserAttached) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_DAC_OVERRIDE)));
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SYS_PTRACE)));
+
+ int pfd1[2] = {};
+ int pfd2[2] = {};
+
+ ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
+ ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
+
+ // Create child process
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // Close reading end of first pipe
+ close(pfd1[0]);
+
+ // Tell parent about location of input
+ char input[] = "hello-world";
+ off_t input_location = reinterpret_cast<off_t>(input);
+ TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
+ sizeof(input_location));
+ TEST_PCHECK(close(pfd1[1]) == 0);
+
+ // Close writing end of second pipe
+ TEST_PCHECK(close(pfd2[1]) == 0);
+
+ // Await parent OK to die
+ char ok = 0;
+ TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
+
+ // Close rest pipes
+ TEST_PCHECK(close(pfd2[0]) == 0);
+ _exit(0);
+ }
+
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Close writing end of first pipe
+ EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
+
+ // Read target location from child
+ off_t target_location;
+ EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
+ SyscallSucceedsWithValue(sizeof(target_location)));
+ // Close reading end of first pipe
+ EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
+
+ ScopedThread([&] {
+ // Attach to child subprocess without stopping it
+ EXPECT_THAT(ptrace(PTRACE_SEIZE, child_pid, NULL, NULL), SyscallSucceeds());
+
+ // Keep capabilities after setuid
+ EXPECT_THAT(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), SyscallSucceeds());
+ constexpr int kNobody = 65534;
+ EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
+
+ // Only restore CAP_SYS_PTRACE and CAP_DAC_OVERRIDE
+ EXPECT_NO_ERRNO(SetCapability(CAP_SYS_PTRACE, true));
+ EXPECT_NO_ERRNO(SetCapability(CAP_DAC_OVERRIDE, true));
+
+ // Open /proc/pid/mem fd
+ std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
+ char expected[] = "hello-world";
+ char output[sizeof(expected)];
+ EXPECT_THAT(pread(memfd.get(), output, sizeof(output),
+ reinterpret_cast<off_t>(target_location)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ EXPECT_STREQ(expected, output);
+
+ // Tell proc its ok to go
+ EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
+ char ok = 1;
+ EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
+ SyscallSucceedsWithValue(sizeof(ok)));
+ EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
+
+ // Expect termination
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+ EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
+ << " status " << status;
+ });
+}
+
+// Attempt to read from /proc/[pid]/mem with different UID/GID.
+TEST(ProcPidMem, DifferentUser) {
+ SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETUID)));
+
+ int pfd1[2] = {};
+ int pfd2[2] = {};
+
+ ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
+ ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
+
+ // Create child process
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // Close reading end of first pipe
+ close(pfd1[0]);
+
+ // Tell parent about location of input
+ char input[] = "hello-world";
+ off_t input_location = reinterpret_cast<off_t>(input);
+ TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
+ sizeof(input_location));
+ TEST_PCHECK(close(pfd1[1]) == 0);
+
+ // Close writing end of second pipe
+ TEST_PCHECK(close(pfd2[1]) == 0);
+
+ // Await parent OK to die
+ char ok = 0;
+ TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
+
+ // Close rest pipes
+ TEST_PCHECK(close(pfd2[0]) == 0);
+ _exit(0);
+ }
+
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Close writing end of first pipe
+ EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
+
+ // Read target location from child
+ off_t target_location;
+ EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
+ SyscallSucceedsWithValue(sizeof(target_location)));
+ // Close reading end of first pipe
+ EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
+
+ ScopedThread([&] {
+ constexpr int kNobody = 65534;
+ EXPECT_THAT(syscall(SYS_setuid, kNobody), SyscallSucceeds());
+
+ // Attempt to open /proc/[child_pid]/mem
+ std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
+ EXPECT_THAT(open(mempath.c_str(), O_RDONLY), SyscallFailsWithErrno(EACCES));
+
+ // Tell proc its ok to go
+ EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
+ char ok = 1;
+ EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
+ SyscallSucceedsWithValue(sizeof(ok)));
+ EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
+
+ // Expect termination
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+ });
+}
+
+// Perform read on /proc/[pid]/mem with same UID/GID.
+TEST(ProcPidMem, SameUser) {
+ int pfd1[2] = {};
+ int pfd2[2] = {};
+
+ ASSERT_THAT(pipe(pfd1), SyscallSucceeds());
+ ASSERT_THAT(pipe(pfd2), SyscallSucceeds());
+
+ // Create child process
+ pid_t const child_pid = fork();
+ if (child_pid == 0) {
+ // Close reading end of first pipe
+ close(pfd1[0]);
+
+ // Tell parent about location of input
+ char input[] = "hello-world";
+ off_t input_location = reinterpret_cast<off_t>(input);
+ TEST_CHECK(WriteFd(pfd1[1], &input_location, sizeof(input_location)) ==
+ sizeof(input_location));
+ TEST_PCHECK(close(pfd1[1]) == 0);
+
+ // Close writing end of second pipe
+ TEST_PCHECK(close(pfd2[1]) == 0);
+
+ // Await parent OK to die
+ char ok = 0;
+ TEST_CHECK(ReadFd(pfd2[0], &ok, sizeof(ok)) == sizeof(ok));
+
+ // Close rest pipes
+ TEST_PCHECK(close(pfd2[0]) == 0);
+ _exit(0);
+ }
+ // In parent process.
+ ASSERT_THAT(child_pid, SyscallSucceeds());
+
+ // Close writing end of first pipe
+ EXPECT_THAT(close(pfd1[1]), SyscallSucceeds());
+
+ // Read target location from child
+ off_t target_location;
+ EXPECT_THAT(ReadFd(pfd1[0], &target_location, sizeof(target_location)),
+ SyscallSucceedsWithValue(sizeof(target_location)));
+ // Close reading end of first pipe
+ EXPECT_THAT(close(pfd1[0]), SyscallSucceeds());
+
+ // Open /proc/pid/mem fd
+ std::string mempath = absl::StrCat("/proc/", child_pid, "/mem");
+ auto memfd = ASSERT_NO_ERRNO_AND_VALUE(Open(mempath, O_RDONLY));
+ char expected[] = "hello-world";
+ char output[sizeof(expected)];
+ EXPECT_THAT(pread(memfd.get(), output, sizeof(output),
+ reinterpret_cast<off_t>(target_location)),
+ SyscallSucceedsWithValue(sizeof(output)));
+ EXPECT_STREQ(expected, output);
+
+ // Tell proc its ok to go
+ EXPECT_THAT(close(pfd2[0]), SyscallSucceeds());
+ char ok = 1;
+ EXPECT_THAT(WriteFd(pfd2[1], &ok, sizeof(ok)),
+ SyscallSucceedsWithValue(sizeof(ok)));
+ EXPECT_THAT(close(pfd2[1]), SyscallSucceeds());
+
+ // Expect termination
+ int status;
+ ASSERT_THAT(waitpid(child_pid, &status, 0), SyscallSucceeds());
+}
+
// Just open and read /proc/self/maps, check that we can find [stack]
TEST(ProcSelfMaps, Basic) {
auto proc_self_maps =