diff options
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs.go | 5 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 28 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task_files.go | 111 |
4 files changed, 129 insertions, 17 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index 24e28a51f..22c8b7fda 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -383,11 +383,6 @@ func (d *dir) DecRef(ctx context.Context) { d.dirRefs.DecRef(func() { d.Destroy(ctx) }) } -// StatFS implements kernfs.Inode.StatFS. -func (d *dir) StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) { - return vfs.GenericStatFS(linux.CGROUP_SUPER_MAGIC), nil -} - // controllerFile represents a generic control file that appears within a cgroup // directory. // diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index ec8d58cc9..25d2e39d6 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -1161,6 +1161,13 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs if !d.isSynthetic() { if stat.Mask != 0 { + if stat.Mask&linux.STATX_SIZE != 0 { + // d.dataMu must be held around the update to both the remote + // file's size and d.size to serialize with writeback (which + // might otherwise write data back up to the old d.size after + // the remote file has been truncated). + d.dataMu.Lock() + } if err := d.file.setAttr(ctx, p9.SetAttrMask{ Permissions: stat.Mask&linux.STATX_MODE != 0, UID: stat.Mask&linux.STATX_UID != 0, @@ -1180,13 +1187,16 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs MTimeSeconds: uint64(stat.Mtime.Sec), MTimeNanoSeconds: uint64(stat.Mtime.Nsec), }); err != nil { + if stat.Mask&linux.STATX_SIZE != 0 { + d.dataMu.Unlock() // +checklocksforce: locked conditionally above + } return err } if stat.Mask&linux.STATX_SIZE != 0 { // d.size should be kept up to date, and privatized // copy-on-write mappings of truncated pages need to be // invalidated, even if InteropModeShared is in effect. - d.updateSizeLocked(stat.Size) + d.updateSizeAndUnlockDataMuLocked(stat.Size) // +checklocksforce: locked conditionally above } } if d.fs.opts.interop == InteropModeShared { @@ -1249,6 +1259,14 @@ func (d *dentry) doAllocate(ctx context.Context, offset, length uint64, allocate // Preconditions: d.metadataMu must be locked. func (d *dentry) updateSizeLocked(newSize uint64) { d.dataMu.Lock() + d.updateSizeAndUnlockDataMuLocked(newSize) +} + +// Preconditions: d.metadataMu and d.dataMu must be locked. +// +// Postconditions: d.dataMu is unlocked. +// +checklocksrelease:d.dataMu +func (d *dentry) updateSizeAndUnlockDataMuLocked(newSize uint64) { oldSize := d.size atomic.StoreUint64(&d.size, newSize) // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings @@ -1257,9 +1275,9 @@ func (d *dentry) updateSizeLocked(newSize uint64) { // contents beyond the new d.size. (We are still holding d.metadataMu, // so we can't race with Write or another truncate.) d.dataMu.Unlock() - if d.size < oldSize { + if newSize < oldSize { oldpgend, _ := hostarch.PageRoundUp(oldSize) - newpgend, _ := hostarch.PageRoundUp(d.size) + newpgend, _ := hostarch.PageRoundUp(newSize) if oldpgend != newpgend { d.mapsMu.Lock() d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{ @@ -1275,8 +1293,8 @@ func (d *dentry) updateSizeLocked(newSize uint64) { // truncated pages have been removed from the remote file, they // should be dropped without being written back. d.dataMu.Lock() - d.cache.Truncate(d.size, d.fs.mfp.MemoryFile()) - d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend}) + d.cache.Truncate(newSize, d.fs.mfp.MemoryFile()) + d.dirty.KeepClean(memmap.MappableRange{newSize, oldpgend}) d.dataMu.Unlock() } } diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index cbbc0935a..f54811edf 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -78,7 +78,7 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &smapsData{task: task}), "stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), "statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}), - "status": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}), + "status": fs.newStatusInode(ctx, task, pidns, fs.NextIno(), 0444), "uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 5bb6bc372..0ce3ed797 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -661,34 +661,119 @@ func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } -// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status. +// statusInode implements kernfs.Inode for /proc/[pid]/status. // // +stateify savable -type statusData struct { - kernfs.DynamicBytesFile +type statusInode struct { + kernfs.InodeAttrs + kernfs.InodeNoStatFS + kernfs.InodeNoopRefCount + kernfs.InodeNotDirectory + kernfs.InodeNotSymlink task *kernel.Task pidns *kernel.PIDNamespace + locks vfs.FileLocks } -var _ dynamicInode = (*statusData)(nil) +// statusFD implements vfs.FileDescriptionImpl and vfs.DynamicByteSource for +// /proc/[pid]/status. +// +// +stateify savable +type statusFD struct { + statusFDLowerBase + vfs.DynamicBytesFileDescriptionImpl + vfs.LockFD + + vfsfd vfs.FileDescription + + inode *statusInode + task *kernel.Task + pidns *kernel.PIDNamespace + userns *auth.UserNamespace // equivalent to struct file::f_cred::user_ns +} + +// statusFDLowerBase is a dumb hack to ensure that statusFD prefers +// vfs.DynamicBytesFileDescriptionImpl methods to vfs.FileDescriptinDefaultImpl +// methods. +// +// +stateify savable +type statusFDLowerBase struct { + vfs.FileDescriptionDefaultImpl +} + +func (fs *filesystem) newStatusInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, ino uint64, perm linux.FileMode) kernfs.Inode { + // Note: credentials are overridden by taskOwnedInode. + inode := &statusInode{ + task: task, + pidns: pidns, + } + inode.InodeAttrs.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeRegular|perm) + return &taskOwnedInode{Inode: inode, owner: task} +} + +// Open implements kernfs.Inode.Open. +func (s *statusInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { + fd := &statusFD{ + inode: s, + task: s.task, + pidns: s.pidns, + userns: rp.Credentials().UserNamespace, + } + fd.LockFD.Init(&s.locks) + if err := fd.vfsfd.Init(fd, opts.Flags, rp.Mount(), d.VFSDentry(), &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + fd.SetDataSource(fd) + return &fd.vfsfd, nil +} + +// SetStat implements kernfs.Inode.SetStat. +func (*statusInode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { + return linuxerr.EPERM +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (s *statusFD) Release(ctx context.Context) { +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (s *statusFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + fs := s.vfsfd.VirtualDentry().Mount().Filesystem() + return s.inode.Stat(ctx, fs, opts) +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (s *statusFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + return linuxerr.EPERM +} // Generate implements vfs.DynamicBytesSource.Generate. -func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { +func (s *statusFD) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name()) fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus()) fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup())) fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task)) + ppid := kernel.ThreadID(0) if parent := s.task.Parent(); parent != nil { ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) } fmt.Fprintf(buf, "PPid:\t%d\n", ppid) + tpid := kernel.ThreadID(0) if tracer := s.task.Tracer(); tracer != nil { tpid = s.pidns.IDOfTask(tracer) } fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid) + + creds := s.task.Credentials() + ruid := creds.RealKUID.In(s.userns).OrOverflow() + euid := creds.EffectiveKUID.In(s.userns).OrOverflow() + suid := creds.SavedKUID.In(s.userns).OrOverflow() + rgid := creds.RealKGID.In(s.userns).OrOverflow() + egid := creds.EffectiveKGID.In(s.userns).OrOverflow() + sgid := creds.SavedKGID.In(s.userns).OrOverflow() var fds int var vss, rss, data uint64 s.task.WithMuLocked(func(t *kernel.Task) { @@ -701,12 +786,26 @@ func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { data = mm.VirtualDataSize() } }) + // Filesystem user/group IDs aren't implemented; effective UID/GID are used + // instead. + fmt.Fprintf(buf, "Uid:\t%d\t%d\t%d\t%d\n", ruid, euid, suid, euid) + fmt.Fprintf(buf, "Gid:\t%d\t%d\t%d\t%d\n", rgid, egid, sgid, egid) fmt.Fprintf(buf, "FDSize:\t%d\n", fds) + buf.WriteString("Groups:\t ") + // There is a space between each pair of supplemental GIDs, as well as an + // unconditional trailing space that some applications actually depend on. + var sep string + for _, kgid := range creds.ExtraKGIDs { + fmt.Fprintf(buf, "%s%d", sep, kgid.In(s.userns).OrOverflow()) + sep = " " + } + buf.WriteString(" \n") + fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10) fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10) fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10) + fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count()) - creds := s.task.Credentials() fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps) fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps) fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps) |