diff options
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r-- | pkg/sentry/fsimpl/ext/dentry.go | 12 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/gofer.go | 60 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs.go | 14 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/directory.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/filesystem.go | 44 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/regular_file.go | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/tmpfs/tmpfs.go | 79 |
8 files changed, 187 insertions, 27 deletions
diff --git a/pkg/sentry/fsimpl/ext/dentry.go b/pkg/sentry/fsimpl/ext/dentry.go index bfbd7c3d4..6bd1a9fc6 100644 --- a/pkg/sentry/fsimpl/ext/dentry.go +++ b/pkg/sentry/fsimpl/ext/dentry.go @@ -60,3 +60,15 @@ func (d *dentry) DecRef() { // inode.decRef(). d.inode.decRef() } + +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) Watches() *vfs.Watches { + return nil +} diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 6295f6b54..3f3bd56f0 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -84,12 +84,6 @@ type filesystem struct { // devMinor is the filesystem's minor device number. devMinor is immutable. devMinor uint32 - // uid and gid are the effective KUID and KGID of the filesystem's creator, - // and are used as the owner and group for files that don't specify one. - // uid and gid are immutable. - uid auth.KUID - gid auth.KGID - // renameMu serves two purposes: // // - It synchronizes path resolution with renaming initiated by this @@ -122,6 +116,8 @@ type filesystemOptions struct { fd int aname string interop InteropMode // derived from the "cache" mount option + dfltuid auth.KUID + dfltgid auth.KGID msize uint32 version string @@ -230,6 +226,15 @@ type InternalFilesystemOptions struct { OpenSocketsByConnecting bool } +// _V9FS_DEFUID and _V9FS_DEFGID (from Linux's fs/9p/v9fs.h) are the default +// UIDs and GIDs used for files that do not provide a specific owner or group +// respectively. +const ( + // uint32(-2) doesn't work in Go. + _V9FS_DEFUID = auth.KUID(4294967294) + _V9FS_DEFGID = auth.KGID(4294967294) +) + // Name implements vfs.FilesystemType.Name. func (FilesystemType) Name() string { return Name @@ -315,6 +320,31 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } } + // Parse the default UID and GID. + fsopts.dfltuid = _V9FS_DEFUID + if dfltuidstr, ok := mopts["dfltuid"]; ok { + delete(mopts, "dfltuid") + dfltuid, err := strconv.ParseUint(dfltuidstr, 10, 32) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltuid=%s", dfltuidstr) + return nil, nil, syserror.EINVAL + } + // In Linux, dfltuid is interpreted as a UID and is converted to a KUID + // in the caller's user namespace, but goferfs isn't + // application-mountable. + fsopts.dfltuid = auth.KUID(dfltuid) + } + fsopts.dfltgid = _V9FS_DEFGID + if dfltgidstr, ok := mopts["dfltgid"]; ok { + delete(mopts, "dfltgid") + dfltgid, err := strconv.ParseUint(dfltgidstr, 10, 32) + if err != nil { + ctx.Warningf("gofer.FilesystemType.GetFilesystem: invalid default UID: dfltgid=%s", dfltgidstr) + return nil, nil, syserror.EINVAL + } + fsopts.dfltgid = auth.KGID(dfltgid) + } + // Parse the 9P message size. fsopts.msize = 1024 * 1024 // 1M, tested to give good enough performance up to 64M if msizestr, ok := mopts["msize"]; ok { @@ -422,8 +452,6 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt client: client, clock: ktime.RealtimeClockFromContext(ctx), devMinor: devMinor, - uid: creds.EffectiveKUID, - gid: creds.EffectiveKGID, syncableDentries: make(map[*dentry]struct{}), specialFileFDs: make(map[*specialFileFD]struct{}), } @@ -672,8 +700,8 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma file: file, ino: qid.Path, mode: uint32(attr.Mode), - uid: uint32(fs.uid), - gid: uint32(fs.gid), + uid: uint32(fs.opts.dfltuid), + gid: uint32(fs.opts.dfltgid), blockSize: usermem.PageSize, handle: handle{ fd: -1, @@ -1011,6 +1039,18 @@ func (d *dentry) decRefLocked() { } } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *dentry) Watches() *vfs.Watches { + return nil +} + // checkCachingLocked should be called after d's reference count becomes 0 or it // becomes disowned. // diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index a83151ad3..bbee8ccda 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -225,9 +225,21 @@ func (d *Dentry) destroy() { } } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *Dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) {} + +// Watches implements vfs.DentryImpl.Watches. +// +// TODO(gvisor.dev/issue/1479): Implement inotify. +func (d *Dentry) Watches() *vfs.Watches { + return nil +} + // InsertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on -// it's own isn't sufficient to insert a child into a directory. InsertChild +// its own isn't sufficient to insert a child into a directory. InsertChild // updates the link count on d if required. // // Precondition: d must represent a directory inode. diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index 007be1572..062321cbc 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -59,6 +59,7 @@ go_library( "//pkg/sentry/pgalloc", "//pkg/sentry/platform", "//pkg/sentry/socket/unix/transport", + "//pkg/sentry/uniqueid", "//pkg/sentry/usage", "//pkg/sentry/vfs", "//pkg/sentry/vfs/lock", diff --git a/pkg/sentry/fsimpl/tmpfs/directory.go b/pkg/sentry/fsimpl/tmpfs/directory.go index f2399981b..70387cb9c 100644 --- a/pkg/sentry/fsimpl/tmpfs/directory.go +++ b/pkg/sentry/fsimpl/tmpfs/directory.go @@ -79,6 +79,7 @@ func (dir *directory) removeChildLocked(child *dentry) { dir.iterMu.Lock() dir.childList.Remove(child) dir.iterMu.Unlock() + child.unlinked = true } type directoryFD struct { @@ -112,6 +113,7 @@ func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallba dir.iterMu.Lock() defer dir.iterMu.Unlock() + fd.dentry().InotifyWithParent(linux.IN_ACCESS, 0, vfs.PathEvent) fd.inode().touchAtime(fd.vfsfd.Mount()) if fd.off == 0 { diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 80fa7b29d..183eb975c 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -177,6 +177,12 @@ func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(pa if err := create(parentDir, name); err != nil { return err } + + ev := linux.IN_CREATE + if dir { + ev |= linux.IN_ISDIR + } + parentDir.inode.watches.Notify(name, uint32(ev), 0, vfs.InodeEvent) parentDir.inode.touchCMtime() return nil } @@ -241,6 +247,7 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EMLINK } d.inode.incLinksLocked() + d.inode.watches.Notify("", linux.IN_ATTRIB, 0, vfs.InodeEvent) parentDir.insertChildLocked(fs.newDentry(d.inode), name) return nil }) @@ -354,6 +361,7 @@ afterTrailingSymlink: if err != nil { return nil, err } + parentDir.inode.watches.Notify(name, linux.IN_CREATE, 0, vfs.PathEvent) parentDir.inode.touchCMtime() return fd, nil } @@ -559,6 +567,8 @@ func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa newParentDir.inode.touchCMtime() } renamed.inode.touchCtime() + + vfs.InotifyRename(ctx, &renamed.inode.watches, &oldParentDir.inode.watches, &newParentDir.inode.watches, oldName, newName, renamed.inode.isDir()) return nil } @@ -603,8 +613,11 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } parentDir.removeChildLocked(child) - parentDir.inode.decLinksLocked() // from child's ".." + parentDir.inode.watches.Notify(name, linux.IN_DELETE|linux.IN_ISDIR, 0, vfs.InodeEvent) + // Remove links for child, child/., and child/.. child.inode.decLinksLocked() + child.inode.decLinksLocked() + parentDir.inode.decLinksLocked() vfsObj.CommitDeleteDentry(&child.vfsd) parentDir.inode.touchCMtime() return nil @@ -618,7 +631,14 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts if err != nil { return err } - return d.inode.setStat(ctx, rp.Credentials(), &opts.Stat) + if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil { + return err + } + + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ev, 0, vfs.InodeEvent) + } + return nil } // StatAt implements vfs.FilesystemImpl.StatAt. @@ -698,6 +718,12 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { return err } + + // Generate inotify events. Note that this must take place before the link + // count of the child is decremented, or else the watches may be dropped + // before these events are added. + vfs.InotifyRemoveChild(&child.inode.watches, &parentDir.inode.watches, name) + parentDir.removeChildLocked(child) child.inode.decLinksLocked() vfsObj.CommitDeleteDentry(&child.vfsd) @@ -754,7 +780,12 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt if err != nil { return err } - return d.inode.setxattr(rp.Credentials(), &opts) + if err := d.inode.setxattr(rp.Credentials(), &opts); err != nil { + return err + } + + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. @@ -765,7 +796,12 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, if err != nil { return err } - return d.inode.removexattr(rp.Credentials(), name) + if err := d.inode.removexattr(rp.Credentials(), name); err != nil { + return err + } + + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // PrependPath implements vfs.FilesystemImpl.PrependPath. diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 3f433d666..fee174375 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -312,7 +312,7 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off f := fd.inode().impl.(*regularFile) if end := offset + srclen; end < offset { // Overflow. - return 0, syserror.EFBIG + return 0, syserror.EINVAL } var err error diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 1e781aecd..f0e098702 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -163,6 +163,11 @@ type dentry struct { // filesystem.mu. name string + // unlinked indicates whether this dentry has been unlinked from its parent. + // It is only set to true on an unlink operation, and never set from true to + // false. unlinked is protected by filesystem.mu. + unlinked bool + // dentryEntry (ugh) links dentries into their parent directory.childList. dentryEntry @@ -201,6 +206,26 @@ func (d *dentry) DecRef() { d.inode.decRef() } +// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. +func (d *dentry) InotifyWithParent(events uint32, cookie uint32, et vfs.EventType) { + if d.inode.isDir() { + events |= linux.IN_ISDIR + } + + // The ordering below is important, Linux always notifies the parent first. + if d.parent != nil { + // Note that d.parent or d.name may be stale if there is a concurrent + // rename operation. Inotify does not provide consistency guarantees. + d.parent.inode.watches.NotifyWithExclusions(d.name, events, cookie, et, d.unlinked) + } + d.inode.watches.Notify("", events, cookie, et) +} + +// Watches implements vfs.DentryImpl.Watches. +func (d *dentry) Watches() *vfs.Watches { + return &d.inode.watches +} + // inode represents a filesystem object. type inode struct { // fs is the owning filesystem. fs is immutable. @@ -209,11 +234,9 @@ type inode struct { // refs is a reference count. refs is accessed using atomic memory // operations. // - // A reference is held on all inodes that are reachable in the filesystem - // tree. For non-directories (which may have multiple hard links), this - // means that a reference is dropped when nlink reaches 0. For directories, - // nlink never reaches 0 due to the "." entry; instead, - // filesystem.RmdirAt() drops the reference. + // A reference is held on all inodes as long as they are reachable in the + // filesystem tree, i.e. nlink is nonzero. This reference is dropped when + // nlink reaches 0. refs int64 // xattrs implements extended attributes. @@ -238,6 +261,9 @@ type inode struct { // Advisory file locks, which lock at the inode level. locks lock.FileLocks + // Inotify watches for this inode. + watches vfs.Watches + impl interface{} // immutable } @@ -259,6 +285,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, i.ctime = now i.mtime = now // i.nlink initialized by caller + i.watches = vfs.Watches{} i.impl = impl } @@ -276,14 +303,17 @@ func (i *inode) incLinksLocked() { atomic.AddUint32(&i.nlink, 1) } -// decLinksLocked decrements i's link count. +// decLinksLocked decrements i's link count. If the link count reaches 0, we +// remove a reference on i as well. // // Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. func (i *inode) decLinksLocked() { if i.nlink == 0 { panic("tmpfs.inode.decLinksLocked() called with no existing links") } - atomic.AddUint32(&i.nlink, ^uint32(0)) + if atomic.AddUint32(&i.nlink, ^uint32(0)) == 0 { + i.decRef() + } } func (i *inode) incRef() { @@ -306,6 +336,7 @@ func (i *inode) tryIncRef() bool { func (i *inode) decRef() { if refs := atomic.AddInt64(&i.refs, -1); refs == 0 { + i.watches.HandleDeletion() if regFile, ok := i.impl.(*regularFile); ok { // Release memory used by regFile to store data. Since regFile is // no longer usable, we don't need to grab any locks or update any @@ -627,8 +658,12 @@ func (fd *fileDescription) filesystem() *filesystem { return fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) } +func (fd *fileDescription) dentry() *dentry { + return fd.vfsfd.Dentry().Impl().(*dentry) +} + func (fd *fileDescription) inode() *inode { - return fd.vfsfd.Dentry().Impl().(*dentry).inode + return fd.dentry().inode } // Stat implements vfs.FileDescriptionImpl.Stat. @@ -641,7 +676,15 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu // SetStat implements vfs.FileDescriptionImpl.SetStat. func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { creds := auth.CredentialsFromContext(ctx) - return fd.inode().setStat(ctx, creds, &opts.Stat) + d := fd.dentry() + if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil { + return err + } + + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ev, 0, vfs.InodeEvent) + } + return nil } // Listxattr implements vfs.FileDescriptionImpl.Listxattr. @@ -656,12 +699,26 @@ func (fd *fileDescription) Getxattr(ctx context.Context, opts vfs.GetxattrOption // Setxattr implements vfs.FileDescriptionImpl.Setxattr. func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOptions) error { - return fd.inode().setxattr(auth.CredentialsFromContext(ctx), &opts) + d := fd.dentry() + if err := d.inode.setxattr(auth.CredentialsFromContext(ctx), &opts); err != nil { + return err + } + + // Generate inotify events. + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // Removexattr implements vfs.FileDescriptionImpl.Removexattr. func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { - return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name) + d := fd.dentry() + if err := d.inode.removexattr(auth.CredentialsFromContext(ctx), name); err != nil { + return err + } + + // Generate inotify events. + d.InotifyWithParent(linux.IN_ATTRIB, 0, vfs.InodeEvent) + return nil } // NewMemfd creates a new tmpfs regular file and file description that can back |