diff options
Diffstat (limited to 'pkg/sentry/fsimpl/memfs')
-rw-r--r-- | pkg/sentry/fsimpl/memfs/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/benchmark_test.go | 27 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/filesystem.go | 667 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/memfs.go | 29 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/pipe_test.go | 18 |
5 files changed, 408 insertions, 335 deletions
diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD index 0cc751eb8..5689bed3b 100644 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ b/pkg/sentry/fsimpl/memfs/BUILD @@ -50,6 +50,7 @@ go_test( deps = [ ":memfs", "//pkg/abi/linux", + "//pkg/fspath", "//pkg/refs", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", @@ -68,6 +69,7 @@ go_test( embed = [":memfs"], deps = [ "//pkg/abi/linux", + "//pkg/fspath", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go index 4a7a94a52..6e987af88 100644 --- a/pkg/sentry/fsimpl/memfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/memfs/benchmark_test.go @@ -21,6 +21,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" @@ -193,9 +194,9 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, + Root: root, + Start: vd, + Path: fspath.Parse(name), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -216,7 +217,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: vd, - Pathname: filename, + Path: fspath.Parse(filename), FollowFinalSymlink: true, }, &vfs.OpenOptions{ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, @@ -237,7 +238,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { @@ -378,9 +379,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { root := mntns.Root() defer root.DecRef() pop := vfs.PathOperation{ - Root: root, - Start: root, - Pathname: mountPointName, + Root: root, + Start: root, + Path: fspath.Parse(mountPointName), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -408,9 +409,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, + Root: root, + Start: vd, + Path: fspath.Parse(name), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -438,7 +439,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: vd, - Pathname: filename, + Path: fspath.Parse(filename), FollowFinalSymlink: true, }, &vfs.OpenOptions{ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, @@ -458,7 +459,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go index af4389459..4a83f310c 100644 --- a/pkg/sentry/fsimpl/memfs/filesystem.go +++ b/pkg/sentry/fsimpl/memfs/filesystem.go @@ -25,323 +25,283 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -// stepLocked resolves rp.Component() in parent directory vfsd. +// Sync implements vfs.FilesystemImpl.Sync. +func (fs *filesystem) Sync(ctx context.Context) error { + // All filesystem state is in-memory. + return nil +} + +// stepLocked resolves rp.Component() to an existing file, starting from the +// given directory. // // stepLocked is loosely analogous to fs/namei.c:walk_component(). // -// Preconditions: filesystem.mu must be locked. !rp.Done(). inode == -// vfsd.Impl().(*dentry).inode. -func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) { - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + if !d.inode.isDir() { + return nil, syserror.ENOTDIR } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, nil, err + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err } afterSymlink: - nextVFSD, err := rp.ResolveComponent(vfsd) + nextVFSD, err := rp.ResolveComponent(&d.vfsd) if err != nil { - return nil, nil, err + return nil, err } if nextVFSD == nil { // Since the Dentry tree is the sole source of truth for memfs, if it's // not in the Dentry tree, it doesn't exist. - return nil, nil, syserror.ENOENT + return nil, syserror.ENOENT } - nextInode := nextVFSD.Impl().(*dentry).inode - if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { + next := nextVFSD.Impl().(*dentry) + if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { // TODO: symlink traversals update access time if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, nil, err + return nil, err } goto afterSymlink // don't check the current directory again } rp.Advance() - return nextVFSD, nextInode, nil + return next, nil } -// walkExistingLocked resolves rp to an existing file. +// walkParentDirLocked resolves all but the last path component of rp to an +// existing directory, starting from the given directory (which is usually +// rp.Start().Impl().(*dentry)). It does not check that the returned directory +// is searchable by the provider of rp. // -// walkExistingLocked is loosely analogous to Linux's -// fs/namei.c:path_lookupat(). +// walkParentDirLocked is loosely analogous to Linux's +// fs/namei.c:path_parentat(). // -// Preconditions: filesystem.mu must be locked. -func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Done() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + for !rp.Final() { + next, err := stepLocked(rp, d) if err != nil { - return nil, nil, err + return nil, err } + d = next } - if rp.MustBeDir() && !inode.isDir() { - return nil, nil, syserror.ENOTDIR + if !d.inode.isDir() { + return nil, syserror.ENOTDIR } - return vfsd, inode, nil + return d, nil } -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. +// resolveLocked resolves rp to an existing file. // -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). +// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). // -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) +// Preconditions: filesystem.mu must be locked. +func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { + d := rp.Start().Impl().(*dentry) + for !rp.Done() { + next, err := stepLocked(rp, d) if err != nil { - return nil, nil, err + return nil, err } + d = next } - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR + if rp.MustBeDir() && !d.inode.isDir() { + return nil, syserror.ENOTDIR } - return vfsd, inode, nil + return d, nil } -// checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). +// doCreateAt checks that creating a file at rp is permitted, then invokes +// create to do so. // -// Preconditions: filesystem.mu must be locked. parentInode == -// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true. -func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) { - if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { - return "", err - } - pc := rp.Component() - if pc == "." || pc == ".." { - return "", syserror.EEXIST - } - childVFSD, err := rp.ResolveChild(parentVFSD, pc) +// doCreateAt is loosely analogous to a conjunction of Linux's +// fs/namei.c:filename_create() and done_path_create(). +// +// Preconditions: !rp.Done(). For the final path component in rp, +// !rp.ShouldFollowSymlink(). +func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { + fs.mu.Lock() + defer fs.mu.Unlock() + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { - return "", err + return err } - if childVFSD != nil { - return "", syserror.EEXIST + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err } - if parentVFSD.IsDisowned() { - return "", syserror.ENOENT + name := rp.Component() + if name == "." || name == ".." { + return syserror.EEXIST } - return pc, nil -} - -// checkDeleteLocked checks that the file represented by vfsd may be deleted. -func checkDeleteLocked(vfsd *vfs.Dentry) error { - parentVFSD := vfsd.Parent() - if parentVFSD == nil { - return syserror.EBUSY + // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the child exists we want to return EEXIST immediately instead + // of attempting symlink/mount traversal. + if parent.vfsd.Child(name) != nil { + return syserror.EEXIST } - if parentVFSD.IsDisowned() { + if !dir && rp.MustBeDir() { return syserror.ENOENT } - return nil + // In memfs, the only way to cause a dentry to be disowned is by removing + // it from the filesystem, so this check is equivalent to checking if + // parent has been removed. + if parent.vfsd.IsDisowned() { + return syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + return create(parent, name) } // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { fs.mu.RLock() defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } if opts.CheckSearchable { - if !inode.isDir() { + if !d.inode.isDir() { return nil, syserror.ENOTDIR } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil { return nil, err } } - inode.incRef() - return vfsd, nil + d.IncRef() + return &d.vfsd, nil } -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { - return err - } - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { - return syserror.EPERM + return nil, err } - d.inode.incLinksLocked() - child := fs.newDentry(d.inode) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil + d.IncRef() + return &d.vfsd, nil +} + +// LinkAt implements vfs.FilesystemImpl.LinkAt. +func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + if rp.Mount() != vd.Mount() { + return syserror.EXDEV + } + d := vd.Dentry().Impl().(*dentry) + if d.inode.isDir() { + return syserror.EPERM + } + if d.inode.nlink == 0 { + return syserror.ENOENT + } + if d.inode.nlink == maxLinks { + return syserror.EMLINK + } + d.inode.incLinksLocked() + child := fs.newDentry(d.inode) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - parentInode.incLinksLocked() // from child's ".." - return nil + return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error { + if parent.inode.nlink == maxLinks { + return syserror.EMLINK + } + parent.inode.incLinksLocked() // from child's ".." + child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - - switch opts.Mode.FileType() { - case 0: - // "Zero file type is equivalent to type S_IFREG." - mknod(2) - fallthrough - case linux.ModeRegular: - // TODO(b/138862511): Implement. - return syserror.EINVAL - - case linux.ModeNamedPipe: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil - - case linux.ModeSocket: - // TODO(b/138862511): Implement. - return syserror.EINVAL - - case linux.ModeCharacterDevice: - fallthrough - case linux.ModeBlockDevice: - // TODO(b/72101894): We don't support creating block or character - // devices at the moment. - // - // When we start supporting block and character devices, we'll - // need to check for CAP_MKNOD here. - return syserror.EPERM - - default: - // "EINVAL - mode requested creation of something other than a - // regular file, device special file, FIFO or socket." - mknod(2) - return syserror.EINVAL - } + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + switch opts.Mode.FileType() { + case 0, linux.S_IFREG: + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFIFO: + child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: + // Not yet supported. + return syserror.EPERM + default: + return syserror.EINVAL + } + }) } // OpenAt implements vfs.FilesystemImpl.OpenAt. func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - // Filter out flags that are not supported by memfs. O_DIRECTORY and - // O_NOFOLLOW have no effect here (they're handled by VFS by setting - // appropriate bits in rp), but are visible in FD status flags. O_NONBLOCK - // is supported only by pipes. - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK + if opts.Flags&linux.O_TMPFILE != 0 { + // Not yet supported. + return nil, syserror.EOPNOTSUPP + } + // Handle O_CREAT and !O_CREAT separately, since in the latter case we + // don't need fs.mu for writing. if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } - return inode.open(ctx, rp, vfsd, opts.Flags, false) + return d.open(ctx, rp, opts.Flags, false /* afterCreate */) } mustCreate := opts.Flags&linux.O_EXCL != 0 - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode + start := rp.Start().Impl().(*dentry) fs.mu.Lock() defer fs.mu.Unlock() if rp.Done() { + // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } if mustCreate { return nil, syserror.EEXIST } - return inode.open(ctx, rp, vfsd, opts.Flags, false) + return start.open(ctx, rp, opts.Flags, false /* afterCreate */) } afterTrailingSymlink: - // Walk to the parent directory of the last path component. - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) - if err != nil { - return nil, err - } - } - if !inode.isDir() { - return nil, syserror.ENOTDIR + parent, err := walkParentDirLocked(rp, start) + if err != nil { + return nil, err } // Check for search permission in the parent directory. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { return nil, err } // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } - pc := rp.Component() - if pc == "." || pc == ".." { + name := rp.Component() + if name == "." || name == ".." { return nil, syserror.EISDIR } // Determine whether or not we need to create a file. - childVFSD, err := rp.ResolveChild(vfsd, pc) - if err != nil { - return nil, err - } - if childVFSD == nil { + child, err := stepLocked(rp, parent) + if err == syserror.ENOENT { // Already checked for searchability above; now check for writability. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { return nil, err } if err := rp.Mount().CheckBeginWrite(); err != nil { @@ -349,38 +309,35 @@ afterTrailingSymlink: } defer rp.Mount().EndWrite() // Create and open the child. - childInode := fs.newRegularFile(rp.Credentials(), opts.Mode) - child := fs.newDentry(childInode) - vfsd.InsertChild(&child.vfsd, pc) - inode.impl.(*directory).childList.PushBack(child) - return childInode.open(ctx, rp, &child.vfsd, opts.Flags, true) + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return child.open(ctx, rp, opts.Flags, true) } - // Open existing file or follow symlink. - if mustCreate { - return nil, syserror.EEXIST + if err != nil { + return nil, err } - childInode := childVFSD.Impl().(*dentry).inode - if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, err - } - // rp.Final() may no longer be true since we now need to resolve the - // symlink target. + // Do we need to resolve a trailing symlink? + if !rp.Done() { + start = parent goto afterTrailingSymlink } - return childInode.open(ctx, rp, childVFSD, opts.Flags, false) + // Open existing file. + if mustCreate { + return nil, syserror.EEXIST + } + return child.open(ctx, rp, opts.Flags, false) } -func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { +func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { ats := vfs.AccessTypesForOpenFlags(flags) if !afterCreate { - if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil { + if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { return nil, err } } mnt := rp.Mount() - switch impl := i.impl.(type) { + switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD fd.readable = vfs.MayReadFileWithOpenFlags(flags) @@ -392,8 +349,8 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr // mnt.EndWrite() is called by regularFileFD.Release(). } mnt.IncRef() - vfsd.IncRef() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + d.IncRef() + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) if flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data = impl.data[:0] @@ -408,28 +365,28 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr } var fd directoryFD mnt.IncRef() - vfsd.IncRef() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + d.IncRef() + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) return &fd.vfsfd, nil case *symlink: // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, vfsd, flags) + return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) } } // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) if err != nil { return "", err } - symlink, ok := inode.impl.(*symlink) + symlink, ok := d.inode.impl.(*symlink) if !ok { return "", syserror.EINVAL } @@ -437,63 +394,172 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } // RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { - if rp.Done() { - return syserror.ENOENT +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + if opts.Flags != 0 { + // TODO(b/145974740): Support renameat2 flags. + return syserror.EINVAL } + + // Resolve newParent first to verify that it's on this Mount. fs.mu.Lock() defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) + newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - _, err = checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { + newName := rp.Component() + if newName == "." || newName == ".." { + return syserror.EBUSY + } + mnt := rp.Mount() + if mnt != oldParentVD.Mount() { + return syserror.EXDEV + } + if err := mnt.CheckBeginWrite(); err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + defer mnt.EndWrite() + + oldParent := oldParentVD.Dentry().Impl().(*dentry) + if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - // TODO: actually implement RenameAt - return syserror.EPERM + // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the existing child is a symlink or mount point then we want + // to rename over it rather than follow it. + renamedVFSD := oldParent.vfsd.Child(oldName) + if renamedVFSD == nil { + return syserror.ENOENT + } + renamed := renamedVFSD.Impl().(*dentry) + if renamed.inode.isDir() { + if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) { + return syserror.EINVAL + } + if oldParent != newParent { + // Writability is needed to change renamed's "..". + if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil { + return err + } + } + } else { + if opts.MustBeDir || rp.MustBeDir() { + return syserror.ENOTDIR + } + } + + if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + replacedVFSD := newParent.vfsd.Child(newName) + var replaced *dentry + if replacedVFSD != nil { + replaced = replacedVFSD.Impl().(*dentry) + if replaced.inode.isDir() { + if !renamed.inode.isDir() { + return syserror.EISDIR + } + if replaced.vfsd.HasChildren() { + return syserror.ENOTEMPTY + } + } else { + if rp.MustBeDir() { + return syserror.ENOTDIR + } + if renamed.inode.isDir() { + return syserror.ENOTDIR + } + } + } else { + if renamed.inode.isDir() && newParent.inode.nlink == maxLinks { + return syserror.EMLINK + } + } + if newParent.vfsd.IsDisowned() { + return syserror.ENOENT + } + + // Linux places this check before some of those above; we do it here for + // simplicity, under the assumption that applications are not intentionally + // doing noop renames expecting them to succeed where non-noop renames + // would fail. + if renamedVFSD == replacedVFSD { + return nil + } + vfsObj := rp.VirtualFilesystem() + oldParentDir := oldParent.inode.impl.(*directory) + newParentDir := newParent.inode.impl.(*directory) + if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { + return err + } + if replaced != nil { + newParentDir.childList.Remove(replaced) + if replaced.inode.isDir() { + newParent.inode.decLinksLocked() // from replaced's ".." + } + replaced.inode.decLinksLocked() + } + oldParentDir.childList.Remove(renamed) + newParentDir.childList.PushBack(renamed) + if renamed.inode.isDir() { + oldParent.inode.decLinksLocked() + newParent.inode.incLinksLocked() + } + // TODO: update timestamps and parent directory sizes + vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) + return nil } // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err + name := rp.Component() + if name == "." { + return syserror.EINVAL } - if !inode.isDir() { + if name == ".." { + return syserror.ENOTEMPTY + } + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if !child.inode.isDir() { return syserror.ENOTDIR } - if vfsd.HasChildren() { + if childVFSD.HasChildren() { return syserror.ENOTEMPTY } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { return err } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decRef() + parent.inode.impl.(*directory).childList.Remove(child) + parent.inode.decLinksLocked() // from child's ".." + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) return nil } // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) if err != nil { return err } @@ -507,21 +573,21 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts // StatAt implements vfs.FilesystemImpl.StatAt. func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) if err != nil { return linux.Statx{}, err } var stat linux.Statx - inode.statTo(&stat) + d.inode.statTo(&stat) return stat, nil } // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) if err != nil { return linux.Statfs{}, err } @@ -531,53 +597,52 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err + name := rp.Component() + if name == "." || name == ".." { + return syserror.EISDIR } - if inode.isDir() { + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if child.inode.isDir() { return syserror.EISDIR } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + if !rp.MustBeDir() { + return syserror.ENOTDIR + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { return err } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decLinksLocked() + parent.inode.impl.(*directory).childList.Remove(child) + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) return nil } @@ -585,7 +650,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return nil, err } @@ -597,7 +662,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return "", err } @@ -609,7 +674,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, nam func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return err } @@ -621,7 +686,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return err } diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go index 9d509f6e4..8d0167c93 100644 --- a/pkg/sentry/fsimpl/memfs/memfs.go +++ b/pkg/sentry/fsimpl/memfs/memfs.go @@ -29,6 +29,7 @@ package memfs import ( "fmt" + "math" "sync" "sync/atomic" @@ -64,12 +65,6 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt func (fs *filesystem) Release() { } -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - // dentry implements vfs.DentryImpl. type dentry struct { vfsd vfs.Dentry @@ -137,6 +132,8 @@ type inode struct { impl interface{} // immutable } +const maxLinks = math.MaxUint32 + func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) { i.refs = 1 i.mode = uint32(mode) @@ -147,20 +144,28 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, i.impl = impl } -// Preconditions: filesystem.mu must be locked for writing. +// incLinksLocked increments i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. +// i.nlink < maxLinks. func (i *inode) incLinksLocked() { - if atomic.AddUint32(&i.nlink, 1) <= 1 { + if i.nlink == 0 { panic("memfs.inode.incLinksLocked() called with no existing links") } + if i.nlink == maxLinks { + panic("memfs.inode.incLinksLocked() called with maximum link count") + } + atomic.AddUint32(&i.nlink, 1) } -// Preconditions: filesystem.mu must be locked for writing. +// decLinksLocked decrements i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. func (i *inode) decLinksLocked() { - if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 { - i.decRef() - } else if nlink == ^uint32(0) { // negative overflow + if i.nlink == 0 { panic("memfs.inode.decLinksLocked() called with no existing links") } + atomic.AddUint32(&i.nlink, ^uint32(0)) } func (i *inode) incRef() { diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go index 5bf527c80..be917aeee 100644 --- a/pkg/sentry/fsimpl/memfs/pipe_test.go +++ b/pkg/sentry/fsimpl/memfs/pipe_test.go @@ -19,6 +19,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -38,7 +39,7 @@ func TestSeparateFDs(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } rfdchan := make(chan *vfs.FileDescription) @@ -76,7 +77,7 @@ func TestNonblockingRead(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} @@ -108,7 +109,7 @@ func TestNonblockingWriteError(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} @@ -126,7 +127,7 @@ func TestSingleFD(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} @@ -160,10 +161,9 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy // Create the pipe. root := mntns.Root() pop := vfs.PathOperation{ - Root: root, - Start: root, - Pathname: fileName, - FollowFinalSymlink: true, + Root: root, + Start: root, + Path: fspath.Parse(fileName), } mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { @@ -174,7 +174,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { |