diff options
author | Jamie Liu <jamieliu@google.com> | 2019-12-23 13:17:29 -0800 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2019-12-23 13:18:39 -0800 |
commit | f45df7505b0e7baf48a37f7c625f05051d144738 (patch) | |
tree | 3e97a9a8b6ff95b4bb7994f51d205f58529fb6b9 /pkg/sentry/fsimpl | |
parent | e548ce18051398fb3fe379326080411f59fda379 (diff) |
Clean up vfs.FilesystemImpl methods that operate on parent directories.
- Make FilesystemImpl methods that operate on parent directories require
!rp.Done() (i.e. there is at least one path component to resolve) as
precondition and postcondition (in cases where they do not finish path
resolution due to mount boundary / absolute symlink), and require that they
do not need to follow the last path component (the file being created /
deleted) as a symlink. Check for these in VFS.
- Add FilesystemImpl.GetParentDentryAt(), which is required to obtain the old
parent directory for VFS.RenameAt(). (Passing the Dentry to be renamed
instead has the wrong semantics if the file named by the old path is a mount
point since the Dentry will be on the wrong Mount.)
- Update memfs to implement these methods correctly (?), including RenameAt.
- Change fspath.Parse() to allow empty paths (to simplify implementation of
AT_EMPTY_PATH).
- Change vfs.PathOperation to take a fspath.Path instead of a raw pathname;
non-test callers will need to fspath.Parse() pathnames themselves anyway in
order to detect absolute paths and select PathOperation.Start accordingly.
PiperOrigin-RevId: 286934941
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r-- | pkg/sentry/fsimpl/ext/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/ext/benchmark/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go | 11 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/ext/ext_test.go | 9 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/ext/filesystem.go | 12 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/BUILD | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/filesystem.go | 138 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs_test.go | 7 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/BUILD | 2 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/benchmark_test.go | 27 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/filesystem.go | 667 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/memfs.go | 29 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/memfs/pipe_test.go | 18 |
13 files changed, 531 insertions, 392 deletions
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD index 880b7bcd3..bc90330bc 100644 --- a/pkg/sentry/fsimpl/ext/BUILD +++ b/pkg/sentry/fsimpl/ext/BUILD @@ -74,6 +74,7 @@ go_test( deps = [ "//pkg/abi/linux", "//pkg/binary", + "//pkg/fspath", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", "//pkg/sentry/fsimpl/ext/disklayout", diff --git a/pkg/sentry/fsimpl/ext/benchmark/BUILD b/pkg/sentry/fsimpl/ext/benchmark/BUILD index bfc46dfa6..4fc8296ef 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/BUILD +++ b/pkg/sentry/fsimpl/ext/benchmark/BUILD @@ -7,6 +7,7 @@ go_test( size = "small", srcs = ["benchmark_test.go"], deps = [ + "//pkg/fspath", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", "//pkg/sentry/fsimpl/ext", diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go index 177ce2cb9..2f46d2d13 100644 --- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go +++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go @@ -24,6 +24,7 @@ import ( "strings" "testing" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext" @@ -121,7 +122,7 @@ func BenchmarkVFS2Ext4fsStat(b *testing.B) { stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{ Root: *root, Start: *root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { @@ -150,9 +151,9 @@ func BenchmarkVFS2ExtfsMountStat(b *testing.B) { creds := auth.CredentialsFromContext(ctx) mountPointName := "/1/" pop := vfs.PathOperation{ - Root: *root, - Start: *root, - Pathname: mountPointName, + Root: *root, + Start: *root, + Path: fspath.Parse(mountPointName), } // Save the mount point for later use. @@ -181,7 +182,7 @@ func BenchmarkVFS2ExtfsMountStat(b *testing.B) { stat, err := vfsfs.StatAt(ctx, creds, &vfs.PathOperation{ Root: *root, Start: *root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { diff --git a/pkg/sentry/fsimpl/ext/ext_test.go b/pkg/sentry/fsimpl/ext/ext_test.go index e9f756732..5d6c999bd 100644 --- a/pkg/sentry/fsimpl/ext/ext_test.go +++ b/pkg/sentry/fsimpl/ext/ext_test.go @@ -25,6 +25,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/ext/disklayout" @@ -140,7 +141,7 @@ func TestSeek(t *testing.T) { fd, err := vfsfs.OpenAt( ctx, auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, + &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, &vfs.OpenOptions{}, ) if err != nil { @@ -359,7 +360,7 @@ func TestStatAt(t *testing.T) { got, err := vfsfs.StatAt(ctx, auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, + &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, &vfs.StatOptions{}, ) if err != nil { @@ -429,7 +430,7 @@ func TestRead(t *testing.T) { fd, err := vfsfs.OpenAt( ctx, auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.absPath}, + &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.absPath)}, &vfs.OpenOptions{}, ) if err != nil { @@ -565,7 +566,7 @@ func TestIterDirents(t *testing.T) { fd, err := vfsfs.OpenAt( ctx, auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: *root, Start: *root, Pathname: test.path}, + &vfs.PathOperation{Root: *root, Start: *root, Path: fspath.Parse(test.path)}, &vfs.OpenOptions{}, ) if err != nil { diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go index d7e87979a..616fc002a 100644 --- a/pkg/sentry/fsimpl/ext/filesystem.go +++ b/pkg/sentry/fsimpl/ext/filesystem.go @@ -275,6 +275,16 @@ func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op return vfsd, nil } +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + vfsd, inode, err := fs.walk(rp, true) + if err != nil { + return nil, err + } + inode.incRef() + return vfsd, nil +} + // OpenAt implements vfs.FilesystemImpl.OpenAt. func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { vfsd, inode, err := fs.walk(rp, false) @@ -378,7 +388,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v } // RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { if rp.Done() { return syserror.ENOENT } diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD index 52596c090..59f7f39e2 100644 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ b/pkg/sentry/fsimpl/kernfs/BUILD @@ -49,6 +49,7 @@ go_test( deps = [ ":kernfs", "//pkg/abi/linux", + "//pkg/fspath", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 3cbbe4b20..a6f9fced5 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -44,39 +44,37 @@ func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingP return nil, err } afterSymlink: + name := rp.Component() + // Revalidation must be skipped if name is "." or ".."; d or its parent + // respectively can't be expected to transition from invalidated back to + // valid, so detecting invalidation and retrying would loop forever. This + // is consistent with Linux: fs/namei.c:walk_component() => lookup_fast() + // calls d_revalidate(), but walk_component() => handle_dots() does not. + if name == "." { + rp.Advance() + return vfsd, nil + } + if name == ".." { + nextVFSD, err := rp.ResolveParent(vfsd) + if err != nil { + return nil, err + } + rp.Advance() + return nextVFSD, nil + } d.dirMu.Lock() - nextVFSD, err := rp.ResolveComponent(vfsd) - d.dirMu.Unlock() + nextVFSD, err := rp.ResolveChild(vfsd, name) if err != nil { + d.dirMu.Unlock() return nil, err } - if nextVFSD != nil { - // Cached dentry exists, revalidate. - next := nextVFSD.Impl().(*Dentry) - if !next.inode.Valid(ctx) { - d.dirMu.Lock() - rp.VirtualFilesystem().ForceDeleteDentry(nextVFSD) - d.dirMu.Unlock() - fs.deferDecRef(nextVFSD) // Reference from Lookup. - nextVFSD = nil - } - } - if nextVFSD == nil { - // Dentry isn't cached; it either doesn't exist or failed - // revalidation. Attempt to resolve it via Lookup. - name := rp.Component() - var err error - nextVFSD, err = d.inode.Lookup(ctx, name) - // Reference on nextVFSD dropped by a corresponding Valid. - if err != nil { - return nil, err - } - d.InsertChild(name, nextVFSD) + next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, nextVFSD) + d.dirMu.Unlock() + if err != nil { + return nil, err } - next := nextVFSD.Impl().(*Dentry) - // Resolve any symlink at current path component. - if rp.ShouldFollowSymlink() && d.isSymlink() { + if rp.ShouldFollowSymlink() && next.isSymlink() { // TODO: VFS2 needs something extra for /proc/[pid]/fd/ "magic symlinks". target, err := next.inode.Readlink(ctx) if err != nil { @@ -89,7 +87,44 @@ afterSymlink: } rp.Advance() - return nextVFSD, nil + return &next.vfsd, nil +} + +// revalidateChildLocked must be called after a call to parent.vfsd.Child(name) +// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be +// nil) to verify that the returned child (or lack thereof) is correct. +// +// Preconditions: Filesystem.mu must be locked for at least reading. +// parent.dirMu must be locked. parent.isDir(). name is not "." or "..". +// +// Postconditions: Caller must call fs.processDeferredDecRefs*. +func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, childVFSD *vfs.Dentry) (*Dentry, error) { + if childVFSD != nil { + // Cached dentry exists, revalidate. + child := childVFSD.Impl().(*Dentry) + if !child.inode.Valid(ctx) { + vfsObj.ForceDeleteDentry(childVFSD) + fs.deferDecRef(childVFSD) // Reference from Lookup. + childVFSD = nil + } + } + if childVFSD == nil { + // Dentry isn't cached; it either doesn't exist or failed + // revalidation. Attempt to resolve it via Lookup. + // + // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry, + // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries + // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl + // casts accordingly. + var err error + childVFSD, err = parent.inode.Lookup(ctx, name) + if err != nil { + return nil, err + } + // Reference on childVFSD dropped by a corresponding Valid. + parent.InsertChild(name, childVFSD) + } + return childVFSD.Impl().(*Dentry), nil } // walkExistingLocked resolves rp to an existing file. @@ -242,6 +277,19 @@ func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, op return vfsd, nil } +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + fs.mu.RLock() + defer fs.processDeferredDecRefs() + defer fs.mu.RUnlock() + vfsd, _, err := fs.walkParentDirLocked(ctx, rp) + if err != nil { + return nil, err + } + vfsd.IncRef() // Ownership transferred to caller. + return vfsd, nil +} + // LinkAt implements vfs.FilesystemImpl.LinkAt. func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { if rp.Done() { @@ -459,40 +507,42 @@ func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } // RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { - noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 - exchange := opts.Flags&linux.RENAME_EXCHANGE != 0 - whiteout := opts.Flags&linux.RENAME_WHITEOUT != 0 - if exchange && (noReplace || whiteout) { - // Can't specify RENAME_NOREPLACE or RENAME_WHITEOUT with RENAME_EXCHANGE. - return syserror.EINVAL - } - if exchange || whiteout { - // Exchange and Whiteout flags are not supported on kernfs. +func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + // Only RENAME_NOREPLACE is supported. + if opts.Flags&^linux.RENAME_NOREPLACE != 0 { return syserror.EINVAL } + noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 fs.mu.Lock() defer fs.mu.Lock() + // Resolve the destination directory first to verify that it's on this + // Mount. + dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp) + fs.processDeferredDecRefsLocked() + if err != nil { + return err + } mnt := rp.Mount() - if mnt != vd.Mount() { + if mnt != oldParentVD.Mount() { return syserror.EXDEV } - if err := mnt.CheckBeginWrite(); err != nil { return err } defer mnt.EndWrite() - dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp) + srcDirVFSD := oldParentVD.Dentry() + srcDir := srcDirVFSD.Impl().(*Dentry) + srcDir.dirMu.Lock() + src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDirVFSD.Child(oldName)) + srcDir.dirMu.Unlock() fs.processDeferredDecRefsLocked() if err != nil { return err } - - srcVFSD := vd.Dentry() - srcDirVFSD := srcVFSD.Parent() + srcVFSD := &src.vfsd // Can we remove the src dentry? if err := checkDeleteLocked(rp, srcVFSD); err != nil { diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go index f78bb7b04..73b6e43b5 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go @@ -24,6 +24,7 @@ import ( "github.com/google/go-cmp/cmp" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" @@ -82,9 +83,9 @@ func newTestSystem(t *testing.T, rootFn RootDentryFn) *TestSystem { // Precondition: path should be relative path. func (s *TestSystem) PathOpAtRoot(path string) vfs.PathOperation { return vfs.PathOperation{ - Root: s.root, - Start: s.root, - Pathname: path, + Root: s.root, + Start: s.root, + Path: fspath.Parse(path), } } diff --git a/pkg/sentry/fsimpl/memfs/BUILD b/pkg/sentry/fsimpl/memfs/BUILD index 0cc751eb8..5689bed3b 100644 --- a/pkg/sentry/fsimpl/memfs/BUILD +++ b/pkg/sentry/fsimpl/memfs/BUILD @@ -50,6 +50,7 @@ go_test( deps = [ ":memfs", "//pkg/abi/linux", + "//pkg/fspath", "//pkg/refs", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", @@ -68,6 +69,7 @@ go_test( embed = [":memfs"], deps = [ "//pkg/abi/linux", + "//pkg/fspath", "//pkg/sentry/context", "//pkg/sentry/context/contexttest", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/fsimpl/memfs/benchmark_test.go b/pkg/sentry/fsimpl/memfs/benchmark_test.go index 4a7a94a52..6e987af88 100644 --- a/pkg/sentry/fsimpl/memfs/benchmark_test.go +++ b/pkg/sentry/fsimpl/memfs/benchmark_test.go @@ -21,6 +21,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/refs" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" @@ -193,9 +194,9 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, + Root: root, + Start: vd, + Path: fspath.Parse(name), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -216,7 +217,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: vd, - Pathname: filename, + Path: fspath.Parse(filename), FollowFinalSymlink: true, }, &vfs.OpenOptions{ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, @@ -237,7 +238,7 @@ func BenchmarkVFS2MemfsStat(b *testing.B) { stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { @@ -378,9 +379,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { root := mntns.Root() defer root.DecRef() pop := vfs.PathOperation{ - Root: root, - Start: root, - Pathname: mountPointName, + Root: root, + Start: root, + Path: fspath.Parse(mountPointName), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -408,9 +409,9 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { for i := depth; i > 0; i-- { name := fmt.Sprintf("%d", i) pop := vfs.PathOperation{ - Root: root, - Start: vd, - Pathname: name, + Root: root, + Start: vd, + Path: fspath.Parse(name), } if err := vfsObj.MkdirAt(ctx, creds, &pop, &vfs.MkdirOptions{ Mode: 0755, @@ -438,7 +439,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: vd, - Pathname: filename, + Path: fspath.Parse(filename), FollowFinalSymlink: true, }, &vfs.OpenOptions{ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, @@ -458,7 +459,7 @@ func BenchmarkVFS2MemfsMountStat(b *testing.B) { stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: filePath, + Path: fspath.Parse(filePath), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { diff --git a/pkg/sentry/fsimpl/memfs/filesystem.go b/pkg/sentry/fsimpl/memfs/filesystem.go index af4389459..4a83f310c 100644 --- a/pkg/sentry/fsimpl/memfs/filesystem.go +++ b/pkg/sentry/fsimpl/memfs/filesystem.go @@ -25,323 +25,283 @@ import ( "gvisor.dev/gvisor/pkg/syserror" ) -// stepLocked resolves rp.Component() in parent directory vfsd. +// Sync implements vfs.FilesystemImpl.Sync. +func (fs *filesystem) Sync(ctx context.Context) error { + // All filesystem state is in-memory. + return nil +} + +// stepLocked resolves rp.Component() to an existing file, starting from the +// given directory. // // stepLocked is loosely analogous to fs/namei.c:walk_component(). // -// Preconditions: filesystem.mu must be locked. !rp.Done(). inode == -// vfsd.Impl().(*dentry).inode. -func stepLocked(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, inode *inode) (*vfs.Dentry, *inode, error) { - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func stepLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + if !d.inode.isDir() { + return nil, syserror.ENOTDIR } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { - return nil, nil, err + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + return nil, err } afterSymlink: - nextVFSD, err := rp.ResolveComponent(vfsd) + nextVFSD, err := rp.ResolveComponent(&d.vfsd) if err != nil { - return nil, nil, err + return nil, err } if nextVFSD == nil { // Since the Dentry tree is the sole source of truth for memfs, if it's // not in the Dentry tree, it doesn't exist. - return nil, nil, syserror.ENOENT + return nil, syserror.ENOENT } - nextInode := nextVFSD.Impl().(*dentry).inode - if symlink, ok := nextInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { + next := nextVFSD.Impl().(*dentry) + if symlink, ok := next.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { // TODO: symlink traversals update access time if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, nil, err + return nil, err } goto afterSymlink // don't check the current directory again } rp.Advance() - return nextVFSD, nextInode, nil + return next, nil } -// walkExistingLocked resolves rp to an existing file. +// walkParentDirLocked resolves all but the last path component of rp to an +// existing directory, starting from the given directory (which is usually +// rp.Start().Impl().(*dentry)). It does not check that the returned directory +// is searchable by the provider of rp. // -// walkExistingLocked is loosely analogous to Linux's -// fs/namei.c:path_lookupat(). +// walkParentDirLocked is loosely analogous to Linux's +// fs/namei.c:path_parentat(). // -// Preconditions: filesystem.mu must be locked. -func walkExistingLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Done() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) +// Preconditions: filesystem.mu must be locked. !rp.Done(). +func walkParentDirLocked(rp *vfs.ResolvingPath, d *dentry) (*dentry, error) { + for !rp.Final() { + next, err := stepLocked(rp, d) if err != nil { - return nil, nil, err + return nil, err } + d = next } - if rp.MustBeDir() && !inode.isDir() { - return nil, nil, syserror.ENOTDIR + if !d.inode.isDir() { + return nil, syserror.ENOTDIR } - return vfsd, inode, nil + return d, nil } -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. +// resolveLocked resolves rp to an existing file. // -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). +// resolveLocked is loosely analogous to Linux's fs/namei.c:path_lookupat(). // -// Preconditions: filesystem.mu must be locked. !rp.Done(). -func walkParentDirLocked(rp *vfs.ResolvingPath) (*vfs.Dentry, *inode, error) { - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) +// Preconditions: filesystem.mu must be locked. +func resolveLocked(rp *vfs.ResolvingPath) (*dentry, error) { + d := rp.Start().Impl().(*dentry) + for !rp.Done() { + next, err := stepLocked(rp, d) if err != nil { - return nil, nil, err + return nil, err } + d = next } - if !inode.isDir() { - return nil, nil, syserror.ENOTDIR + if rp.MustBeDir() && !d.inode.isDir() { + return nil, syserror.ENOTDIR } - return vfsd, inode, nil + return d, nil } -// checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). +// doCreateAt checks that creating a file at rp is permitted, then invokes +// create to do so. // -// Preconditions: filesystem.mu must be locked. parentInode == -// parentVFSD.Impl().(*dentry).inode. parentInode.isDir() == true. -func checkCreateLocked(rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode *inode) (string, error) { - if err := parentInode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true); err != nil { - return "", err - } - pc := rp.Component() - if pc == "." || pc == ".." { - return "", syserror.EEXIST - } - childVFSD, err := rp.ResolveChild(parentVFSD, pc) +// doCreateAt is loosely analogous to a conjunction of Linux's +// fs/namei.c:filename_create() and done_path_create(). +// +// Preconditions: !rp.Done(). For the final path component in rp, +// !rp.ShouldFollowSymlink(). +func (fs *filesystem) doCreateAt(rp *vfs.ResolvingPath, dir bool, create func(parent *dentry, name string) error) error { + fs.mu.Lock() + defer fs.mu.Unlock() + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { - return "", err + return err } - if childVFSD != nil { - return "", syserror.EEXIST + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err } - if parentVFSD.IsDisowned() { - return "", syserror.ENOENT + name := rp.Component() + if name == "." || name == ".." { + return syserror.EEXIST } - return pc, nil -} - -// checkDeleteLocked checks that the file represented by vfsd may be deleted. -func checkDeleteLocked(vfsd *vfs.Dentry) error { - parentVFSD := vfsd.Parent() - if parentVFSD == nil { - return syserror.EBUSY + // Call parent.vfsd.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the child exists we want to return EEXIST immediately instead + // of attempting symlink/mount traversal. + if parent.vfsd.Child(name) != nil { + return syserror.EEXIST } - if parentVFSD.IsDisowned() { + if !dir && rp.MustBeDir() { return syserror.ENOENT } - return nil + // In memfs, the only way to cause a dentry to be disowned is by removing + // it from the filesystem, so this check is equivalent to checking if + // parent has been removed. + if parent.vfsd.IsDisowned() { + return syserror.ENOENT + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + return create(parent, name) } // GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. func (fs *filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { fs.mu.RLock() defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } if opts.CheckSearchable { - if !inode.isDir() { + if !d.inode.isDir() { return nil, syserror.ENOTDIR } - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + if err := d.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true /* isDir */); err != nil { return nil, err } } - inode.incRef() - return vfsd, nil + d.IncRef() + return &d.vfsd, nil } -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) +// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. +func (fs *filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { + fs.mu.RLock() + defer fs.mu.RUnlock() + d, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { - return err - } - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - d := vd.Dentry().Impl().(*dentry) - if d.inode.isDir() { - return syserror.EPERM + return nil, err } - d.inode.incLinksLocked() - child := fs.newDentry(d.inode) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil + d.IncRef() + return &d.vfsd, nil +} + +// LinkAt implements vfs.FilesystemImpl.LinkAt. +func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + if rp.Mount() != vd.Mount() { + return syserror.EXDEV + } + d := vd.Dentry().Impl().(*dentry) + if d.inode.isDir() { + return syserror.EPERM + } + if d.inode.nlink == 0 { + return syserror.ENOENT + } + if d.inode.nlink == maxLinks { + return syserror.EMLINK + } + d.inode.incLinksLocked() + child := fs.newDentry(d.inode) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // MkdirAt implements vfs.FilesystemImpl.MkdirAt. func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - parentInode.incLinksLocked() // from child's ".." - return nil + return fs.doCreateAt(rp, true /* dir */, func(parent *dentry, name string) error { + if parent.inode.nlink == maxLinks { + return syserror.EMLINK + } + parent.inode.incLinksLocked() // from child's ".." + child := fs.newDentry(fs.newDirectory(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // MknodAt implements vfs.FilesystemImpl.MknodAt. func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - - switch opts.Mode.FileType() { - case 0: - // "Zero file type is equivalent to type S_IFREG." - mknod(2) - fallthrough - case linux.ModeRegular: - // TODO(b/138862511): Implement. - return syserror.EINVAL - - case linux.ModeNamedPipe: - child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil - - case linux.ModeSocket: - // TODO(b/138862511): Implement. - return syserror.EINVAL - - case linux.ModeCharacterDevice: - fallthrough - case linux.ModeBlockDevice: - // TODO(b/72101894): We don't support creating block or character - // devices at the moment. - // - // When we start supporting block and character devices, we'll - // need to check for CAP_MKNOD here. - return syserror.EPERM - - default: - // "EINVAL - mode requested creation of something other than a - // regular file, device special file, FIFO or socket." - mknod(2) - return syserror.EINVAL - } + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + switch opts.Mode.FileType() { + case 0, linux.S_IFREG: + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFIFO: + child := fs.newDentry(fs.newNamedPipe(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + case linux.S_IFBLK, linux.S_IFCHR, linux.S_IFSOCK: + // Not yet supported. + return syserror.EPERM + default: + return syserror.EINVAL + } + }) } // OpenAt implements vfs.FilesystemImpl.OpenAt. func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - // Filter out flags that are not supported by memfs. O_DIRECTORY and - // O_NOFOLLOW have no effect here (they're handled by VFS by setting - // appropriate bits in rp), but are visible in FD status flags. O_NONBLOCK - // is supported only by pipes. - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NONBLOCK + if opts.Flags&linux.O_TMPFILE != 0 { + // Not yet supported. + return nil, syserror.EOPNOTSUPP + } + // Handle O_CREAT and !O_CREAT separately, since in the latter case we + // don't need fs.mu for writing. if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() defer fs.mu.RUnlock() - vfsd, inode, err := walkExistingLocked(rp) + d, err := resolveLocked(rp) if err != nil { return nil, err } - return inode.open(ctx, rp, vfsd, opts.Flags, false) + return d.open(ctx, rp, opts.Flags, false /* afterCreate */) } mustCreate := opts.Flags&linux.O_EXCL != 0 - vfsd := rp.Start() - inode := vfsd.Impl().(*dentry).inode + start := rp.Start().Impl().(*dentry) fs.mu.Lock() defer fs.mu.Unlock() if rp.Done() { + // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } if mustCreate { return nil, syserror.EEXIST } - return inode.open(ctx, rp, vfsd, opts.Flags, false) + return start.open(ctx, rp, opts.Flags, false /* afterCreate */) } afterTrailingSymlink: - // Walk to the parent directory of the last path component. - for !rp.Final() { - var err error - vfsd, inode, err = stepLocked(rp, vfsd, inode) - if err != nil { - return nil, err - } - } - if !inode.isDir() { - return nil, syserror.ENOTDIR + parent, err := walkParentDirLocked(rp, start) + if err != nil { + return nil, err } // Check for search permission in the parent directory. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayExec, true); err != nil { return nil, err } // Reject attempts to open directories with O_CREAT. if rp.MustBeDir() { return nil, syserror.EISDIR } - pc := rp.Component() - if pc == "." || pc == ".." { + name := rp.Component() + if name == "." || name == ".." { return nil, syserror.EISDIR } // Determine whether or not we need to create a file. - childVFSD, err := rp.ResolveChild(vfsd, pc) - if err != nil { - return nil, err - } - if childVFSD == nil { + child, err := stepLocked(rp, parent) + if err == syserror.ENOENT { // Already checked for searchability above; now check for writability. - if err := inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true); err != nil { return nil, err } if err := rp.Mount().CheckBeginWrite(); err != nil { @@ -349,38 +309,35 @@ afterTrailingSymlink: } defer rp.Mount().EndWrite() // Create and open the child. - childInode := fs.newRegularFile(rp.Credentials(), opts.Mode) - child := fs.newDentry(childInode) - vfsd.InsertChild(&child.vfsd, pc) - inode.impl.(*directory).childList.PushBack(child) - return childInode.open(ctx, rp, &child.vfsd, opts.Flags, true) + child := fs.newDentry(fs.newRegularFile(rp.Credentials(), opts.Mode)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return child.open(ctx, rp, opts.Flags, true) } - // Open existing file or follow symlink. - if mustCreate { - return nil, syserror.EEXIST + if err != nil { + return nil, err } - childInode := childVFSD.Impl().(*dentry).inode - if symlink, ok := childInode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO: symlink traversals update access time - if err := rp.HandleSymlink(symlink.target); err != nil { - return nil, err - } - // rp.Final() may no longer be true since we now need to resolve the - // symlink target. + // Do we need to resolve a trailing symlink? + if !rp.Done() { + start = parent goto afterTrailingSymlink } - return childInode.open(ctx, rp, childVFSD, opts.Flags, false) + // Open existing file. + if mustCreate { + return nil, syserror.EEXIST + } + return child.open(ctx, rp, opts.Flags, false) } -func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { +func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, flags uint32, afterCreate bool) (*vfs.FileDescription, error) { ats := vfs.AccessTypesForOpenFlags(flags) if !afterCreate { - if err := i.checkPermissions(rp.Credentials(), ats, i.isDir()); err != nil { + if err := d.inode.checkPermissions(rp.Credentials(), ats, d.inode.isDir()); err != nil { return nil, err } } mnt := rp.Mount() - switch impl := i.impl.(type) { + switch impl := d.inode.impl.(type) { case *regularFile: var fd regularFileFD fd.readable = vfs.MayReadFileWithOpenFlags(flags) @@ -392,8 +349,8 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr // mnt.EndWrite() is called by regularFileFD.Release(). } mnt.IncRef() - vfsd.IncRef() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + d.IncRef() + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) if flags&linux.O_TRUNC != 0 { impl.mu.Lock() impl.data = impl.data[:0] @@ -408,28 +365,28 @@ func (i *inode) open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr } var fd directoryFD mnt.IncRef() - vfsd.IncRef() - fd.vfsfd.Init(&fd, flags, mnt, vfsd, &vfs.FileDescriptionOptions{}) + d.IncRef() + fd.vfsfd.Init(&fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}) return &fd.vfsfd, nil case *symlink: // Can't open symlinks without O_PATH (which is unimplemented). return nil, syserror.ELOOP case *namedPipe: - return newNamedPipeFD(ctx, impl, rp, vfsd, flags) + return newNamedPipeFD(ctx, impl, rp, &d.vfsd, flags) default: - panic(fmt.Sprintf("unknown inode type: %T", i.impl)) + panic(fmt.Sprintf("unknown inode type: %T", d.inode.impl)) } } // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) if err != nil { return "", err } - symlink, ok := inode.impl.(*symlink) + symlink, ok := d.inode.impl.(*symlink) if !ok { return "", syserror.EINVAL } @@ -437,63 +394,172 @@ func (fs *filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (st } // RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry, opts vfs.RenameOptions) error { - if rp.Done() { - return syserror.ENOENT +func (fs *filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { + if opts.Flags != 0 { + // TODO(b/145974740): Support renameat2 flags. + return syserror.EINVAL } + + // Resolve newParent first to verify that it's on this Mount. fs.mu.Lock() defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) + newParent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - _, err = checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { + newName := rp.Component() + if newName == "." || newName == ".." { + return syserror.EBUSY + } + mnt := rp.Mount() + if mnt != oldParentVD.Mount() { + return syserror.EXDEV + } + if err := mnt.CheckBeginWrite(); err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + defer mnt.EndWrite() + + oldParent := oldParentVD.Dentry().Impl().(*dentry) + if err := oldParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - // TODO: actually implement RenameAt - return syserror.EPERM + // Call vfs.Dentry.Child() instead of stepLocked() or rp.ResolveChild(), + // because if the existing child is a symlink or mount point then we want + // to rename over it rather than follow it. + renamedVFSD := oldParent.vfsd.Child(oldName) + if renamedVFSD == nil { + return syserror.ENOENT + } + renamed := renamedVFSD.Impl().(*dentry) + if renamed.inode.isDir() { + if renamed == newParent || renamedVFSD.IsAncestorOf(&newParent.vfsd) { + return syserror.EINVAL + } + if oldParent != newParent { + // Writability is needed to change renamed's "..". + if err := renamed.inode.checkPermissions(rp.Credentials(), vfs.MayWrite, true /* isDir */); err != nil { + return err + } + } + } else { + if opts.MustBeDir || rp.MustBeDir() { + return syserror.ENOTDIR + } + } + + if err := newParent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { + return err + } + replacedVFSD := newParent.vfsd.Child(newName) + var replaced *dentry + if replacedVFSD != nil { + replaced = replacedVFSD.Impl().(*dentry) + if replaced.inode.isDir() { + if !renamed.inode.isDir() { + return syserror.EISDIR + } + if replaced.vfsd.HasChildren() { + return syserror.ENOTEMPTY + } + } else { + if rp.MustBeDir() { + return syserror.ENOTDIR + } + if renamed.inode.isDir() { + return syserror.ENOTDIR + } + } + } else { + if renamed.inode.isDir() && newParent.inode.nlink == maxLinks { + return syserror.EMLINK + } + } + if newParent.vfsd.IsDisowned() { + return syserror.ENOENT + } + + // Linux places this check before some of those above; we do it here for + // simplicity, under the assumption that applications are not intentionally + // doing noop renames expecting them to succeed where non-noop renames + // would fail. + if renamedVFSD == replacedVFSD { + return nil + } + vfsObj := rp.VirtualFilesystem() + oldParentDir := oldParent.inode.impl.(*directory) + newParentDir := newParent.inode.impl.(*directory) + if err := vfsObj.PrepareRenameDentry(vfs.MountNamespaceFromContext(ctx), renamedVFSD, replacedVFSD); err != nil { + return err + } + if replaced != nil { + newParentDir.childList.Remove(replaced) + if replaced.inode.isDir() { + newParent.inode.decLinksLocked() // from replaced's ".." + } + replaced.inode.decLinksLocked() + } + oldParentDir.childList.Remove(renamed) + newParentDir.childList.PushBack(renamed) + if renamed.inode.isDir() { + oldParent.inode.decLinksLocked() + newParent.inode.incLinksLocked() + } + // TODO: update timestamps and parent directory sizes + vfsObj.CommitRenameReplaceDentry(renamedVFSD, &newParent.vfsd, newName, replacedVFSD) + return nil } // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err + name := rp.Component() + if name == "." { + return syserror.EINVAL } - if !inode.isDir() { + if name == ".." { + return syserror.ENOTEMPTY + } + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if !child.inode.isDir() { return syserror.ENOTDIR } - if vfsd.HasChildren() { + if childVFSD.HasChildren() { return syserror.ENOTEMPTY } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { return err } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decRef() + parent.inode.impl.(*directory).childList.Remove(child) + parent.inode.decLinksLocked() // from child's ".." + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) return nil } // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) if err != nil { return err } @@ -507,21 +573,21 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts // StatAt implements vfs.FilesystemImpl.StatAt. func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { fs.mu.RLock() - _, inode, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + d, err := resolveLocked(rp) if err != nil { return linux.Statx{}, err } var stat linux.Statx - inode.statTo(&stat) + d.inode.statTo(&stat) return stat, nil } // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() - _, _, err := walkExistingLocked(rp) - fs.mu.RUnlock() + defer fs.mu.RUnlock() + _, err := resolveLocked(rp) if err != nil { return linux.Statfs{}, err } @@ -531,53 +597,52 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := walkParentDirLocked(rp) - if err != nil { - return err - } - pc, err := checkCreateLocked(rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) - parentVFSD.InsertChild(&child.vfsd, pc) - parentInode.impl.(*directory).childList.PushBack(child) - return nil + return fs.doCreateAt(rp, false /* dir */, func(parent *dentry, name string) error { + child := fs.newDentry(fs.newSymlink(rp.Credentials(), target)) + parent.vfsd.InsertChild(&child.vfsd, name) + parent.inode.impl.(*directory).childList.PushBack(child) + return nil + }) } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() defer fs.mu.Unlock() - vfsd, inode, err := walkExistingLocked(rp) + parent, err := walkParentDirLocked(rp, rp.Start().Impl().(*dentry)) if err != nil { return err } - if err := rp.Mount().CheckBeginWrite(); err != nil { + if err := parent.inode.checkPermissions(rp.Credentials(), vfs.MayWrite|vfs.MayExec, true /* isDir */); err != nil { return err } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(vfsd); err != nil { - return err + name := rp.Component() + if name == "." || name == ".." { + return syserror.EISDIR } - if inode.isDir() { + childVFSD := parent.vfsd.Child(name) + if childVFSD == nil { + return syserror.ENOENT + } + child := childVFSD.Impl().(*dentry) + if child.inode.isDir() { return syserror.EISDIR } - if err := rp.VirtualFilesystem().DeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { + if !rp.MustBeDir() { + return syserror.ENOTDIR + } + mnt := rp.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + vfsObj := rp.VirtualFilesystem() + if err := vfsObj.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), childVFSD); err != nil { return err } - // Remove from parent directory's childList. - vfsd.Parent().Impl().(*dentry).inode.impl.(*directory).childList.Remove(vfsd.Impl().(*dentry)) - inode.decLinksLocked() + parent.inode.impl.(*directory).childList.Remove(child) + child.inode.decLinksLocked() + vfsObj.CommitDeleteDentry(childVFSD) return nil } @@ -585,7 +650,7 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return nil, err } @@ -597,7 +662,7 @@ func (fs *filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return "", err } @@ -609,7 +674,7 @@ func (fs *filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, nam func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return err } @@ -621,7 +686,7 @@ func (fs *filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() defer fs.mu.RUnlock() - _, _, err := walkExistingLocked(rp) + _, err := resolveLocked(rp) if err != nil { return err } diff --git a/pkg/sentry/fsimpl/memfs/memfs.go b/pkg/sentry/fsimpl/memfs/memfs.go index 9d509f6e4..8d0167c93 100644 --- a/pkg/sentry/fsimpl/memfs/memfs.go +++ b/pkg/sentry/fsimpl/memfs/memfs.go @@ -29,6 +29,7 @@ package memfs import ( "fmt" + "math" "sync" "sync/atomic" @@ -64,12 +65,6 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt func (fs *filesystem) Release() { } -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - // dentry implements vfs.DentryImpl. type dentry struct { vfsd vfs.Dentry @@ -137,6 +132,8 @@ type inode struct { impl interface{} // immutable } +const maxLinks = math.MaxUint32 + func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, mode linux.FileMode) { i.refs = 1 i.mode = uint32(mode) @@ -147,20 +144,28 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials, i.impl = impl } -// Preconditions: filesystem.mu must be locked for writing. +// incLinksLocked increments i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. +// i.nlink < maxLinks. func (i *inode) incLinksLocked() { - if atomic.AddUint32(&i.nlink, 1) <= 1 { + if i.nlink == 0 { panic("memfs.inode.incLinksLocked() called with no existing links") } + if i.nlink == maxLinks { + panic("memfs.inode.incLinksLocked() called with maximum link count") + } + atomic.AddUint32(&i.nlink, 1) } -// Preconditions: filesystem.mu must be locked for writing. +// decLinksLocked decrements i's link count. +// +// Preconditions: filesystem.mu must be locked for writing. i.nlink != 0. func (i *inode) decLinksLocked() { - if nlink := atomic.AddUint32(&i.nlink, ^uint32(0)); nlink == 0 { - i.decRef() - } else if nlink == ^uint32(0) { // negative overflow + if i.nlink == 0 { panic("memfs.inode.decLinksLocked() called with no existing links") } + atomic.AddUint32(&i.nlink, ^uint32(0)) } func (i *inode) incRef() { diff --git a/pkg/sentry/fsimpl/memfs/pipe_test.go b/pkg/sentry/fsimpl/memfs/pipe_test.go index 5bf527c80..be917aeee 100644 --- a/pkg/sentry/fsimpl/memfs/pipe_test.go +++ b/pkg/sentry/fsimpl/memfs/pipe_test.go @@ -19,6 +19,7 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/context/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -38,7 +39,7 @@ func TestSeparateFDs(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } rfdchan := make(chan *vfs.FileDescription) @@ -76,7 +77,7 @@ func TestNonblockingRead(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_NONBLOCK} @@ -108,7 +109,7 @@ func TestNonblockingWriteError(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_WRONLY | linux.O_NONBLOCK} @@ -126,7 +127,7 @@ func TestSingleFD(t *testing.T) { pop := vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, } openOpts := vfs.OpenOptions{Flags: linux.O_RDWR} @@ -160,10 +161,9 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy // Create the pipe. root := mntns.Root() pop := vfs.PathOperation{ - Root: root, - Start: root, - Pathname: fileName, - FollowFinalSymlink: true, + Root: root, + Start: root, + Path: fspath.Parse(fileName), } mknodOpts := vfs.MknodOptions{Mode: linux.ModeNamedPipe | 0644} if err := vfsObj.MknodAt(ctx, creds, &pop, &mknodOpts); err != nil { @@ -174,7 +174,7 @@ func setup(t *testing.T) (context.Context, *auth.Credentials, *vfs.VirtualFilesy stat, err := vfsObj.StatAt(ctx, creds, &vfs.PathOperation{ Root: root, Start: root, - Pathname: fileName, + Path: fspath.Parse(fileName), FollowFinalSymlink: true, }, &vfs.StatOptions{}) if err != nil { |