diff options
Diffstat (limited to 'pkg/sentry/fsimpl/kernfs')
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/BUILD | 63 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go | 132 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 201 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/filesystem.go | 768 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 556 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs.go | 422 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs_test.go | 317 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/symlink.go | 54 |
8 files changed, 0 insertions, 2513 deletions
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD deleted file mode 100644 index 66d409785..000000000 --- a/pkg/sentry/fsimpl/kernfs/BUILD +++ /dev/null @@ -1,63 +0,0 @@ -load("//tools/go_stateify:defs.bzl", "go_library") -load("@io_bazel_rules_go//go:def.bzl", "go_test") -load("//tools/go_generics:defs.bzl", "go_template_instance") - -package(licenses = ["notice"]) - -go_template_instance( - name = "slot_list", - out = "slot_list.go", - package = "kernfs", - prefix = "slot", - template = "//pkg/ilist:generic_list", - types = { - "Element": "*slot", - "Linker": "*slot", - }, -) - -go_library( - name = "kernfs", - srcs = [ - "dynamic_bytes_file.go", - "fd_impl_util.go", - "filesystem.go", - "inode_impl_util.go", - "kernfs.go", - "slot_list.go", - "symlink.go", - ], - importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs", - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/fspath", - "//pkg/log", - "//pkg/refs", - "//pkg/sentry/context", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/memmap", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/sync", - "//pkg/syserror", - ], -) - -go_test( - name = "kernfs_test", - size = "small", - srcs = ["kernfs_test.go"], - deps = [ - ":kernfs", - "//pkg/abi/linux", - "//pkg/sentry/context", - "//pkg/sentry/context/contexttest", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/usermem", - "//pkg/sentry/vfs", - "//pkg/syserror", - "@com_github_google_go-cmp//cmp:go_default_library", - ], -) diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go deleted file mode 100644 index 75624e0b1..000000000 --- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// DynamicBytesFile implements kernfs.Inode and represents a read-only -// file whose contents are backed by a vfs.DynamicBytesSource. -// -// Must be instantiated with NewDynamicBytesFile or initialized with Init -// before first use. -// -// +stateify savable -type DynamicBytesFile struct { - InodeAttrs - InodeNoopRefCount - InodeNotDirectory - InodeNotSymlink - - data vfs.DynamicBytesSource -} - -var _ Inode = (*DynamicBytesFile)(nil) - -// Init initializes a dynamic bytes file. -func (f *DynamicBytesFile) Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode) { - if perm&^linux.PermissionsMask != 0 { - panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask)) - } - f.InodeAttrs.Init(creds, ino, linux.ModeRegular|perm) - f.data = data -} - -// Open implements Inode.Open. -func (f *DynamicBytesFile) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - fd := &DynamicBytesFD{} - if err := fd.Init(rp.Mount(), vfsd, f.data, flags); err != nil { - return nil, err - } - return &fd.vfsfd, nil -} - -// SetStat implements Inode.SetStat. -func (f *DynamicBytesFile) SetStat(*vfs.Filesystem, vfs.SetStatOptions) error { - // DynamicBytesFiles are immutable. - return syserror.EPERM -} - -// DynamicBytesFD implements vfs.FileDescriptionImpl for an FD backed by a -// DynamicBytesFile. -// -// Must be initialized with Init before first use. -// -// +stateify savable -type DynamicBytesFD struct { - vfs.FileDescriptionDefaultImpl - vfs.DynamicBytesFileDescriptionImpl - - vfsfd vfs.FileDescription - inode Inode -} - -// Init initializes a DynamicBytesFD. -func (fd *DynamicBytesFD) Init(m *vfs.Mount, d *vfs.Dentry, data vfs.DynamicBytesSource, flags uint32) error { - if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { - return err - } - fd.inode = d.Impl().(*Dentry).inode - fd.SetDataSource(data) - return nil -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *DynamicBytesFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - return fd.DynamicBytesFileDescriptionImpl.Seek(ctx, offset, whence) -} - -// Read implmenets vfs.FileDescriptionImpl.Read. -func (fd *DynamicBytesFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return fd.DynamicBytesFileDescriptionImpl.Read(ctx, dst, opts) -} - -// PRead implmenets vfs.FileDescriptionImpl.PRead. -func (fd *DynamicBytesFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return fd.DynamicBytesFileDescriptionImpl.PRead(ctx, dst, offset, opts) -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *DynamicBytesFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return fd.FileDescriptionDefaultImpl.Write(ctx, src, opts) -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *DynamicBytesFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return fd.FileDescriptionDefaultImpl.PWrite(ctx, src, offset, opts) -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *DynamicBytesFD) Release() {} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - fs := fd.vfsfd.VirtualDentry().Mount().Filesystem() - return fd.inode.Stat(fs), nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *DynamicBytesFD) SetStat(context.Context, vfs.SetStatOptions) error { - // DynamicBytesFiles are immutable. - return syserror.EPERM -} diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go deleted file mode 100644 index 5fa1fa67b..000000000 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// GenericDirectoryFD implements vfs.FileDescriptionImpl for a generic directory -// inode that uses OrderChildren to track child nodes. GenericDirectoryFD is not -// compatible with dynamic directories. -// -// Note that GenericDirectoryFD holds a lock over OrderedChildren while calling -// IterDirents callback. The IterDirents callback therefore cannot hash or -// unhash children, or recursively call IterDirents on the same underlying -// inode. -// -// Must be initialize with Init before first use. -type GenericDirectoryFD struct { - vfs.FileDescriptionDefaultImpl - vfs.DirectoryFileDescriptionDefaultImpl - - vfsfd vfs.FileDescription - children *OrderedChildren - off int64 -} - -// Init initializes a GenericDirectoryFD. -func (fd *GenericDirectoryFD) Init(m *vfs.Mount, d *vfs.Dentry, children *OrderedChildren, flags uint32) error { - if vfs.AccessTypesForOpenFlags(flags)&vfs.MayWrite != 0 { - // Can't open directories for writing. - return syserror.EISDIR - } - if err := fd.vfsfd.Init(fd, flags, m, d, &vfs.FileDescriptionOptions{}); err != nil { - return err - } - fd.children = children - return nil -} - -// VFSFileDescription returns a pointer to the vfs.FileDescription representing -// this object. -func (fd *GenericDirectoryFD) VFSFileDescription() *vfs.FileDescription { - return &fd.vfsfd -} - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *GenericDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - return fd.FileDescriptionDefaultImpl.ConfigureMMap(ctx, opts) -} - -// Read implmenets vfs.FileDescriptionImpl.Read. -func (fd *GenericDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - return fd.DirectoryFileDescriptionDefaultImpl.Read(ctx, dst, opts) -} - -// PRead implmenets vfs.FileDescriptionImpl.PRead. -func (fd *GenericDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - return fd.DirectoryFileDescriptionDefaultImpl.PRead(ctx, dst, offset, opts) -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *GenericDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - return fd.DirectoryFileDescriptionDefaultImpl.Write(ctx, src, opts) -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *GenericDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - return fd.DirectoryFileDescriptionDefaultImpl.PWrite(ctx, src, offset, opts) -} - -// Release implements vfs.FileDecriptionImpl.Release. -func (fd *GenericDirectoryFD) Release() {} - -func (fd *GenericDirectoryFD) filesystem() *vfs.Filesystem { - return fd.vfsfd.VirtualDentry().Mount().Filesystem() -} - -func (fd *GenericDirectoryFD) inode() Inode { - return fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode -} - -// IterDirents implements vfs.FileDecriptionImpl.IterDirents. IterDirents holds -// o.mu when calling cb. -func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error { - vfsFS := fd.filesystem() - fs := vfsFS.Impl().(*Filesystem) - vfsd := fd.vfsfd.VirtualDentry().Dentry() - - fs.mu.Lock() - defer fs.mu.Unlock() - - // Handle ".". - if fd.off == 0 { - stat := fd.inode().Stat(vfsFS) - dirent := vfs.Dirent{ - Name: ".", - Type: linux.DT_DIR, - Ino: stat.Ino, - NextOff: 1, - } - if !cb.Handle(dirent) { - return nil - } - fd.off++ - } - - // Handle "..". - if fd.off == 1 { - parentInode := vfsd.ParentOrSelf().Impl().(*Dentry).inode - stat := parentInode.Stat(vfsFS) - dirent := vfs.Dirent{ - Name: "..", - Type: linux.FileMode(stat.Mode).DirentType(), - Ino: stat.Ino, - NextOff: 2, - } - if !cb.Handle(dirent) { - return nil - } - fd.off++ - } - - // Handle static children. - fd.children.mu.RLock() - defer fd.children.mu.RUnlock() - // fd.off accounts for "." and "..", but fd.children do not track - // these. - childIdx := fd.off - 2 - for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() { - inode := it.Dentry.Impl().(*Dentry).inode - stat := inode.Stat(vfsFS) - dirent := vfs.Dirent{ - Name: it.Name, - Type: linux.FileMode(stat.Mode).DirentType(), - Ino: stat.Ino, - NextOff: fd.off + 1, - } - if !cb.Handle(dirent) { - return nil - } - fd.off++ - } - - var err error - relOffset := fd.off - int64(len(fd.children.set)) - 2 - fd.off, err = fd.inode().IterDirents(ctx, cb, fd.off, relOffset) - return err -} - -// Seek implements vfs.FileDecriptionImpl.Seek. -func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - fs := fd.filesystem().Impl().(*Filesystem) - fs.mu.Lock() - defer fs.mu.Unlock() - - switch whence { - case linux.SEEK_SET: - // Use offset as given. - case linux.SEEK_CUR: - offset += fd.off - default: - return 0, syserror.EINVAL - } - if offset < 0 { - return 0, syserror.EINVAL - } - fd.off = offset - return offset, nil -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *GenericDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - fs := fd.filesystem() - inode := fd.inode() - return inode.Stat(fs), nil -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - fs := fd.filesystem() - inode := fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode - return inode.SetStat(fs, opts) -} diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go deleted file mode 100644 index a4600ad47..000000000 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ /dev/null @@ -1,768 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file implements vfs.FilesystemImpl for kernfs. - -package kernfs - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// stepExistingLocked resolves rp.Component() in parent directory vfsd. -// -// stepExistingLocked is loosely analogous to fs/namei.c:walk_component(). -// -// Preconditions: Filesystem.mu must be locked for at least reading. !rp.Done(). -// -// Postcondition: Caller must call fs.processDeferredDecRefs*. -func (fs *Filesystem) stepExistingLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry) (*vfs.Dentry, error) { - d := vfsd.Impl().(*Dentry) - if !d.isDir() { - return nil, syserror.ENOTDIR - } - // Directory searchable? - if err := d.inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { - return nil, err - } -afterSymlink: - name := rp.Component() - // Revalidation must be skipped if name is "." or ".."; d or its parent - // respectively can't be expected to transition from invalidated back to - // valid, so detecting invalidation and retrying would loop forever. This - // is consistent with Linux: fs/namei.c:walk_component() => lookup_fast() - // calls d_revalidate(), but walk_component() => handle_dots() does not. - if name == "." { - rp.Advance() - return vfsd, nil - } - if name == ".." { - nextVFSD, err := rp.ResolveParent(vfsd) - if err != nil { - return nil, err - } - rp.Advance() - return nextVFSD, nil - } - d.dirMu.Lock() - nextVFSD, err := rp.ResolveChild(vfsd, name) - if err != nil { - d.dirMu.Unlock() - return nil, err - } - next, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), d, name, nextVFSD) - d.dirMu.Unlock() - if err != nil { - return nil, err - } - // Resolve any symlink at current path component. - if rp.ShouldFollowSymlink() && next.isSymlink() { - // TODO: VFS2 needs something extra for /proc/[pid]/fd/ "magic symlinks". - target, err := next.inode.Readlink(ctx) - if err != nil { - return nil, err - } - if err := rp.HandleSymlink(target); err != nil { - return nil, err - } - goto afterSymlink - - } - rp.Advance() - return &next.vfsd, nil -} - -// revalidateChildLocked must be called after a call to parent.vfsd.Child(name) -// or vfs.ResolvingPath.ResolveChild(name) returns childVFSD (which may be -// nil) to verify that the returned child (or lack thereof) is correct. -// -// Preconditions: Filesystem.mu must be locked for at least reading. -// parent.dirMu must be locked. parent.isDir(). name is not "." or "..". -// -// Postconditions: Caller must call fs.processDeferredDecRefs*. -func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *Dentry, name string, childVFSD *vfs.Dentry) (*Dentry, error) { - if childVFSD != nil { - // Cached dentry exists, revalidate. - child := childVFSD.Impl().(*Dentry) - if !child.inode.Valid(ctx) { - vfsObj.ForceDeleteDentry(childVFSD) - fs.deferDecRef(childVFSD) // Reference from Lookup. - childVFSD = nil - } - } - if childVFSD == nil { - // Dentry isn't cached; it either doesn't exist or failed - // revalidation. Attempt to resolve it via Lookup. - // - // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry, - // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries - // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl - // casts accordingly. - var err error - childVFSD, err = parent.inode.Lookup(ctx, name) - if err != nil { - return nil, err - } - // Reference on childVFSD dropped by a corresponding Valid. - parent.insertChildLocked(name, childVFSD) - } - return childVFSD.Impl().(*Dentry), nil -} - -// walkExistingLocked resolves rp to an existing file. -// -// walkExistingLocked is loosely analogous to Linux's -// fs/namei.c:path_lookupat(). -// -// Preconditions: Filesystem.mu must be locked for at least reading. -// -// Postconditions: Caller must call fs.processDeferredDecRefs*. -func (fs *Filesystem) walkExistingLocked(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, Inode, error) { - vfsd := rp.Start() - for !rp.Done() { - var err error - vfsd, err = fs.stepExistingLocked(ctx, rp, vfsd) - if err != nil { - return nil, nil, err - } - } - d := vfsd.Impl().(*Dentry) - if rp.MustBeDir() && !d.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, d.inode, nil -} - -// walkParentDirLocked resolves all but the last path component of rp to an -// existing directory. It does not check that the returned directory is -// searchable by the provider of rp. -// -// walkParentDirLocked is loosely analogous to Linux's -// fs/namei.c:path_parentat(). -// -// Preconditions: Filesystem.mu must be locked for at least reading. !rp.Done(). -// -// Postconditions: Caller must call fs.processDeferredDecRefs*. -func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, Inode, error) { - vfsd := rp.Start() - for !rp.Final() { - var err error - vfsd, err = fs.stepExistingLocked(ctx, rp, vfsd) - if err != nil { - return nil, nil, err - } - } - d := vfsd.Impl().(*Dentry) - if !d.isDir() { - return nil, nil, syserror.ENOTDIR - } - return vfsd, d.inode, nil -} - -// checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). -// -// Preconditions: Filesystem.mu must be locked for at least reading. parentInode -// == parentVFSD.Impl().(*Dentry).Inode. isDir(parentInode) == true. -func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parentVFSD *vfs.Dentry, parentInode Inode) (string, error) { - if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { - return "", err - } - pc := rp.Component() - if pc == "." || pc == ".." { - return "", syserror.EEXIST - } - childVFSD, err := rp.ResolveChild(parentVFSD, pc) - if err != nil { - return "", err - } - if childVFSD != nil { - return "", syserror.EEXIST - } - if parentVFSD.IsDisowned() { - return "", syserror.ENOENT - } - return pc, nil -} - -// checkDeleteLocked checks that the file represented by vfsd may be deleted. -// -// Preconditions: Filesystem.mu must be locked for at least reading. -func checkDeleteLocked(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry) error { - parentVFSD := vfsd.Parent() - if parentVFSD == nil { - return syserror.EBUSY - } - if parentVFSD.IsDisowned() { - return syserror.ENOENT - } - if err := parentVFSD.Impl().(*Dentry).inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite|vfs.MayExec); err != nil { - return err - } - return nil -} - -// Release implements vfs.FilesystemImpl.Release. -func (fs *Filesystem) Release() { -} - -// Sync implements vfs.FilesystemImpl.Sync. -func (fs *Filesystem) Sync(ctx context.Context) error { - // All filesystem state is in-memory. - return nil -} - -// GetDentryAt implements vfs.FilesystemImpl.GetDentryAt. -func (fs *Filesystem) GetDentryAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetDentryOptions) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.processDeferredDecRefs() - defer fs.mu.RUnlock() - vfsd, inode, err := fs.walkExistingLocked(ctx, rp) - if err != nil { - return nil, err - } - - if opts.CheckSearchable { - d := vfsd.Impl().(*Dentry) - if !d.isDir() { - return nil, syserror.ENOTDIR - } - if err := inode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { - return nil, err - } - } - vfsd.IncRef() // Ownership transferred to caller. - return vfsd, nil -} - -// GetParentDentryAt implements vfs.FilesystemImpl.GetParentDentryAt. -func (fs *Filesystem) GetParentDentryAt(ctx context.Context, rp *vfs.ResolvingPath) (*vfs.Dentry, error) { - fs.mu.RLock() - defer fs.processDeferredDecRefs() - defer fs.mu.RUnlock() - vfsd, _, err := fs.walkParentDirLocked(ctx, rp) - if err != nil { - return nil, err - } - vfsd.IncRef() // Ownership transferred to caller. - return vfsd, nil -} - -// LinkAt implements vfs.FilesystemImpl.LinkAt. -func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.VirtualDentry) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode) - if err != nil { - return err - } - if rp.Mount() != vd.Mount() { - return syserror.EXDEV - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - - d := vd.Dentry().Impl().(*Dentry) - if d.isDir() { - return syserror.EPERM - } - - child, err := parentInode.NewLink(ctx, pc, d.inode) - if err != nil { - return err - } - parentVFSD.Impl().(*Dentry).InsertChild(pc, child) - return nil -} - -// MkdirAt implements vfs.FilesystemImpl.MkdirAt. -func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MkdirOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child, err := parentInode.NewDir(ctx, pc, opts) - if err != nil { - return err - } - parentVFSD.Impl().(*Dentry).InsertChild(pc, child) - return nil -} - -// MknodAt implements vfs.FilesystemImpl.MknodAt. -func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.MknodOptions) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - new, err := parentInode.NewNode(ctx, pc, opts) - if err != nil { - return err - } - parentVFSD.Impl().(*Dentry).InsertChild(pc, new) - return nil -} - -// OpenAt implements vfs.FilesystemImpl.OpenAt. -func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - // Filter out flags that are not supported by kernfs. O_DIRECTORY and - // O_NOFOLLOW have no effect here (they're handled by VFS by setting - // appropriate bits in rp), but are returned by - // FileDescriptionImpl.StatusFlags(). - opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_TRUNC | linux.O_DIRECTORY | linux.O_NOFOLLOW - ats := vfs.AccessTypesForOpenFlags(opts.Flags) - - // Do not create new file. - if opts.Flags&linux.O_CREAT == 0 { - fs.mu.RLock() - defer fs.processDeferredDecRefs() - defer fs.mu.RUnlock() - vfsd, inode, err := fs.walkExistingLocked(ctx, rp) - if err != nil { - return nil, err - } - if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { - return nil, err - } - return inode.Open(rp, vfsd, opts.Flags) - } - - // May create new file. - mustCreate := opts.Flags&linux.O_EXCL != 0 - vfsd := rp.Start() - inode := vfsd.Impl().(*Dentry).inode - fs.mu.Lock() - defer fs.mu.Unlock() - if rp.Done() { - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - if mustCreate { - return nil, syserror.EEXIST - } - if err := inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { - return nil, err - } - return inode.Open(rp, vfsd, opts.Flags) - } -afterTrailingSymlink: - parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return nil, err - } - // Check for search permission in the parent directory. - if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayExec); err != nil { - return nil, err - } - // Reject attempts to open directories with O_CREAT. - if rp.MustBeDir() { - return nil, syserror.EISDIR - } - pc := rp.Component() - if pc == "." || pc == ".." { - return nil, syserror.EISDIR - } - // Determine whether or not we need to create a file. - childVFSD, err := rp.ResolveChild(parentVFSD, pc) - if err != nil { - return nil, err - } - if childVFSD == nil { - // Already checked for searchability above; now check for writability. - if err := parentInode.CheckPermissions(ctx, rp.Credentials(), vfs.MayWrite); err != nil { - return nil, err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return nil, err - } - defer rp.Mount().EndWrite() - // Create and open the child. - child, err := parentInode.NewFile(ctx, pc, opts) - if err != nil { - return nil, err - } - parentVFSD.Impl().(*Dentry).InsertChild(pc, child) - return child.Impl().(*Dentry).inode.Open(rp, child, opts.Flags) - } - // Open existing file or follow symlink. - if mustCreate { - return nil, syserror.EEXIST - } - childDentry := childVFSD.Impl().(*Dentry) - childInode := childDentry.inode - if rp.ShouldFollowSymlink() { - if childDentry.isSymlink() { - target, err := childInode.Readlink(ctx) - if err != nil { - return nil, err - } - if err := rp.HandleSymlink(target); err != nil { - return nil, err - } - // rp.Final() may no longer be true since we now need to resolve the - // symlink target. - goto afterTrailingSymlink - } - } - if err := childInode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { - return nil, err - } - return childInode.Open(rp, childVFSD, opts.Flags) -} - -// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. -func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { - fs.mu.RLock() - d, inode, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return "", err - } - if !d.Impl().(*Dentry).isSymlink() { - return "", syserror.EINVAL - } - return inode.Readlink(ctx) -} - -// RenameAt implements vfs.FilesystemImpl.RenameAt. -func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldParentVD vfs.VirtualDentry, oldName string, opts vfs.RenameOptions) error { - // Only RENAME_NOREPLACE is supported. - if opts.Flags&^linux.RENAME_NOREPLACE != 0 { - return syserror.EINVAL - } - noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 - - fs.mu.Lock() - defer fs.mu.Lock() - - // Resolve the destination directory first to verify that it's on this - // Mount. - dstDirVFSD, dstDirInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - mnt := rp.Mount() - if mnt != oldParentVD.Mount() { - return syserror.EXDEV - } - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - - srcDirVFSD := oldParentVD.Dentry() - srcDir := srcDirVFSD.Impl().(*Dentry) - srcDir.dirMu.Lock() - src, err := fs.revalidateChildLocked(ctx, rp.VirtualFilesystem(), srcDir, oldName, srcDirVFSD.Child(oldName)) - srcDir.dirMu.Unlock() - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - srcVFSD := &src.vfsd - - // Can we remove the src dentry? - if err := checkDeleteLocked(ctx, rp, srcVFSD); err != nil { - return err - } - - // Can we create the dst dentry? - var dstVFSD *vfs.Dentry - pc, err := checkCreateLocked(ctx, rp, dstDirVFSD, dstDirInode) - switch err { - case nil: - // Ok, continue with rename as replacement. - case syserror.EEXIST: - if noReplace { - // Won't overwrite existing node since RENAME_NOREPLACE was requested. - return syserror.EEXIST - } - dstVFSD, err = rp.ResolveChild(dstDirVFSD, pc) - if err != nil { - panic(fmt.Sprintf("Child %q for parent Dentry %+v disappeared inside atomic section?", pc, dstDirVFSD)) - } - default: - return err - } - - mntns := vfs.MountNamespaceFromContext(ctx) - virtfs := rp.VirtualFilesystem() - - srcDirDentry := srcDirVFSD.Impl().(*Dentry) - dstDirDentry := dstDirVFSD.Impl().(*Dentry) - - // We can't deadlock here due to lock ordering because we're protected from - // concurrent renames by fs.mu held for writing. - srcDirDentry.dirMu.Lock() - defer srcDirDentry.dirMu.Unlock() - dstDirDentry.dirMu.Lock() - defer dstDirDentry.dirMu.Unlock() - - if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil { - return err - } - srcDirInode := srcDirDentry.inode - replaced, err := srcDirInode.Rename(ctx, srcVFSD.Name(), pc, srcVFSD, dstDirVFSD) - if err != nil { - virtfs.AbortRenameDentry(srcVFSD, dstVFSD) - return err - } - virtfs.CommitRenameReplaceDentry(srcVFSD, dstDirVFSD, pc, replaced) - return nil -} - -// RmdirAt implements vfs.FilesystemImpl.RmdirAt. -func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - vfsd, inode, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(ctx, rp, vfsd); err != nil { - return err - } - if !vfsd.Impl().(*Dentry).isDir() { - return syserror.ENOTDIR - } - if inode.HasChildren() { - return syserror.ENOTEMPTY - } - virtfs := rp.VirtualFilesystem() - parentDentry := vfsd.Parent().Impl().(*Dentry) - parentDentry.dirMu.Lock() - defer parentDentry.dirMu.Unlock() - if err := virtfs.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { - return err - } - if err := parentDentry.inode.RmDir(ctx, rp.Component(), vfsd); err != nil { - virtfs.AbortDeleteDentry(vfsd) - return err - } - virtfs.CommitDeleteDentry(vfsd) - return nil -} - -// SetStatAt implements vfs.FilesystemImpl.SetStatAt. -func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { - fs.mu.RLock() - _, inode, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return err - } - if opts.Stat.Mask == 0 { - return nil - } - return inode.SetStat(fs.VFSFilesystem(), opts) -} - -// StatAt implements vfs.FilesystemImpl.StatAt. -func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { - fs.mu.RLock() - _, inode, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return linux.Statx{}, err - } - return inode.Stat(fs.VFSFilesystem()), nil -} - -// StatFSAt implements vfs.FilesystemImpl.StatFSAt. -func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { - fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return linux.Statfs{}, err - } - // TODO: actually implement statfs - return linux.Statfs{}, syserror.ENOSYS -} - -// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. -func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, target string) error { - if rp.Done() { - return syserror.EEXIST - } - fs.mu.Lock() - defer fs.mu.Unlock() - parentVFSD, parentInode, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - pc, err := checkCreateLocked(ctx, rp, parentVFSD, parentInode) - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - child, err := parentInode.NewSymlink(ctx, pc, target) - if err != nil { - return err - } - parentVFSD.Impl().(*Dentry).InsertChild(pc, child) - return nil -} - -// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. -func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { - fs.mu.Lock() - defer fs.mu.Unlock() - vfsd, _, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked() - if err != nil { - return err - } - if err := rp.Mount().CheckBeginWrite(); err != nil { - return err - } - defer rp.Mount().EndWrite() - if err := checkDeleteLocked(ctx, rp, vfsd); err != nil { - return err - } - if vfsd.Impl().(*Dentry).isDir() { - return syserror.EISDIR - } - virtfs := rp.VirtualFilesystem() - parentDentry := vfsd.Parent().Impl().(*Dentry) - parentDentry.dirMu.Lock() - defer parentDentry.dirMu.Unlock() - if err := virtfs.PrepareDeleteDentry(vfs.MountNamespaceFromContext(ctx), vfsd); err != nil { - return err - } - if err := parentDentry.inode.Unlink(ctx, rp.Component(), vfsd); err != nil { - virtfs.AbortDeleteDentry(vfsd) - return err - } - virtfs.CommitDeleteDentry(vfsd) - return nil -} - -// ListxattrAt implements vfs.FilesystemImpl.ListxattrAt. -func (fs *Filesystem) ListxattrAt(ctx context.Context, rp *vfs.ResolvingPath) ([]string, error) { - fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return nil, err - } - // kernfs currently does not support extended attributes. - return nil, syserror.ENOTSUP -} - -// GetxattrAt implements vfs.FilesystemImpl.GetxattrAt. -func (fs *Filesystem) GetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) (string, error) { - fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return "", err - } - // kernfs currently does not support extended attributes. - return "", syserror.ENOTSUP -} - -// SetxattrAt implements vfs.FilesystemImpl.SetxattrAt. -func (fs *Filesystem) SetxattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetxattrOptions) error { - fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return err - } - // kernfs currently does not support extended attributes. - return syserror.ENOTSUP -} - -// RemovexattrAt implements vfs.FilesystemImpl.RemovexattrAt. -func (fs *Filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { - fs.mu.RLock() - _, _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs() - if err != nil { - return err - } - // kernfs currently does not support extended attributes. - return syserror.ENOTSUP -} - -// PrependPath implements vfs.FilesystemImpl.PrependPath. -func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { - fs.mu.RLock() - defer fs.mu.RUnlock() - return vfs.GenericPrependPath(vfsroot, vd, b) -} diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go deleted file mode 100644 index 1700fffd9..000000000 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" -) - -// InodeNoopRefCount partially implements the Inode interface, specifically the -// inodeRefs sub interface. InodeNoopRefCount implements a simple reference -// count for inodes, performing no extra actions when references are obtained or -// released. This is suitable for simple file inodes that don't reference any -// resources. -type InodeNoopRefCount struct { -} - -// IncRef implements Inode.IncRef. -func (n *InodeNoopRefCount) IncRef() { -} - -// DecRef implements Inode.DecRef. -func (n *InodeNoopRefCount) DecRef() { -} - -// TryIncRef implements Inode.TryIncRef. -func (n *InodeNoopRefCount) TryIncRef() bool { - return true -} - -// Destroy implements Inode.Destroy. -func (n *InodeNoopRefCount) Destroy() { -} - -// InodeDirectoryNoNewChildren partially implements the Inode interface. -// InodeDirectoryNoNewChildren represents a directory inode which does not -// support creation of new children. -type InodeDirectoryNoNewChildren struct{} - -// NewFile implements Inode.NewFile. -func (*InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -// NewDir implements Inode.NewDir. -func (*InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -// NewLink implements Inode.NewLink. -func (*InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -// NewSymlink implements Inode.NewSymlink. -func (*InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -// NewNode implements Inode.NewNode. -func (*InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -// InodeNotDirectory partially implements the Inode interface, specifically the -// inodeDirectory and inodeDynamicDirectory sub interfaces. Inodes that do not -// represent directories can embed this to provide no-op implementations for -// directory-related functions. -type InodeNotDirectory struct { -} - -// HasChildren implements Inode.HasChildren. -func (*InodeNotDirectory) HasChildren() bool { - return false -} - -// NewFile implements Inode.NewFile. -func (*InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*vfs.Dentry, error) { - panic("NewFile called on non-directory inode") -} - -// NewDir implements Inode.NewDir. -func (*InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*vfs.Dentry, error) { - panic("NewDir called on non-directory inode") -} - -// NewLink implements Inode.NewLinkink. -func (*InodeNotDirectory) NewLink(context.Context, string, Inode) (*vfs.Dentry, error) { - panic("NewLink called on non-directory inode") -} - -// NewSymlink implements Inode.NewSymlink. -func (*InodeNotDirectory) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { - panic("NewSymlink called on non-directory inode") -} - -// NewNode implements Inode.NewNode. -func (*InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { - panic("NewNode called on non-directory inode") -} - -// Unlink implements Inode.Unlink. -func (*InodeNotDirectory) Unlink(context.Context, string, *vfs.Dentry) error { - panic("Unlink called on non-directory inode") -} - -// RmDir implements Inode.RmDir. -func (*InodeNotDirectory) RmDir(context.Context, string, *vfs.Dentry) error { - panic("RmDir called on non-directory inode") -} - -// Rename implements Inode.Rename. -func (*InodeNotDirectory) Rename(context.Context, string, string, *vfs.Dentry, *vfs.Dentry) (*vfs.Dentry, error) { - panic("Rename called on non-directory inode") -} - -// Lookup implements Inode.Lookup. -func (*InodeNotDirectory) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - panic("Lookup called on non-directory inode") -} - -// IterDirents implements Inode.IterDirents. -func (*InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { - panic("IterDirents called on non-directory inode") -} - -// Valid implements Inode.Valid. -func (*InodeNotDirectory) Valid(context.Context) bool { - return true -} - -// InodeNoDynamicLookup partially implements the Inode interface, specifically -// the inodeDynamicLookup sub interface. Directory inodes that do not support -// dymanic entries (i.e. entries that are not "hashed" into the -// vfs.Dentry.children) can embed this to provide no-op implementations for -// functions related to dynamic entries. -type InodeNoDynamicLookup struct{} - -// Lookup implements Inode.Lookup. -func (*InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - return nil, syserror.ENOENT -} - -// IterDirents implements Inode.IterDirents. -func (*InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { - return offset, nil -} - -// Valid implements Inode.Valid. -func (*InodeNoDynamicLookup) Valid(ctx context.Context) bool { - return true -} - -// InodeNotSymlink partially implements the Inode interface, specifically the -// inodeSymlink sub interface. All inodes that are not symlinks may embed this -// to return the appropriate errors from symlink-related functions. -type InodeNotSymlink struct{} - -// Readlink implements Inode.Readlink. -func (*InodeNotSymlink) Readlink(context.Context) (string, error) { - return "", syserror.EINVAL -} - -// InodeAttrs partially implements the Inode interface, specifically the -// inodeMetadata sub interface. InodeAttrs provides functionality related to -// inode attributes. -// -// Must be initialized by Init prior to first use. -type InodeAttrs struct { - ino uint64 - mode uint32 - uid uint32 - gid uint32 - nlink uint32 -} - -// Init initializes this InodeAttrs. -func (a *InodeAttrs) Init(creds *auth.Credentials, ino uint64, mode linux.FileMode) { - if mode.FileType() == 0 { - panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode)) - } - - nlink := uint32(1) - if mode.FileType() == linux.ModeDirectory { - nlink = 2 - } - atomic.StoreUint64(&a.ino, ino) - atomic.StoreUint32(&a.mode, uint32(mode)) - atomic.StoreUint32(&a.uid, uint32(creds.EffectiveKUID)) - atomic.StoreUint32(&a.gid, uint32(creds.EffectiveKGID)) - atomic.StoreUint32(&a.nlink, nlink) -} - -// Mode implements Inode.Mode. -func (a *InodeAttrs) Mode() linux.FileMode { - return linux.FileMode(atomic.LoadUint32(&a.mode)) -} - -// Stat partially implements Inode.Stat. Note that this function doesn't provide -// all the stat fields, and the embedder should consider extending the result -// with filesystem-specific fields. -func (a *InodeAttrs) Stat(*vfs.Filesystem) linux.Statx { - var stat linux.Statx - stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_NLINK - stat.Ino = atomic.LoadUint64(&a.ino) - stat.Mode = uint16(a.Mode()) - stat.UID = atomic.LoadUint32(&a.uid) - stat.GID = atomic.LoadUint32(&a.gid) - stat.Nlink = atomic.LoadUint32(&a.nlink) - - // TODO: Implement other stat fields like timestamps. - - return stat -} - -// SetStat implements Inode.SetStat. -func (a *InodeAttrs) SetStat(_ *vfs.Filesystem, opts vfs.SetStatOptions) error { - stat := opts.Stat - if stat.Mask&linux.STATX_MODE != 0 { - for { - old := atomic.LoadUint32(&a.mode) - new := old | uint32(stat.Mode & ^uint16(linux.S_IFMT)) - if swapped := atomic.CompareAndSwapUint32(&a.mode, old, new); swapped { - break - } - } - } - - if stat.Mask&linux.STATX_UID != 0 { - atomic.StoreUint32(&a.uid, stat.UID) - } - if stat.Mask&linux.STATX_GID != 0 { - atomic.StoreUint32(&a.gid, stat.GID) - } - - // Note that not all fields are modifiable. For example, the file type and - // inode numbers are immutable after node creation. - - // TODO: Implement other stat fields like timestamps. - - return nil -} - -// CheckPermissions implements Inode.CheckPermissions. -func (a *InodeAttrs) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - mode := a.Mode() - return vfs.GenericCheckPermissions( - creds, - ats, - mode.FileType() == linux.ModeDirectory, - uint16(mode), - auth.KUID(atomic.LoadUint32(&a.uid)), - auth.KGID(atomic.LoadUint32(&a.gid)), - ) -} - -// IncLinks implements Inode.IncLinks. -func (a *InodeAttrs) IncLinks(n uint32) { - if atomic.AddUint32(&a.nlink, n) <= n { - panic("InodeLink.IncLinks called with no existing links") - } -} - -// DecLinks implements Inode.DecLinks. -func (a *InodeAttrs) DecLinks() { - if nlink := atomic.AddUint32(&a.nlink, ^uint32(0)); nlink == ^uint32(0) { - // Negative overflow - panic("Inode.DecLinks called at 0 links") - } -} - -type slot struct { - Name string - Dentry *vfs.Dentry - slotEntry -} - -// OrderedChildrenOptions contains initialization options for OrderedChildren. -type OrderedChildrenOptions struct { - // Writable indicates whether vfs.FilesystemImpl methods implemented by - // OrderedChildren may modify the tracked children. This applies to - // operations related to rename, unlink and rmdir. If an OrderedChildren is - // not writable, these operations all fail with EPERM. - Writable bool -} - -// OrderedChildren partially implements the Inode interface. OrderedChildren can -// be embedded in directory inodes to keep track of the children in the -// directory, and can then be used to implement a generic directory FD -- see -// GenericDirectoryFD. OrderedChildren is not compatible with dynamic -// directories. -// -// Must be initialize with Init before first use. -type OrderedChildren struct { - refs.AtomicRefCount - - // Can children be modified by user syscalls? It set to false, interface - // methods that would modify the children return EPERM. Immutable. - writable bool - - mu sync.RWMutex - order slotList - set map[string]*slot -} - -// Init initializes an OrderedChildren. -func (o *OrderedChildren) Init(opts OrderedChildrenOptions) { - o.writable = opts.Writable - o.set = make(map[string]*slot) -} - -// DecRef implements Inode.DecRef. -func (o *OrderedChildren) DecRef() { - o.AtomicRefCount.DecRefWithDestructor(o.Destroy) -} - -// Destroy cleans up resources referenced by this OrderedChildren. -func (o *OrderedChildren) Destroy() { - o.mu.Lock() - defer o.mu.Unlock() - o.order.Reset() - o.set = nil -} - -// Populate inserts children into this OrderedChildren, and d's dentry -// cache. Populate returns the number of directories inserted, which the caller -// may use to update the link count for the parent directory. -// -// Precondition: d.Impl() must be a kernfs Dentry. d must represent a directory -// inode. children must not contain any conflicting entries already in o. -func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 { - var links uint32 - for name, child := range children { - if child.isDir() { - links++ - } - if err := o.Insert(name, child.VFSDentry()); err != nil { - panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d)) - } - d.InsertChild(name, child.VFSDentry()) - } - return links -} - -// HasChildren implements Inode.HasChildren. -func (o *OrderedChildren) HasChildren() bool { - o.mu.RLock() - defer o.mu.RUnlock() - return len(o.set) > 0 -} - -// Insert inserts child into o. This ignores the writability of o, as this is -// not part of the vfs.FilesystemImpl interface, and is a lower-level operation. -func (o *OrderedChildren) Insert(name string, child *vfs.Dentry) error { - o.mu.Lock() - defer o.mu.Unlock() - if _, ok := o.set[name]; ok { - return syserror.EEXIST - } - s := &slot{ - Name: name, - Dentry: child, - } - o.order.PushBack(s) - o.set[name] = s - return nil -} - -// Precondition: caller must hold o.mu for writing. -func (o *OrderedChildren) removeLocked(name string) { - if s, ok := o.set[name]; ok { - delete(o.set, name) - o.order.Remove(s) - } -} - -// Precondition: caller must hold o.mu for writing. -func (o *OrderedChildren) replaceChildLocked(name string, new *vfs.Dentry) *vfs.Dentry { - if s, ok := o.set[name]; ok { - // Existing slot with given name, simply replace the dentry. - var old *vfs.Dentry - old, s.Dentry = s.Dentry, new - return old - } - - // No existing slot with given name, create and hash new slot. - s := &slot{ - Name: name, - Dentry: new, - } - o.order.PushBack(s) - o.set[name] = s - return nil -} - -// Precondition: caller must hold o.mu for reading or writing. -func (o *OrderedChildren) checkExistingLocked(name string, child *vfs.Dentry) error { - s, ok := o.set[name] - if !ok { - return syserror.ENOENT - } - if s.Dentry != child { - panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! OrderedChild: %+v, vfs: %+v", s.Dentry, child)) - } - return nil -} - -// Unlink implements Inode.Unlink. -func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *vfs.Dentry) error { - if !o.writable { - return syserror.EPERM - } - o.mu.Lock() - defer o.mu.Unlock() - if err := o.checkExistingLocked(name, child); err != nil { - return err - } - o.removeLocked(name) - return nil -} - -// Rmdir implements Inode.Rmdir. -func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *vfs.Dentry) error { - // We're not responsible for checking that child is a directory, that it's - // empty, or updating any link counts; so this is the same as unlink. - return o.Unlink(ctx, name, child) -} - -type renameAcrossDifferentImplementationsError struct{} - -func (renameAcrossDifferentImplementationsError) Error() string { - return "rename across inodes with different implementations" -} - -// Rename implements Inode.Rename. -// -// Precondition: Rename may only be called across two directory inodes with -// identical implementations of Rename. Practically, this means filesystems that -// implement Rename by embedding OrderedChildren for any directory -// implementation must use OrderedChildren for all directory implementations -// that will support Rename. -// -// Postcondition: reference on any replaced dentry transferred to caller. -func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *vfs.Dentry) (*vfs.Dentry, error) { - dst, ok := dstDir.Impl().(*Dentry).inode.(interface{}).(*OrderedChildren) - if !ok { - return nil, renameAcrossDifferentImplementationsError{} - } - if !o.writable || !dst.writable { - return nil, syserror.EPERM - } - // Note: There's a potential deadlock below if concurrent calls to Rename - // refer to the same src and dst directories in reverse. We avoid any - // ordering issues because the caller is required to serialize concurrent - // calls to Rename in accordance with the interface declaration. - o.mu.Lock() - defer o.mu.Unlock() - if dst != o { - dst.mu.Lock() - defer dst.mu.Unlock() - } - if err := o.checkExistingLocked(oldname, child); err != nil { - return nil, err - } - replaced := dst.replaceChildLocked(newname, child) - return replaced, nil -} - -// nthLocked returns an iterator to the nth child tracked by this object. The -// iterator is valid until the caller releases o.mu. Returns nil if the -// requested index falls out of bounds. -// -// Preconditon: Caller must hold o.mu for reading. -func (o *OrderedChildren) nthLocked(i int64) *slot { - for it := o.order.Front(); it != nil && i >= 0; it = it.Next() { - if i == 0 { - return it - } - i-- - } - return nil -} - -// InodeSymlink partially implements Inode interface for symlinks. -type InodeSymlink struct { - InodeNotDirectory -} - -// Open implements Inode.Open. -func (InodeSymlink) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - return nil, syserror.ELOOP -} - -// StaticDirectory is a standard implementation of a directory with static -// contents. -// -// +stateify savable -type StaticDirectory struct { - InodeNotSymlink - InodeDirectoryNoNewChildren - InodeAttrs - InodeNoDynamicLookup - OrderedChildren -} - -var _ Inode = (*StaticDirectory)(nil) - -// NewStaticDir creates a new static directory and returns its dentry. -func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry { - inode := &StaticDirectory{} - inode.Init(creds, ino, perm) - - dentry := &Dentry{} - dentry.Init(inode) - - inode.OrderedChildren.Init(OrderedChildrenOptions{}) - links := inode.OrderedChildren.Populate(dentry, children) - inode.IncLinks(links) - - return dentry -} - -// Init initializes StaticDirectory. -func (s *StaticDirectory) Init(creds *auth.Credentials, ino uint64, perm linux.FileMode) { - if perm&^linux.PermissionsMask != 0 { - panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask)) - } - s.InodeAttrs.Init(creds, ino, linux.ModeDirectory|perm) -} - -// Open implements kernfs.Inode. -func (s *StaticDirectory) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - fd := &GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &s.OrderedChildren, flags) - return fd.VFSFileDescription(), nil -} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go deleted file mode 100644 index 85bcdcc57..000000000 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ /dev/null @@ -1,422 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package kernfs provides the tools to implement inode-based filesystems. -// Kernfs has two main features: -// -// 1. The Inode interface, which maps VFS2's path-based filesystem operations to -// specific filesystem nodes. Kernfs uses the Inode interface to provide a -// blanket implementation for the vfs.FilesystemImpl. Kernfs also serves as -// the synchronization mechanism for all filesystem operations by holding a -// filesystem-wide lock across all operations. -// -// 2. Various utility types which provide generic implementations for various -// parts of the Inode and vfs.FileDescription interfaces. Client filesystems -// based on kernfs can embed the appropriate set of these to avoid having to -// reimplement common filesystem operations. See inode_impl_util.go and -// fd_impl_util.go. -// -// Reference Model: -// -// Kernfs dentries represents named pointers to inodes. Dentries and inode have -// independent lifetimes and reference counts. A child dentry unconditionally -// holds a reference on its parent directory's dentry. A dentry also holds a -// reference on the inode it points to. Multiple dentries can point to the same -// inode (for example, in the case of hardlinks). File descriptors hold a -// reference to the dentry they're opened on. -// -// Dentries are guaranteed to exist while holding Filesystem.mu for -// reading. Dropping dentries require holding Filesystem.mu for writing. To -// queue dentries for destruction from a read critical section, see -// Filesystem.deferDecRef. -// -// Lock ordering: -// -// kernfs.Filesystem.mu -// kernfs.Dentry.dirMu -// vfs.VirtualFilesystem.mountMu -// vfs.Dentry.mu -// kernfs.Filesystem.droppedDentriesMu -// (inode implementation locks, if any) -package kernfs - -import ( - "fmt" - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" -) - -// FilesystemType implements vfs.FilesystemType. -type FilesystemType struct{} - -// Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory -// filesystem. Concrete implementations are expected to embed this in their own -// Filesystem type. -type Filesystem struct { - vfsfs vfs.Filesystem - - droppedDentriesMu sync.Mutex - - // droppedDentries is a list of dentries waiting to be DecRef()ed. This is - // used to defer dentry destruction until mu can be acquired for - // writing. Protected by droppedDentriesMu. - droppedDentries []*vfs.Dentry - - // mu synchronizes the lifetime of Dentries on this filesystem. Holding it - // for reading guarantees continued existence of any resolved dentries, but - // the dentry tree may be modified. - // - // Kernfs dentries can only be DecRef()ed while holding mu for writing. For - // example: - // - // fs.mu.Lock() - // defer fs.mu.Unlock() - // ... - // dentry1.DecRef() - // defer dentry2.DecRef() // Ok, will run before Unlock. - // - // If discarding dentries in a read context, use Filesystem.deferDecRef. For - // example: - // - // fs.mu.RLock() - // fs.mu.processDeferredDecRefs() - // defer fs.mu.RUnlock() - // ... - // fs.deferDecRef(dentry) - mu sync.RWMutex - - // nextInoMinusOne is used to to allocate inode numbers on this - // filesystem. Must be accessed by atomic operations. - nextInoMinusOne uint64 -} - -// deferDecRef defers dropping a dentry ref until the next call to -// processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. -// -// Precondition: d must not already be pending destruction. -func (fs *Filesystem) deferDecRef(d *vfs.Dentry) { - fs.droppedDentriesMu.Lock() - fs.droppedDentries = append(fs.droppedDentries, d) - fs.droppedDentriesMu.Unlock() -} - -// processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the -// droppedDentries list. See comment on Filesystem.mu. -func (fs *Filesystem) processDeferredDecRefs() { - fs.mu.Lock() - fs.processDeferredDecRefsLocked() - fs.mu.Unlock() -} - -// Precondition: fs.mu must be held for writing. -func (fs *Filesystem) processDeferredDecRefsLocked() { - fs.droppedDentriesMu.Lock() - for _, d := range fs.droppedDentries { - d.DecRef() - } - fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. - fs.droppedDentriesMu.Unlock() -} - -// Init initializes a kernfs filesystem. This should be called from during -// vfs.FilesystemType.NewFilesystem for the concrete filesystem embedding -// kernfs. -func (fs *Filesystem) Init(vfsObj *vfs.VirtualFilesystem) { - fs.vfsfs.Init(vfsObj, fs) -} - -// VFSFilesystem returns the generic vfs filesystem object. -func (fs *Filesystem) VFSFilesystem() *vfs.Filesystem { - return &fs.vfsfs -} - -// NextIno allocates a new inode number on this filesystem. -func (fs *Filesystem) NextIno() uint64 { - return atomic.AddUint64(&fs.nextInoMinusOne, 1) -} - -// These consts are used in the Dentry.flags field. -const ( - // Dentry points to a directory inode. - dflagsIsDir = 1 << iota - - // Dentry points to a symlink inode. - dflagsIsSymlink -) - -// Dentry implements vfs.DentryImpl. -// -// A kernfs dentry is similar to a dentry in a traditional filesystem: it's a -// named reference to an inode. A dentry generally lives as long as it's part of -// a mounted filesystem tree. Kernfs doesn't cache dentries once all references -// to them are removed. Dentries hold a single reference to the inode they point -// to, and child dentries hold a reference on their parent. -// -// Must be initialized by Init prior to first use. -type Dentry struct { - refs.AtomicRefCount - - vfsd vfs.Dentry - inode Inode - - refs uint64 - - // flags caches useful information about the dentry from the inode. See the - // dflags* consts above. Must be accessed by atomic ops. - flags uint32 - - // dirMu protects vfsd.children for directory dentries. - dirMu sync.Mutex -} - -// Init initializes this dentry. -// -// Precondition: Caller must hold a reference on inode. -// -// Postcondition: Caller's reference on inode is transferred to the dentry. -func (d *Dentry) Init(inode Inode) { - d.vfsd.Init(d) - d.inode = inode - ftype := inode.Mode().FileType() - if ftype == linux.ModeDirectory { - d.flags |= dflagsIsDir - } - if ftype == linux.ModeSymlink { - d.flags |= dflagsIsSymlink - } -} - -// VFSDentry returns the generic vfs dentry for this kernfs dentry. -func (d *Dentry) VFSDentry() *vfs.Dentry { - return &d.vfsd -} - -// isDir checks whether the dentry points to a directory inode. -func (d *Dentry) isDir() bool { - return atomic.LoadUint32(&d.flags)&dflagsIsDir != 0 -} - -// isSymlink checks whether the dentry points to a symlink inode. -func (d *Dentry) isSymlink() bool { - return atomic.LoadUint32(&d.flags)&dflagsIsSymlink != 0 -} - -// DecRef implements vfs.DentryImpl.DecRef. -func (d *Dentry) DecRef() { - d.AtomicRefCount.DecRefWithDestructor(d.destroy) -} - -// Precondition: Dentry must be removed from VFS' dentry cache. -func (d *Dentry) destroy() { - d.inode.DecRef() // IncRef from Init. - d.inode = nil - if parent := d.vfsd.Parent(); parent != nil { - parent.DecRef() // IncRef from Dentry.InsertChild. - } -} - -// InsertChild inserts child into the vfs dentry cache with the given name under -// this dentry. This does not update the directory inode, so calling this on -// it's own isn't sufficient to insert a child into a directory. InsertChild -// updates the link count on d if required. -// -// Precondition: d must represent a directory inode. -func (d *Dentry) InsertChild(name string, child *vfs.Dentry) { - d.dirMu.Lock() - d.insertChildLocked(name, child) - d.dirMu.Unlock() -} - -// insertChildLocked is equivalent to InsertChild, with additional -// preconditions. -// -// Precondition: d.dirMu must be locked. -func (d *Dentry) insertChildLocked(name string, child *vfs.Dentry) { - if !d.isDir() { - panic(fmt.Sprintf("InsertChild called on non-directory Dentry: %+v.", d)) - } - vfsDentry := d.VFSDentry() - vfsDentry.IncRef() // DecRef in child's Dentry.destroy. - vfsDentry.InsertChild(child, name) -} - -// The Inode interface maps filesystem-level operations that operate on paths to -// equivalent operations on specific filesystem nodes. -// -// The interface methods are groups into logical categories as sub interfaces -// below. Generally, an implementation for each sub interface can be provided by -// embedding an appropriate type from inode_impl_utils.go. The sub interfaces -// are purely organizational. Methods declared directly in the main interface -// have no generic implementations, and should be explicitly provided by the -// client filesystem. -// -// Generally, implementations are not responsible for tasks that are common to -// all filesystems. These include: -// -// - Checking that dentries passed to methods are of the appropriate file type. -// - Checking permissions. -// - Updating link and reference counts. -// -// Specific responsibilities of implementations are documented below. -type Inode interface { - // Methods related to reference counting. A generic implementation is - // provided by InodeNoopRefCount. These methods are generally called by the - // equivalent Dentry methods. - inodeRefs - - // Methods related to node metadata. A generic implementation is provided by - // InodeAttrs. - inodeMetadata - - // Method for inodes that represent symlink. InodeNotSymlink provides a - // blanket implementation for all non-symlink inodes. - inodeSymlink - - // Method for inodes that represent directories. InodeNotDirectory provides - // a blanket implementation for all non-directory inodes. - inodeDirectory - - // Method for inodes that represent dynamic directories and their - // children. InodeNoDynamicLookup provides a blanket implementation for all - // non-dynamic-directory inodes. - inodeDynamicLookup - - // Open creates a file description for the filesystem object represented by - // this inode. The returned file description should hold a reference on the - // inode for its lifetime. - // - // Precondition: !rp.Done(). vfsd.Impl() must be a kernfs Dentry. - Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) -} - -type inodeRefs interface { - IncRef() - DecRef() - TryIncRef() bool - // Destroy is called when the inode reaches zero references. Destroy release - // all resources (references) on objects referenced by the inode, including - // any child dentries. - Destroy() -} - -type inodeMetadata interface { - // CheckPermissions checks that creds may access this inode for the - // requested access type, per the the rules of - // fs/namei.c:generic_permission(). - CheckPermissions(ctx context.Context, creds *auth.Credentials, atx vfs.AccessTypes) error - - // Mode returns the (struct stat)::st_mode value for this inode. This is - // separated from Stat for performance. - Mode() linux.FileMode - - // Stat returns the metadata for this inode. This corresponds to - // vfs.FilesystemImpl.StatAt. - Stat(fs *vfs.Filesystem) linux.Statx - - // SetStat updates the metadata for this inode. This corresponds to - // vfs.FilesystemImpl.SetStatAt. - SetStat(fs *vfs.Filesystem, opts vfs.SetStatOptions) error -} - -// Precondition: All methods in this interface may only be called on directory -// inodes. -type inodeDirectory interface { - // The New{File,Dir,Node,Symlink} methods below should return a new inode - // hashed into this inode. - // - // These inode constructors are inode-level operations rather than - // filesystem-level operations to allow client filesystems to mix different - // implementations based on the new node's location in the - // filesystem. - - // HasChildren returns true if the directory inode has any children. - HasChildren() bool - - // NewFile creates a new regular file inode. - NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) - - // NewDir creates a new directory inode. - NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) - - // NewLink creates a new hardlink to a specified inode in this - // directory. Implementations should create a new kernfs Dentry pointing to - // target, and update target's link count. - NewLink(ctx context.Context, name string, target Inode) (*vfs.Dentry, error) - - // NewSymlink creates a new symbolic link inode. - NewSymlink(ctx context.Context, name, target string) (*vfs.Dentry, error) - - // NewNode creates a new filesystem node for a mknod syscall. - NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*vfs.Dentry, error) - - // Unlink removes a child dentry from this directory inode. - Unlink(ctx context.Context, name string, child *vfs.Dentry) error - - // RmDir removes an empty child directory from this directory - // inode. Implementations must update the parent directory's link count, - // if required. Implementations are not responsible for checking that child - // is a directory, checking for an empty directory. - RmDir(ctx context.Context, name string, child *vfs.Dentry) error - - // Rename is called on the source directory containing an inode being - // renamed. child should point to the resolved child in the source - // directory. If Rename replaces a dentry in the destination directory, it - // should return the replaced dentry or nil otherwise. - // - // Precondition: Caller must serialize concurrent calls to Rename. - Rename(ctx context.Context, oldname, newname string, child, dstDir *vfs.Dentry) (replaced *vfs.Dentry, err error) -} - -type inodeDynamicLookup interface { - // Lookup should return an appropriate dentry if name should resolve to a - // child of this dynamic directory inode. This gives the directory an - // opportunity on every lookup to resolve additional entries that aren't - // hashed into the directory. This is only called when the inode is a - // directory. If the inode is not a directory, or if the directory only - // contains a static set of children, the implementer can unconditionally - // return an appropriate error (ENOTDIR and ENOENT respectively). - // - // The child returned by Lookup will be hashed into the VFS dentry tree. Its - // lifetime can be controlled by the filesystem implementation with an - // appropriate implementation of Valid. - // - // Lookup returns the child with an extra reference and the caller owns this - // reference. - Lookup(ctx context.Context, name string) (*vfs.Dentry, error) - - // Valid should return true if this inode is still valid, or needs to - // be resolved again by a call to Lookup. - Valid(ctx context.Context) bool - - // IterDirents is used to iterate over dynamically created entries. It invokes - // cb on each entry in the directory represented by the FileDescription. - // 'offset' is the offset for the entire IterDirents call, which may include - // results from the caller. 'relOffset' is the offset inside the entries - // returned by this IterDirents invocation. In other words, - // 'offset+relOffset+1' is the value that should be set in vfs.Dirent.NextOff, - // while 'relOffset' is the place where iteration should start from. - IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) -} - -type inodeSymlink interface { - // Readlink resolves the target of a symbolic link. If an inode is not a - // symlink, the implementation should return EINVAL. - Readlink(ctx context.Context) (string, error) -} diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go deleted file mode 100644 index aa3fe76ee..000000000 --- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs_test - -import ( - "bytes" - "fmt" - "testing" - - "github.com/google/go-cmp/cmp" - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/usermem" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -const defaultMode linux.FileMode = 01777 -const staticFileContent = "This is sample content for a static test file." - -// RootDentryFn is a generator function for creating the root dentry of a test -// filesystem. See newTestSystem. -type RootDentryFn func(*auth.Credentials, *filesystem) *kernfs.Dentry - -// newTestSystem sets up a minimal environment for running a test, including an -// instance of a test filesystem. Tests can control the contents of the -// filesystem by providing an appropriate rootFn, which should return a -// pre-populated root dentry. -func newTestSystem(t *testing.T, rootFn RootDentryFn) *testutil.System { - ctx := contexttest.Context(t) - creds := auth.CredentialsFromContext(ctx) - v := vfs.New() - v.MustRegisterFilesystemType("testfs", &fsType{rootFn: rootFn}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mns, err := v.NewMountNamespace(ctx, creds, "", "testfs", &vfs.GetFilesystemOptions{}) - if err != nil { - t.Fatalf("Failed to create testfs root mount: %v", err) - } - return testutil.NewSystem(ctx, t, v, mns) -} - -type fsType struct { - rootFn RootDentryFn -} - -type filesystem struct { - kernfs.Filesystem -} - -type file struct { - kernfs.DynamicBytesFile - content string -} - -func (fs *filesystem) newFile(creds *auth.Credentials, content string) *kernfs.Dentry { - f := &file{} - f.content = content - f.DynamicBytesFile.Init(creds, fs.NextIno(), f, 0777) - - d := &kernfs.Dentry{} - d.Init(f) - return d -} - -func (f *file) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%s", f.content) - return nil -} - -type attrs struct { - kernfs.InodeAttrs -} - -func (a *attrs) SetStat(fs *vfs.Filesystem, opt vfs.SetStatOptions) error { - return syserror.EPERM -} - -type readonlyDir struct { - attrs - kernfs.InodeNotSymlink - kernfs.InodeNoDynamicLookup - kernfs.InodeDirectoryNoNewChildren - - kernfs.OrderedChildren - dentry kernfs.Dentry -} - -func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry { - dir := &readonlyDir{} - dir.attrs.Init(creds, fs.NextIno(), linux.ModeDirectory|mode) - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - dir.dentry.Init(dir) - - dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents)) - - return &dir.dentry -} - -func (d *readonlyDir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - if err := fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags); err != nil { - return nil, err - } - return fd.VFSFileDescription(), nil -} - -type dir struct { - attrs - kernfs.InodeNotSymlink - kernfs.InodeNoDynamicLookup - - fs *filesystem - dentry kernfs.Dentry - kernfs.OrderedChildren -} - -func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry { - dir := &dir{} - dir.fs = fs - dir.attrs.Init(creds, fs.NextIno(), linux.ModeDirectory|mode) - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) - dir.dentry.Init(dir) - - dir.IncLinks(dir.OrderedChildren.Populate(&dir.dentry, contents)) - - return &dir.dentry -} - -func (d *dir) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &d.OrderedChildren, flags) - return fd.VFSFileDescription(), nil -} - -func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*vfs.Dentry, error) { - creds := auth.CredentialsFromContext(ctx) - dir := d.fs.newDir(creds, opts.Mode, nil) - dirVFSD := dir.VFSDentry() - if err := d.OrderedChildren.Insert(name, dirVFSD); err != nil { - dir.DecRef() - return nil, err - } - d.IncLinks(1) - return dirVFSD, nil -} - -func (d *dir) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*vfs.Dentry, error) { - creds := auth.CredentialsFromContext(ctx) - f := d.fs.newFile(creds, "") - fVFSD := f.VFSDentry() - if err := d.OrderedChildren.Insert(name, fVFSD); err != nil { - f.DecRef() - return nil, err - } - return fVFSD, nil -} - -func (*dir) NewLink(context.Context, string, kernfs.Inode) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -func (*dir) NewSymlink(context.Context, string, string) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -func (*dir) NewNode(context.Context, string, vfs.MknodOptions) (*vfs.Dentry, error) { - return nil, syserror.EPERM -} - -func (fst *fsType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opt vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - fs := &filesystem{} - fs.Init(vfsObj) - root := fst.rootFn(creds, fs) - return fs.VFSFilesystem(), root.VFSDentry(), nil -} - -// -------------------- Remainder of the file are test cases -------------------- - -func TestBasic(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, map[string]*kernfs.Dentry{ - "file1": fs.newFile(creds, staticFileContent), - }) - }) - defer sys.Destroy() - sys.GetDentryOrDie(sys.PathOpAtRoot("file1")).DecRef() -} - -func TestMkdirGetDentry(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, map[string]*kernfs.Dentry{ - "dir1": fs.newDir(creds, 0755, nil), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("dir1/a new directory") - if err := sys.VFS.MkdirAt(sys.Ctx, sys.Creds, &pop, &vfs.MkdirOptions{Mode: 0755}); err != nil { - t.Fatalf("MkdirAt for PathOperation %+v failed: %v", pop, err) - } - sys.GetDentryOrDie(pop).DecRef() -} - -func TestReadStaticFile(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, map[string]*kernfs.Dentry{ - "file1": fs.newFile(creds, staticFileContent), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("file1") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) - } - defer fd.DecRef() - - content, err := sys.ReadToEnd(fd) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - if diff := cmp.Diff(staticFileContent, content); diff != "" { - t.Fatalf("Read returned unexpected data:\n--- want\n+++ got\n%v", diff) - } -} - -func TestCreateNewFileInStaticDir(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, map[string]*kernfs.Dentry{ - "dir1": fs.newDir(creds, 0755, nil), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("dir1/newfile") - opts := &vfs.OpenOptions{Flags: linux.O_CREAT | linux.O_EXCL, Mode: defaultMode} - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, opts) - if err != nil { - t.Fatalf("OpenAt(pop:%+v, opts:%+v) failed: %v", pop, opts, err) - } - - // Close the file. The file should persist. - fd.DecRef() - - fd, err = sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt(pop:%+v) = %+v failed: %v", pop, fd, err) - } - fd.DecRef() -} - -func TestDirFDReadWrite(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, nil) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("/") - fd, err := sys.VFS.OpenAt(sys.Ctx, sys.Creds, &pop, &vfs.OpenOptions{ - Flags: linux.O_RDONLY, - }) - if err != nil { - t.Fatalf("OpenAt for PathOperation %+v failed: %v", pop, err) - } - defer fd.DecRef() - - // Read/Write should fail for directory FDs. - if _, err := fd.Read(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.ReadOptions{}); err != syserror.EISDIR { - t.Fatalf("Read for directory FD failed with unexpected error: %v", err) - } - if _, err := fd.Write(sys.Ctx, usermem.BytesIOSequence([]byte{}), vfs.WriteOptions{}); err != syserror.EBADF { - t.Fatalf("Write for directory FD failed with unexpected error: %v", err) - } -} - -func TestDirFDIterDirents(t *testing.T) { - sys := newTestSystem(t, func(creds *auth.Credentials, fs *filesystem) *kernfs.Dentry { - return fs.newReadonlyDir(creds, 0755, map[string]*kernfs.Dentry{ - // Fill root with nodes backed by various inode implementations. - "dir1": fs.newReadonlyDir(creds, 0755, nil), - "dir2": fs.newDir(creds, 0755, map[string]*kernfs.Dentry{ - "dir3": fs.newDir(creds, 0755, nil), - }), - "file1": fs.newFile(creds, staticFileContent), - }) - }) - defer sys.Destroy() - - pop := sys.PathOpAtRoot("/") - sys.AssertDirectoryContains(&pop, map[string]testutil.DirentType{ - "dir1": linux.DT_DIR, - "dir2": linux.DT_DIR, - "file1": linux.DT_REG, - }) -} diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go deleted file mode 100644 index f19f12854..000000000 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package kernfs - -import ( - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" -) - -// StaticSymlink provides an Inode implementation for symlinks that point to -// a immutable target. -type StaticSymlink struct { - InodeAttrs - InodeNoopRefCount - InodeSymlink - - target string -} - -var _ Inode = (*StaticSymlink)(nil) - -// NewStaticSymlink creates a new symlink file pointing to 'target'. -func NewStaticSymlink(creds *auth.Credentials, ino uint64, target string) *Dentry { - inode := &StaticSymlink{} - inode.Init(creds, ino, target) - - d := &Dentry{} - d.Init(inode) - return d -} - -// Init initializes the instance. -func (s *StaticSymlink) Init(creds *auth.Credentials, ino uint64, target string) { - s.target = target - s.InodeAttrs.Init(creds, ino, linux.ModeSymlink|0777) -} - -// Readlink implements Inode. -func (s *StaticSymlink) Readlink(_ context.Context) (string, error) { - return s.target, nil -} |