// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package vfs import ( "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" ) // A Filesystem is a tree of nodes represented by Dentries, which forms part of // a VirtualFilesystem. // // Filesystems are reference-counted. Unless otherwise specified, all // Filesystem methods require that a reference is held. // // Filesystem is analogous to Linux's struct super_block. // // +stateify savable type Filesystem struct { // refs is the reference count. refs is accessed using atomic memory // operations. refs int64 // vfs is the VirtualFilesystem that uses this Filesystem. vfs is // immutable. vfs *VirtualFilesystem // impl is the FilesystemImpl associated with this Filesystem. impl is // immutable. This should be the last field in Dentry. impl FilesystemImpl } // Init must be called before first use of fs. func (fs *Filesystem) Init(vfsObj *VirtualFilesystem, impl FilesystemImpl) { fs.refs = 1 fs.vfs = vfsObj fs.impl = impl vfsObj.filesystemsMu.Lock() vfsObj.filesystems[fs] = struct{}{} vfsObj.filesystemsMu.Unlock() } // VirtualFilesystem returns the containing VirtualFilesystem. func (fs *Filesystem) VirtualFilesystem() *VirtualFilesystem { return fs.vfs } // Impl returns the FilesystemImpl associated with fs. func (fs *Filesystem) Impl() FilesystemImpl { return fs.impl } // IncRef increments fs' reference count. func (fs *Filesystem) IncRef() { if atomic.AddInt64(&fs.refs, 1) <= 1 { panic("Filesystem.IncRef() called without holding a reference") } } // TryIncRef increments fs' reference count and returns true. If fs' reference // count is zero, TryIncRef does nothing and returns false. // // TryIncRef does not require that a reference is held on fs. func (fs *Filesystem) TryIncRef() bool { for { refs := atomic.LoadInt64(&fs.refs) if refs <= 0 { return false } if atomic.CompareAndSwapInt64(&fs.refs, refs, refs+1) { return true } } } // DecRef decrements fs' reference count. func (fs *Filesystem) DecRef() { if refs := atomic.AddInt64(&fs.refs, -1); refs == 0 { fs.vfs.filesystemsMu.Lock() delete(fs.vfs.filesystems, fs) fs.vfs.filesystemsMu.Unlock() fs.impl.Release() } else if refs < 0 { panic("Filesystem.decRef() called without holding a reference") } } // FilesystemImpl contains implementation details for a Filesystem. // Implementations of FilesystemImpl should contain their associated Filesystem // by value as their first field. // // All methods that take a ResolvingPath must resolve the path before // performing any other checks, including rejection of the operation if not // supported by the FilesystemImpl. This is because the final FilesystemImpl // (responsible for actually implementing the operation) isn't known until path // resolution is complete. // // Unless otherwise specified, FilesystemImpl methods are responsible for // performing permission checks. In many cases, vfs package functions in // permissions.go may be used to help perform these checks. // // When multiple specified error conditions apply to a given method call, the // implementation may return any applicable errno unless otherwise specified, // but returning the earliest error specified is preferable to maximize // compatibility with Linux. // // All methods may return errors not specified, notably including: // // - ENOENT if a required path component does not exist. // // - ENOTDIR if an intermediate path component is not a directory. // // - Errors from vfs-package functions (ResolvingPath.Resolve*(), // Mount.CheckBeginWrite(), permission-checking functions, etc.) // // For all methods that take or return linux.Statx, Statx.Uid and Statx.Gid // should be interpreted as IDs in the root UserNamespace (i.e. as auth.KUID // and auth.KGID respectively). // // FilesystemImpl combines elements of Linux's struct super_operations and // struct inode_operations, for reasons described in the documentation for // Dentry. type FilesystemImpl interface { // Release is called when the associated Filesystem reaches zero // references. Release() // Sync "causes all pending modifications to filesystem metadata and cached // file data to be written to the underlying [filesystem]", as by syncfs(2). Sync(ctx context.Context) error // AccessAt checks whether a user with creds can access the file at rp. AccessAt(ctx context.Context, rp *ResolvingPath, creds *auth.Credentials, ats AccessTypes) error // GetDentryAt returns a Dentry representing the file at rp. A reference is // taken on the returned Dentry. // // GetDentryAt does not correspond directly to a Linux syscall; it is used // in the implementation of: // // - Syscalls that need to resolve two paths: link(), linkat(). // // - Syscalls that need to refer to a filesystem position outside the // context of a file description: chdir(), fchdir(), chroot(), mount(), // umount(). GetDentryAt(ctx context.Context, rp *ResolvingPath, opts GetDentryOptions) (*Dentry, error) // GetParentDentryAt returns a Dentry representing the directory at the // second-to-last path component in rp. (Note that, despite the name, this // is not necessarily the parent directory of the file at rp, since the // last path component in rp may be "." or "..".) A reference is taken on // the returned Dentry. // // GetParentDentryAt does not correspond directly to a Linux syscall; it is // used in the implementation of the rename() family of syscalls, which // must resolve the parent directories of two paths. // // Preconditions: !rp.Done(). // // Postconditions: If GetParentDentryAt returns a nil error, then // rp.Final(). If GetParentDentryAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). GetParentDentryAt(ctx context.Context, rp *ResolvingPath) (*Dentry, error) // LinkAt creates a hard link at rp representing the same file as vd. It // does not take ownership of references on vd. // // Errors: // // - If the last path component in rp is "." or "..", LinkAt returns // EEXIST. // // - If a file already exists at rp, LinkAt returns EEXIST. // // - If rp.MustBeDir(), LinkAt returns ENOENT. // // - If the directory in which the link would be created has been removed // by RmdirAt or RenameAt, LinkAt returns ENOENT. // // - If rp.Mount != vd.Mount(), LinkAt returns EXDEV. // // - If vd represents a directory, LinkAt returns EPERM. // // - If vd represents a file for which all existing links have been // removed, or a file created by open(O_TMPFILE|O_EXCL), LinkAt returns // ENOENT. Equivalently, if vd represents a file with a link count of 0 not // created by open(O_TMPFILE) without O_EXCL, LinkAt returns ENOENT. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If LinkAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). LinkAt(ctx context.Context, rp *ResolvingPath, vd VirtualDentry) error // MkdirAt creates a directory at rp. // // Errors: // // - If the last path component in rp is "." or "..", MkdirAt returns // EEXIST. // // - If a file already exists at rp, MkdirAt returns EEXIST. // // - If the directory in which the new directory would be created has been // removed by RmdirAt or RenameAt, MkdirAt returns ENOENT. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If MkdirAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). MkdirAt(ctx context.Context, rp *ResolvingPath, opts MkdirOptions) error // MknodAt creates a regular file, device special file, or named pipe at // rp. // // Errors: // // - If the last path component in rp is "." or "..", MknodAt returns // EEXIST. // // - If a file already exists at rp, MknodAt returns EEXIST. // // - If rp.MustBeDir(), MknodAt returns ENOENT. // // - If the directory in which the file would be created has been removed // by RmdirAt or RenameAt, MknodAt returns ENOENT. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If MknodAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). MknodAt(ctx context.Context, rp *ResolvingPath, opts MknodOptions) error // OpenAt returns an FileDescription providing access to the file at rp. A // reference is taken on the returned FileDescription. // // Errors: // // - If opts.Flags specifies O_TMPFILE and this feature is unsupported by // the implementation, OpenAt returns EOPNOTSUPP. (All other unsupported // features are silently ignored, consistently with Linux's open*(2).) OpenAt(ctx context.Context, rp *ResolvingPath, opts OpenOptions) (*FileDescription, error) // ReadlinkAt returns the target of the symbolic link at rp. // // Errors: // // - If the file at rp is not a symbolic link, ReadlinkAt returns EINVAL. ReadlinkAt(ctx context.Context, rp *ResolvingPath) (string, error) // RenameAt renames the file named oldName in directory oldParentVD to rp. // It does not take ownership of references on oldParentVD. // // Errors [1]: // // - If opts.Flags specifies unsupported options, RenameAt returns EINVAL. // // - If the last path component in rp is "." or "..", and opts.Flags // contains RENAME_NOREPLACE, RenameAt returns EEXIST. // // - If the last path component in rp is "." or "..", and opts.Flags does // not contain RENAME_NOREPLACE, RenameAt returns EBUSY. // // - If rp.Mount != oldParentVD.Mount(), RenameAt returns EXDEV. // // - If the renamed file is not a directory, and opts.MustBeDir is true, // RenameAt returns ENOTDIR. // // - If renaming would replace an existing file and opts.Flags contains // RENAME_NOREPLACE, RenameAt returns EEXIST. // // - If there is no existing file at rp and opts.Flags contains // RENAME_EXCHANGE, RenameAt returns ENOENT. // // - If there is an existing non-directory file at rp, and rp.MustBeDir() // is true, RenameAt returns ENOTDIR. // // - If the renamed file is not a directory, opts.Flags does not contain // RENAME_EXCHANGE, and rp.MustBeDir() is true, RenameAt returns ENOTDIR. // (This check is not subsumed by the check for directory replacement below // since it applies even if there is no file to replace.) // // - If the renamed file is a directory, and the new parent directory of // the renamed file is either the renamed directory or a descendant // subdirectory of the renamed directory, RenameAt returns EINVAL. // // - If renaming would exchange the renamed file with an ancestor directory // of the renamed file, RenameAt returns EINVAL. // // - If renaming would replace an ancestor directory of the renamed file, // RenameAt returns ENOTEMPTY. (This check would be subsumed by the // non-empty directory check below; however, this check takes place before // the self-rename check.) // // - If the renamed file would replace or exchange with itself (i.e. the // source and destination paths resolve to the same file), RenameAt returns // nil, skipping the checks described below. // // - If the source or destination directory is not writable by the provider // of rp.Credentials(), RenameAt returns EACCES. // // - If the renamed file is a directory, and renaming would replace a // non-directory file, RenameAt returns ENOTDIR. // // - If the renamed file is not a directory, and renaming would replace a // directory, RenameAt returns EISDIR. // // - If the new parent directory of the renamed file has been removed by // RmdirAt or a preceding call to RenameAt, RenameAt returns ENOENT. // // - If the renamed file is a directory, it is not writable by the // provider of rp.Credentials(), and the source and destination parent // directories are different, RenameAt returns EACCES. (This is nominally // required to change the ".." entry in the renamed directory.) // // - If renaming would replace a non-empty directory, RenameAt returns // ENOTEMPTY. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). oldName is not "." or "..". // // Postconditions: If RenameAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). // // [1] "The worst of all namespace operations - renaming directory. // "Perverted" doesn't even start to describe it. Somebody in UCB had a // heck of a trip..." - fs/namei.c:vfs_rename() RenameAt(ctx context.Context, rp *ResolvingPath, oldParentVD VirtualDentry, oldName string, opts RenameOptions) error // RmdirAt removes the directory at rp. // // Errors: // // - If the last path component in rp is ".", RmdirAt returns EINVAL. // // - If the last path component in rp is "..", RmdirAt returns ENOTEMPTY. // // - If no file exists at rp, RmdirAt returns ENOENT. // // - If the file at rp exists but is not a directory, RmdirAt returns // ENOTDIR. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If RmdirAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). RmdirAt(ctx context.Context, rp *ResolvingPath) error // SetStatAt updates metadata for the file at the given path. Implementations // are responsible for checking if the operation can be performed // (see vfs.CheckSetStat() for common checks). // // Errors: // // - If opts specifies unsupported options, SetStatAt returns EINVAL. SetStatAt(ctx context.Context, rp *ResolvingPath, opts SetStatOptions) error // StatAt returns metadata for the file at rp. StatAt(ctx context.Context, rp *ResolvingPath, opts StatOptions) (linux.Statx, error) // StatFSAt returns metadata for the filesystem containing the file at rp. // (This method takes a path because a FilesystemImpl may consist of any // number of constituent filesystems.) StatFSAt(ctx context.Context, rp *ResolvingPath) (linux.Statfs, error) // SymlinkAt creates a symbolic link at rp referring to the given target. // // Errors: // // - If the last path component in rp is "." or "..", SymlinkAt returns // EEXIST. // // - If a file already exists at rp, SymlinkAt returns EEXIST. // // - If rp.MustBeDir(), SymlinkAt returns ENOENT. // // - If the directory in which the symbolic link would be created has been // removed by RmdirAt or RenameAt, SymlinkAt returns ENOENT. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If SymlinkAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). SymlinkAt(ctx context.Context, rp *ResolvingPath, target string) error // UnlinkAt removes the file at rp. // // Errors: // // - If the last path component in rp is "." or "..", UnlinkAt returns // EISDIR. // // - If no file exists at rp, UnlinkAt returns ENOENT. // // - If rp.MustBeDir(), and the file at rp exists and is not a directory, // UnlinkAt returns ENOTDIR. // // - If the file at rp exists but is a directory, UnlinkAt returns EISDIR. // // Preconditions: !rp.Done(). For the final path component in rp, // !rp.ShouldFollowSymlink(). // // Postconditions: If UnlinkAt returns an error returned by // ResolvingPath.Resolve*(), then !rp.Done(). UnlinkAt(ctx context.Context, rp *ResolvingPath) error // ListxattrAt returns all extended attribute names for the file at rp. // // Errors: // // - If extended attributes are not supported by the filesystem, // ListxattrAt returns nil. (See FileDescription.Listxattr for an // explanation.) ListxattrAt(ctx context.Context, rp *ResolvingPath) ([]string, error) // GetxattrAt returns the value associated with the given extended // attribute for the file at rp. // // Errors: // // - If extended attributes are not supported by the filesystem, GetxattrAt // returns ENOTSUP. GetxattrAt(ctx context.Context, rp *ResolvingPath, name string) (string, error) // SetxattrAt changes the value associated with the given extended // attribute for the file at rp. // // Errors: // // - If extended attributes are not supported by the filesystem, SetxattrAt // returns ENOTSUP. SetxattrAt(ctx context.Context, rp *ResolvingPath, opts SetxattrOptions) error // RemovexattrAt removes the given extended attribute from the file at rp. // // Errors: // // - If extended attributes are not supported by the filesystem, // RemovexattrAt returns ENOTSUP. RemovexattrAt(ctx context.Context, rp *ResolvingPath, name string) error // BoundEndpointAt returns the Unix socket endpoint bound at the path rp. // // - If a non-socket file exists at rp, then BoundEndpointAt returns ECONNREFUSED. BoundEndpointAt(ctx context.Context, rp *ResolvingPath) (transport.BoundEndpoint, error) // PrependPath prepends a path from vd to vd.Mount().Root() to b. // // If vfsroot.Ok(), it is the contextual VFS root; if it is encountered // before vd.Mount().Root(), PrependPath should stop prepending path // components and return a PrependPathAtVFSRootError. // // If traversal of vd.Dentry()'s ancestors encounters an independent // ("root") Dentry that is not vd.Mount().Root() (i.e. vd.Dentry() is not a // descendant of vd.Mount().Root()), PrependPath should stop prepending // path components and return a PrependPathAtNonMountRootError. // // Filesystems for which Dentries do not have meaningful paths may prepend // an arbitrary descriptive string to b and then return a // PrependPathSyntheticError. // // Most implementations can acquire the appropriate locks to ensure that // Dentry.Name() and Dentry.Parent() are fixed for vd.Dentry() and all of // its ancestors, then call GenericPrependPath. // // Preconditions: vd.Mount().Filesystem().Impl() == this FilesystemImpl. PrependPath(ctx context.Context, vfsroot, vd VirtualDentry, b *fspath.Builder) error // TODO: inotify_add_watch() } // PrependPathAtVFSRootError is returned by implementations of // FilesystemImpl.PrependPath() when they encounter the contextual VFS root. type PrependPathAtVFSRootError struct{} // Error implements error.Error. func (PrependPathAtVFSRootError) Error() string { return "vfs.FilesystemImpl.PrependPath() reached VFS root" } // PrependPathAtNonMountRootError is returned by implementations of // FilesystemImpl.PrependPath() when they encounter an independent ancestor // Dentry that is not the Mount root. type PrependPathAtNonMountRootError struct{} // Error implements error.Error. func (PrependPathAtNonMountRootError) Error() string { return "vfs.FilesystemImpl.PrependPath() reached root other than Mount root" } // PrependPathSyntheticError is returned by implementations of // FilesystemImpl.PrependPath() for which prepended names do not represent real // paths. type PrependPathSyntheticError struct{} // Error implements error.Error. func (PrependPathSyntheticError) Error() string { return "vfs.FilesystemImpl.PrependPath() prepended synthetic name" }