// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package vfs implements a virtual filesystem layer. // // Lock order: // // EpollInstance.interestMu // FileDescription.epollMu // FilesystemImpl/FileDescriptionImpl locks // VirtualFilesystem.mountMu // Dentry.mu // Locks acquired by FilesystemImpls between Prepare{Delete,Rename}Dentry and Commit{Delete,Rename*}Dentry // VirtualFilesystem.filesystemsMu // fdnotifier.notifier.mu // EpollInstance.mu // Locks acquired by FileDescriptionImpl.Readiness // Inotify.mu // Watches.mu // Inotify.evMu // VirtualFilesystem.fsTypesMu // // Locking Dentry.mu in multiple Dentries requires holding // VirtualFilesystem.mountMu. Locking EpollInstance.interestMu in multiple // EpollInstances requires holding epollCycleMu. package vfs import ( "fmt" "path" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsmetric" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sync" ) // A VirtualFilesystem (VFS for short) combines Filesystems in trees of Mounts. // // There is no analogue to the VirtualFilesystem type in Linux, as the // equivalent state in Linux is global. // // +stateify savable type VirtualFilesystem struct { // mountMu serializes mount mutations. // // mountMu is analogous to Linux's namespace_sem. mountMu sync.Mutex `state:"nosave"` // mounts maps (mount parent, mount point) pairs to mounts. (Since mounts // are uniquely namespaced, including mount parent in the key correctly // handles both bind mounts and mount namespaces; Linux does the same.) // Synchronization between mutators and readers is provided by mounts.seq; // synchronization between mutators is provided by mountMu. // // mounts is used to follow mount points during path traversal. We use a // single table rather than per-Dentry tables to reduce size (and therefore // cache footprint) for the vast majority of Dentries that are not mount // points. // // mounts is analogous to Linux's mount_hashtable. mounts mountTable `state:".([]*Mount)"` // mountpoints maps mount points to mounts at those points in all // namespaces. mountpoints is protected by mountMu. // // mountpoints is used to find mounts that must be umounted due to // removal of a mount point Dentry from another mount namespace. ("A file // or directory that is a mount point in one namespace that is not a mount // point in another namespace, may be renamed, unlinked, or removed // (rmdir(2)) in the mount namespace in which it is not a mount point // (subject to the usual permission checks)." - mount_namespaces(7)) // // mountpoints is analogous to Linux's mountpoint_hashtable. mountpoints map[*Dentry]map[*Mount]struct{} // lastMountID is the last allocated mount ID. lastMountID is accessed // using atomic memory operations. lastMountID uint64 // anonMount is a Mount, not included in mounts or mountpoints, // representing an anonFilesystem. anonMount is used to back // VirtualDentries returned by VirtualFilesystem.NewAnonVirtualDentry(). // anonMount is immutable. // // anonMount is analogous to Linux's anon_inode_mnt. anonMount *Mount // devices contains all registered Devices. devices is protected by // devicesMu. devicesMu sync.RWMutex `state:"nosave"` devices map[devTuple]*registeredDevice // anonBlockDevMinor contains all allocated anonymous block device minor // numbers. anonBlockDevMinorNext is a lower bound for the smallest // unallocated anonymous block device number. anonBlockDevMinorNext and // anonBlockDevMinor are protected by anonBlockDevMinorMu. anonBlockDevMinorMu sync.Mutex `state:"nosave"` anonBlockDevMinorNext uint32 anonBlockDevMinor map[uint32]struct{} // fsTypes contains all registered FilesystemTypes. fsTypes is protected by // fsTypesMu. fsTypesMu sync.RWMutex `state:"nosave"` fsTypes map[string]*registeredFilesystemType // filesystems contains all Filesystems. filesystems is protected by // filesystemsMu. filesystemsMu sync.Mutex `state:"nosave"` filesystems map[*Filesystem]struct{} } // Init initializes a new VirtualFilesystem with no mounts or FilesystemTypes. func (vfs *VirtualFilesystem) Init(ctx context.Context) error { if vfs.mountpoints != nil { panic("VFS already initialized") } vfs.mountpoints = make(map[*Dentry]map[*Mount]struct{}) vfs.devices = make(map[devTuple]*registeredDevice) vfs.anonBlockDevMinorNext = 1 vfs.anonBlockDevMinor = make(map[uint32]struct{}) vfs.fsTypes = make(map[string]*registeredFilesystemType) vfs.filesystems = make(map[*Filesystem]struct{}) vfs.mounts.Init() // Construct vfs.anonMount. anonfsDevMinor, err := vfs.GetAnonBlockDevMinor() if err != nil { // This shouldn't be possible since anonBlockDevMinorNext was // initialized to 1 above (no device numbers have been allocated yet). panic(fmt.Sprintf("VirtualFilesystem.Init: device number allocation for anonfs failed: %v", err)) } anonfs := anonFilesystem{ devMinor: anonfsDevMinor, } anonfs.vfsfs.Init(vfs, &anonFilesystemType{}, &anonfs) defer anonfs.vfsfs.DecRef(ctx) anonMount := vfs.NewDisconnectedMount(&anonfs.vfsfs, nil, &MountOptions{}) vfs.anonMount = anonMount return nil } // Release drops references on filesystem objects held by vfs. // // Precondition: This must be called after VFS.Init() has succeeded. func (vfs *VirtualFilesystem) Release(ctx context.Context) { vfs.anonMount.DecRef(ctx) for _, fst := range vfs.fsTypes { fst.fsType.Release(ctx) } } // PathOperation specifies the path operated on by a VFS method. // // PathOperation is passed to VFS methods by pointer to reduce memory copying: // it's somewhat large and should never escape. (Options structs are passed by // pointer to VFS and FileDescription methods for the same reason.) // // +stateify savable type PathOperation struct { // Root is the VFS root. References on Root are borrowed from the provider // of the PathOperation. // // Invariants: Root.Ok(). Root VirtualDentry // Start is the starting point for the path traversal. References on Start // are borrowed from the provider of the PathOperation (i.e. the caller of // the VFS method to which the PathOperation was passed). // // Invariants: Start.Ok(). If Path.Absolute, then Start == Root. Start VirtualDentry // Path is the pathname traversed by this operation. Path fspath.Path // If FollowFinalSymlink is true, and the Dentry traversed by the final // path component represents a symbolic link, the symbolic link should be // followed. FollowFinalSymlink bool } // AccessAt checks whether a user with creds has access to the file at // the given path. func (vfs *VirtualFilesystem) AccessAt(ctx context.Context, creds *auth.Credentials, ats AccessTypes, pop *PathOperation) error { rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.AccessAt(ctx, rp, creds, ats) if err == nil { rp.Release(ctx) return nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // GetDentryAt returns a VirtualDentry representing the given path, at which a // file must exist. A reference is taken on the returned VirtualDentry. func (vfs *VirtualFilesystem) GetDentryAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetDentryOptions) (VirtualDentry, error) { rp := vfs.getResolvingPath(creds, pop) for { d, err := rp.mount.fs.impl.GetDentryAt(ctx, rp, *opts) if err == nil { vd := VirtualDentry{ mount: rp.mount, dentry: d, } rp.mount.IncRef() rp.Release(ctx) return vd, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return VirtualDentry{}, err } } } // Preconditions: pop.Path.Begin.Ok(). func (vfs *VirtualFilesystem) getParentDirAndName(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (VirtualDentry, string, error) { rp := vfs.getResolvingPath(creds, pop) for { parent, err := rp.mount.fs.impl.GetParentDentryAt(ctx, rp) if err == nil { parentVD := VirtualDentry{ mount: rp.mount, dentry: parent, } rp.mount.IncRef() name := rp.Component() rp.Release(ctx) return parentVD, name, nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.GetParentDentryAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return VirtualDentry{}, "", err } } } // LinkAt creates a hard link at newpop representing the existing file at // oldpop. func (vfs *VirtualFilesystem) LinkAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation) error { oldVD, err := vfs.GetDentryAt(ctx, creds, oldpop, &GetDentryOptions{}) if err != nil { return err } if !newpop.Path.Begin.Ok() { oldVD.DecRef(ctx) if newpop.Path.Absolute { return linuxerr.EEXIST } return linuxerr.ENOENT } if newpop.FollowFinalSymlink { oldVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.LinkAt: file creation paths can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, newpop) for { err := rp.mount.fs.impl.LinkAt(ctx, rp, oldVD) if err == nil { rp.Release(ctx) oldVD.DecRef(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.LinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) oldVD.DecRef(ctx) return err } } } // MkdirAt creates a directory at the given path. func (vfs *VirtualFilesystem) MkdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MkdirOptions) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. // This is consistent with mkdirat(dirfd, "", mode). if pop.Path.Absolute { return linuxerr.EEXIST } return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MkdirAt: file creation paths can't follow final symlink") return linuxerr.EINVAL } // "Under Linux, apart from the permission bits, the S_ISVTX mode bit is // also honored." - mkdir(2) opts.Mode &= 0777 | linux.S_ISVTX rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.MkdirAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.MkdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // MknodAt creates a file of the given mode at the given path. It returns an // error from the linuxerr package. func (vfs *VirtualFilesystem) MknodAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *MknodOptions) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. // This is consistent with mknodat(dirfd, "", mode, dev). if pop.Path.Absolute { return linuxerr.EEXIST } return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.MknodAt: file creation paths can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.MknodAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.MknodAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // OpenAt returns a FileDescription providing access to the file at the given // path. A reference is taken on the returned FileDescription. func (vfs *VirtualFilesystem) OpenAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *OpenOptions) (*FileDescription, error) { fsmetric.Opens.Increment() // Remove: // // - O_CLOEXEC, which affects file descriptors and therefore must be // handled outside of VFS. // // - Unknown flags. opts.Flags &= linux.O_ACCMODE | linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_APPEND | linux.O_NONBLOCK | linux.O_DSYNC | linux.O_ASYNC | linux.O_DIRECT | linux.O_LARGEFILE | linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_NOATIME | linux.O_SYNC | linux.O_PATH | linux.O_TMPFILE // Linux's __O_SYNC (which we call linux.O_SYNC) implies O_DSYNC. if opts.Flags&linux.O_SYNC != 0 { opts.Flags |= linux.O_DSYNC } // Linux's __O_TMPFILE (which we call linux.O_TMPFILE) must be specified // with O_DIRECTORY and a writable access mode (to ensure that it fails on // filesystem implementations that do not support it). if opts.Flags&linux.O_TMPFILE != 0 { if opts.Flags&linux.O_DIRECTORY == 0 { return nil, linuxerr.EINVAL } if opts.Flags&linux.O_CREAT != 0 { return nil, linuxerr.EINVAL } if opts.Flags&linux.O_ACCMODE == linux.O_RDONLY { return nil, linuxerr.EINVAL } } // O_PATH causes most other flags to be ignored. if opts.Flags&linux.O_PATH != 0 { opts.Flags &= linux.O_DIRECTORY | linux.O_NOFOLLOW | linux.O_PATH } // "On Linux, the following bits are also honored in mode: [S_ISUID, // S_ISGID, S_ISVTX]" - open(2) opts.Mode &= 0777 | linux.S_ISUID | linux.S_ISGID | linux.S_ISVTX if opts.Flags&linux.O_NOFOLLOW != 0 { pop.FollowFinalSymlink = false } rp := vfs.getResolvingPath(creds, pop) if opts.Flags&linux.O_DIRECTORY != 0 { rp.mustBeDir = true } if opts.Flags&linux.O_PATH != 0 { vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) if err != nil { return nil, err } fd := &opathFD{} if err := fd.vfsfd.Init(fd, opts.Flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{}); err != nil { return nil, err } vd.DecRef(ctx) return &fd.vfsfd, err } for { fd, err := rp.mount.fs.impl.OpenAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) if opts.FileExec { if fd.Mount().Flags.NoExec { fd.DecRef(ctx) return nil, linuxerr.EACCES } // Only a regular file can be executed. stat, err := fd.Stat(ctx, StatOptions{Mask: linux.STATX_TYPE}) if err != nil { fd.DecRef(ctx) return nil, err } if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.S_IFMT != linux.S_IFREG { fd.DecRef(ctx) return nil, linuxerr.EACCES } } fd.Dentry().InotifyWithParent(ctx, linux.IN_OPEN, 0, PathEvent) return fd, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return nil, err } } } // ReadlinkAt returns the target of the symbolic link at the given path. func (vfs *VirtualFilesystem) ReadlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (string, error) { rp := vfs.getResolvingPath(creds, pop) for { target, err := rp.mount.fs.impl.ReadlinkAt(ctx, rp) if err == nil { rp.Release(ctx) return target, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return "", err } } } // RenameAt renames the file at oldpop to newpop. func (vfs *VirtualFilesystem) RenameAt(ctx context.Context, creds *auth.Credentials, oldpop, newpop *PathOperation, opts *RenameOptions) error { if !oldpop.Path.Begin.Ok() { if oldpop.Path.Absolute { return linuxerr.EBUSY } return linuxerr.ENOENT } if oldpop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RenameAt: source path can't follow final symlink") return linuxerr.EINVAL } oldParentVD, oldName, err := vfs.getParentDirAndName(ctx, creds, oldpop) if err != nil { return err } if oldName == "." || oldName == ".." { oldParentVD.DecRef(ctx) return linuxerr.EBUSY } if !newpop.Path.Begin.Ok() { oldParentVD.DecRef(ctx) if newpop.Path.Absolute { return linuxerr.EBUSY } return linuxerr.ENOENT } if newpop.FollowFinalSymlink { oldParentVD.DecRef(ctx) ctx.Warningf("VirtualFilesystem.RenameAt: destination path can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, newpop) renameOpts := *opts if oldpop.Path.Dir { renameOpts.MustBeDir = true } for { err := rp.mount.fs.impl.RenameAt(ctx, rp, oldParentVD, oldName, renameOpts) if err == nil { rp.Release(ctx) oldParentVD.DecRef(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.RenameAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) oldParentVD.DecRef(ctx) return err } } } // RmdirAt removes the directory at the given path. func (vfs *VirtualFilesystem) RmdirAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. // This is consistent with unlinkat(dirfd, "", AT_REMOVEDIR). if pop.Path.Absolute { return linuxerr.EBUSY } return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.RmdirAt: file deletion paths can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.RmdirAt(ctx, rp) if err == nil { rp.Release(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.RmdirAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // SetStatAt changes metadata for the file at the given path. func (vfs *VirtualFilesystem) SetStatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetStatOptions) error { rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.SetStatAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // StatAt returns metadata for the file at the given path. func (vfs *VirtualFilesystem) StatAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *StatOptions) (linux.Statx, error) { rp := vfs.getResolvingPath(creds, pop) for { stat, err := rp.mount.fs.impl.StatAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return stat, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return linux.Statx{}, err } } } // StatFSAt returns metadata for the filesystem containing the file at the // given path. func (vfs *VirtualFilesystem) StatFSAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) (linux.Statfs, error) { rp := vfs.getResolvingPath(creds, pop) for { statfs, err := rp.mount.fs.impl.StatFSAt(ctx, rp) if err == nil { rp.Release(ctx) return statfs, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return linux.Statfs{}, err } } } // SymlinkAt creates a symbolic link at the given path with the given target. func (vfs *VirtualFilesystem) SymlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, target string) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. // This is consistent with symlinkat(oldpath, newdirfd, ""). if pop.Path.Absolute { return linuxerr.EEXIST } return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.SymlinkAt: file creation paths can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.SymlinkAt(ctx, rp, target) if err == nil { rp.Release(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.SymlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // UnlinkAt deletes the non-directory file at the given path. func (vfs *VirtualFilesystem) UnlinkAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation) error { if !pop.Path.Begin.Ok() { // pop.Path should not be empty in operations that create/delete files. // This is consistent with unlinkat(dirfd, "", 0). if pop.Path.Absolute { return linuxerr.EBUSY } return linuxerr.ENOENT } if pop.FollowFinalSymlink { ctx.Warningf("VirtualFilesystem.UnlinkAt: file deletion paths can't follow final symlink") return linuxerr.EINVAL } rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.UnlinkAt(ctx, rp) if err == nil { rp.Release(ctx) return nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.UnlinkAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // BoundEndpointAt gets the bound endpoint at the given path, if one exists. func (vfs *VirtualFilesystem) BoundEndpointAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *BoundEndpointOptions) (transport.BoundEndpoint, error) { rp := vfs.getResolvingPath(creds, pop) for { bep, err := rp.mount.fs.impl.BoundEndpointAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return bep, nil } if checkInvariants { if rp.canHandleError(err) && rp.Done() { panic(fmt.Sprintf("%T.BoundEndpointAt() consumed all path components and returned %v", rp.mount.fs.impl, err)) } } if !rp.handleError(ctx, err) { rp.Release(ctx) return nil, err } } } // ListXattrAt returns all extended attribute names for the file at the given // path. func (vfs *VirtualFilesystem) ListXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, size uint64) ([]string, error) { rp := vfs.getResolvingPath(creds, pop) for { names, err := rp.mount.fs.impl.ListXattrAt(ctx, rp, size) if err == nil { rp.Release(ctx) return names, nil } if linuxerr.Equals(linuxerr.EOPNOTSUPP, err) { // Linux doesn't actually return EOPNOTSUPP in this case; instead, // fs/xattr.c:vfs_listxattr() falls back to allowing the security // subsystem to return security extended attributes, which by // default don't exist. rp.Release(ctx) return nil, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return nil, err } } } // GetXattrAt returns the value associated with the given extended attribute // for the file at the given path. func (vfs *VirtualFilesystem) GetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *GetXattrOptions) (string, error) { rp := vfs.getResolvingPath(creds, pop) for { val, err := rp.mount.fs.impl.GetXattrAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return val, nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return "", err } } } // SetXattrAt changes the value associated with the given extended attribute // for the file at the given path. func (vfs *VirtualFilesystem) SetXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *SetXattrOptions) error { rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.SetXattrAt(ctx, rp, *opts) if err == nil { rp.Release(ctx) return nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // RemoveXattrAt removes the given extended attribute from the file at rp. func (vfs *VirtualFilesystem) RemoveXattrAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, name string) error { rp := vfs.getResolvingPath(creds, pop) for { err := rp.mount.fs.impl.RemoveXattrAt(ctx, rp, name) if err == nil { rp.Release(ctx) return nil } if !rp.handleError(ctx, err) { rp.Release(ctx) return err } } } // SyncAllFilesystems has the semantics of Linux's sync(2). func (vfs *VirtualFilesystem) SyncAllFilesystems(ctx context.Context) error { var retErr error for fs := range vfs.getFilesystems() { if err := fs.impl.Sync(ctx); err != nil && retErr == nil { retErr = err } fs.DecRef(ctx) } return retErr } func (vfs *VirtualFilesystem) getFilesystems() map[*Filesystem]struct{} { fss := make(map[*Filesystem]struct{}) vfs.filesystemsMu.Lock() defer vfs.filesystemsMu.Unlock() for fs := range vfs.filesystems { if !fs.TryIncRef() { continue } fss[fs] = struct{}{} } return fss } // MkdirAllAt recursively creates non-existent directories on the given path // (including the last component). func (vfs *VirtualFilesystem) MkdirAllAt(ctx context.Context, currentPath string, root VirtualDentry, creds *auth.Credentials, mkdirOpts *MkdirOptions) error { pop := &PathOperation{ Root: root, Start: root, Path: fspath.Parse(currentPath), } stat, err := vfs.StatAt(ctx, creds, pop, &StatOptions{Mask: linux.STATX_TYPE}) switch { case err == nil: if stat.Mask&linux.STATX_TYPE == 0 || stat.Mode&linux.FileTypeMask != linux.ModeDirectory { return linuxerr.ENOTDIR } // Directory already exists. return nil case linuxerr.Equals(linuxerr.ENOENT, err): // Expected, we will create the dir. default: return fmt.Errorf("stat failed for %q during directory creation: %w", currentPath, err) } // Recurse to ensure parent is created and then create the final directory. if err := vfs.MkdirAllAt(ctx, path.Dir(currentPath), root, creds, mkdirOpts); err != nil { return err } if err := vfs.MkdirAt(ctx, creds, pop, mkdirOpts); err != nil { return fmt.Errorf("failed to create directory %q: %w", currentPath, err) } return nil } // MakeSyntheticMountpoint creates parent directories of target if they do not // exist and attempts to create a directory for the mountpoint. If a // non-directory file already exists there then we allow it. func (vfs *VirtualFilesystem) MakeSyntheticMountpoint(ctx context.Context, target string, root VirtualDentry, creds *auth.Credentials) error { mkdirOpts := &MkdirOptions{Mode: 0777, ForSyntheticMountpoint: true} // Make sure the parent directory of target exists. if err := vfs.MkdirAllAt(ctx, path.Dir(target), root, creds, mkdirOpts); err != nil { return fmt.Errorf("failed to create parent directory of mountpoint %q: %w", target, err) } // Attempt to mkdir the final component. If a file (of any type) exists // then we let allow mounting on top of that because we do not require the // target to be an existing directory, unlike Linux mount(2). if err := vfs.MkdirAt(ctx, creds, &PathOperation{ Root: root, Start: root, Path: fspath.Parse(target), }, mkdirOpts); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { return fmt.Errorf("failed to create mountpoint %q: %w", target, err) } return nil } // A VirtualDentry represents a node in a VFS tree, by combining a Dentry // (which represents a node in a Filesystem's tree) and a Mount (which // represents the Filesystem's position in a VFS mount tree). // // VirtualDentry's semantics are similar to that of a Go interface object // representing a pointer: it is a copyable value type that represents // references to another entity. The zero value of VirtualDentry is an "empty // VirtualDentry", directly analogous to a nil interface object. // VirtualDentry.Ok() checks that a VirtualDentry is not zero-valued; unless // otherwise specified, all other VirtualDentry methods require // VirtualDentry.Ok() == true. // // Mounts and Dentries are reference-counted, requiring that users call // VirtualDentry.{Inc,Dec}Ref() as appropriate. We often colloquially refer to // references on the Mount and Dentry referred to by a VirtualDentry as // references on the VirtualDentry itself. Unless otherwise specified, all // VirtualDentry methods require that a reference is held on the VirtualDentry. // // VirtualDentry is analogous to Linux's struct path. // // +stateify savable type VirtualDentry struct { mount *Mount dentry *Dentry } // MakeVirtualDentry creates a VirtualDentry. func MakeVirtualDentry(mount *Mount, dentry *Dentry) VirtualDentry { return VirtualDentry{ mount: mount, dentry: dentry, } } // Ok returns true if vd is not empty. It does not require that a reference is // held. func (vd VirtualDentry) Ok() bool { return vd.mount != nil } // IncRef increments the reference counts on the Mount and Dentry represented // by vd. func (vd VirtualDentry) IncRef() { vd.mount.IncRef() vd.dentry.IncRef() } // DecRef decrements the reference counts on the Mount and Dentry represented // by vd. func (vd VirtualDentry) DecRef(ctx context.Context) { vd.dentry.DecRef(ctx) vd.mount.DecRef(ctx) } // Mount returns the Mount associated with vd. It does not take a reference on // the returned Mount. func (vd VirtualDentry) Mount() *Mount { return vd.mount } // Dentry returns the Dentry associated with vd. It does not take a reference // on the returned Dentry. func (vd VirtualDentry) Dentry() *Dentry { return vd.dentry }