From cd86bd493156f055aa09a5c23f33a8a432cb8d00 Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Mon, 19 Oct 2020 17:46:05 -0700 Subject: Fix runsc tests on VFS2 overlay. - Check the sticky bit in overlay.filesystem.UnlinkAt(). Fixes StickyTest.StickyBitPermDenied. - When configuring a VFS2 overlay in runsc, copy the lower layer's root owner/group/mode to the upper layer's root (as in the VFS1 equivalent, boot.addOverlay()). This makes the overlay root owned by UID/GID 65534 with mode 0755 rather than owned by UID/GID 0 with mode 01777. Fixes CreateTest.CreateFailsOnUnpermittedDir, which assumes that the test cannot create files in /. - MknodTest.UnimplementedTypesReturnError assumes that the creation of device special files is not supported. However, while the VFS2 gofer client still doesn't support device special files, VFS2 tmpfs does, and in the overlay test dimension mknod() targets a tmpfs upper layer. The test initially has all capabilities, including CAP_MKNOD, so its creation of these files succeeds. Constrain these tests to VFS1. - Rename overlay.nonDirectoryFD to overlay.regularFileFD and only use it for regular files, using the original FD for pipes and device special files. This is more consistent with Linux (which gets the original inode_operations, and therefore file_operations, for these file types from ovl_fill_inode() => init_special_inode()) and fixes remaining mknod and pipe tests. - Read/write 1KB at a time in PipeTest.Streaming, rather than 4 bytes. This isn't strictly necessary, but it makes the test less obnoxiously slow on ptrace. Fixes #4407 PiperOrigin-RevId: 337971042 --- pkg/sentry/fsimpl/overlay/BUILD | 5 +- pkg/sentry/fsimpl/overlay/copy_up.go | 70 +++-- pkg/sentry/fsimpl/overlay/filesystem.go | 52 +++- pkg/sentry/fsimpl/overlay/non_directory.go | 375 ------------------------ pkg/sentry/fsimpl/overlay/overlay.go | 4 +- pkg/sentry/fsimpl/overlay/regular_file.go | 456 +++++++++++++++++++++++++++++ 6 files changed, 537 insertions(+), 425 deletions(-) delete mode 100644 pkg/sentry/fsimpl/overlay/non_directory.go create mode 100644 pkg/sentry/fsimpl/overlay/regular_file.go (limited to 'pkg/sentry/fsimpl') diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD index 8cf5b35d3..1e11b0428 100644 --- a/pkg/sentry/fsimpl/overlay/BUILD +++ b/pkg/sentry/fsimpl/overlay/BUILD @@ -21,14 +21,16 @@ go_library( "directory.go", "filesystem.go", "fstree.go", - "non_directory.go", "overlay.go", + "regular_file.go", ], visibility = ["//pkg/sentry:internal"], deps = [ "//pkg/abi/linux", "//pkg/context", "//pkg/fspath", + "//pkg/log", + "//pkg/sentry/arch", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", "//pkg/sentry/memmap", @@ -37,5 +39,6 @@ go_library( "//pkg/sync", "//pkg/syserror", "//pkg/usermem", + "//pkg/waiter", ], ) diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go index 73b126669..4506642ca 100644 --- a/pkg/sentry/fsimpl/overlay/copy_up.go +++ b/pkg/sentry/fsimpl/overlay/copy_up.go @@ -75,8 +75,21 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { return syserror.ENOENT } - // Perform copy-up. + // Obtain settable timestamps from the lower layer. vfsObj := d.fs.vfsfs.VirtualFilesystem() + oldpop := vfs.PathOperation{ + Root: d.lowerVDs[0], + Start: d.lowerVDs[0], + } + const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME + oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{ + Mask: timestampsMask, + }) + if err != nil { + return err + } + + // Perform copy-up. newpop := vfs.PathOperation{ Root: d.parent.upperVD, Start: d.parent.upperVD, @@ -101,10 +114,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } switch ftype { case linux.S_IFREG: - oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVDs[0], - Start: d.lowerVDs[0], - }, &vfs.OpenOptions{ + oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{ Flags: linux.O_RDONLY, }) if err != nil { @@ -160,9 +170,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } if err := newFD.SetStat(ctx, vfs.SetStatOptions{ Stat: linux.Statx{ - Mask: linux.STATX_UID | linux.STATX_GID, - UID: d.uid, - GID: d.gid, + Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, + UID: d.uid, + GID: d.gid, + Atime: oldStat.Atime, + Mtime: oldStat.Mtime, }, }); err != nil { cleanupUndoCopyUp() @@ -179,9 +191,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ Stat: linux.Statx{ - Mask: linux.STATX_UID | linux.STATX_GID, - UID: d.uid, - GID: d.gid, + Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, + UID: d.uid, + GID: d.gid, + Atime: oldStat.Atime, + Mtime: oldStat.Mtime, }, }); err != nil { cleanupUndoCopyUp() @@ -195,10 +209,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { d.upperVD = upperVD case linux.S_IFLNK: - target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVDs[0], - Start: d.lowerVDs[0], - }) + target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop) if err != nil { return err } @@ -207,10 +218,12 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { } if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ Stat: linux.Statx{ - Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID, - Mode: uint16(d.mode), - UID: d.uid, - GID: d.gid, + Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, + Mode: uint16(d.mode), + UID: d.uid, + GID: d.gid, + Atime: oldStat.Atime, + Mtime: oldStat.Mtime, }, }); err != nil { cleanupUndoCopyUp() @@ -224,25 +237,20 @@ func (d *dentry) copyUpLocked(ctx context.Context) error { d.upperVD = upperVD case linux.S_IFBLK, linux.S_IFCHR: - lowerStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: d.lowerVDs[0], - Start: d.lowerVDs[0], - }, &vfs.StatOptions{}) - if err != nil { - return err - } if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{ Mode: linux.FileMode(d.mode), - DevMajor: lowerStat.RdevMajor, - DevMinor: lowerStat.RdevMinor, + DevMajor: oldStat.RdevMajor, + DevMinor: oldStat.RdevMinor, }); err != nil { return err } if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{ Stat: linux.Statx{ - Mask: linux.STATX_UID | linux.STATX_GID, - UID: d.uid, - GID: d.gid, + Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask×tampsMask, + UID: d.uid, + GID: d.gid, + Atime: oldStat.Atime, + Mtime: oldStat.Mtime, }, }); err != nil { cleanupUndoCopyUp() diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go index bd11372d5..78a01bbb7 100644 --- a/pkg/sentry/fsimpl/overlay/filesystem.go +++ b/pkg/sentry/fsimpl/overlay/filesystem.go @@ -765,7 +765,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if mustCreate { return nil, syserror.EEXIST } - if mayWrite { + if start.isRegularFile() && mayWrite { if err := start.copyUpLocked(ctx); err != nil { return nil, err } @@ -819,7 +819,7 @@ afterTrailingSymlink: if rp.MustBeDir() && !child.isDir() { return nil, syserror.ENOTDIR } - if mayWrite { + if child.isRegularFile() && mayWrite { if err := child.copyUpLocked(ctx); err != nil { return nil, err } @@ -872,8 +872,11 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts * if err != nil { return nil, err } + if ftype != linux.S_IFREG { + return layerFD, nil + } layerFlags := layerFD.StatusFlags() - fd := &nonDirectoryFD{ + fd := ®ularFileFD{ copiedUp: isUpper, cachedFD: layerFD, cachedFlags: layerFlags, @@ -969,7 +972,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving } // Finally construct the overlay FD. upperFlags := upperFD.StatusFlags() - fd := &nonDirectoryFD{ + fd := ®ularFileFD{ copiedUp: true, cachedFD: upperFD, cachedFlags: upperFlags, @@ -1293,6 +1296,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error if !child.isDir() { return syserror.ENOTDIR } + if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(atomic.LoadUint32(&parent.mode)), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil { + return err + } child.dirMu.Lock() defer child.dirMu.Unlock() whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx) @@ -1528,12 +1534,38 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } + parentMode := atomic.LoadUint32(&parent.mode) child := parent.children[name] var childLayer lookupLayer + if child == nil { + if parentMode&linux.S_ISVTX != 0 { + // If the parent's sticky bit is set, we need a child dentry to get + // its owner. + child, err = fs.getChildLocked(ctx, parent, name, &ds) + if err != nil { + return err + } + } else { + // Determine if the file being unlinked actually exists. Holding + // parent.dirMu prevents a dentry from being instantiated for the file, + // which in turn prevents it from being copied-up, so this result is + // stable. + childLayer, err = fs.lookupLayerLocked(ctx, parent, name) + if err != nil { + return err + } + if !childLayer.existsInOverlay() { + return syserror.ENOENT + } + } + } if child != nil { if child.isDir() { return syserror.EISDIR } + if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(parentMode), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil { + return err + } if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil { return err } @@ -1546,18 +1578,6 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error } else { childLayer = lookupLayerLower } - } else { - // Determine if the file being unlinked actually exists. Holding - // parent.dirMu prevents a dentry from being instantiated for the file, - // which in turn prevents it from being copied-up, so this result is - // stable. - childLayer, err = fs.lookupLayerLocked(ctx, parent, name) - if err != nil { - return err - } - if !childLayer.existsInOverlay() { - return syserror.ENOENT - } } pop := vfs.PathOperation{ diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go deleted file mode 100644 index 853aee951..000000000 --- a/pkg/sentry/fsimpl/overlay/non_directory.go +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package overlay - -import ( - "sync/atomic" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/memmap" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -func (d *dentry) isSymlink() bool { - return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK -} - -func (d *dentry) readlink(ctx context.Context) (string, error) { - layerVD := d.topLayer() - return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: layerVD, - Start: layerVD, - }) -} - -// +stateify savable -type nonDirectoryFD struct { - fileDescription - - // If copiedUp is false, cachedFD represents - // fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents - // fileDescription.dentry().upperVD. cachedFlags is the last known value of - // cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are - // protected by mu. - mu sync.Mutex `state:"nosave"` - copiedUp bool - cachedFD *vfs.FileDescription - cachedFlags uint32 -} - -func (fd *nonDirectoryFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) { - fd.mu.Lock() - defer fd.mu.Unlock() - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return nil, err - } - wrappedFD.IncRef() - return wrappedFD, nil -} - -func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) { - d := fd.dentry() - statusFlags := fd.vfsfd.StatusFlags() - if !fd.copiedUp && d.isCopiedUp() { - // Switch to the copied-up file. - upperVD := d.topLayer() - upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ - Root: upperVD, - Start: upperVD, - }, &vfs.OpenOptions{ - Flags: statusFlags, - }) - if err != nil { - return nil, err - } - oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR) - if oldOffErr == nil { - if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil { - upperFD.DecRef(ctx) - return nil, err - } - } - fd.cachedFD.DecRef(ctx) - fd.copiedUp = true - fd.cachedFD = upperFD - fd.cachedFlags = statusFlags - } else if fd.cachedFlags != statusFlags { - if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil { - return nil, err - } - fd.cachedFlags = statusFlags - } - return fd.cachedFD, nil -} - -// Release implements vfs.FileDescriptionImpl.Release. -func (fd *nonDirectoryFD) Release(ctx context.Context) { - fd.cachedFD.DecRef(ctx) - fd.cachedFD = nil -} - -// OnClose implements vfs.FileDescriptionImpl.OnClose. -func (fd *nonDirectoryFD) OnClose(ctx context.Context) error { - // Linux doesn't define ovl_file_operations.flush at all (i.e. its - // equivalent to OnClose is a no-op). We pass through to - // fd.cachedFD.OnClose() without upgrading if fd.dentry() has been - // copied-up, since OnClose is mostly used to define post-close writeback, - // and if fd.cachedFD hasn't been updated then it can't have been used to - // mutate fd.dentry() anyway. - fd.mu.Lock() - if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags { - if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil { - fd.mu.Unlock() - return err - } - fd.cachedFlags = statusFlags - } - wrappedFD := fd.cachedFD - fd.mu.Unlock() - return wrappedFD.OnClose(ctx) -} - -// Stat implements vfs.FileDescriptionImpl.Stat. -func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { - var stat linux.Statx - if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { - wrappedFD, err := fd.getCurrentFD(ctx) - if err != nil { - return linux.Statx{}, err - } - stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{ - Mask: layerMask, - Sync: opts.Sync, - }) - wrappedFD.DecRef(ctx) - if err != nil { - return linux.Statx{}, err - } - } - fd.dentry().statInternalTo(ctx, &opts, &stat) - return stat, nil -} - -// Allocate implements vfs.FileDescriptionImpl.Allocate. -func (fd *nonDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error { - wrappedFD, err := fd.getCurrentFD(ctx) - if err != nil { - return err - } - defer wrappedFD.DecRef(ctx) - return wrappedFD.Allocate(ctx, mode, offset, length) -} - -// SetStat implements vfs.FileDescriptionImpl.SetStat. -func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { - d := fd.dentry() - mode := linux.FileMode(atomic.LoadUint32(&d.mode)) - if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { - return err - } - mnt := fd.vfsfd.Mount() - if err := mnt.CheckBeginWrite(); err != nil { - return err - } - defer mnt.EndWrite() - if err := d.copyUpLocked(ctx); err != nil { - return err - } - // Changes to d's attributes are serialized by d.copyMu. - d.copyMu.Lock() - defer d.copyMu.Unlock() - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return err - } - if err := wrappedFD.SetStat(ctx, opts); err != nil { - return err - } - d.updateAfterSetStatLocked(&opts) - if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { - d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) - } - return nil -} - -// StatFS implements vfs.FileDescriptionImpl.StatFS. -func (fd *nonDirectoryFD) StatFS(ctx context.Context) (linux.Statfs, error) { - return fd.filesystem().statFS(ctx) -} - -// PRead implements vfs.FileDescriptionImpl.PRead. -func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { - wrappedFD, err := fd.getCurrentFD(ctx) - if err != nil { - return 0, err - } - defer wrappedFD.DecRef(ctx) - return wrappedFD.PRead(ctx, dst, offset, opts) -} - -// Read implements vfs.FileDescriptionImpl.Read. -func (fd *nonDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { - // Hold fd.mu during the read to serialize the file offset. - fd.mu.Lock() - defer fd.mu.Unlock() - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return 0, err - } - return wrappedFD.Read(ctx, dst, opts) -} - -// PWrite implements vfs.FileDescriptionImpl.PWrite. -func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { - wrappedFD, err := fd.getCurrentFD(ctx) - if err != nil { - return 0, err - } - defer wrappedFD.DecRef(ctx) - return wrappedFD.PWrite(ctx, src, offset, opts) -} - -// Write implements vfs.FileDescriptionImpl.Write. -func (fd *nonDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { - // Hold fd.mu during the write to serialize the file offset. - fd.mu.Lock() - defer fd.mu.Unlock() - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return 0, err - } - return wrappedFD.Write(ctx, src, opts) -} - -// Seek implements vfs.FileDescriptionImpl.Seek. -func (fd *nonDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { - // Hold fd.mu during the seek to serialize the file offset. - fd.mu.Lock() - defer fd.mu.Unlock() - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return 0, err - } - return wrappedFD.Seek(ctx, offset, whence) -} - -// Sync implements vfs.FileDescriptionImpl.Sync. -func (fd *nonDirectoryFD) Sync(ctx context.Context) error { - fd.mu.Lock() - if !fd.dentry().isCopiedUp() { - fd.mu.Unlock() - return nil - } - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - fd.mu.Unlock() - return err - } - wrappedFD.IncRef() - defer wrappedFD.DecRef(ctx) - fd.mu.Unlock() - return wrappedFD.Sync(ctx) -} - -// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. -func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { - if err := fd.ensureMappable(ctx, opts); err != nil { - return err - } - return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) -} - -// ensureMappable ensures that fd.dentry().wrappedMappable is not nil. -func (fd *nonDirectoryFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { - d := fd.dentry() - - // Fast path if we already have a Mappable for the current top layer. - if atomic.LoadUint32(&d.isMappable) != 0 { - return nil - } - - // Only permit mmap of regular files, since other file types may have - // unpredictable behavior when mmapped (e.g. /dev/zero). - if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { - return syserror.ENODEV - } - - // Get a Mappable for the current top layer. - fd.mu.Lock() - defer fd.mu.Unlock() - d.copyMu.RLock() - defer d.copyMu.RUnlock() - if atomic.LoadUint32(&d.isMappable) != 0 { - return nil - } - wrappedFD, err := fd.currentFDLocked(ctx) - if err != nil { - return err - } - if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { - return err - } - if opts.MappingIdentity != nil { - opts.MappingIdentity.DecRef(ctx) - opts.MappingIdentity = nil - } - // Use this Mappable for all mappings of this layer (unless we raced with - // another call to ensureMappable). - d.mapsMu.Lock() - defer d.mapsMu.Unlock() - d.dataMu.Lock() - defer d.dataMu.Unlock() - if d.wrappedMappable == nil { - d.wrappedMappable = opts.Mappable - atomic.StoreUint32(&d.isMappable, 1) - } - return nil -} - -// AddMapping implements memmap.Mappable.AddMapping. -func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { - d.mapsMu.Lock() - defer d.mapsMu.Unlock() - if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { - return err - } - if !d.isCopiedUp() { - d.lowerMappings.AddMapping(ms, ar, offset, writable) - } - return nil -} - -// RemoveMapping implements memmap.Mappable.RemoveMapping. -func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { - d.mapsMu.Lock() - defer d.mapsMu.Unlock() - d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) - if !d.isCopiedUp() { - d.lowerMappings.RemoveMapping(ms, ar, offset, writable) - } -} - -// CopyMapping implements memmap.Mappable.CopyMapping. -func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { - d.mapsMu.Lock() - defer d.mapsMu.Unlock() - if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { - return err - } - if !d.isCopiedUp() { - d.lowerMappings.AddMapping(ms, dstAR, offset, writable) - } - return nil -} - -// Translate implements memmap.Mappable.Translate. -func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { - d.dataMu.RLock() - defer d.dataMu.RUnlock() - return d.wrappedMappable.Translate(ctx, required, optional, at) -} - -// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. -func (d *dentry) InvalidateUnsavable(ctx context.Context) error { - d.mapsMu.Lock() - defer d.mapsMu.Unlock() - return d.wrappedMappable.InvalidateUnsavable(ctx) -} diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go index e5f506d2e..4c5de8d32 100644 --- a/pkg/sentry/fsimpl/overlay/overlay.go +++ b/pkg/sentry/fsimpl/overlay/overlay.go @@ -18,7 +18,7 @@ // // Lock order: // -// directoryFD.mu / nonDirectoryFD.mu +// directoryFD.mu / regularFileFD.mu // filesystem.renameMu // dentry.dirMu // dentry.copyMu @@ -453,7 +453,7 @@ type dentry struct { // - If this dentry is copied-up, then wrappedMappable is the Mappable // obtained from a call to the current top layer's // FileDescription.ConfigureMMap(). Once wrappedMappable becomes non-nil - // (from a call to nonDirectoryFD.ensureMappable()), it cannot become nil. + // (from a call to regularFileFD.ensureMappable()), it cannot become nil. // wrappedMappable is protected by mapsMu and dataMu. // // - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is diff --git a/pkg/sentry/fsimpl/overlay/regular_file.go b/pkg/sentry/fsimpl/overlay/regular_file.go new file mode 100644 index 000000000..2b89a7a6d --- /dev/null +++ b/pkg/sentry/fsimpl/overlay/regular_file.go @@ -0,0 +1,456 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package overlay + +import ( + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/memmap" + "gvisor.dev/gvisor/pkg/sentry/vfs" + "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" +) + +func (d *dentry) isRegularFile() bool { + return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFREG +} + +func (d *dentry) isSymlink() bool { + return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK +} + +func (d *dentry) readlink(ctx context.Context) (string, error) { + layerVD := d.topLayer() + return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ + Root: layerVD, + Start: layerVD, + }) +} + +// +stateify savable +type regularFileFD struct { + fileDescription + + // If copiedUp is false, cachedFD represents + // fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents + // fileDescription.dentry().upperVD. cachedFlags is the last known value of + // cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are + // protected by mu. + mu sync.Mutex `state:"nosave"` + copiedUp bool + cachedFD *vfs.FileDescription + cachedFlags uint32 + + // If copiedUp is false, lowerWaiters contains all waiter.Entries + // registered with cachedFD. lowerWaiters is protected by mu. + lowerWaiters map[*waiter.Entry]waiter.EventMask +} + +func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) { + fd.mu.Lock() + defer fd.mu.Unlock() + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return nil, err + } + wrappedFD.IncRef() + return wrappedFD, nil +} + +func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) { + d := fd.dentry() + statusFlags := fd.vfsfd.StatusFlags() + if !fd.copiedUp && d.isCopiedUp() { + // Switch to the copied-up file. + upperVD := d.topLayer() + upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ + Root: upperVD, + Start: upperVD, + }, &vfs.OpenOptions{ + Flags: statusFlags, + }) + if err != nil { + return nil, err + } + oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR) + if oldOffErr == nil { + if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil { + upperFD.DecRef(ctx) + return nil, err + } + } + if len(fd.lowerWaiters) != 0 { + ready := upperFD.Readiness(^waiter.EventMask(0)) + for e, mask := range fd.lowerWaiters { + fd.cachedFD.EventUnregister(e) + upperFD.EventRegister(e, mask) + if ready&mask != 0 { + e.Callback.Callback(e) + } + } + } + fd.cachedFD.DecRef(ctx) + fd.copiedUp = true + fd.cachedFD = upperFD + fd.cachedFlags = statusFlags + fd.lowerWaiters = nil + } else if fd.cachedFlags != statusFlags { + if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil { + return nil, err + } + fd.cachedFlags = statusFlags + } + return fd.cachedFD, nil +} + +// Release implements vfs.FileDescriptionImpl.Release. +func (fd *regularFileFD) Release(ctx context.Context) { + fd.cachedFD.DecRef(ctx) + fd.cachedFD = nil +} + +// OnClose implements vfs.FileDescriptionImpl.OnClose. +func (fd *regularFileFD) OnClose(ctx context.Context) error { + // Linux doesn't define ovl_file_operations.flush at all (i.e. its + // equivalent to OnClose is a no-op). We pass through to + // fd.cachedFD.OnClose() without upgrading if fd.dentry() has been + // copied-up, since OnClose is mostly used to define post-close writeback, + // and if fd.cachedFD hasn't been updated then it can't have been used to + // mutate fd.dentry() anyway. + fd.mu.Lock() + if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags { + if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil { + fd.mu.Unlock() + return err + } + fd.cachedFlags = statusFlags + } + wrappedFD := fd.cachedFD + fd.mu.Unlock() + return wrappedFD.OnClose(ctx) +} + +// Stat implements vfs.FileDescriptionImpl.Stat. +func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { + var stat linux.Statx + if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + return linux.Statx{}, err + } + stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{ + Mask: layerMask, + Sync: opts.Sync, + }) + wrappedFD.DecRef(ctx) + if err != nil { + return linux.Statx{}, err + } + } + fd.dentry().statInternalTo(ctx, &opts, &stat) + return stat, nil +} + +// Allocate implements vfs.FileDescriptionImpl.Allocate. +func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + return err + } + defer wrappedFD.DecRef(ctx) + return wrappedFD.Allocate(ctx, mode, offset, length) +} + +// SetStat implements vfs.FileDescriptionImpl.SetStat. +func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { + d := fd.dentry() + mode := linux.FileMode(atomic.LoadUint32(&d.mode)) + if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { + return err + } + mnt := fd.vfsfd.Mount() + if err := mnt.CheckBeginWrite(); err != nil { + return err + } + defer mnt.EndWrite() + if err := d.copyUpLocked(ctx); err != nil { + return err + } + // Changes to d's attributes are serialized by d.copyMu. + d.copyMu.Lock() + defer d.copyMu.Unlock() + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return err + } + if err := wrappedFD.SetStat(ctx, opts); err != nil { + return err + } + d.updateAfterSetStatLocked(&opts) + if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 { + d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) + } + return nil +} + +// StatFS implements vfs.FileDescriptionImpl.StatFS. +func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) { + return fd.filesystem().statFS(ctx) +} + +// Readiness implements waiter.Waitable.Readiness. +func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { + ctx := context.Background() + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + // TODO(b/171089913): Just use fd.cachedFD since Readiness can't return + // an error. This is obviously wrong, but at least consistent with + // VFS1. + log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err) + fd.mu.Lock() + wrappedFD = fd.cachedFD + wrappedFD.IncRef() + fd.mu.Unlock() + } + defer wrappedFD.DecRef(ctx) + return wrappedFD.Readiness(mask) +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (fd *regularFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + fd.mu.Lock() + defer fd.mu.Unlock() + wrappedFD, err := fd.currentFDLocked(context.Background()) + if err != nil { + // TODO(b/171089913): Just use fd.cachedFD since EventRegister can't + // return an error. This is obviously wrong, but at least consistent + // with VFS1. + log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err) + wrappedFD = fd.cachedFD + } + wrappedFD.EventRegister(e, mask) + if !fd.copiedUp { + if fd.lowerWaiters == nil { + fd.lowerWaiters = make(map[*waiter.Entry]waiter.EventMask) + } + fd.lowerWaiters[e] = mask + } +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (fd *regularFileFD) EventUnregister(e *waiter.Entry) { + fd.mu.Lock() + defer fd.mu.Unlock() + fd.cachedFD.EventUnregister(e) + if !fd.copiedUp { + delete(fd.lowerWaiters, e) + } +} + +// PRead implements vfs.FileDescriptionImpl.PRead. +func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + return 0, err + } + defer wrappedFD.DecRef(ctx) + return wrappedFD.PRead(ctx, dst, offset, opts) +} + +// Read implements vfs.FileDescriptionImpl.Read. +func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { + // Hold fd.mu during the read to serialize the file offset. + fd.mu.Lock() + defer fd.mu.Unlock() + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return 0, err + } + return wrappedFD.Read(ctx, dst, opts) +} + +// PWrite implements vfs.FileDescriptionImpl.PWrite. +func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + return 0, err + } + defer wrappedFD.DecRef(ctx) + return wrappedFD.PWrite(ctx, src, offset, opts) +} + +// Write implements vfs.FileDescriptionImpl.Write. +func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { + // Hold fd.mu during the write to serialize the file offset. + fd.mu.Lock() + defer fd.mu.Unlock() + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return 0, err + } + return wrappedFD.Write(ctx, src, opts) +} + +// Seek implements vfs.FileDescriptionImpl.Seek. +func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { + // Hold fd.mu during the seek to serialize the file offset. + fd.mu.Lock() + defer fd.mu.Unlock() + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return 0, err + } + return wrappedFD.Seek(ctx, offset, whence) +} + +// Sync implements vfs.FileDescriptionImpl.Sync. +func (fd *regularFileFD) Sync(ctx context.Context) error { + fd.mu.Lock() + if !fd.dentry().isCopiedUp() { + fd.mu.Unlock() + return nil + } + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + fd.mu.Unlock() + return err + } + wrappedFD.IncRef() + defer wrappedFD.DecRef(ctx) + fd.mu.Unlock() + return wrappedFD.Sync(ctx) +} + +// Ioctl implements vfs.FileDescriptionImpl.Ioctl. +func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { + wrappedFD, err := fd.getCurrentFD(ctx) + if err != nil { + return 0, err + } + defer wrappedFD.DecRef(ctx) + return wrappedFD.Ioctl(ctx, uio, args) +} + +// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. +func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { + if err := fd.ensureMappable(ctx, opts); err != nil { + return err + } + return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) +} + +// ensureMappable ensures that fd.dentry().wrappedMappable is not nil. +func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { + d := fd.dentry() + + // Fast path if we already have a Mappable for the current top layer. + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + + // Only permit mmap of regular files, since other file types may have + // unpredictable behavior when mmapped (e.g. /dev/zero). + if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { + return syserror.ENODEV + } + + // Get a Mappable for the current top layer. + fd.mu.Lock() + defer fd.mu.Unlock() + d.copyMu.RLock() + defer d.copyMu.RUnlock() + if atomic.LoadUint32(&d.isMappable) != 0 { + return nil + } + wrappedFD, err := fd.currentFDLocked(ctx) + if err != nil { + return err + } + if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { + return err + } + if opts.MappingIdentity != nil { + opts.MappingIdentity.DecRef(ctx) + opts.MappingIdentity = nil + } + // Use this Mappable for all mappings of this layer (unless we raced with + // another call to ensureMappable). + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.dataMu.Lock() + defer d.dataMu.Unlock() + if d.wrappedMappable == nil { + d.wrappedMappable = opts.Mappable + atomic.StoreUint32(&d.isMappable, 1) + } + return nil +} + +// AddMapping implements memmap.Mappable.AddMapping. +func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, ar, offset, writable) + } + return nil +} + +// RemoveMapping implements memmap.Mappable.RemoveMapping. +func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar usermem.AddrRange, offset uint64, writable bool) { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) + if !d.isCopiedUp() { + d.lowerMappings.RemoveMapping(ms, ar, offset, writable) + } +} + +// CopyMapping implements memmap.Mappable.CopyMapping. +func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR usermem.AddrRange, offset uint64, writable bool) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { + return err + } + if !d.isCopiedUp() { + d.lowerMappings.AddMapping(ms, dstAR, offset, writable) + } + return nil +} + +// Translate implements memmap.Mappable.Translate. +func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at usermem.AccessType) ([]memmap.Translation, error) { + d.dataMu.RLock() + defer d.dataMu.RUnlock() + return d.wrappedMappable.Translate(ctx, required, optional, at) +} + +// InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. +func (d *dentry) InvalidateUnsavable(ctx context.Context) error { + d.mapsMu.Lock() + defer d.mapsMu.Unlock() + return d.wrappedMappable.InvalidateUnsavable(ctx) +} -- cgit v1.2.3