summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--pkg/sentry/fsimpl/overlay/BUILD5
-rw-r--r--pkg/sentry/fsimpl/overlay/copy_up.go70
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go52
-rw-r--r--pkg/sentry/fsimpl/overlay/overlay.go4
-rw-r--r--pkg/sentry/fsimpl/overlay/regular_file.go (renamed from pkg/sentry/fsimpl/overlay/non_directory.go)115
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go3
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go20
-rw-r--r--runsc/boot/vfs.go32
-rw-r--r--test/syscalls/linux/mknod.cc16
-rw-r--r--test/syscalls/linux/pipe.cc30
10 files changed, 247 insertions, 100 deletions
diff --git a/pkg/sentry/fsimpl/overlay/BUILD b/pkg/sentry/fsimpl/overlay/BUILD
index 8cf5b35d3..1e11b0428 100644
--- a/pkg/sentry/fsimpl/overlay/BUILD
+++ b/pkg/sentry/fsimpl/overlay/BUILD
@@ -21,14 +21,16 @@ go_library(
"directory.go",
"filesystem.go",
"fstree.go",
- "non_directory.go",
"overlay.go",
+ "regular_file.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/context",
"//pkg/fspath",
+ "//pkg/log",
+ "//pkg/sentry/arch",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/memmap",
@@ -37,5 +39,6 @@ go_library(
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
+ "//pkg/waiter",
],
)
diff --git a/pkg/sentry/fsimpl/overlay/copy_up.go b/pkg/sentry/fsimpl/overlay/copy_up.go
index 73b126669..4506642ca 100644
--- a/pkg/sentry/fsimpl/overlay/copy_up.go
+++ b/pkg/sentry/fsimpl/overlay/copy_up.go
@@ -75,8 +75,21 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
return syserror.ENOENT
}
- // Perform copy-up.
+ // Obtain settable timestamps from the lower layer.
vfsObj := d.fs.vfsfs.VirtualFilesystem()
+ oldpop := vfs.PathOperation{
+ Root: d.lowerVDs[0],
+ Start: d.lowerVDs[0],
+ }
+ const timestampsMask = linux.STATX_ATIME | linux.STATX_MTIME
+ oldStat, err := vfsObj.StatAt(ctx, d.fs.creds, &oldpop, &vfs.StatOptions{
+ Mask: timestampsMask,
+ })
+ if err != nil {
+ return err
+ }
+
+ // Perform copy-up.
newpop := vfs.PathOperation{
Root: d.parent.upperVD,
Start: d.parent.upperVD,
@@ -101,10 +114,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
switch ftype {
case linux.S_IFREG:
- oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVDs[0],
- Start: d.lowerVDs[0],
- }, &vfs.OpenOptions{
+ oldFD, err := vfsObj.OpenAt(ctx, d.fs.creds, &oldpop, &vfs.OpenOptions{
Flags: linux.O_RDONLY,
})
if err != nil {
@@ -160,9 +170,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if err := newFD.SetStat(ctx, vfs.SetStatOptions{
Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: d.uid,
- GID: d.gid,
+ Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+ UID: d.uid,
+ GID: d.gid,
+ Atime: oldStat.Atime,
+ Mtime: oldStat.Mtime,
},
}); err != nil {
cleanupUndoCopyUp()
@@ -179,9 +191,11 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: d.uid,
- GID: d.gid,
+ Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+ UID: d.uid,
+ GID: d.gid,
+ Atime: oldStat.Atime,
+ Mtime: oldStat.Mtime,
},
}); err != nil {
cleanupUndoCopyUp()
@@ -195,10 +209,7 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
d.upperVD = upperVD
case linux.S_IFLNK:
- target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVDs[0],
- Start: d.lowerVDs[0],
- })
+ target, err := vfsObj.ReadlinkAt(ctx, d.fs.creds, &oldpop)
if err != nil {
return err
}
@@ -207,10 +218,12 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
}
if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
Stat: linux.Statx{
- Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID,
- Mode: uint16(d.mode),
- UID: d.uid,
- GID: d.gid,
+ Mask: linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+ Mode: uint16(d.mode),
+ UID: d.uid,
+ GID: d.gid,
+ Atime: oldStat.Atime,
+ Mtime: oldStat.Mtime,
},
}); err != nil {
cleanupUndoCopyUp()
@@ -224,25 +237,20 @@ func (d *dentry) copyUpLocked(ctx context.Context) error {
d.upperVD = upperVD
case linux.S_IFBLK, linux.S_IFCHR:
- lowerStat, err := vfsObj.StatAt(ctx, d.fs.creds, &vfs.PathOperation{
- Root: d.lowerVDs[0],
- Start: d.lowerVDs[0],
- }, &vfs.StatOptions{})
- if err != nil {
- return err
- }
if err := vfsObj.MknodAt(ctx, d.fs.creds, &newpop, &vfs.MknodOptions{
Mode: linux.FileMode(d.mode),
- DevMajor: lowerStat.RdevMajor,
- DevMinor: lowerStat.RdevMinor,
+ DevMajor: oldStat.RdevMajor,
+ DevMinor: oldStat.RdevMinor,
}); err != nil {
return err
}
if err := vfsObj.SetStatAt(ctx, d.fs.creds, &newpop, &vfs.SetStatOptions{
Stat: linux.Statx{
- Mask: linux.STATX_UID | linux.STATX_GID,
- UID: d.uid,
- GID: d.gid,
+ Mask: linux.STATX_UID | linux.STATX_GID | oldStat.Mask&timestampsMask,
+ UID: d.uid,
+ GID: d.gid,
+ Atime: oldStat.Atime,
+ Mtime: oldStat.Mtime,
},
}); err != nil {
cleanupUndoCopyUp()
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index bd11372d5..78a01bbb7 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -765,7 +765,7 @@ func (fs *filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if mustCreate {
return nil, syserror.EEXIST
}
- if mayWrite {
+ if start.isRegularFile() && mayWrite {
if err := start.copyUpLocked(ctx); err != nil {
return nil, err
}
@@ -819,7 +819,7 @@ afterTrailingSymlink:
if rp.MustBeDir() && !child.isDir() {
return nil, syserror.ENOTDIR
}
- if mayWrite {
+ if child.isRegularFile() && mayWrite {
if err := child.copyUpLocked(ctx); err != nil {
return nil, err
}
@@ -872,8 +872,11 @@ func (d *dentry) openCopiedUp(ctx context.Context, rp *vfs.ResolvingPath, opts *
if err != nil {
return nil, err
}
+ if ftype != linux.S_IFREG {
+ return layerFD, nil
+ }
layerFlags := layerFD.StatusFlags()
- fd := &nonDirectoryFD{
+ fd := &regularFileFD{
copiedUp: isUpper,
cachedFD: layerFD,
cachedFlags: layerFlags,
@@ -969,7 +972,7 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
}
// Finally construct the overlay FD.
upperFlags := upperFD.StatusFlags()
- fd := &nonDirectoryFD{
+ fd := &regularFileFD{
copiedUp: true,
cachedFD: upperFD,
cachedFlags: upperFlags,
@@ -1293,6 +1296,9 @@ func (fs *filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
if !child.isDir() {
return syserror.ENOTDIR
}
+ if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(atomic.LoadUint32(&parent.mode)), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+ return err
+ }
child.dirMu.Lock()
defer child.dirMu.Unlock()
whiteouts, err := child.collectWhiteoutsForRmdirLocked(ctx)
@@ -1528,12 +1534,38 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
+ parentMode := atomic.LoadUint32(&parent.mode)
child := parent.children[name]
var childLayer lookupLayer
+ if child == nil {
+ if parentMode&linux.S_ISVTX != 0 {
+ // If the parent's sticky bit is set, we need a child dentry to get
+ // its owner.
+ child, err = fs.getChildLocked(ctx, parent, name, &ds)
+ if err != nil {
+ return err
+ }
+ } else {
+ // Determine if the file being unlinked actually exists. Holding
+ // parent.dirMu prevents a dentry from being instantiated for the file,
+ // which in turn prevents it from being copied-up, so this result is
+ // stable.
+ childLayer, err = fs.lookupLayerLocked(ctx, parent, name)
+ if err != nil {
+ return err
+ }
+ if !childLayer.existsInOverlay() {
+ return syserror.ENOENT
+ }
+ }
+ }
if child != nil {
if child.isDir() {
return syserror.EISDIR
}
+ if err := vfs.CheckDeleteSticky(rp.Credentials(), linux.FileMode(parentMode), auth.KUID(atomic.LoadUint32(&child.uid))); err != nil {
+ return err
+ }
if err := vfsObj.PrepareDeleteDentry(mntns, &child.vfsd); err != nil {
return err
}
@@ -1546,18 +1578,6 @@ func (fs *filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
} else {
childLayer = lookupLayerLower
}
- } else {
- // Determine if the file being unlinked actually exists. Holding
- // parent.dirMu prevents a dentry from being instantiated for the file,
- // which in turn prevents it from being copied-up, so this result is
- // stable.
- childLayer, err = fs.lookupLayerLocked(ctx, parent, name)
- if err != nil {
- return err
- }
- if !childLayer.existsInOverlay() {
- return syserror.ENOENT
- }
}
pop := vfs.PathOperation{
diff --git a/pkg/sentry/fsimpl/overlay/overlay.go b/pkg/sentry/fsimpl/overlay/overlay.go
index e5f506d2e..4c5de8d32 100644
--- a/pkg/sentry/fsimpl/overlay/overlay.go
+++ b/pkg/sentry/fsimpl/overlay/overlay.go
@@ -18,7 +18,7 @@
//
// Lock order:
//
-// directoryFD.mu / nonDirectoryFD.mu
+// directoryFD.mu / regularFileFD.mu
// filesystem.renameMu
// dentry.dirMu
// dentry.copyMu
@@ -453,7 +453,7 @@ type dentry struct {
// - If this dentry is copied-up, then wrappedMappable is the Mappable
// obtained from a call to the current top layer's
// FileDescription.ConfigureMMap(). Once wrappedMappable becomes non-nil
- // (from a call to nonDirectoryFD.ensureMappable()), it cannot become nil.
+ // (from a call to regularFileFD.ensureMappable()), it cannot become nil.
// wrappedMappable is protected by mapsMu and dataMu.
//
// - isMappable is non-zero iff wrappedMappable is non-nil. isMappable is
diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/regular_file.go
index 853aee951..2b89a7a6d 100644
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/regular_file.go
@@ -19,14 +19,21 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
)
+func (d *dentry) isRegularFile() bool {
+ return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFREG
+}
+
func (d *dentry) isSymlink() bool {
return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK
}
@@ -40,7 +47,7 @@ func (d *dentry) readlink(ctx context.Context) (string, error) {
}
// +stateify savable
-type nonDirectoryFD struct {
+type regularFileFD struct {
fileDescription
// If copiedUp is false, cachedFD represents
@@ -52,9 +59,13 @@ type nonDirectoryFD struct {
copiedUp bool
cachedFD *vfs.FileDescription
cachedFlags uint32
+
+ // If copiedUp is false, lowerWaiters contains all waiter.Entries
+ // registered with cachedFD. lowerWaiters is protected by mu.
+ lowerWaiters map[*waiter.Entry]waiter.EventMask
}
-func (fd *nonDirectoryFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
+func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) {
fd.mu.Lock()
defer fd.mu.Unlock()
wrappedFD, err := fd.currentFDLocked(ctx)
@@ -65,7 +76,7 @@ func (fd *nonDirectoryFD) getCurrentFD(ctx context.Context) (*vfs.FileDescriptio
return wrappedFD, nil
}
-func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
+func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) {
d := fd.dentry()
statusFlags := fd.vfsfd.StatusFlags()
if !fd.copiedUp && d.isCopiedUp() {
@@ -87,10 +98,21 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip
return nil, err
}
}
+ if len(fd.lowerWaiters) != 0 {
+ ready := upperFD.Readiness(^waiter.EventMask(0))
+ for e, mask := range fd.lowerWaiters {
+ fd.cachedFD.EventUnregister(e)
+ upperFD.EventRegister(e, mask)
+ if ready&mask != 0 {
+ e.Callback.Callback(e)
+ }
+ }
+ }
fd.cachedFD.DecRef(ctx)
fd.copiedUp = true
fd.cachedFD = upperFD
fd.cachedFlags = statusFlags
+ fd.lowerWaiters = nil
} else if fd.cachedFlags != statusFlags {
if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil {
return nil, err
@@ -101,13 +123,13 @@ func (fd *nonDirectoryFD) currentFDLocked(ctx context.Context) (*vfs.FileDescrip
}
// Release implements vfs.FileDescriptionImpl.Release.
-func (fd *nonDirectoryFD) Release(ctx context.Context) {
+func (fd *regularFileFD) Release(ctx context.Context) {
fd.cachedFD.DecRef(ctx)
fd.cachedFD = nil
}
// OnClose implements vfs.FileDescriptionImpl.OnClose.
-func (fd *nonDirectoryFD) OnClose(ctx context.Context) error {
+func (fd *regularFileFD) OnClose(ctx context.Context) error {
// Linux doesn't define ovl_file_operations.flush at all (i.e. its
// equivalent to OnClose is a no-op). We pass through to
// fd.cachedFD.OnClose() without upgrading if fd.dentry() has been
@@ -128,7 +150,7 @@ func (fd *nonDirectoryFD) OnClose(ctx context.Context) error {
}
// Stat implements vfs.FileDescriptionImpl.Stat.
-func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
var stat linux.Statx
if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 {
wrappedFD, err := fd.getCurrentFD(ctx)
@@ -149,7 +171,7 @@ func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux
}
// Allocate implements vfs.FileDescriptionImpl.Allocate.
-func (fd *nonDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
wrappedFD, err := fd.getCurrentFD(ctx)
if err != nil {
return err
@@ -159,7 +181,7 @@ func (fd *nonDirectoryFD) Allocate(ctx context.Context, mode, offset, length uin
}
// SetStat implements vfs.FileDescriptionImpl.SetStat.
-func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
+func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
d := fd.dentry()
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
@@ -191,12 +213,61 @@ func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions)
}
// StatFS implements vfs.FileDescriptionImpl.StatFS.
-func (fd *nonDirectoryFD) StatFS(ctx context.Context) (linux.Statfs, error) {
+func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) {
return fd.filesystem().statFS(ctx)
}
+// Readiness implements waiter.Waitable.Readiness.
+func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
+ ctx := context.Background()
+ wrappedFD, err := fd.getCurrentFD(ctx)
+ if err != nil {
+ // TODO(b/171089913): Just use fd.cachedFD since Readiness can't return
+ // an error. This is obviously wrong, but at least consistent with
+ // VFS1.
+ log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err)
+ fd.mu.Lock()
+ wrappedFD = fd.cachedFD
+ wrappedFD.IncRef()
+ fd.mu.Unlock()
+ }
+ defer wrappedFD.DecRef(ctx)
+ return wrappedFD.Readiness(mask)
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (fd *regularFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ wrappedFD, err := fd.currentFDLocked(context.Background())
+ if err != nil {
+ // TODO(b/171089913): Just use fd.cachedFD since EventRegister can't
+ // return an error. This is obviously wrong, but at least consistent
+ // with VFS1.
+ log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err)
+ wrappedFD = fd.cachedFD
+ }
+ wrappedFD.EventRegister(e, mask)
+ if !fd.copiedUp {
+ if fd.lowerWaiters == nil {
+ fd.lowerWaiters = make(map[*waiter.Entry]waiter.EventMask)
+ }
+ fd.lowerWaiters[e] = mask
+ }
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (fd *regularFileFD) EventUnregister(e *waiter.Entry) {
+ fd.mu.Lock()
+ defer fd.mu.Unlock()
+ fd.cachedFD.EventUnregister(e)
+ if !fd.copiedUp {
+ delete(fd.lowerWaiters, e)
+ }
+}
+
// PRead implements vfs.FileDescriptionImpl.PRead.
-func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
wrappedFD, err := fd.getCurrentFD(ctx)
if err != nil {
return 0, err
@@ -206,7 +277,7 @@ func (fd *nonDirectoryFD) PRead(ctx context.Context, dst usermem.IOSequence, off
}
// Read implements vfs.FileDescriptionImpl.Read.
-func (fd *nonDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
// Hold fd.mu during the read to serialize the file offset.
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -218,7 +289,7 @@ func (fd *nonDirectoryFD) Read(ctx context.Context, dst usermem.IOSequence, opts
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
-func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
wrappedFD, err := fd.getCurrentFD(ctx)
if err != nil {
return 0, err
@@ -228,7 +299,7 @@ func (fd *nonDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence, of
}
// Write implements vfs.FileDescriptionImpl.Write.
-func (fd *nonDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// Hold fd.mu during the write to serialize the file offset.
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -240,7 +311,7 @@ func (fd *nonDirectoryFD) Write(ctx context.Context, src usermem.IOSequence, opt
}
// Seek implements vfs.FileDescriptionImpl.Seek.
-func (fd *nonDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
// Hold fd.mu during the seek to serialize the file offset.
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -252,7 +323,7 @@ func (fd *nonDirectoryFD) Seek(ctx context.Context, offset int64, whence int32)
}
// Sync implements vfs.FileDescriptionImpl.Sync.
-func (fd *nonDirectoryFD) Sync(ctx context.Context) error {
+func (fd *regularFileFD) Sync(ctx context.Context) error {
fd.mu.Lock()
if !fd.dentry().isCopiedUp() {
fd.mu.Unlock()
@@ -269,8 +340,18 @@ func (fd *nonDirectoryFD) Sync(ctx context.Context) error {
return wrappedFD.Sync(ctx)
}
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
+func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) {
+ wrappedFD, err := fd.getCurrentFD(ctx)
+ if err != nil {
+ return 0, err
+ }
+ defer wrappedFD.DecRef(ctx)
+ return wrappedFD.Ioctl(ctx, uio, args)
+}
+
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
-func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
+func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
if err := fd.ensureMappable(ctx, opts); err != nil {
return err
}
@@ -278,7 +359,7 @@ func (fd *nonDirectoryFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOp
}
// ensureMappable ensures that fd.dentry().wrappedMappable is not nil.
-func (fd *nonDirectoryFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
+func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error {
d := fd.dentry()
// Fast path if we already have a Mappable for the current top layer.
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index f61039f5b..1a152142b 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -237,8 +237,7 @@ func (fd *VFSPipeFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
// PipeSize implements fcntl(F_GETPIPE_SZ).
func (fd *VFSPipeFD) PipeSize() int64 {
- // Inline Pipe.FifoSize() rather than calling it with nil Context and
- // fs.File and ignoring the returned error (which is always nil).
+ // Inline Pipe.FifoSize() since we don't have a fs.File.
fd.pipe.mu.Lock()
defer fd.pipe.mu.Unlock()
return fd.pipe.max
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index d8b8d9783..36e89700e 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -145,16 +145,6 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return uintptr(file.StatusFlags()), nil, nil
case linux.F_SETFL:
return 0, nil, file.SetStatusFlags(t, t.Credentials(), args[2].Uint())
- case linux.F_SETPIPE_SZ:
- pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
- if !ok {
- return 0, nil, syserror.EBADF
- }
- n, err := pipefile.SetPipeSize(int64(args[2].Int()))
- if err != nil {
- return 0, nil, err
- }
- return uintptr(n), nil, nil
case linux.F_GETOWN:
owner, hasOwner := getAsyncOwner(t, file)
if !hasOwner {
@@ -190,6 +180,16 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, err
}
return 0, nil, setAsyncOwner(t, file, owner.Type, owner.PID)
+ case linux.F_SETPIPE_SZ:
+ pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
+ if !ok {
+ return 0, nil, syserror.EBADF
+ }
+ n, err := pipefile.SetPipeSize(int64(args[2].Int()))
+ if err != nil {
+ return 0, nil, err
+ }
+ return uintptr(n), nil, nil
case linux.F_GETPIPE_SZ:
pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
if !ok {
diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go
index 82e459f46..004da5b40 100644
--- a/runsc/boot/vfs.go
+++ b/runsc/boot/vfs.go
@@ -264,10 +264,38 @@ func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Cre
}
cu.Add(func() { lower.DecRef(ctx) })
+ // Propagate the lower layer's root's owner, group, and mode to the upper
+ // layer's root for consistency with VFS1.
+ upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root())
+ lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root())
+ stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{
+ Root: lowerRootVD,
+ Start: lowerRootVD,
+ }, &vfs.StatOptions{
+ Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE,
+ })
+ if err != nil {
+ return nil, nil, err
+ }
+ err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{
+ Root: upperRootVD,
+ Start: upperRootVD,
+ }, &vfs.SetStatOptions{
+ Stat: linux.Statx{
+ Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask,
+ UID: stat.UID,
+ GID: stat.GID,
+ Mode: stat.Mode,
+ },
+ })
+ if err != nil {
+ return nil, nil, err
+ }
+
// Configure overlay with both layers.
overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{
- UpperRoot: vfs.MakeVirtualDentry(upper, upper.Root()),
- LowerRoots: []vfs.VirtualDentry{vfs.MakeVirtualDentry(lower, lower.Root())},
+ UpperRoot: upperRootVD,
+ LowerRoots: []vfs.VirtualDentry{lowerRootVD},
}
return &overlayOpts, cu.Release(), nil
}
diff --git a/test/syscalls/linux/mknod.cc b/test/syscalls/linux/mknod.cc
index ae65d366b..b96907b30 100644
--- a/test/syscalls/linux/mknod.cc
+++ b/test/syscalls/linux/mknod.cc
@@ -93,15 +93,15 @@ TEST(MknodTest, MknodOnExistingPathFails) {
}
TEST(MknodTest, UnimplementedTypesReturnError) {
- const std::string path = NewTempAbsPath();
+ // TODO(gvisor.dev/issue/1624): These file types are supported by some
+ // filesystems in VFS2, so this test should be deleted along with VFS1.
+ SKIP_IF(!IsRunningWithVFS1());
- if (IsRunningWithVFS1()) {
- ASSERT_THAT(mknod(path.c_str(), S_IFSOCK, 0),
- SyscallFailsWithErrno(EOPNOTSUPP));
- }
- // These will fail on linux as well since we don't have CAP_MKNOD.
- ASSERT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM));
- ASSERT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
+ const std::string path = NewTempAbsPath();
+ EXPECT_THAT(mknod(path.c_str(), S_IFSOCK, 0),
+ SyscallFailsWithErrno(EOPNOTSUPP));
+ EXPECT_THAT(mknod(path.c_str(), S_IFCHR, 0), SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(mknod(path.c_str(), S_IFBLK, 0), SyscallFailsWithErrno(EPERM));
}
TEST(MknodTest, Socket) {
diff --git a/test/syscalls/linux/pipe.cc b/test/syscalls/linux/pipe.cc
index c097c9187..06d9dbf65 100644
--- a/test/syscalls/linux/pipe.cc
+++ b/test/syscalls/linux/pipe.cc
@@ -569,30 +569,38 @@ TEST_P(PipeTest, Streaming) {
// Size() requires 2 syscalls, call it once and remember the value.
const int pipe_size = Size();
+ const size_t streamed_bytes = 4 * pipe_size;
absl::Notification notify;
- ScopedThread t([this, &notify, pipe_size]() {
+ ScopedThread t([&, this]() {
+ std::vector<char> buf(1024);
// Don't start until it's full.
notify.WaitForNotification();
- for (int i = 0; i < pipe_size; i++) {
- int rbuf;
- ASSERT_THAT(read(rfd_.get(), &rbuf, sizeof(rbuf)),
- SyscallSucceedsWithValue(sizeof(rbuf)));
- EXPECT_EQ(rbuf, i);
+ ssize_t total = 0;
+ while (total < streamed_bytes) {
+ ASSERT_THAT(read(rfd_.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ total += buf.size();
}
});
// Write 4 bytes * pipe_size. It will fill up the pipe once, notify the reader
// to start. Then we write pipe size worth 3 more times to ensure the reader
// can follow along.
+ //
+ // The size of each write (which is determined by buf.size()) must be smaller
+ // than the size of the pipe (which, in the "smallbuffer" configuration, is 1
+ // page) for the check for notify.Notify() below to be correct.
+ std::vector<char> buf(1024);
+ RandomizeBuffer(buf.data(), buf.size());
ssize_t total = 0;
- for (int i = 0; i < pipe_size; i++) {
- ssize_t written = write(wfd_.get(), &i, sizeof(i));
- ASSERT_THAT(written, SyscallSucceedsWithValue(sizeof(i)));
- total += written;
+ while (total < streamed_bytes) {
+ ASSERT_THAT(write(wfd_.get(), buf.data(), buf.size()),
+ SyscallSucceedsWithValue(buf.size()));
+ total += buf.size();
// Is the next write about to fill up the buffer? Wake up the reader once.
- if (total < pipe_size && (total + written) >= pipe_size) {
+ if (total < pipe_size && (total + buf.size()) >= pipe_size) {
notify.Notify();
}
}