summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl')
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go6
-rw-r--r--pkg/sentry/fsimpl/devpts/slave.go8
-rw-r--r--pkg/sentry/fsimpl/ext/BUILD4
-rw-r--r--pkg/sentry/fsimpl/fuse/BUILD5
-rw-r--r--pkg/sentry/fsimpl/fuse/dev.go33
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go200
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go9
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go28
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go87
-rw-r--r--pkg/sentry/fsimpl/gofer/handle.go5
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go70
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go92
-rw-r--r--pkg/sentry/fsimpl/host/BUILD1
-rw-r--r--pkg/sentry/fsimpl/host/host.go13
-rw-r--r--pkg/sentry/fsimpl/host/socket_iovec.go7
-rw-r--r--pkg/sentry/fsimpl/kernfs/BUILD2
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go14
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go2
-rw-r--r--pkg/sentry/fsimpl/overlay/filesystem.go2
-rw-r--r--pkg/sentry/fsimpl/overlay/non_directory.go4
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go2
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go6
-rw-r--r--pkg/sentry/fsimpl/proc/task.go4
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go2
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go4
-rw-r--r--pkg/sentry/fsimpl/sys/BUILD2
-rw-r--r--pkg/sentry/fsimpl/testutil/BUILD2
-rw-r--r--pkg/sentry/fsimpl/testutil/kernel.go8
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go35
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go7
34 files changed, 525 insertions, 151 deletions
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 69879498a..1081fff52 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -67,8 +67,8 @@ func (mi *masterInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vf
}
// Stat implements kernfs.Inode.Stat.
-func (mi *masterInode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- statx, err := mi.InodeAttrs.Stat(vfsfs, opts)
+func (mi *masterInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ statx, err := mi.InodeAttrs.Stat(ctx, vfsfs, opts)
if err != nil {
return linux.Statx{}, err
}
@@ -186,7 +186,7 @@ func (mfd *masterFileDescription) SetStat(ctx context.Context, opts vfs.SetStatO
// Stat implements vfs.FileDescriptionImpl.Stat.
func (mfd *masterFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
fs := mfd.vfsfd.VirtualDentry().Mount().Filesystem()
- return mfd.inode.Stat(fs, opts)
+ return mfd.inode.Stat(ctx, fs, opts)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go
index cf1a0f0ac..a91cae3ef 100644
--- a/pkg/sentry/fsimpl/devpts/slave.go
+++ b/pkg/sentry/fsimpl/devpts/slave.go
@@ -73,8 +73,8 @@ func (si *slaveInode) Valid(context.Context) bool {
}
// Stat implements kernfs.Inode.Stat.
-func (si *slaveInode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- statx, err := si.InodeAttrs.Stat(vfsfs, opts)
+func (si *slaveInode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ statx, err := si.InodeAttrs.Stat(ctx, vfsfs, opts)
if err != nil {
return linux.Statx{}, err
}
@@ -132,7 +132,7 @@ func (sfd *slaveFileDescription) Write(ctx context.Context, src usermem.IOSequen
return sfd.inode.t.ld.outputQueueWrite(ctx, src)
}
-// Ioctl implements vfs.FileDescripionImpl.Ioctl.
+// Ioctl implements vfs.FileDescriptionImpl.Ioctl.
func (sfd *slaveFileDescription) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArguments) (uintptr, error) {
switch cmd := args[1].Uint(); cmd {
case linux.FIONREAD: // linux.FIONREAD == linux.TIOCINQ
@@ -183,7 +183,7 @@ func (sfd *slaveFileDescription) SetStat(ctx context.Context, opts vfs.SetStatOp
// Stat implements vfs.FileDescriptionImpl.Stat.
func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem()
- return sfd.inode.Stat(fs, opts)
+ return sfd.inode.Stat(ctx, fs, opts)
}
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
diff --git a/pkg/sentry/fsimpl/ext/BUILD b/pkg/sentry/fsimpl/ext/BUILD
index ef24f8159..abc610ef3 100644
--- a/pkg/sentry/fsimpl/ext/BUILD
+++ b/pkg/sentry/fsimpl/ext/BUILD
@@ -96,7 +96,7 @@ go_test(
"//pkg/syserror",
"//pkg/test/testutil",
"//pkg/usermem",
- "@com_github_google_go-cmp//cmp:go_default_library",
- "@com_github_google_go-cmp//cmp/cmpopts:go_default_library",
+ "@com_github_google_go_cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp/cmpopts:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/fuse/BUILD b/pkg/sentry/fsimpl/fuse/BUILD
index 41567967d..737007748 100644
--- a/pkg/sentry/fsimpl/fuse/BUILD
+++ b/pkg/sentry/fsimpl/fuse/BUILD
@@ -6,12 +6,17 @@ go_library(
name = "fuse",
srcs = [
"dev.go",
+ "fusefs.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/log",
"//pkg/sentry/fsimpl/devtmpfs",
+ "//pkg/sentry/fsimpl/kernfs",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/fuse/dev.go b/pkg/sentry/fsimpl/fuse/dev.go
index f6a67d005..c9e12a94f 100644
--- a/pkg/sentry/fsimpl/fuse/dev.go
+++ b/pkg/sentry/fsimpl/fuse/dev.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/devtmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -30,6 +31,10 @@ type fuseDevice struct{}
// Open implements vfs.Device.Open.
func (fuseDevice) Open(ctx context.Context, mnt *vfs.Mount, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ if !kernel.FUSEEnabled {
+ return nil, syserror.ENOENT
+ }
+
var fd DeviceFD
if err := fd.vfsfd.Init(&fd, opts.Flags, mnt, vfsd, &vfs.FileDescriptionOptions{
UseDentryMetadata: true,
@@ -46,6 +51,9 @@ type DeviceFD struct {
vfs.DentryMetadataFileDescriptionImpl
vfs.NoLockFD
+ // mounted specifies whether a FUSE filesystem was mounted using the DeviceFD.
+ mounted bool
+
// TODO(gvisor.dev/issue/2987): Add all the data structures needed to enqueue
// and deque requests, control synchronization and establish communication
// between the FUSE kernel module and the /dev/fuse character device.
@@ -56,26 +64,51 @@ func (fd *DeviceFD) Release() {}
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *DeviceFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
+ // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+ if !fd.mounted {
+ return 0, syserror.EPERM
+ }
+
return 0, syserror.ENOSYS
}
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *DeviceFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+ if !fd.mounted {
+ return 0, syserror.EPERM
+ }
+
return 0, syserror.ENOSYS
}
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *DeviceFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+ if !fd.mounted {
+ return 0, syserror.EPERM
+ }
+
return 0, syserror.ENOSYS
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *DeviceFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+ if !fd.mounted {
+ return 0, syserror.EPERM
+ }
+
return 0, syserror.ENOSYS
}
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *DeviceFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ // Operations on /dev/fuse don't make sense until a FUSE filesystem is mounted.
+ if !fd.mounted {
+ return 0, syserror.EPERM
+ }
+
return 0, syserror.ENOSYS
}
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
new file mode 100644
index 000000000..f7775fb9b
--- /dev/null
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -0,0 +1,200 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package fuse implements fusefs.
+package fuse
+
+import (
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// Name is the default filesystem name.
+const Name = "fuse"
+
+// FilesystemType implements vfs.FilesystemType.
+type FilesystemType struct{}
+
+type filesystemOptions struct {
+ // userID specifies the numeric uid of the mount owner.
+ // This option should not be specified by the filesystem owner.
+ // It is set by libfuse (or, if libfuse is not used, must be set
+ // by the filesystem itself). For more information, see man page
+ // for fuse(8)
+ userID uint32
+
+ // groupID specifies the numeric gid of the mount owner.
+ // This option should not be specified by the filesystem owner.
+ // It is set by libfuse (or, if libfuse is not used, must be set
+ // by the filesystem itself). For more information, see man page
+ // for fuse(8)
+ groupID uint32
+
+ // rootMode specifies the the file mode of the filesystem's root.
+ rootMode linux.FileMode
+}
+
+// filesystem implements vfs.FilesystemImpl.
+type filesystem struct {
+ kernfs.Filesystem
+ devMinor uint32
+
+ // fuseFD is the FD returned when opening /dev/fuse. It is used for communication
+ // between the FUSE server daemon and the sentry fusefs.
+ fuseFD *DeviceFD
+
+ // opts is the options the fusefs is initialized with.
+ opts filesystemOptions
+}
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+ return Name
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ var fsopts filesystemOptions
+ mopts := vfs.GenericParseMountOptions(opts.Data)
+ deviceDescriptorStr, ok := mopts["fd"]
+ if !ok {
+ log.Warningf("%s.GetFilesystem: communication file descriptor N (obtained by opening /dev/fuse) must be specified as 'fd=N'", fsType.Name())
+ return nil, nil, syserror.EINVAL
+ }
+ delete(mopts, "fd")
+
+ deviceDescriptor, err := strconv.ParseInt(deviceDescriptorStr, 10 /* base */, 32 /* bitSize */)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ kernelTask := kernel.TaskFromContext(ctx)
+ if kernelTask == nil {
+ log.Warningf("%s.GetFilesystem: couldn't get kernel task from context", fsType.Name())
+ return nil, nil, syserror.EINVAL
+ }
+ fuseFd := kernelTask.GetFileVFS2(int32(deviceDescriptor))
+
+ // Parse and set all the other supported FUSE mount options.
+ // TODO: Expand the supported mount options.
+ if userIDStr, ok := mopts["user_id"]; ok {
+ delete(mopts, "user_id")
+ userID, err := strconv.ParseUint(userIDStr, 10, 32)
+ if err != nil {
+ log.Warningf("%s.GetFilesystem: invalid user_id: user_id=%s", fsType.Name(), userIDStr)
+ return nil, nil, syserror.EINVAL
+ }
+ fsopts.userID = uint32(userID)
+ }
+
+ if groupIDStr, ok := mopts["group_id"]; ok {
+ delete(mopts, "group_id")
+ groupID, err := strconv.ParseUint(groupIDStr, 10, 32)
+ if err != nil {
+ log.Warningf("%s.GetFilesystem: invalid group_id: group_id=%s", fsType.Name(), groupIDStr)
+ return nil, nil, syserror.EINVAL
+ }
+ fsopts.groupID = uint32(groupID)
+ }
+
+ rootMode := linux.FileMode(0777)
+ modeStr, ok := mopts["rootmode"]
+ if ok {
+ delete(mopts, "rootmode")
+ mode, err := strconv.ParseUint(modeStr, 8, 32)
+ if err != nil {
+ log.Warningf("%s.GetFilesystem: invalid mode: %q", fsType.Name(), modeStr)
+ return nil, nil, syserror.EINVAL
+ }
+ rootMode = linux.FileMode(mode)
+ }
+ fsopts.rootMode = rootMode
+
+ // Check for unparsed options.
+ if len(mopts) != 0 {
+ log.Warningf("%s.GetFilesystem: unknown options: %v", fsType.Name(), mopts)
+ return nil, nil, syserror.EINVAL
+ }
+
+ // Mark the device as ready so it can be used. /dev/fuse can only be used if the FD was used to
+ // mount a FUSE filesystem.
+ fuseFD := fuseFd.Impl().(*DeviceFD)
+ fuseFD.mounted = true
+
+ fs := &filesystem{
+ devMinor: devMinor,
+ fuseFD: fuseFD,
+ opts: fsopts,
+ }
+
+ fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
+
+ // TODO: dispatch a FUSE_INIT request to the FUSE daemon server before
+ // returning. Mount will not block on this dispatched request.
+
+ // root is the fusefs root directory.
+ root := fs.newInode(creds, fsopts.rootMode)
+
+ return fs.VFSFilesystem(), root.VFSDentry(), nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
+}
+
+// Inode implements kernfs.Inode.
+type Inode struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoDynamicLookup
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.OrderedChildren
+
+ locks vfs.FileLocks
+
+ dentry kernfs.Dentry
+}
+
+func (fs *filesystem) newInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
+ i := &Inode{}
+ i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
+ i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ i.dentry.Init(i)
+
+ return &i.dentry
+}
+
+// Open implements kernfs.Inode.Open.
+func (i *Inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), vfsd, &i.OrderedChildren, &i.locks, &opts)
+ if err != nil {
+ return nil, err
+ }
+ return fd.VFSFileDescription(), nil
+}
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index 5d83fe363..8c7c8e1b3 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -85,6 +85,7 @@ func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
d2 := &dentry{
refs: 1, // held by d
fs: d.fs,
+ ino: d.fs.nextSyntheticIno(),
mode: uint32(opts.mode),
uid: uint32(opts.kuid),
gid: uint32(opts.kgid),
@@ -184,13 +185,13 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
{
Name: ".",
Type: linux.DT_DIR,
- Ino: d.ino,
+ Ino: uint64(d.ino),
NextOff: 1,
},
{
Name: "..",
Type: uint8(atomic.LoadUint32(&parent.mode) >> 12),
- Ino: parent.ino,
+ Ino: uint64(parent.ino),
NextOff: 2,
},
}
@@ -226,7 +227,7 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
}
dirent := vfs.Dirent{
Name: p9d.Name,
- Ino: p9d.QID.Path,
+ Ino: uint64(inoFromPath(p9d.QID.Path)),
NextOff: int64(len(dirents) + 1),
}
// p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or
@@ -259,7 +260,7 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
dirents = append(dirents, vfs.Dirent{
Name: child.name,
Type: uint8(atomic.LoadUint32(&child.mode) >> 12),
- Ino: child.ino,
+ Ino: uint64(child.ino),
NextOff: int64(len(dirents) + 1),
})
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 7bcc99b29..00e3c99cd 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -150,11 +150,9 @@ afterSymlink:
return nil, err
}
if d != d.parent && !d.cachedMetadataAuthoritative() {
- _, attrMask, attr, err := d.parent.file.getAttr(ctx, dentryAttrMask())
- if err != nil {
+ if err := d.parent.updateFromGetattr(ctx); err != nil {
return nil, err
}
- d.parent.updateFromP9Attrs(attrMask, &attr)
}
rp.Advance()
return d.parent, nil
@@ -209,18 +207,28 @@ func (fs *filesystem) getChildLocked(ctx context.Context, vfsObj *vfs.VirtualFil
// Preconditions: As for getChildLocked. !parent.isSynthetic().
func (fs *filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.VirtualFilesystem, parent *dentry, name string, child *dentry, ds **[]*dentry) (*dentry, error) {
+ if child != nil {
+ // Need to lock child.metadataMu because we might be updating child
+ // metadata. We need to hold the lock *before* getting metadata from the
+ // server and release it after updating local metadata.
+ child.metadataMu.Lock()
+ }
qid, file, attrMask, attr, err := parent.file.walkGetAttrOne(ctx, name)
if err != nil && err != syserror.ENOENT {
+ if child != nil {
+ child.metadataMu.Unlock()
+ }
return nil, err
}
if child != nil {
- if !file.isNil() && qid.Path == child.ino {
- // The file at this path hasn't changed. Just update cached
- // metadata.
+ if !file.isNil() && inoFromPath(qid.Path) == child.ino {
+ // The file at this path hasn't changed. Just update cached metadata.
file.close(ctx)
- child.updateFromP9Attrs(attrMask, &attr)
+ child.updateFromP9AttrsLocked(attrMask, &attr)
+ child.metadataMu.Unlock()
return child, nil
}
+ child.metadataMu.Unlock()
if file.isNil() && child.isSynthetic() {
// We have a synthetic file, and no remote file has arisen to
// replace it.
@@ -1326,7 +1334,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
fs.renameMuRUnlockAndCheckCaching(&ds)
return err
}
- if err := d.setStat(ctx, rp.Credentials(), &opts.Stat, rp.Mount()); err != nil {
+ if err := d.setStat(ctx, rp.Credentials(), &opts, rp.Mount()); err != nil {
fs.renameMuRUnlockAndCheckCaching(&ds)
return err
}
@@ -1499,3 +1507,7 @@ func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
defer fs.renameMu.RUnlock()
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
}
+
+func (fs *filesystem) nextSyntheticIno() inodeNumber {
+ return inodeNumber(atomic.AddUint64(&fs.syntheticSeq, 1) | syntheticInoMask)
+}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 8e74e60a5..e20de84b5 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -110,6 +110,26 @@ type filesystem struct {
syncMu sync.Mutex
syncableDentries map[*dentry]struct{}
specialFileFDs map[*specialFileFD]struct{}
+
+ // syntheticSeq stores a counter to used to generate unique inodeNumber for
+ // synthetic dentries.
+ syntheticSeq uint64
+}
+
+// inodeNumber represents inode number reported in Dirent.Ino. For regular
+// dentries, it comes from QID.Path from the 9P server. Synthetic dentries
+// have have their inodeNumber generated sequentially, with the MSB reserved to
+// prevent conflicts with regular dentries.
+type inodeNumber uint64
+
+// Reserve MSB for synthetic mounts.
+const syntheticInoMask = uint64(1) << 63
+
+func inoFromPath(path uint64) inodeNumber {
+ if path&syntheticInoMask != 0 {
+ log.Warningf("Dropping MSB from ino, collision is possible. Original: %d, new: %d", path, path&^syntheticInoMask)
+ }
+ return inodeNumber(path &^ syntheticInoMask)
}
type filesystemOptions struct {
@@ -582,21 +602,27 @@ type dentry struct {
// returned by the server. dirents is protected by dirMu.
dirents []vfs.Dirent
- // Cached metadata; protected by metadataMu and accessed using atomic
- // memory operations unless otherwise specified.
+ // Cached metadata; protected by metadataMu.
+ // To access:
+ // - In situations where consistency is not required (like stat), these
+ // can be accessed using atomic operations only (without locking).
+ // - Lock metadataMu and can access without atomic operations.
+ // To mutate:
+ // - Lock metadataMu and use atomic operations to update because we might
+ // have atomic readers that don't hold the lock.
metadataMu sync.Mutex
- ino uint64 // immutable
- mode uint32 // type is immutable, perms are mutable
- uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
- gid uint32 // auth.KGID, but ...
- blockSize uint32 // 0 if unknown
+ ino inodeNumber // immutable
+ mode uint32 // type is immutable, perms are mutable
+ uid uint32 // auth.KUID, but stored as raw uint32 for sync/atomic
+ gid uint32 // auth.KGID, but ...
+ blockSize uint32 // 0 if unknown
// Timestamps, all nsecs from the Unix epoch.
atime int64
mtime int64
ctime int64
btime int64
// File size, protected by both metadataMu and dataMu (i.e. both must be
- // locked to mutate it).
+ // locked to mutate it; locking either is sufficient to access it).
size uint64
// nlink counts the number of hard links to this dentry. It's updated and
@@ -704,7 +730,7 @@ func (fs *filesystem) newDentry(ctx context.Context, file p9file, qid p9.QID, ma
d := &dentry{
fs: fs,
file: file,
- ino: qid.Path,
+ ino: inoFromPath(qid.Path),
mode: uint32(attr.Mode),
uid: uint32(fs.opts.dfltuid),
gid: uint32(fs.opts.dfltgid),
@@ -759,8 +785,8 @@ func (d *dentry) cachedMetadataAuthoritative() bool {
// updateFromP9Attrs is called to update d's metadata after an update from the
// remote filesystem.
-func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
- d.metadataMu.Lock()
+// Precondition: d.metadataMu must be locked.
+func (d *dentry) updateFromP9AttrsLocked(mask p9.AttrMask, attr *p9.Attr) {
if mask.Mode {
if got, want := uint32(attr.Mode.FileType()), d.fileType(); got != want {
d.metadataMu.Unlock()
@@ -796,7 +822,6 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
if mask.Size {
d.updateFileSizeLocked(attr.Size)
}
- d.metadataMu.Unlock()
}
// Preconditions: !d.isSynthetic()
@@ -808,6 +833,10 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
file p9file
handleMuRLocked bool
)
+ // d.metadataMu must be locked *before* we getAttr so that we do not end up
+ // updating stale attributes in d.updateFromP9AttrsLocked().
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
d.handleMu.RLock()
if !d.handle.file.isNil() {
file = d.handle.file
@@ -823,7 +852,7 @@ func (d *dentry) updateFromGetattr(ctx context.Context) error {
if err != nil {
return err
}
- d.updateFromP9Attrs(attrMask, &attr)
+ d.updateFromP9AttrsLocked(attrMask, &attr)
return nil
}
@@ -846,7 +875,7 @@ func (d *dentry) statTo(stat *linux.Statx) {
stat.UID = atomic.LoadUint32(&d.uid)
stat.GID = atomic.LoadUint32(&d.gid)
stat.Mode = uint16(atomic.LoadUint32(&d.mode))
- stat.Ino = d.ino
+ stat.Ino = uint64(d.ino)
stat.Size = atomic.LoadUint64(&d.size)
// This is consistent with regularFileFD.Seek(), which treats regular files
// as having no holes.
@@ -859,7 +888,8 @@ func (d *dentry) statTo(stat *linux.Statx) {
stat.DevMinor = d.fs.devMinor
}
-func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error {
+func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.SetStatOptions, mnt *vfs.Mount) error {
+ stat := &opts.Stat
if stat.Mask == 0 {
return nil
}
@@ -867,7 +897,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
return syserror.EPERM
}
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
- if err := vfs.CheckSetStat(ctx, creds, stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+ if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
return err
}
if err := mnt.CheckBeginWrite(); err != nil {
@@ -884,14 +914,14 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
// Prepare for truncate.
if stat.Mask&linux.STATX_SIZE != 0 {
- switch d.mode & linux.S_IFMT {
- case linux.S_IFREG:
+ switch mode.FileType() {
+ case linux.ModeRegular:
if !setLocalMtime {
// Truncate updates mtime.
setLocalMtime = true
stat.Mtime.Nsec = linux.UTIME_NOW
}
- case linux.S_IFDIR:
+ case linux.ModeDirectory:
return syserror.EISDIR
default:
return syserror.EINVAL
@@ -908,6 +938,17 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
}
if !d.isSynthetic() {
if stat.Mask != 0 {
+ if stat.Mask&linux.STATX_SIZE != 0 {
+ // Check whether to allow a truncate request to be made.
+ switch d.mode & linux.S_IFMT {
+ case linux.S_IFREG:
+ // Allow.
+ case linux.S_IFDIR:
+ return syserror.EISDIR
+ default:
+ return syserror.EINVAL
+ }
+ }
if err := d.file.setAttr(ctx, p9.SetAttrMask{
Permissions: stat.Mask&linux.STATX_MODE != 0,
UID: stat.Mask&linux.STATX_UID != 0,
@@ -974,7 +1015,7 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
func (d *dentry) updateFileSizeLocked(newSize uint64) {
d.dataMu.Lock()
oldSize := d.size
- d.size = newSize
+ atomic.StoreUint64(&d.size, newSize)
// d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
// below. This allows concurrent calls to Read/Translate/etc. These
// functions synchronize with truncation by refusing to use cache
@@ -1320,8 +1361,8 @@ func (d *dentry) removexattr(ctx context.Context, creds *auth.Credentials, name
// Extended attributes in the user.* namespace are only supported for regular
// files and directories.
func (d *dentry) userXattrSupported() bool {
- filetype := linux.S_IFMT & atomic.LoadUint32(&d.mode)
- return filetype == linux.S_IFREG || filetype == linux.S_IFDIR
+ filetype := linux.FileMode(atomic.LoadUint32(&d.mode)).FileType()
+ return filetype == linux.ModeRegular || filetype == linux.ModeDirectory
}
// Preconditions: !d.isSynthetic(). d.isRegularFile() || d.isDir().
@@ -1469,7 +1510,7 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
// SetStat implements vfs.FileDescriptionImpl.SetStat.
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
- if err := fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, fd.vfsfd.Mount()); err != nil {
+ if err := fd.dentry().setStat(ctx, auth.CredentialsFromContext(ctx), &opts, fd.vfsfd.Mount()); err != nil {
return err
}
if ev := vfs.InotifyEventFromStatMask(opts.Stat.Mask); ev != 0 {
diff --git a/pkg/sentry/fsimpl/gofer/handle.go b/pkg/sentry/fsimpl/gofer/handle.go
index 724a3f1f7..8792ca4f2 100644
--- a/pkg/sentry/fsimpl/gofer/handle.go
+++ b/pkg/sentry/fsimpl/gofer/handle.go
@@ -126,11 +126,16 @@ func (h *handle) writeFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, o
}
func (h *handle) sync(ctx context.Context) error {
+ // Handle most common case first.
if h.fd >= 0 {
ctx.UninterruptibleSleepStart(false)
err := syscall.Fsync(int(h.fd))
ctx.UninterruptibleSleepFinish(false)
return err
}
+ if h.file.isNil() {
+ // File hasn't been touched, there is nothing to sync.
+ return nil
+ }
return h.file.fsync(ctx)
}
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 3d2d3530a..02317a133 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -89,7 +89,9 @@ func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint
if err != nil {
return err
}
- d.size = size
+ d.dataMu.Lock()
+ atomic.StoreUint64(&d.size, size)
+ d.dataMu.Unlock()
if !d.cachedMetadataAuthoritative() {
d.touchCMtimeLocked()
}
@@ -153,26 +155,53 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ n, _, err := fd.pwrite(ctx, src, offset, opts)
+ return n, err
+}
+
+// pwrite returns the number of bytes written, final offset, error. The final
+// offset should be ignored by PWrite.
+func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, offset, syserror.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, offset, syserror.EOPNOTSUPP
}
+ d := fd.dentry()
+ // If the fd was opened with O_APPEND, make sure the file size is updated.
+ // There is a possible race here if size is modified externally after
+ // metadata cache is updated.
+ if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
+ if err := d.updateFromGetattr(ctx); err != nil {
+ return 0, offset, err
+ }
+ }
+
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+ // Set offset to file size if the fd was opened with O_APPEND.
+ if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+ // Holding d.metadataMu is sufficient for reading d.size.
+ offset = int64(d.size)
+ }
limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
if err != nil {
- return 0, err
+ return 0, offset, err
}
src = src.TakeFirst64(limit)
+ n, err := fd.pwriteLocked(ctx, src, offset, opts)
+ return n, offset + n, err
+}
+// Preconditions: fd.dentry().metatdataMu must be locked.
+func (fd *regularFileFD) pwriteLocked(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
d := fd.dentry()
- d.metadataMu.Lock()
- defer d.metadataMu.Unlock()
if d.fs.opts.interop != InteropModeShared {
// Compare Linux's mm/filemap.c:__generic_file_write_iter() =>
// file_update_time(). This is d.touchCMtime(), but without locking
@@ -235,8 +264,8 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
fd.mu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
+ n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+ fd.off = off
fd.mu.Unlock()
return n, err
}
@@ -582,20 +611,19 @@ func (fd *regularFileFD) Sync(ctx context.Context) error {
func (d *dentry) syncSharedHandle(ctx context.Context) error {
d.handleMu.RLock()
- if !d.handleWritable {
- d.handleMu.RUnlock()
- return nil
- }
- d.dataMu.Lock()
- // Write dirty cached data to the remote file.
- err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
- d.dataMu.Unlock()
- if err == nil {
- // Sync the remote file.
- err = d.handle.sync(ctx)
+ defer d.handleMu.RUnlock()
+
+ if d.handleWritable {
+ d.dataMu.Lock()
+ // Write dirty cached data to the remote file.
+ err := fsutil.SyncDirtyAll(ctx, &d.cache, &d.dirty, d.size, d.fs.mfp.MemoryFile(), d.handle.writeFromBlocksAt)
+ d.dataMu.Unlock()
+ if err != nil {
+ return err
+ }
}
- d.handleMu.RUnlock()
- return err
+ // Sync the remote file.
+ return d.handle.sync(ctx)
}
// ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 3c4e7e2e4..811528982 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -16,6 +16,7 @@ package gofer
import (
"sync"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
@@ -28,9 +29,9 @@ import (
)
// specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device
-// special files, and (when filesystemOptions.specialRegularFiles is in effect)
-// regular files. specialFileFD differs from regularFileFD by using per-FD
-// handles instead of shared per-dentry handles, and never buffering I/O.
+// special files, and (when filesystemOptions.regularFilesUseSpecialFileFD is
+// in effect) regular files. specialFileFD differs from regularFileFD by using
+// per-FD handles instead of shared per-dentry handles, and never buffering I/O.
type specialFileFD struct {
fileDescription
@@ -41,10 +42,10 @@ type specialFileFD struct {
// file offset is significant, i.e. a regular file. seekable is immutable.
seekable bool
- // mayBlock is true if this file description represents a file for which
- // queue may send I/O readiness events. mayBlock is immutable.
- mayBlock bool
- queue waiter.Queue
+ // haveQueue is true if this file description represents a file for which
+ // queue may send I/O readiness events. haveQueue is immutable.
+ haveQueue bool
+ queue waiter.Queue
// If seekable is true, off is the file offset. off is protected by mu.
mu sync.Mutex
@@ -54,14 +55,14 @@ type specialFileFD struct {
func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks, flags uint32) (*specialFileFD, error) {
ftype := d.fileType()
seekable := ftype == linux.S_IFREG
- mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK
+ haveQueue := (ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK) && h.fd >= 0
fd := &specialFileFD{
- handle: h,
- seekable: seekable,
- mayBlock: mayBlock,
+ handle: h,
+ seekable: seekable,
+ haveQueue: haveQueue,
}
fd.LockFD.Init(locks)
- if mayBlock && h.fd >= 0 {
+ if haveQueue {
if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil {
return nil, err
}
@@ -70,7 +71,7 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks,
DenyPRead: !seekable,
DenyPWrite: !seekable,
}); err != nil {
- if mayBlock && h.fd >= 0 {
+ if haveQueue {
fdnotifier.RemoveFD(h.fd)
}
return nil, err
@@ -80,7 +81,7 @@ func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, locks *vfs.FileLocks,
// Release implements vfs.FileDescriptionImpl.Release.
func (fd *specialFileFD) Release() {
- if fd.mayBlock && fd.handle.fd >= 0 {
+ if fd.haveQueue {
fdnotifier.RemoveFD(fd.handle.fd)
}
fd.handle.close(context.Background())
@@ -100,7 +101,7 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error {
// Readiness implements waiter.Waitable.Readiness.
func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
- if fd.mayBlock {
+ if fd.haveQueue {
return fdnotifier.NonBlockingPoll(fd.handle.fd, mask)
}
return fd.fileDescription.Readiness(mask)
@@ -108,8 +109,9 @@ func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask {
// EventRegister implements waiter.Waitable.EventRegister.
func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
- if fd.mayBlock {
+ if fd.haveQueue {
fd.queue.EventRegister(e, mask)
+ fdnotifier.UpdateFD(fd.handle.fd)
return
}
fd.fileDescription.EventRegister(e, mask)
@@ -117,8 +119,9 @@ func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
// EventUnregister implements waiter.Waitable.EventUnregister.
func (fd *specialFileFD) EventUnregister(e *waiter.Entry) {
- if fd.mayBlock {
+ if fd.haveQueue {
fd.queue.EventUnregister(e)
+ fdnotifier.UpdateFD(fd.handle.fd)
return
}
fd.fileDescription.EventUnregister(e)
@@ -142,7 +145,7 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
// mmap due to lock ordering; MM locks precede dentry.dataMu. That doesn't
// hold here since specialFileFD doesn't client-cache data. Just buffer the
// read instead.
- if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+ if d := fd.dentry(); d.cachedMetadataAuthoritative() {
d.touchAtime(fd.vfsfd.Mount())
}
buf := make([]byte, dst.NumBytes())
@@ -174,39 +177,76 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ n, _, err := fd.pwrite(ctx, src, offset, opts)
+ return n, err
+}
+
+// pwrite returns the number of bytes written, final offset, error. The final
+// offset should be ignored by PWrite.
+func (fd *specialFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if fd.seekable && offset < 0 {
- return 0, syserror.EINVAL
+ return 0, offset, syserror.EINVAL
}
// Check that flags are supported.
//
// TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, offset, syserror.EOPNOTSUPP
+ }
+
+ d := fd.dentry()
+ // If the regular file fd was opened with O_APPEND, make sure the file size
+ // is updated. There is a possible race here if size is modified externally
+ // after metadata cache is updated.
+ if fd.seekable && fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 && !d.cachedMetadataAuthoritative() {
+ if err := d.updateFromGetattr(ctx); err != nil {
+ return 0, offset, err
+ }
}
if fd.seekable {
+ // We need to hold the metadataMu *while* writing to a regular file.
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+
+ // Set offset to file size if the regular file was opened with O_APPEND.
+ if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+ // Holding d.metadataMu is sufficient for reading d.size.
+ offset = int64(d.size)
+ }
limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
if err != nil {
- return 0, err
+ return 0, offset, err
}
src = src.TakeFirst64(limit)
}
// Do a buffered write. See rationale in PRead.
- if d := fd.dentry(); d.fs.opts.interop != InteropModeShared {
+ if d.cachedMetadataAuthoritative() {
d.touchCMtime()
}
buf := make([]byte, src.NumBytes())
// Don't do partial writes if we get a partial read from src.
if _, err := src.CopyIn(ctx, buf); err != nil {
- return 0, err
+ return 0, offset, err
}
n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset))
if err == syserror.EAGAIN {
err = syserror.ErrWouldBlock
}
- return int64(n), err
+ finalOff = offset
+ // Update file size for regular files.
+ if fd.seekable {
+ finalOff += int64(n)
+ // d.metadataMu is already locked at this point.
+ if uint64(finalOff) > d.size {
+ d.dataMu.Lock()
+ defer d.dataMu.Unlock()
+ atomic.StoreUint64(&d.size, uint64(finalOff))
+ }
+ }
+ return int64(n), finalOff, err
}
// Write implements vfs.FileDescriptionImpl.Write.
@@ -216,8 +256,8 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
}
fd.mu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
+ n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+ fd.off = off
fd.mu.Unlock()
return n, err
}
diff --git a/pkg/sentry/fsimpl/host/BUILD b/pkg/sentry/fsimpl/host/BUILD
index 44a09d87a..e86fbe2d5 100644
--- a/pkg/sentry/fsimpl/host/BUILD
+++ b/pkg/sentry/fsimpl/host/BUILD
@@ -22,6 +22,7 @@ go_library(
"//pkg/context",
"//pkg/fdnotifier",
"//pkg/fspath",
+ "//pkg/iovec",
"//pkg/log",
"//pkg/refs",
"//pkg/safemem",
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 1cd2982cb..c894f2ca0 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -259,7 +259,7 @@ func (i *inode) Mode() linux.FileMode {
}
// Stat implements kernfs.Inode.
-func (i *inode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+func (i *inode) Stat(ctx context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
if opts.Mask&linux.STATX__RESERVED != 0 {
return linux.Statx{}, syserror.EINVAL
}
@@ -373,7 +373,7 @@ func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
// SetStat implements kernfs.Inode.
func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
- s := opts.Stat
+ s := &opts.Stat
m := s.Mask
if m == 0 {
@@ -386,7 +386,7 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
if err := syscall.Fstat(i.hostFD, &hostStat); err != nil {
return err
}
- if err := vfs.CheckSetStat(ctx, creds, &s, linux.FileMode(hostStat.Mode&linux.PermissionsMask), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
+ if err := vfs.CheckSetStat(ctx, creds, &opts, linux.FileMode(hostStat.Mode), auth.KUID(hostStat.Uid), auth.KGID(hostStat.Gid)); err != nil {
return err
}
@@ -396,6 +396,9 @@ func (i *inode) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Cre
}
}
if m&linux.STATX_SIZE != 0 {
+ if hostStat.Mode&linux.S_IFMT != linux.S_IFREG {
+ return syserror.EINVAL
+ }
if err := syscall.Ftruncate(i.hostFD, int64(s.Size)); err != nil {
return err
}
@@ -534,8 +537,8 @@ func (f *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions)
}
// Stat implements vfs.FileDescriptionImpl.
-func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- return f.inode.Stat(f.vfsfd.Mount().Filesystem(), opts)
+func (f *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
+ return f.inode.Stat(ctx, f.vfsfd.Mount().Filesystem(), opts)
}
// Release implements vfs.FileDescriptionImpl.
diff --git a/pkg/sentry/fsimpl/host/socket_iovec.go b/pkg/sentry/fsimpl/host/socket_iovec.go
index 584c247d2..fc0d5fd38 100644
--- a/pkg/sentry/fsimpl/host/socket_iovec.go
+++ b/pkg/sentry/fsimpl/host/socket_iovec.go
@@ -17,13 +17,10 @@ package host
import (
"syscall"
- "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/iovec"
"gvisor.dev/gvisor/pkg/syserror"
)
-// maxIovs is the maximum number of iovecs to pass to the host.
-var maxIovs = linux.UIO_MAXIOV
-
// copyToMulti copies as many bytes from src to dst as possible.
func copyToMulti(dst [][]byte, src []byte) {
for _, d := range dst {
@@ -74,7 +71,7 @@ func buildIovec(bufs [][]byte, maxlen int64, truncate bool) (length int64, iovec
}
}
- if iovsRequired > maxIovs {
+ if iovsRequired > iovec.MaxIovs {
// The kernel will reject our call if we pass this many iovs.
// Use a single intermediate buffer instead.
b := make([]byte, stopLen)
diff --git a/pkg/sentry/fsimpl/kernfs/BUILD b/pkg/sentry/fsimpl/kernfs/BUILD
index 179df6c1e..3835557fe 100644
--- a/pkg/sentry/fsimpl/kernfs/BUILD
+++ b/pkg/sentry/fsimpl/kernfs/BUILD
@@ -70,6 +70,6 @@ go_test(
"//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/usermem",
- "@com_github_google_go-cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index 6886b0876..c6c4472e7 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -127,7 +127,7 @@ func (fd *DynamicBytesFD) Release() {}
// Stat implements vfs.FileDescriptionImpl.Stat.
func (fd *DynamicBytesFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
fs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
- return fd.inode.Stat(fs, opts)
+ return fd.inode.Stat(ctx, fs, opts)
}
// SetStat implements vfs.FileDescriptionImpl.SetStat.
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index ca8b8c63b..1d37ccb98 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -112,7 +112,7 @@ func (fd *GenericDirectoryFD) PWrite(ctx context.Context, src usermem.IOSequence
return fd.DirectoryFileDescriptionDefaultImpl.PWrite(ctx, src, offset, opts)
}
-// Release implements vfs.FileDecriptionImpl.Release.
+// Release implements vfs.FileDescriptionImpl.Release.
func (fd *GenericDirectoryFD) Release() {}
func (fd *GenericDirectoryFD) filesystem() *vfs.Filesystem {
@@ -123,7 +123,7 @@ func (fd *GenericDirectoryFD) inode() Inode {
return fd.vfsfd.VirtualDentry().Dentry().Impl().(*Dentry).inode
}
-// IterDirents implements vfs.FileDecriptionImpl.IterDirents. IterDirents holds
+// IterDirents implements vfs.FileDescriptionImpl.IterDirents. IterDirents holds
// o.mu when calling cb.
func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
fd.mu.Lock()
@@ -132,7 +132,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
opts := vfs.StatOptions{Mask: linux.STATX_INO}
// Handle ".".
if fd.off == 0 {
- stat, err := fd.inode().Stat(fd.filesystem(), opts)
+ stat, err := fd.inode().Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
@@ -152,7 +152,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
if fd.off == 1 {
vfsd := fd.vfsfd.VirtualDentry().Dentry()
parentInode := genericParentOrSelf(vfsd.Impl().(*Dentry)).inode
- stat, err := parentInode.Stat(fd.filesystem(), opts)
+ stat, err := parentInode.Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
@@ -176,7 +176,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
childIdx := fd.off - 2
for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() {
inode := it.Dentry.Impl().(*Dentry).inode
- stat, err := inode.Stat(fd.filesystem(), opts)
+ stat, err := inode.Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
@@ -198,7 +198,7 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
return err
}
-// Seek implements vfs.FileDecriptionImpl.Seek.
+// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
fd.mu.Lock()
defer fd.mu.Unlock()
@@ -226,7 +226,7 @@ func (fd *GenericDirectoryFD) Seek(ctx context.Context, offset int64, whence int
func (fd *GenericDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
fs := fd.filesystem()
inode := fd.inode()
- return inode.Stat(fs, opts)
+ return inode.Stat(ctx, fs, opts)
}
// SetStat implements vfs.FileDescriptionImpl.SetStat.
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 8939871c1..61a36cff9 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -684,7 +684,7 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err != nil {
return linux.Statx{}, err
}
- return inode.Stat(fs.VFSFilesystem(), opts)
+ return inode.Stat(ctx, fs.VFSFilesystem(), opts)
}
// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 4cb885d87..579e627f0 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -243,7 +243,7 @@ func (a *InodeAttrs) Mode() linux.FileMode {
// Stat partially implements Inode.Stat. Note that this function doesn't provide
// all the stat fields, and the embedder should consider extending the result
// with filesystem-specific fields.
-func (a *InodeAttrs) Stat(*vfs.Filesystem, vfs.StatOptions) (linux.Statx, error) {
+func (a *InodeAttrs) Stat(context.Context, *vfs.Filesystem, vfs.StatOptions) (linux.Statx, error) {
var stat linux.Statx
stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_NLINK
stat.DevMajor = a.devMajor
@@ -267,7 +267,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
if opts.Stat.Mask&^(linux.STATX_MODE|linux.STATX_UID|linux.STATX_GID) != 0 {
return syserror.EPERM
}
- if err := vfs.CheckSetStat(ctx, creds, &opts.Stat, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
+ if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil {
return err
}
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 596de1edf..46f207664 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -346,7 +346,7 @@ type inodeMetadata interface {
// Stat returns the metadata for this inode. This corresponds to
// vfs.FilesystemImpl.StatAt.
- Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error)
+ Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error)
// SetStat updates the metadata for this inode. This corresponds to
// vfs.FilesystemImpl.SetStatAt. Implementations are responsible for checking
diff --git a/pkg/sentry/fsimpl/overlay/filesystem.go b/pkg/sentry/fsimpl/overlay/filesystem.go
index ff82e1f20..6b705e955 100644
--- a/pkg/sentry/fsimpl/overlay/filesystem.go
+++ b/pkg/sentry/fsimpl/overlay/filesystem.go
@@ -1104,7 +1104,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
}
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
- if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts.Stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+ if err := vfs.CheckSetStat(ctx, rp.Credentials(), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
return err
}
mnt := rp.Mount()
diff --git a/pkg/sentry/fsimpl/overlay/non_directory.go b/pkg/sentry/fsimpl/overlay/non_directory.go
index a3c1f7a8d..c0749e711 100644
--- a/pkg/sentry/fsimpl/overlay/non_directory.go
+++ b/pkg/sentry/fsimpl/overlay/non_directory.go
@@ -151,7 +151,7 @@ func (fd *nonDirectoryFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux
func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
d := fd.dentry()
mode := linux.FileMode(atomic.LoadUint32(&d.mode))
- if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts.Stat, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
+ if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil {
return err
}
mnt := fd.vfsfd.Mount()
@@ -176,7 +176,7 @@ func (fd *nonDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptions)
return nil
}
-// StatFS implements vfs.FileDesciptionImpl.StatFS.
+// StatFS implements vfs.FileDescriptionImpl.StatFS.
func (fd *nonDirectoryFD) StatFS(ctx context.Context) (linux.Statfs, error) {
return fd.filesystem().statFS(ctx)
}
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index dd7eaf4a8..811f80a5f 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -115,7 +115,7 @@ func (i *inode) Mode() linux.FileMode {
}
// Stat implements kernfs.Inode.Stat.
-func (i *inode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+func (i *inode) Stat(_ context.Context, vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
ts := linux.NsecToStatxTimestamp(i.ctime.Nanoseconds())
return linux.Statx{
Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS,
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 36a89540c..79c2725f3 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -128,7 +128,7 @@ func (fd *subtasksFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallbac
return fd.GenericDirectoryFD.IterDirents(ctx, cb)
}
-// Seek implements vfs.FileDecriptionImpl.Seek.
+// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *subtasksFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
if fd.task.ExitState() >= kernel.TaskExitZombie {
return 0, syserror.ENOENT
@@ -165,8 +165,8 @@ func (i *subtasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *v
}
// Stat implements kernfs.Inode.
-func (i *subtasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.InodeAttrs.Stat(vsfs, opts)
+func (i *subtasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts)
if err != nil {
return linux.Statx{}, err
}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 8bb2b0ce1..a5c7aa470 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -156,8 +156,8 @@ func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.
}
// Stat implements kernfs.Inode.
-func (i *taskOwnedInode) Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.Inode.Stat(fs, opts)
+func (i *taskOwnedInode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.Inode.Stat(ctx, fs, opts)
if err != nil {
return linux.Statx{}, err
}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 9af43b859..859b7d727 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -876,7 +876,7 @@ var _ vfs.FileDescriptionImpl = (*namespaceFD)(nil)
// Stat implements FileDescriptionImpl.
func (fd *namespaceFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) {
vfs := fd.vfsfd.VirtualDentry().Mount().Filesystem()
- return fd.inode.Stat(vfs, opts)
+ return fd.inode.Stat(ctx, vfs, opts)
}
// SetStat implements FileDescriptionImpl.
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 2f214d0c2..6d2b90a8b 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -206,8 +206,8 @@ func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.
return fd.VFSFileDescription(), nil
}
-func (i *tasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.InodeAttrs.Stat(vsfs, opts)
+func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts)
if err != nil {
return linux.Statx{}, err
}
diff --git a/pkg/sentry/fsimpl/sys/BUILD b/pkg/sentry/fsimpl/sys/BUILD
index a741e2bb6..1b548ccd4 100644
--- a/pkg/sentry/fsimpl/sys/BUILD
+++ b/pkg/sentry/fsimpl/sys/BUILD
@@ -29,6 +29,6 @@ go_test(
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
- "@com_github_google_go-cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/testutil/BUILD b/pkg/sentry/fsimpl/testutil/BUILD
index 0e4053a46..400a97996 100644
--- a/pkg/sentry/fsimpl/testutil/BUILD
+++ b/pkg/sentry/fsimpl/testutil/BUILD
@@ -32,6 +32,6 @@ go_library(
"//pkg/sentry/vfs",
"//pkg/sync",
"//pkg/usermem",
- "@com_github_google_go-cmp//cmp:go_default_library",
+ "@com_github_google_go_cmp//cmp:go_default_library",
],
)
diff --git a/pkg/sentry/fsimpl/testutil/kernel.go b/pkg/sentry/fsimpl/testutil/kernel.go
index c16a36cdb..e743e8114 100644
--- a/pkg/sentry/fsimpl/testutil/kernel.go
+++ b/pkg/sentry/fsimpl/testutil/kernel.go
@@ -62,6 +62,7 @@ func Boot() (*kernel.Kernel, error) {
return nil, fmt.Errorf("creating platform: %v", err)
}
+ kernel.VFS2Enabled = true
k := &kernel.Kernel{
Platform: plat,
}
@@ -73,7 +74,7 @@ func Boot() (*kernel.Kernel, error) {
k.SetMemoryFile(mf)
// Pass k as the platform since it is savable, unlike the actual platform.
- vdso, err := loader.PrepareVDSO(nil, k)
+ vdso, err := loader.PrepareVDSO(k)
if err != nil {
return nil, fmt.Errorf("creating vdso: %v", err)
}
@@ -103,11 +104,6 @@ func Boot() (*kernel.Kernel, error) {
return nil, fmt.Errorf("initializing kernel: %v", err)
}
- kernel.VFS2Enabled = true
-
- if err := k.VFS().Init(); err != nil {
- return nil, fmt.Errorf("VFS init: %v", err)
- }
k.VFS().MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
AllowUserMount: true,
AllowUserList: true,
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index ed40f6b52..ef210a69b 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -277,7 +277,7 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
creds := rp.Credentials()
var childInode *inode
switch opts.Mode.FileType() {
- case 0, linux.S_IFREG:
+ case linux.S_IFREG:
childInode = fs.newRegularFile(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode)
case linux.S_IFIFO:
childInode = fs.newNamedPipe(creds.EffectiveKUID, creds.EffectiveKGID, opts.Mode)
@@ -649,7 +649,7 @@ func (fs *filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
fs.mu.RUnlock()
return err
}
- if err := d.inode.setStat(ctx, rp.Credentials(), &opts.Stat); err != nil {
+ if err := d.inode.setStat(ctx, rp.Credentials(), &opts); err != nil {
fs.mu.RUnlock()
return err
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 1cdb46e6f..abbaa5d60 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -325,8 +325,15 @@ func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
+ n, _, err := fd.pwrite(ctx, src, offset, opts)
+ return n, err
+}
+
+// pwrite returns the number of bytes written, final offset and error. The
+// final offset should be ignored by PWrite.
+func (fd *regularFileFD) pwrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (written, finalOff int64, err error) {
if offset < 0 {
- return 0, syserror.EINVAL
+ return 0, offset, syserror.EINVAL
}
// Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
@@ -334,40 +341,44 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
//
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
- return 0, syserror.EOPNOTSUPP
+ return 0, offset, syserror.EOPNOTSUPP
}
srclen := src.NumBytes()
if srclen == 0 {
- return 0, nil
+ return 0, offset, nil
}
f := fd.inode().impl.(*regularFile)
+ f.inode.mu.Lock()
+ defer f.inode.mu.Unlock()
+ // If the file is opened with O_APPEND, update offset to file size.
+ if fd.vfsfd.StatusFlags()&linux.O_APPEND != 0 {
+ // Locking f.inode.mu is sufficient for reading f.size.
+ offset = int64(f.size)
+ }
if end := offset + srclen; end < offset {
// Overflow.
- return 0, syserror.EINVAL
+ return 0, offset, syserror.EINVAL
}
- var err error
srclen, err = vfs.CheckLimit(ctx, offset, srclen)
if err != nil {
- return 0, err
+ return 0, offset, err
}
src = src.TakeFirst64(srclen)
- f.inode.mu.Lock()
rw := getRegularFileReadWriter(f, offset)
n, err := src.CopyInTo(ctx, rw)
- fd.inode().touchCMtimeLocked()
- f.inode.mu.Unlock()
+ f.inode.touchCMtimeLocked()
putRegularFileReadWriter(rw)
- return n, err
+ return n, n + offset, err
}
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
fd.offMu.Lock()
- n, err := fd.PWrite(ctx, src, fd.off, opts)
- fd.off += n
+ n, off, err := fd.pwrite(ctx, src, fd.off, opts)
+ fd.off = off
fd.offMu.Unlock()
return n, err
}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index d7f4f0779..2545d88e9 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -452,7 +452,8 @@ func (i *inode) statTo(stat *linux.Statx) {
}
}
-func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx) error {
+func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, opts *vfs.SetStatOptions) error {
+ stat := &opts.Stat
if stat.Mask == 0 {
return nil
}
@@ -460,7 +461,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu
return syserror.EPERM
}
mode := linux.FileMode(atomic.LoadUint32(&i.mode))
- if err := vfs.CheckSetStat(ctx, creds, stat, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
+ if err := vfs.CheckSetStat(ctx, creds, opts, mode, auth.KUID(atomic.LoadUint32(&i.uid)), auth.KGID(atomic.LoadUint32(&i.gid))); err != nil {
return err
}
i.mu.Lock()
@@ -695,7 +696,7 @@ func (fd *fileDescription) Stat(ctx context.Context, opts vfs.StatOptions) (linu
func (fd *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
creds := auth.CredentialsFromContext(ctx)
d := fd.dentry()
- if err := d.inode.setStat(ctx, creds, &opts.Stat); err != nil {
+ if err := d.inode.setStat(ctx, creds, &opts); err != nil {
return err
}