diff options
Diffstat (limited to 'pkg/sentry')
26 files changed, 513 insertions, 233 deletions
diff --git a/pkg/sentry/arch/syscalls_arm64.go b/pkg/sentry/arch/syscalls_arm64.go index 92d062513..95dfd1e90 100644 --- a/pkg/sentry/arch/syscalls_arm64.go +++ b/pkg/sentry/arch/syscalls_arm64.go @@ -23,7 +23,7 @@ const restartSyscallNr = uintptr(128) // // In linux, at the entry of the syscall handler(el0_svc_common()), value of R0 // is saved to the pt_regs.orig_x0 in kernel code. But currently, the orig_x0 -// was not accessible to the user space application, so we have to do the same +// was not accessible to the userspace application, so we have to do the same // operation in the sentry code to save the R0 value into the App context. func (c *context64) SyscallSaveOrig() { c.OrigR0 = c.Regs.Regs[0] diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go index 9d41fcbdb..8ae2d78d7 100644 --- a/pkg/sentry/fs/gofer/fs.go +++ b/pkg/sentry/fs/gofer/fs.go @@ -60,8 +60,7 @@ const ( limitHostFDTranslationKey = "limit_host_fd_translation" // overlayfsStaleRead if present closes cached readonly file after the first - // write. This is done to workaround a limitation of overlayfs in kernels - // before 4.19 where open FDs are not updated after the file is copied up. + // write. This is done to workaround a limitation of Linux overlayfs. overlayfsStaleRead = "overlayfs_stale_read" ) diff --git a/pkg/sentry/fsimpl/devpts/line_discipline.go b/pkg/sentry/fsimpl/devpts/line_discipline.go index e201801d6..f7bc325d1 100644 --- a/pkg/sentry/fsimpl/devpts/line_discipline.go +++ b/pkg/sentry/fsimpl/devpts/line_discipline.go @@ -27,8 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) -// LINT.IfChange - const ( // canonMaxBytes is the number of bytes that fit into a single line of // terminal input in canonical mode. This corresponds to N_TTY_BUF_SIZE @@ -445,5 +443,3 @@ func (l *lineDiscipline) peek(b []byte) int { } return size } - -// LINT.ThenChange(../../fs/tty/line_discipline.go) diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 04a292927..7a7ce5d81 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -27,8 +27,6 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) -// LINT.IfChange - // masterInode is the inode for the master end of the Terminal. type masterInode struct { kernfs.InodeAttrs @@ -222,5 +220,3 @@ func maybeEmitUnimplementedEvent(ctx context.Context, cmd uint32) { unimpl.EmitUnimplementedEvent(ctx) } } - -// LINT.ThenChange(../../fs/tty/master.go) diff --git a/pkg/sentry/fsimpl/devpts/queue.go b/pkg/sentry/fsimpl/devpts/queue.go index 29a6be858..dffb4232c 100644 --- a/pkg/sentry/fsimpl/devpts/queue.go +++ b/pkg/sentry/fsimpl/devpts/queue.go @@ -25,8 +25,6 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) -// LINT.IfChange - // waitBufMaxBytes is the maximum size of a wait buffer. It is based on // TTYB_DEFAULT_MEM_LIMIT. const waitBufMaxBytes = 131072 @@ -236,5 +234,3 @@ func (q *queue) waitBufAppend(b []byte) { q.waitBuf = append(q.waitBuf, b) q.waitBufLen += uint64(len(b)) } - -// LINT.ThenChange(../../fs/tty/queue.go) diff --git a/pkg/sentry/fsimpl/devpts/slave.go b/pkg/sentry/fsimpl/devpts/slave.go index 0a98dc896..526cd406c 100644 --- a/pkg/sentry/fsimpl/devpts/slave.go +++ b/pkg/sentry/fsimpl/devpts/slave.go @@ -26,8 +26,6 @@ import ( "gvisor.dev/gvisor/pkg/waiter" ) -// LINT.IfChange - // slaveInode is the inode for the slave end of the Terminal. type slaveInode struct { kernfs.InodeAttrs @@ -182,5 +180,3 @@ func (sfd *slaveFileDescription) Stat(ctx context.Context, opts vfs.StatOptions) fs := sfd.vfsfd.VirtualDentry().Mount().Filesystem() return sfd.inode.Stat(fs, opts) } - -// LINT.ThenChange(../../fs/tty/slave.go) diff --git a/pkg/sentry/fsimpl/devpts/terminal.go b/pkg/sentry/fsimpl/devpts/terminal.go index b44e673d8..7d2781c54 100644 --- a/pkg/sentry/fsimpl/devpts/terminal.go +++ b/pkg/sentry/fsimpl/devpts/terminal.go @@ -22,8 +22,6 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -// LINT.IfChanges - // Terminal is a pseudoterminal. // // +stateify savable @@ -120,5 +118,3 @@ func (tm *Terminal) tty(isMaster bool) *kernel.TTY { } return tm.slaveKTTY } - -// LINT.ThenChange(../../fs/tty/terminal.go) diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go index 1da8d5d82..ebf063a58 100644 --- a/pkg/sentry/fsimpl/gofer/gofer.go +++ b/pkg/sentry/fsimpl/gofer/gofer.go @@ -143,9 +143,12 @@ type filesystemOptions struct { // If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote // filesystem may not be coherent with writable host FDs opened later, so - // mappings of the former must be replaced by mappings of the latter. This - // is usually only the case when the remote filesystem is an overlayfs - // mount on Linux < 4.19. + // all uses of the former must be replaced by uses of the latter. This is + // usually only the case when the remote filesystem is a Linux overlayfs + // mount. (Prior to Linux 4.18, patch series centered on commit + // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were + // incoherent between pre-copy-up and post-copy-up FDs; after that patch + // series, only memory mappings are incoherent.) overlayfsStaleRead bool // If regularFilesUseSpecialFileFD is true, application FDs representing @@ -866,8 +869,8 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin Size: stat.Mask&linux.STATX_SIZE != 0, ATime: stat.Mask&linux.STATX_ATIME != 0, MTime: stat.Mask&linux.STATX_MTIME != 0, - ATimeNotSystemTime: stat.Atime.Nsec != linux.UTIME_NOW, - MTimeNotSystemTime: stat.Mtime.Nsec != linux.UTIME_NOW, + ATimeNotSystemTime: stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW, + MTimeNotSystemTime: stat.Mask&linux.STATX_MTIME != 0 && stat.Mtime.Nsec != linux.UTIME_NOW, }, p9.SetAttr{ Permissions: p9.FileMode(stat.Mode), UID: p9.UID(stat.UID), diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD index a2d9649e7..007be1572 100644 --- a/pkg/sentry/fsimpl/tmpfs/BUILD +++ b/pkg/sentry/fsimpl/tmpfs/BUILD @@ -52,7 +52,6 @@ go_library( "//pkg/sentry/fs", "//pkg/sentry/fs/fsutil", "//pkg/sentry/fs/lock", - "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/pipe", "//pkg/sentry/kernel/time", @@ -96,6 +95,7 @@ go_test( "pipe_test.go", "regular_file_test.go", "stat_test.go", + "tmpfs_test.go", ], library = ":tmpfs", deps = [ @@ -105,7 +105,6 @@ go_test( "//pkg/sentry/contexttest", "//pkg/sentry/fs/lock", "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/contexttest", "//pkg/sentry/vfs", "//pkg/syserror", "//pkg/usermem", diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go index 36ffcb592..80fa7b29d 100644 --- a/pkg/sentry/fsimpl/tmpfs/filesystem.go +++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go @@ -16,6 +16,7 @@ package tmpfs import ( "fmt" + "sync/atomic" "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" @@ -24,6 +25,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" + "gvisor.dev/gvisor/pkg/usermem" ) // Sync implements vfs.FilesystemImpl.Sync. @@ -76,8 +78,8 @@ afterSymlink: return nil, err } if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issue/1197): Symlink traversals updates - // access time. + // Symlink traversal updates access time. + atomic.StoreInt64(&d.inode.atime, d.inode.fs.clock.Now().Nanoseconds()) if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err } @@ -361,8 +363,8 @@ afterTrailingSymlink: } // Do we need to resolve a trailing symlink? if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() { - // TODO(gvisor.dev/issue/1197): Symlink traversals updates - // access time. + // Symlink traversal updates access time. + atomic.StoreInt64(&child.inode.atime, child.inode.fs.clock.Now().Nanoseconds()) if err := rp.HandleSymlink(symlink.target); err != nil { return nil, err } @@ -636,12 +638,19 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() defer fs.mu.RUnlock() - _, err := resolveLocked(rp) - if err != nil { + if _, err := resolveLocked(rp); err != nil { return linux.Statfs{}, err } - // TODO(gvisor.dev/issue/1197): Actually implement statfs. - return linux.Statfs{}, syserror.ENOSYS + statfs := linux.Statfs{ + Type: linux.TMPFS_MAGIC, + BlockSize: usermem.PageSize, + FragmentSize: usermem.PageSize, + NameLength: linux.NAME_MAX, + // TODO(b/29637826): Allow configuring a tmpfs size and enforce it. + Blocks: 0, + BlocksFree: 0, + } + return statfs, nil } // SymlinkAt implements vfs.FilesystemImpl.SymlinkAt. @@ -763,5 +772,24 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath, func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error { fs.mu.RLock() defer fs.mu.RUnlock() - return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b) + mnt := vd.Mount() + d := vd.Dentry().Impl().(*dentry) + for { + if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() { + return vfs.PrependPathAtVFSRootError{} + } + if &d.vfsd == mnt.Root() { + return nil + } + if d.parent == nil { + if d.name != "" { + // This must be an anonymous memfd file. + b.PrependComponent("/" + d.name) + return vfs.PrependPathSyntheticError{} + } + return vfs.PrependPathAtNonMountRootError{} + } + b.PrependComponent(d.name) + d = d.parent + } } diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go index 57e5e28ec..3f433d666 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go @@ -88,6 +88,7 @@ type regularFile struct { func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode { file := ®ularFile{ memFile: fs.memFile, + seals: linux.F_SEAL_SEAL, } file.inode.init(file, fs, creds, linux.S_IFREG|mode) file.inode.nlink = 1 // from parent directory @@ -577,3 +578,44 @@ exitLoop: return done, retErr } + +// GetSeals returns the current set of seals on a memfd inode. +func GetSeals(fd *vfs.FileDescription) (uint32, error) { + f, ok := fd.Impl().(*regularFileFD) + if !ok { + return 0, syserror.EINVAL + } + rf := f.inode().impl.(*regularFile) + rf.dataMu.RLock() + defer rf.dataMu.RUnlock() + return rf.seals, nil +} + +// AddSeals adds new file seals to a memfd inode. +func AddSeals(fd *vfs.FileDescription, val uint32) error { + f, ok := fd.Impl().(*regularFileFD) + if !ok { + return syserror.EINVAL + } + rf := f.inode().impl.(*regularFile) + rf.mapsMu.Lock() + defer rf.mapsMu.Unlock() + rf.dataMu.RLock() + defer rf.dataMu.RUnlock() + + if rf.seals&linux.F_SEAL_SEAL != 0 { + // Seal applied which prevents addition of any new seals. + return syserror.EPERM + } + + // F_SEAL_WRITE can only be added if there are no active writable maps. + if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 { + if rf.writableMappingPages > 0 { + return syserror.EBUSY + } + } + + // Seals can only be added, never removed. + rf.seals |= val + return nil +} diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go index 0399725cf..64e1c40ad 100644 --- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go +++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go @@ -18,152 +18,16 @@ import ( "bytes" "fmt" "io" - "sync/atomic" "testing" "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/fs/lock" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" ) -// nextFileID is used to generate unique file names. -var nextFileID int64 - -// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error -// is not nil, then cleanup should be called when the root is no longer needed. -func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) { - creds := auth.CredentialsFromContext(ctx) - - vfsObj := &vfs.VirtualFilesystem{} - if err := vfsObj.Init(); err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) - } - - vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) - if err != nil { - return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) - } - root := mntns.Root() - return vfsObj, root, func() { - root.DecRef() - mntns.DecRef() - }, nil -} - -// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If -// the returned err is not nil, then cleanup should be called when the FD is no -// longer needed. -func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the file that will be write/read. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(filename), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, - Mode: linux.ModeRegular | mode, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) - } - - return fd, cleanup, nil -} - -// newDirFD is like newFileFD, but for directories. -func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the dir. - if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.MkdirOptions{ - Mode: linux.ModeDirectory | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err) - } - - // Open the dir and return it. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(dirname), - }, &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_DIRECTORY, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err) - } - - return fd, cleanup, nil -} - -// newPipeFD is like newFileFD, but for pipes. -func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { - creds := auth.CredentialsFromContext(ctx) - vfsObj, root, cleanup, err := newTmpfsRoot(ctx) - if err != nil { - return nil, nil, err - } - - pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1)) - - // Create the pipe. - if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(pipename), - }, &vfs.MknodOptions{ - Mode: linux.ModeNamedPipe | mode, - }); err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err) - } - - // Open the pipe and return it. - fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ - Root: root, - Start: root, - Path: fspath.Parse(pipename), - }, &vfs.OpenOptions{ - Flags: linux.O_RDWR, - }) - if err != nil { - cleanup() - return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err) - } - - return fd, cleanup, nil -} - // Test that we can write some data to a file and read it back.` func TestSimpleWriteRead(t *testing.T) { ctx := contexttest.Context(t) diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go index 60c2c980e..f7ee4aab2 100644 --- a/pkg/sentry/fsimpl/tmpfs/stat_test.go +++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go @@ -19,8 +19,8 @@ import ( "testing" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/contexttest" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest" "gvisor.dev/gvisor/pkg/sentry/vfs" ) @@ -29,7 +29,6 @@ func TestStatAfterCreate(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( @@ -175,7 +174,6 @@ func TestSetStat(t *testing.T) { mode := linux.FileMode(0644) // Run with different file types. - // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets. for _, typ := range []string{"file", "dir", "pipe"} { t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) { var ( diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go index 405928bd0..1e781aecd 100644 --- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go @@ -94,7 +94,7 @@ type FilesystemOpts struct { } // GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { +func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, _ string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx) if memFileProvider == nil { panic("MemoryFileProviderFromContext returned nil") @@ -139,6 +139,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt return &fs.vfsfs, &root.vfsd, nil } +// NewFilesystem returns a new tmpfs filesystem. +func NewFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*vfs.Filesystem, *vfs.Dentry, error) { + return FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "", vfs.GetFilesystemOptions{}) +} + // Release implements vfs.FilesystemImpl.Release. func (fs *filesystem) Release() { fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) @@ -658,3 +663,34 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption func (fd *fileDescription) Removexattr(ctx context.Context, name string) error { return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name) } + +// NewMemfd creates a new tmpfs regular file and file description that can back +// an anonymous fd created by memfd_create. +func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name string) (*vfs.FileDescription, error) { + fs, ok := mount.Filesystem().Impl().(*filesystem) + if !ok { + panic("NewMemfd() called with non-tmpfs mount") + } + + // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with + // S_IRWXUGO. + mode := linux.FileMode(0777) + inode := fs.newRegularFile(creds, mode) + rf := inode.impl.(*regularFile) + if allowSeals { + rf.seals = 0 + } + + d := fs.newDentry(inode) + defer d.DecRef() + d.name = name + + // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with + // FMODE_READ | FMODE_WRITE. + var fd regularFileFD + flags := uint32(linux.O_RDWR) + if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil { + return nil, err + } + return &fd.vfsfd, nil +} diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go new file mode 100644 index 000000000..a240fb276 --- /dev/null +++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go @@ -0,0 +1,156 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tmpfs + +import ( + "fmt" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fspath" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// nextFileID is used to generate unique file names. +var nextFileID int64 + +// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error +// is not nil, then cleanup should be called when the root is no longer needed. +func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) { + creds := auth.CredentialsFromContext(ctx) + + vfsObj := &vfs.VirtualFilesystem{} + if err := vfsObj.Init(); err != nil { + return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err) + } + + vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ + AllowUserMount: true, + }) + mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{}) + if err != nil { + return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err) + } + root := mntns.Root() + return vfsObj, root, func() { + root.DecRef() + mntns.DecRef() + }, nil +} + +// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If +// the returned err is not nil, then cleanup should be called when the FD is no +// longer needed. +func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1)) + + // Create the file that will be write/read. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(filename), + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL, + Mode: linux.ModeRegular | mode, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err) + } + + return fd, cleanup, nil +} + +// newDirFD is like newFileFD, but for directories. +func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1)) + + // Create the dir. + if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(dirname), + }, &vfs.MkdirOptions{ + Mode: linux.ModeDirectory | mode, + }); err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err) + } + + // Open the dir and return it. + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(dirname), + }, &vfs.OpenOptions{ + Flags: linux.O_RDONLY | linux.O_DIRECTORY, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err) + } + + return fd, cleanup, nil +} + +// newPipeFD is like newFileFD, but for pipes. +func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) { + creds := auth.CredentialsFromContext(ctx) + vfsObj, root, cleanup, err := newTmpfsRoot(ctx) + if err != nil { + return nil, nil, err + } + + name := fmt.Sprintf("tmpfs-test-%d", atomic.AddInt64(&nextFileID, 1)) + + if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(name), + }, &vfs.MknodOptions{ + Mode: linux.ModeNamedPipe | mode, + }); err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to create pipe %q: %v", name, err) + } + + fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse(name), + }, &vfs.OpenOptions{ + Flags: linux.O_RDWR, + }) + if err != nil { + cleanup() + return nil, nil, fmt.Errorf("failed to open pipe %q: %v", name, err) + } + + return fd, cleanup, nil +} diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD index 8104f50f3..a28eab8b8 100644 --- a/pkg/sentry/kernel/BUILD +++ b/pkg/sentry/kernel/BUILD @@ -173,6 +173,7 @@ go_library( "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/fsimpl/sockfs", "//pkg/sentry/fsimpl/timerfd", + "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/hostcpu", "//pkg/sentry/inet", "//pkg/sentry/kernel/auth", diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go index 3617da8c6..5efeb3767 100644 --- a/pkg/sentry/kernel/kernel.go +++ b/pkg/sentry/kernel/kernel.go @@ -53,6 +53,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs" "gvisor.dev/gvisor/pkg/sentry/fsimpl/timerfd" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/hostcpu" "gvisor.dev/gvisor/pkg/sentry/inet" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -259,6 +260,10 @@ type Kernel struct { // syscalls (as opposed to named pipes created by mknod()). pipeMount *vfs.Mount + // shmMount is the Mount used for anonymous files created by the + // memfd_create() syscalls. It is analagous to Linux's shm_mnt. + shmMount *vfs.Mount + // socketMount is the Mount used for sockets created by the socket() and // socketpair() syscalls. There are several cases where a socket dentry will // not be contained in socketMount: @@ -330,6 +335,9 @@ func (k *Kernel) Init(args InitKernelArgs) error { if args.Timekeeper == nil { return fmt.Errorf("Timekeeper is nil") } + if args.Timekeeper.clocks == nil { + return fmt.Errorf("Must call Timekeeper.SetClocks() before Kernel.Init()") + } if args.RootUserNamespace == nil { return fmt.Errorf("RootUserNamespace is nil") } @@ -384,6 +392,18 @@ func (k *Kernel) Init(args InitKernelArgs) error { } k.pipeMount = pipeMount + tmpfsFilesystem, tmpfsRoot, err := tmpfs.NewFilesystem(k.SupervisorContext(), &k.vfs, auth.NewRootCredentials(k.rootUserNamespace)) + if err != nil { + return fmt.Errorf("failed to create tmpfs filesystem: %v", err) + } + defer tmpfsFilesystem.DecRef() + defer tmpfsRoot.DecRef() + shmMount, err := k.vfs.NewDisconnectedMount(tmpfsFilesystem, tmpfsRoot, &vfs.MountOptions{}) + if err != nil { + return fmt.Errorf("failed to create tmpfs mount: %v", err) + } + k.shmMount = shmMount + socketFilesystem, err := sockfs.NewFilesystem(&k.vfs) if err != nil { return fmt.Errorf("failed to create sockfs filesystem: %v", err) @@ -1656,6 +1676,11 @@ func (k *Kernel) PipeMount() *vfs.Mount { return k.pipeMount } +// ShmMount returns the tmpfs mount. +func (k *Kernel) ShmMount() *vfs.Mount { + return k.shmMount +} + // SocketMount returns the sockfs mount. func (k *Kernel) SocketMount() *vfs.Mount { return k.socketMount diff --git a/pkg/sentry/kernel/pipe/pipe_util.go b/pkg/sentry/kernel/pipe/pipe_util.go index 5a1d4fd57..aacf28da2 100644 --- a/pkg/sentry/kernel/pipe/pipe_util.go +++ b/pkg/sentry/kernel/pipe/pipe_util.go @@ -144,7 +144,7 @@ func (p *Pipe) Ioctl(ctx context.Context, io usermem.IO, args arch.SyscallArgume if v > math.MaxInt32 { v = math.MaxInt32 // Silently truncate. } - // Copy result to user-space. + // Copy result to userspace. _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ AddressSpaceActive: true, }) diff --git a/pkg/sentry/kernel/task_syscall.go b/pkg/sentry/kernel/task_syscall.go index c9db78e06..a5903b0b5 100644 --- a/pkg/sentry/kernel/task_syscall.go +++ b/pkg/sentry/kernel/task_syscall.go @@ -199,10 +199,10 @@ func (t *Task) doSyscall() taskRunState { // // On x86, register rax was shared by syscall number and return // value, and at the entry of the syscall handler, the rax was - // saved to regs.orig_rax which was exposed to user space. + // saved to regs.orig_rax which was exposed to userspace. // But on arm64, syscall number was passed through X8, and the X0 // was shared by the first syscall argument and return value. The - // X0 was saved to regs.orig_x0 which was not exposed to user space. + // X0 was saved to regs.orig_x0 which was not exposed to userspace. // So we have to do the same operation here to save the X0 value // into the task context. t.Arch().SyscallSaveOrig() diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go index 9d032f052..60df51dae 100644 --- a/pkg/sentry/socket/netstack/netstack.go +++ b/pkg/sentry/socket/netstack/netstack.go @@ -1321,6 +1321,29 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa return int32(time.Duration(v) / time.Second), nil + case linux.TCP_SYNCNT: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil + + case linux.TCP_WINDOW_CLAMP: + if outLen < sizeOfInt32 { + return nil, syserr.ErrInvalidArgument + } + + v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption) + if err != nil { + return nil, syserr.TranslateNetstackError(err) + } + + return int32(v), nil default: emitUnimplementedEventTCP(t, name) } @@ -1790,6 +1813,22 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) * } return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v)))) + case linux.TCP_SYNCNT: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + v := usermem.ByteOrder.Uint32(optVal) + + return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v))) + + case linux.TCP_WINDOW_CLAMP: + if len(optVal) < sizeOfInt32 { + return syserr.ErrInvalidArgument + } + v := usermem.ByteOrder.Uint32(optVal) + + return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v))) + case linux.TCP_REPAIR_OPTIONS: t.Kernel().EmitUnimplementedEvent(t) @@ -2679,7 +2718,7 @@ func (s *socketOpsCommon) ioctl(ctx context.Context, io usermem.IO, args arch.Sy v = math.MaxInt32 } - // Copy result to user-space. + // Copy result to userspace. _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ AddressSpaceActive: true, }) @@ -2748,7 +2787,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc if v > math.MaxInt32 { v = math.MaxInt32 } - // Copy result to user-space. + // Copy result to userspace. _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ AddressSpaceActive: true, }) @@ -2764,7 +2803,7 @@ func Ioctl(ctx context.Context, ep commonEndpoint, io usermem.IO, args arch.Sysc v = math.MaxInt32 } - // Copy result to user-space. + // Copy result to userspace. _, err := usermem.CopyObjectOut(ctx, io, args[2].Pointer(), int32(v), usermem.IOOpts{ AddressSpaceActive: true, }) diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index df0d0f461..39f2b79ec 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -16,7 +16,6 @@ package linux import ( "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/log" "gvisor.dev/gvisor/pkg/sentry/arch" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -26,7 +25,6 @@ import ( // doSplice implements a blocking splice operation. func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) { - log.Infof("NLAC: doSplice opts: %+v", opts) if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 || (opts.SrcStart+opts.Length < 0) { return 0, syserror.EINVAL } diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD index c32f942fb..f882ef840 100644 --- a/pkg/sentry/syscalls/linux/vfs2/BUILD +++ b/pkg/sentry/syscalls/linux/vfs2/BUILD @@ -13,6 +13,7 @@ go_library( "fscontext.go", "getdents.go", "ioctl.go", + "memfd.go", "mmap.go", "path.go", "pipe.go", @@ -43,6 +44,7 @@ go_library( "//pkg/sentry/fsimpl/pipefs", "//pkg/sentry/fsimpl/signalfd", "//pkg/sentry/fsimpl/timerfd", + "//pkg/sentry/fsimpl/tmpfs", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", "//pkg/sentry/kernel/pipe", diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go index 8181d80f4..ca0f7fd1e 100644 --- a/pkg/sentry/syscalls/linux/vfs2/fd.go +++ b/pkg/sentry/syscalls/linux/vfs2/fd.go @@ -17,6 +17,7 @@ package vfs2 import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/pipe" slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux" @@ -157,6 +158,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall return 0, nil, syserror.EBADF } return uintptr(pipefile.PipeSize()), nil, nil + case linux.F_GET_SEALS: + val, err := tmpfs.GetSeals(file) + return uintptr(val), nil, err + case linux.F_ADD_SEALS: + if !file.IsWritable() { + return 0, nil, syserror.EPERM + } + err := tmpfs.AddSeals(file, args[2].Uint()) + return 0, nil, err default: // TODO(gvisor.dev/issue/1623): Everything else is not yet supported. return 0, nil, syserror.EINVAL diff --git a/pkg/sentry/syscalls/linux/vfs2/memfd.go b/pkg/sentry/syscalls/linux/vfs2/memfd.go new file mode 100644 index 000000000..bbe248d17 --- /dev/null +++ b/pkg/sentry/syscalls/linux/vfs2/memfd.go @@ -0,0 +1,63 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package vfs2 + +import ( + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/arch" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/syserror" +) + +const ( + memfdPrefix = "memfd:" + memfdMaxNameLen = linux.NAME_MAX - len(memfdPrefix) + memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING) +) + +// MemfdCreate implements the linux syscall memfd_create(2). +func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + addr := args[0].Pointer() + flags := args[1].Uint() + + if flags&^memfdAllFlags != 0 { + // Unknown bits in flags. + return 0, nil, syserror.EINVAL + } + + allowSeals := flags&linux.MFD_ALLOW_SEALING != 0 + cloExec := flags&linux.MFD_CLOEXEC != 0 + + name, err := t.CopyInString(addr, memfdMaxNameLen) + if err != nil { + return 0, nil, err + } + + shmMount := t.Kernel().ShmMount() + file, err := tmpfs.NewMemfd(shmMount, t.Credentials(), allowSeals, memfdPrefix+name) + if err != nil { + return 0, nil, err + } + + fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{ + CloseOnExec: cloExec, + }) + if err != nil { + return 0, nil, err + } + + return uintptr(fd), nil, nil +} diff --git a/pkg/sentry/syscalls/linux/vfs2/setstat.go b/pkg/sentry/syscalls/linux/vfs2/setstat.go index 4e61f1452..09ecfed26 100644 --- a/pkg/sentry/syscalls/linux/vfs2/setstat.go +++ b/pkg/sentry/syscalls/linux/vfs2/setstat.go @@ -246,73 +246,104 @@ func Utimes(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal return 0, nil, err } - opts := vfs.SetStatOptions{ - Stat: linux.Statx{ - Mask: linux.STATX_ATIME | linux.STATX_MTIME, - }, - } - if timesAddr == 0 { - opts.Stat.Atime.Nsec = linux.UTIME_NOW - opts.Stat.Mtime.Nsec = linux.UTIME_NOW - } else { - var times [2]linux.Timeval - if _, err := t.CopyIn(timesAddr, ×); err != nil { - return 0, nil, err - } - opts.Stat.Atime = linux.StatxTimestamp{ - Sec: times[0].Sec, - Nsec: uint32(times[0].Usec * 1000), - } - opts.Stat.Mtime = linux.StatxTimestamp{ - Sec: times[1].Sec, - Nsec: uint32(times[1].Usec * 1000), - } + var opts vfs.SetStatOptions + if err := populateSetStatOptionsForUtimes(t, timesAddr, &opts); err != nil { + return 0, nil, err } return 0, nil, setstatat(t, linux.AT_FDCWD, path, disallowEmptyPath, followFinalSymlink, &opts) } -// Utimensat implements Linux syscall utimensat(2). -func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { +// Futimesat implements Linux syscall futimesat(2). +func Futimesat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { dirfd := args[0].Int() pathAddr := args[1].Pointer() timesAddr := args[2].Pointer() - flags := args[3].Int() - if flags&^linux.AT_SYMLINK_NOFOLLOW != 0 { - return 0, nil, syserror.EINVAL - } - - path, err := copyInPath(t, pathAddr) - if err != nil { - return 0, nil, err + // "If filename is NULL and dfd refers to an open file, then operate on the + // file. Otherwise look up filename, possibly using dfd as a starting + // point." - fs/utimes.c + var path fspath.Path + shouldAllowEmptyPath := allowEmptyPath + if dirfd == linux.AT_FDCWD || pathAddr != 0 { + var err error + path, err = copyInPath(t, pathAddr) + if err != nil { + return 0, nil, err + } + shouldAllowEmptyPath = disallowEmptyPath } var opts vfs.SetStatOptions - if err := populateSetStatOptionsForUtimens(t, timesAddr, &opts); err != nil { + if err := populateSetStatOptionsForUtimes(t, timesAddr, &opts); err != nil { return 0, nil, err } - return 0, nil, setstatat(t, dirfd, path, disallowEmptyPath, followFinalSymlink, &opts) + return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, followFinalSymlink, &opts) } -// Futimens implements Linux syscall futimens(2). -func Futimens(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { - fd := args[0].Int() - timesAddr := args[1].Pointer() - - file := t.GetFileVFS2(fd) - if file == nil { - return 0, nil, syserror.EBADF +func populateSetStatOptionsForUtimes(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { + if timesAddr == 0 { + opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME + opts.Stat.Atime.Nsec = linux.UTIME_NOW + opts.Stat.Mtime.Nsec = linux.UTIME_NOW + return nil } - defer file.DecRef() + var times [2]linux.Timeval + if _, err := t.CopyIn(timesAddr, ×); err != nil { + return err + } + if times[0].Usec < 0 || times[0].Usec > 999999 || times[1].Usec < 0 || times[1].Usec > 999999 { + return syserror.EINVAL + } + opts.Stat.Mask = linux.STATX_ATIME | linux.STATX_MTIME + opts.Stat.Atime = linux.StatxTimestamp{ + Sec: times[0].Sec, + Nsec: uint32(times[0].Usec * 1000), + } + opts.Stat.Mtime = linux.StatxTimestamp{ + Sec: times[1].Sec, + Nsec: uint32(times[1].Usec * 1000), + } + return nil +} +// Utimensat implements Linux syscall utimensat(2). +func Utimensat(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + dirfd := args[0].Int() + pathAddr := args[1].Pointer() + timesAddr := args[2].Pointer() + flags := args[3].Int() + + // Linux requires that the UTIME_OMIT check occur before checking path or + // flags. var opts vfs.SetStatOptions if err := populateSetStatOptionsForUtimens(t, timesAddr, &opts); err != nil { return 0, nil, err } + if opts.Stat.Mask == 0 { + return 0, nil, nil + } - return 0, nil, file.SetStat(t, opts) + if flags&^linux.AT_SYMLINK_NOFOLLOW != 0 { + return 0, nil, syserror.EINVAL + } + + // "If filename is NULL and dfd refers to an open file, then operate on the + // file. Otherwise look up filename, possibly using dfd as a starting + // point." - fs/utimes.c + var path fspath.Path + shouldAllowEmptyPath := allowEmptyPath + if dirfd == linux.AT_FDCWD || pathAddr != 0 { + var err error + path, err = copyInPath(t, pathAddr) + if err != nil { + return 0, nil, err + } + shouldAllowEmptyPath = disallowEmptyPath + } + + return 0, nil, setstatat(t, dirfd, path, shouldAllowEmptyPath, shouldFollowFinalSymlink(flags&linux.AT_SYMLINK_NOFOLLOW == 0), &opts) } func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, opts *vfs.SetStatOptions) error { @@ -327,6 +358,9 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, op return err } if times[0].Nsec != linux.UTIME_OMIT { + if times[0].Nsec != linux.UTIME_NOW && (times[0].Nsec < 0 || times[0].Nsec > 999999999) { + return syserror.EINVAL + } opts.Stat.Mask |= linux.STATX_ATIME opts.Stat.Atime = linux.StatxTimestamp{ Sec: times[0].Sec, @@ -334,6 +368,9 @@ func populateSetStatOptionsForUtimens(t *kernel.Task, timesAddr usermem.Addr, op } } if times[1].Nsec != linux.UTIME_OMIT { + if times[1].Nsec != linux.UTIME_NOW && (times[1].Nsec < 0 || times[1].Nsec > 999999999) { + return syserror.EINVAL + } opts.Stat.Mask |= linux.STATX_MTIME opts.Stat.Mtime = linux.StatxTimestamp{ Sec: times[1].Sec, diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go index 9c04677f1..a332d01bd 100644 --- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go +++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go @@ -123,7 +123,7 @@ func Override() { s.Table[258] = syscalls.Supported("mkdirat", Mkdirat) s.Table[259] = syscalls.Supported("mknodat", Mknodat) s.Table[260] = syscalls.Supported("fchownat", Fchownat) - s.Table[261] = syscalls.Supported("futimens", Futimens) + s.Table[261] = syscalls.Supported("futimesat", Futimesat) s.Table[262] = syscalls.Supported("newfstatat", Newfstatat) s.Table[263] = syscalls.Supported("unlinkat", Unlinkat) s.Table[264] = syscalls.Supported("renameat", Renameat) @@ -158,7 +158,7 @@ func Override() { s.Table[306] = syscalls.Supported("syncfs", Syncfs) s.Table[307] = syscalls.Supported("sendmmsg", SendMMsg) s.Table[316] = syscalls.Supported("renameat2", Renameat2) - delete(s.Table, 319) // memfd_create + s.Table[319] = syscalls.Supported("memfd_create", MemfdCreate) s.Table[322] = syscalls.Supported("execveat", Execveat) s.Table[327] = syscalls.Supported("preadv2", Preadv2) s.Table[328] = syscalls.Supported("pwritev2", Pwritev2) |