diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/BUILD | 3 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/filesystem.go | 65 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/host_named_pipe.go | 97 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/gofer/special_file.go | 77 | ||||
-rw-r--r-- | pkg/sentry/platform/kvm/machine_arm64.go | 6 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_splice.go | 6 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/sys_xattr.go | 12 | ||||
-rw-r--r-- | pkg/sentry/syscalls/linux/vfs2/splice.go | 5 | ||||
-rw-r--r-- | runsc/boot/filter/config.go | 8 | ||||
-rw-r--r-- | runsc/boot/fs.go | 18 | ||||
-rw-r--r-- | runsc/boot/vfs.go | 130 | ||||
-rw-r--r-- | runsc/cmd/boot.go | 2 | ||||
-rw-r--r-- | runsc/container/BUILD | 2 | ||||
-rw-r--r-- | runsc/container/console_test.go | 2 | ||||
-rw-r--r-- | runsc/container/container_test.go | 25 | ||||
-rw-r--r-- | runsc/container/multi_container_test.go | 2 | ||||
-rw-r--r-- | runsc/specutils/namespace.go | 16 | ||||
-rw-r--r-- | test/runner/defs.bzl | 49 | ||||
-rw-r--r-- | test/runner/runner.go | 4 | ||||
-rw-r--r-- | test/syscalls/BUILD | 494 | ||||
-rw-r--r-- | test/syscalls/linux/inotify.cc | 100 | ||||
-rw-r--r-- | test/syscalls/linux/pty.cc | 6 | ||||
-rw-r--r-- | test/syscalls/linux/socket_unix.cc | 7 |
24 files changed, 945 insertions, 192 deletions
@@ -219,6 +219,7 @@ dev: ## Installs a set of local runtimes. Requires sudo. @$(MAKE) configure RUNTIME="$(RUNTIME)" ARGS="--net-raw" @$(MAKE) configure RUNTIME="$(RUNTIME)-d" ARGS="--net-raw --debug --strace --log-packets" @$(MAKE) configure RUNTIME="$(RUNTIME)-p" ARGS="--net-raw --profile" + @$(MAKE) configure RUNTIME="$(RUNTIME)-vfs2-d" ARGS="--net-raw --debug --strace --log-packets --vfs2" @sudo systemctl restart docker .PHONY: dev diff --git a/pkg/sentry/fsimpl/gofer/BUILD b/pkg/sentry/fsimpl/gofer/BUILD index 67e916525..f5f35a3bc 100644 --- a/pkg/sentry/fsimpl/gofer/BUILD +++ b/pkg/sentry/fsimpl/gofer/BUILD @@ -35,6 +35,7 @@ go_library( "fstree.go", "gofer.go", "handle.go", + "host_named_pipe.go", "p9file.go", "regular_file.go", "socket.go", @@ -47,6 +48,7 @@ go_library( "//pkg/abi/linux", "//pkg/context", "//pkg/fd", + "//pkg/fdnotifier", "//pkg/fspath", "//pkg/log", "//pkg/p9", @@ -71,6 +73,7 @@ go_library( "//pkg/unet", "//pkg/usermem", "//pkg/waiter", + "@org_golang_x_sys//unix:go_default_library", ], ) diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go index 7f2181216..36e0e1856 100644 --- a/pkg/sentry/fsimpl/gofer/filesystem.go +++ b/pkg/sentry/fsimpl/gofer/filesystem.go @@ -760,7 +760,7 @@ afterTrailingSymlink: parent.dirMu.Unlock() return nil, syserror.EPERM } - fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts) + fd, err := parent.createAndOpenChildLocked(ctx, rp, &opts, &ds) parent.dirMu.Unlock() return fd, err } @@ -873,19 +873,37 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts if opts.Flags&linux.O_DIRECT != 0 { return nil, syserror.EINVAL } - h, err := openHandle(ctx, d.file, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0) + // We assume that the server silently inserts O_NONBLOCK in the open flags + // for all named pipes (because all existing gofers do this). + // + // NOTE(b/133875563): This makes named pipe opens racy, because the + // mechanisms for translating nonblocking to blocking opens can only detect + // the instantaneous presence of a peer holding the other end of the pipe + // open, not whether the pipe was *previously* opened by a peer that has + // since closed its end. + isBlockingOpenOfNamedPipe := d.fileType() == linux.S_IFIFO && opts.Flags&linux.O_NONBLOCK == 0 +retry: + h, err := openHandle(ctx, d.file, ats.MayRead(), ats.MayWrite(), opts.Flags&linux.O_TRUNC != 0) if err != nil { + if isBlockingOpenOfNamedPipe && ats == vfs.MayWrite && err == syserror.ENXIO { + // An attempt to open a named pipe with O_WRONLY|O_NONBLOCK fails + // with ENXIO if opening the same named pipe with O_WRONLY would + // block because there are no readers of the pipe. + if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil { + return nil, err + } + goto retry + } return nil, err } - seekable := d.fileType() == linux.S_IFREG - fd := &specialFileFD{ - handle: h, - seekable: seekable, + if isBlockingOpenOfNamedPipe && ats == vfs.MayRead && h.fd >= 0 { + if err := blockUntilNonblockingPipeHasWriter(ctx, h.fd); err != nil { + h.close(ctx) + return nil, err + } } - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ - DenyPRead: !seekable, - DenyPWrite: !seekable, - }); err != nil { + fd, err := newSpecialFileFD(h, mnt, d, opts.Flags) + if err != nil { h.close(ctx) return nil, err } @@ -894,7 +912,7 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts // Preconditions: d.fs.renameMu must be locked. d.dirMu must be locked. // !d.isSynthetic(). -func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions) (*vfs.FileDescription, error) { +func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.OpenOptions, ds **[]*dentry) (*vfs.FileDescription, error) { if err := d.checkPermissions(rp.Credentials(), vfs.MayWrite); err != nil { return nil, err } @@ -947,6 +965,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } return nil, err } + *ds = appendDentry(*ds, child) // Incorporate the fid that was opened by lcreate. useRegularFileFD := child.fileType() == linux.S_IFREG && !d.fs.opts.regularFilesUseSpecialFileFD if useRegularFileFD { @@ -959,10 +978,6 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving child.handleWritable = vfs.MayWriteFileWithOpenFlags(opts.Flags) child.handleMu.Unlock() } - // Take a reference on the new dentry to be held by the new file - // description. (This reference also means that the new dentry is not - // eligible for caching yet, so we don't need to append to a dentry slice.) - child.refs = 1 // Insert the dentry into the tree. d.cacheNewChildLocked(child, name) if d.cachedMetadataAuthoritative() { @@ -981,22 +996,16 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving } childVFSFD = &fd.vfsfd } else { - seekable := child.fileType() == linux.S_IFREG - fd := &specialFileFD{ - handle: handle{ - file: openFile, - fd: -1, - }, - seekable: seekable, + h := handle{ + file: openFile, + fd: -1, } if fdobj != nil { - fd.handle.fd = int32(fdobj.Release()) + h.fd = int32(fdobj.Release()) } - if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{ - DenyPRead: !seekable, - DenyPWrite: !seekable, - }); err != nil { - fd.handle.close(ctx) + fd, err := newSpecialFileFD(h, mnt, child, opts.Flags) + if err != nil { + h.close(ctx) return nil, err } childVFSFD = &fd.vfsfd diff --git a/pkg/sentry/fsimpl/gofer/host_named_pipe.go b/pkg/sentry/fsimpl/gofer/host_named_pipe.go new file mode 100644 index 000000000..7294de7d6 --- /dev/null +++ b/pkg/sentry/fsimpl/gofer/host_named_pipe.go @@ -0,0 +1,97 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gofer + +import ( + "fmt" + "sync" + "time" + + "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/syserror" +) + +// Global pipe used by blockUntilNonblockingPipeHasWriter since we can't create +// pipes after sentry initialization due to syscall filters. +var ( + tempPipeMu sync.Mutex + tempPipeReadFD int + tempPipeWriteFD int + tempPipeBuf [1]byte +) + +func init() { + var pipeFDs [2]int + if err := unix.Pipe(pipeFDs[:]); err != nil { + panic(fmt.Sprintf("failed to create pipe for gofer.blockUntilNonblockingPipeHasWriter: %v", err)) + } + tempPipeReadFD = pipeFDs[0] + tempPipeWriteFD = pipeFDs[1] +} + +func blockUntilNonblockingPipeHasWriter(ctx context.Context, fd int32) error { + for { + ok, err := nonblockingPipeHasWriter(fd) + if err != nil { + return err + } + if ok { + return nil + } + if err := sleepBetweenNamedPipeOpenChecks(ctx); err != nil { + return err + } + } +} + +func nonblockingPipeHasWriter(fd int32) (bool, error) { + tempPipeMu.Lock() + defer tempPipeMu.Unlock() + // Copy 1 byte from fd into the temporary pipe. + n, err := unix.Tee(int(fd), tempPipeWriteFD, 1, unix.SPLICE_F_NONBLOCK) + if err == syserror.EAGAIN { + // The pipe represented by fd is empty, but has a writer. + return true, nil + } + if err != nil { + return false, err + } + if n == 0 { + // The pipe represented by fd is empty and has no writer. + return false, nil + } + // The pipe represented by fd is non-empty, so it either has, or has + // previously had, a writer. Remove the byte copied to the temporary pipe + // before returning. + if n, err := unix.Read(tempPipeReadFD, tempPipeBuf[:]); err != nil || n != 1 { + panic(fmt.Sprintf("failed to drain pipe for gofer.blockUntilNonblockingPipeHasWriter: got (%d, %v), wanted (1, nil)", n, err)) + } + return true, nil +} + +func sleepBetweenNamedPipeOpenChecks(ctx context.Context) error { + t := time.NewTimer(100 * time.Millisecond) + defer t.Stop() + cancel := ctx.SleepStart() + select { + case <-t.C: + ctx.SleepFinish(true) + return nil + case <-cancel: + ctx.SleepFinish(false) + return syserror.ErrInterrupted + } +} diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go index a464e6a94..ff6126b87 100644 --- a/pkg/sentry/fsimpl/gofer/special_file.go +++ b/pkg/sentry/fsimpl/gofer/special_file.go @@ -19,17 +19,18 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" + "gvisor.dev/gvisor/pkg/fdnotifier" "gvisor.dev/gvisor/pkg/safemem" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/syserror" "gvisor.dev/gvisor/pkg/usermem" + "gvisor.dev/gvisor/pkg/waiter" ) -// specialFileFD implements vfs.FileDescriptionImpl for files other than -// regular files, directories, and symlinks: pipes, sockets, etc. It is also -// used for regular files when filesystemOptions.specialRegularFiles is in -// effect. specialFileFD differs from regularFileFD by using per-FD handles -// instead of shared per-dentry handles, and never buffering I/O. +// specialFileFD implements vfs.FileDescriptionImpl for pipes, sockets, device +// special files, and (when filesystemOptions.specialRegularFiles is in effect) +// regular files. specialFileFD differs from regularFileFD by using per-FD +// handles instead of shared per-dentry handles, and never buffering I/O. type specialFileFD struct { fileDescription @@ -40,13 +41,47 @@ type specialFileFD struct { // file offset is significant, i.e. a regular file. seekable is immutable. seekable bool + // mayBlock is true if this file description represents a file for which + // queue may send I/O readiness events. mayBlock is immutable. + mayBlock bool + queue waiter.Queue + // If seekable is true, off is the file offset. off is protected by mu. mu sync.Mutex off int64 } +func newSpecialFileFD(h handle, mnt *vfs.Mount, d *dentry, flags uint32) (*specialFileFD, error) { + ftype := d.fileType() + seekable := ftype == linux.S_IFREG + mayBlock := ftype == linux.S_IFIFO || ftype == linux.S_IFSOCK + fd := &specialFileFD{ + handle: h, + seekable: seekable, + mayBlock: mayBlock, + } + if mayBlock && h.fd >= 0 { + if err := fdnotifier.AddFD(h.fd, &fd.queue); err != nil { + return nil, err + } + } + if err := fd.vfsfd.Init(fd, flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{ + DenyPRead: !seekable, + DenyPWrite: !seekable, + }); err != nil { + if mayBlock && h.fd >= 0 { + fdnotifier.RemoveFD(h.fd) + } + return nil, err + } + return fd, nil +} + // Release implements vfs.FileDescriptionImpl.Release. func (fd *specialFileFD) Release() { + if fd.mayBlock && fd.handle.fd >= 0 { + fdnotifier.RemoveFD(fd.handle.fd) + } fd.handle.close(context.Background()) fs := fd.vfsfd.Mount().Filesystem().Impl().(*filesystem) fs.syncMu.Lock() @@ -62,6 +97,32 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error { return fd.handle.file.flush(ctx) } +// Readiness implements waiter.Waitable.Readiness. +func (fd *specialFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { + if fd.mayBlock { + return fdnotifier.NonBlockingPoll(fd.handle.fd, mask) + } + return fd.fileDescription.Readiness(mask) +} + +// EventRegister implements waiter.Waitable.EventRegister. +func (fd *specialFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { + if fd.mayBlock { + fd.queue.EventRegister(e, mask) + return + } + fd.fileDescription.EventRegister(e, mask) +} + +// EventUnregister implements waiter.Waitable.EventUnregister. +func (fd *specialFileFD) EventUnregister(e *waiter.Entry) { + if fd.mayBlock { + fd.queue.EventUnregister(e) + return + } + fd.fileDescription.EventUnregister(e) +} + // PRead implements vfs.FileDescriptionImpl.PRead. func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { if fd.seekable && offset < 0 { @@ -81,6 +142,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs } buf := make([]byte, dst.NumBytes()) n, err := fd.handle.readToBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + if err == syserror.EAGAIN { + err = syserror.ErrWouldBlock + } if n == 0 { return 0, err } @@ -130,6 +194,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off return 0, err } n, err := fd.handle.writeFromBlocksAt(ctx, safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)), uint64(offset)) + if err == syserror.EAGAIN { + err = syserror.ErrWouldBlock + } return int64(n), err } diff --git a/pkg/sentry/platform/kvm/machine_arm64.go b/pkg/sentry/platform/kvm/machine_arm64.go index e42505542..750751aa3 100644 --- a/pkg/sentry/platform/kvm/machine_arm64.go +++ b/pkg/sentry/platform/kvm/machine_arm64.go @@ -60,6 +60,12 @@ func rdonlyRegionsForSetMem() (phyRegions []physicalRegion) { if !vr.accessType.Write && vr.accessType.Read { rdonlyRegions = append(rdonlyRegions, vr.region) } + + // TODO(gvisor.dev/issue/2686): PROT_NONE should be specially treated. + // Workaround: treated as rdonly temporarily. + if !vr.accessType.Write && !vr.accessType.Read && !vr.accessType.Execute { + rdonlyRegions = append(rdonlyRegions, vr.region) + } }) for _, r := range rdonlyRegions { diff --git a/pkg/sentry/syscalls/linux/sys_splice.go b/pkg/sentry/syscalls/linux/sys_splice.go index 39f2b79ec..77c78889d 100644 --- a/pkg/sentry/syscalls/linux/sys_splice.go +++ b/pkg/sentry/syscalls/linux/sys_splice.go @@ -80,6 +80,12 @@ func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonB } } + if total > 0 { + // On Linux, inotify behavior is not very consistent with splice(2). We try + // our best to emulate Linux for very basic calls to splice, where for some + // reason, events are generated for output files, but not input files. + outFile.Dirent.InotifyEvent(linux.IN_MODIFY, 0) + } return total, err } diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go index 2de5e3422..c24946160 100644 --- a/pkg/sentry/syscalls/linux/sys_xattr.go +++ b/pkg/sentry/syscalls/linux/sys_xattr.go @@ -207,7 +207,11 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si return syserror.EOPNOTSUPP } - return d.Inode.SetXattr(t, d, name, value, flags) + if err := d.Inode.SetXattr(t, d, name, value, flags); err != nil { + return err + } + d.InotifyEvent(linux.IN_ATTRIB, 0) + return nil } func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) { @@ -418,7 +422,11 @@ func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error { return syserror.EOPNOTSUPP } - return d.Inode.RemoveXattr(t, d, name) + if err := d.Inode.RemoveXattr(t, d, name); err != nil { + return err + } + d.InotifyEvent(linux.IN_ATTRIB, 0) + return nil } // LINT.ThenChange(vfs2/xattr.go) diff --git a/pkg/sentry/syscalls/linux/vfs2/splice.go b/pkg/sentry/syscalls/linux/vfs2/splice.go index 8f3c22a02..945a364a7 100644 --- a/pkg/sentry/syscalls/linux/vfs2/splice.go +++ b/pkg/sentry/syscalls/linux/vfs2/splice.go @@ -187,6 +187,11 @@ func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscal if n == 0 { return 0, nil, err } + + // On Linux, inotify behavior is not very consistent with splice(2). We try + // our best to emulate Linux for very basic calls to splice, where for some + // reason, events are generated for output files, but not input files. + outFile.Dentry().InotifyWithParent(linux.IN_MODIFY, 0, vfs.PathEvent) return uintptr(n), nil, nil } diff --git a/runsc/boot/filter/config.go b/runsc/boot/filter/config.go index 98cdd90dd..60e33425f 100644 --- a/runsc/boot/filter/config.go +++ b/runsc/boot/filter/config.go @@ -288,6 +288,14 @@ var allowedSyscalls = seccomp.SyscallRules{ syscall.SYS_SIGALTSTACK: {}, unix.SYS_STATX: {}, syscall.SYS_SYNC_FILE_RANGE: {}, + syscall.SYS_TEE: []seccomp.Rule{ + { + seccomp.AllowAny{}, + seccomp.AllowAny{}, + seccomp.AllowValue(1), /* len */ + seccomp.AllowValue(unix.SPLICE_F_NONBLOCK), /* flags */ + }, + }, syscall.SYS_TGKILL: []seccomp.Rule{ { seccomp.AllowValue(uint64(os.Getpid())), diff --git a/runsc/boot/fs.go b/runsc/boot/fs.go index 52f8344ca..b98a1eb50 100644 --- a/runsc/boot/fs.go +++ b/runsc/boot/fs.go @@ -63,7 +63,7 @@ const ( ) // tmpfs has some extra supported options that we must pass through. -var tmpfsAllowedOptions = []string{"mode", "uid", "gid"} +var tmpfsAllowedData = []string{"mode", "uid", "gid"} func addOverlay(ctx context.Context, conf *Config, lower *fs.Inode, name string, lowerFlags fs.MountSourceFlags) (*fs.Inode, error) { // Upper layer uses the same flags as lower, but it must be read-write. @@ -154,8 +154,8 @@ func compileMounts(spec *specs.Spec) []specs.Mount { return mounts } -// p9MountOptions creates a slice of options for a p9 mount. -func p9MountOptions(fd int, fa FileAccessType, vfs2 bool) []string { +// p9MountData creates a slice of p9 mount data. +func p9MountData(fd int, fa FileAccessType, vfs2 bool) []string { opts := []string{ "trans=fd", "rfdno=" + strconv.Itoa(fd), @@ -235,7 +235,7 @@ func isSupportedMountFlag(fstype, opt string) bool { return true } if fstype == tmpfsvfs2.Name { - ok, err := parseMountOption(opt, tmpfsAllowedOptions...) + ok, err := parseMountOption(opt, tmpfsAllowedData...) return ok && err == nil } return false @@ -716,7 +716,7 @@ func (c *containerMounter) createRootMount(ctx context.Context, conf *Config) (* fd := c.fds.remove() log.Infof("Mounting root over 9P, ioFD: %d", fd) p9FS := mustFindFilesystem("9p") - opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) if conf.OverlayfsStaleRead { // We can't check for overlayfs here because sandbox is chroot'ed and gofer @@ -770,7 +770,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { return "", nil, false, err } @@ -778,7 +778,7 @@ func (c *containerMounter) getMountNameAndOptions(conf *Config, m specs.Mount) ( case bind: fd := c.fds.remove() fsName = gofervfs2.Name - opts = p9MountOptions(fd, c.getMountAccessType(m), conf.VFS2) + opts = p9MountData(fd, c.getMountAccessType(m), conf.VFS2) // If configured, add overlay to all writable mounts. useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly @@ -931,7 +931,7 @@ func (c *containerMounter) createRestoreEnvironment(conf *Config) (*fs.RestoreEn // Add root mount. fd := c.fds.remove() - opts := p9MountOptions(fd, conf.FileAccess, false /* vfs2 */) + opts := p9MountData(fd, conf.FileAccess, false /* vfs2 */) mf := fs.MountSourceFlags{} if c.root.Readonly || conf.Overlay { @@ -1019,7 +1019,7 @@ func (c *containerMounter) mountTmp(ctx context.Context, conf *Config, mns *fs.M Destination: "/tmp", // Sticky bit is added to prevent accidental deletion of files from // another user. This is normally done for /tmp. - Options: []string{"mode=1777"}, + Options: []string{"mode=01777"}, } return c.mountSubmount(ctx, conf, mns, root, tmpMount) diff --git a/runsc/boot/vfs.go b/runsc/boot/vfs.go index f48e6b0f1..6c84f0794 100644 --- a/runsc/boot/vfs.go +++ b/runsc/boot/vfs.go @@ -136,7 +136,7 @@ func (c *containerMounter) setupVFS2(ctx context.Context, conf *Config, procArgs func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { fd := c.fds.remove() - opts := strings.Join(p9MountOptions(fd, conf.FileAccess, true /* vfs2 */), ",") + opts := strings.Join(p9MountData(fd, conf.FileAccess, true /* vfs2 */), ",") log.Infof("Mounting root over 9P, ioFD: %d", fd) mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", gofer.Name, &vfs.GetFilesystemOptions{Data: opts}) @@ -160,8 +160,9 @@ func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *Config, } } - // TODO(gvisor.dev/issue/1487): implement mountTmp from fs.go. - + if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil { + return fmt.Errorf(`mount submount "\tmp": %w`, err) + } return nil } @@ -199,8 +200,6 @@ func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { return mounts, nil } -// TODO(gvisor.dev/issue/1487): Implement submount options similar to the VFS1 -// version. func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) error { root := mns.Root() defer root.DecRef() @@ -209,12 +208,11 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, Start: root, Path: fspath.Parse(submount.Destination), } - - fsName, options, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) + fsName, opts, err := c.getMountNameAndOptionsVFS2(conf, submount) if err != nil { return fmt.Errorf("mountOptions failed: %w", err) } - if fsName == "" { + if len(fsName) == 0 { // Filesystem is not supported (e.g. cgroup), just skip it. return nil } @@ -222,17 +220,6 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, if err := c.makeSyntheticMount(ctx, submount.Destination, root, creds); err != nil { return err } - - opts := &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - Data: strings.Join(options, ","), - }, - InternalMount: true, - } - - // All writes go to upper, be paranoid and make lower readonly. - opts.ReadOnly = useOverlay - if err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts); err != nil { return fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.Destination, submount.Type, err, opts) } @@ -242,13 +229,13 @@ func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *Config, // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values // used for mounts. -func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, []string, bool, error) { +func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndFD) (string, *vfs.MountOptions, error) { var ( - fsName string - opts []string - useOverlay bool + fsName string + data []string ) + // Find filesystem name and FS specific data field. switch m.Type { case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: fsName = m.Type @@ -258,21 +245,46 @@ func (c *containerMounter) getMountNameAndOptionsVFS2(conf *Config, m *mountAndF fsName = m.Type var err error - opts, err = parseAndFilterOptions(m.Options, tmpfsAllowedOptions...) + data, err = parseAndFilterOptions(m.Options, tmpfsAllowedData...) if err != nil { - return "", nil, false, err + return "", nil, err } case bind: fsName = gofer.Name - opts = p9MountOptions(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) - // If configured, add overlay to all writable mounts. - useOverlay = conf.Overlay && !mountFlags(m.Options).ReadOnly + data = p9MountData(m.fd, c.getMountAccessType(m.Mount), true /* vfs2 */) default: log.Warningf("ignoring unknown filesystem type %q", m.Type) } - return fsName, opts, useOverlay, nil + + opts := &vfs.MountOptions{ + GetFilesystemOptions: vfs.GetFilesystemOptions{ + Data: strings.Join(data, ","), + }, + InternalMount: true, + } + + for _, o := range m.Options { + switch o { + case "rw": + opts.ReadOnly = false + case "ro": + opts.ReadOnly = true + case "noatime": + // TODO(gvisor.dev/issue/1193): Implement MS_NOATIME. + case "noexec": + opts.Flags.NoExec = true + default: + log.Warningf("ignoring unknown mount option %q", o) + } + } + + if conf.Overlay { + // All writes go to upper, be paranoid and make lower readonly. + opts.ReadOnly = true + } + return fsName, opts, nil } func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath string, root vfs.VirtualDentry, creds *auth.Credentials) error { @@ -301,3 +313,63 @@ func (c *containerMounter) makeSyntheticMount(ctx context.Context, currentPath s } return nil } + +// mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. +// Technically we don't have to mount tmpfs at /tmp, as we could just rely on +// the host /tmp, but this is a nice optimization, and fixes some apps that call +// mknod in /tmp. It's unsafe to mount tmpfs if: +// 1. /tmp is mounted explicitly: we should not override user's wish +// 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp +// +// Note that when there are submounts inside of '/tmp', directories for the +// mount points must be present, making '/tmp' not empty anymore. +func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *Config, creds *auth.Credentials, mns *vfs.MountNamespace) error { + for _, m := range c.mounts { + // m.Destination has been cleaned, so it's to use equality here. + if m.Destination == "/tmp" { + log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m) + return nil + } + } + + root := mns.Root() + defer root.DecRef() + pop := vfs.PathOperation{ + Root: root, + Start: root, + Path: fspath.Parse("/tmp"), + } + // TODO(gvisor.dev/issue/2782): Use O_PATH when available. + statx, err := c.k.VFS().StatAt(ctx, creds, &pop, &vfs.StatOptions{}) + switch err { + case nil: + // Found '/tmp' in filesystem, check if it's empty. + if linux.FileMode(statx.Mode).FileType() != linux.ModeDirectory { + // Not a dir?! Leave it be. + return nil + } + if statx.Nlink > 2 { + // If more than "." and ".." is found, skip internal tmpfs to prevent + // hiding existing files. + log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) + return nil + } + log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) + fallthrough + + case syserror.ENOENT: + // No '/tmp' found (or fallthrough from above). It's safe to mount internal + // tmpfs. + tmpMount := specs.Mount{ + Type: tmpfs.Name, + Destination: "/tmp", + // Sticky bit is added to prevent accidental deletion of files from + // another user. This is normally done for /tmp. + Options: []string{"mode=01777"}, + } + return c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{Mount: tmpMount}) + + default: + return fmt.Errorf(`stating "/tmp" inside container: %w`, err) + } +} diff --git a/runsc/cmd/boot.go b/runsc/cmd/boot.go index 4c2ac6ff0..01204ab4d 100644 --- a/runsc/cmd/boot.go +++ b/runsc/cmd/boot.go @@ -136,7 +136,7 @@ func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) } // Ensure that if there is a panic, all goroutine stacks are printed. - debug.SetTraceback("all") + debug.SetTraceback("system") conf := args[0].(*boot.Config) diff --git a/runsc/container/BUILD b/runsc/container/BUILD index 9a856d65c..49cfb0837 100644 --- a/runsc/container/BUILD +++ b/runsc/container/BUILD @@ -47,7 +47,7 @@ go_test( "//test/cmd/test_app", ], library = ":container", - shard_count = 5, + shard_count = 10, tags = [ "requires-kvm", ], diff --git a/runsc/container/console_test.go b/runsc/container/console_test.go index 294dca5e7..3813c6b93 100644 --- a/runsc/container/console_test.go +++ b/runsc/container/console_test.go @@ -119,7 +119,7 @@ func receiveConsolePTY(srv *unet.ServerSocket) (*os.File, error) { // Test that an pty FD is sent over the console socket if one is provided. func TestConsoleSocket(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("true") _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index d59a1d97e..e7715b6f7 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -256,8 +256,6 @@ var ( func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { // Always load the default config. cs := make(map[string]*boot.Config) - cs["default"] = testutil.TestConfig(t) - for _, o := range opts { switch o { case overlay: @@ -285,9 +283,16 @@ func configs(t *testing.T, opts ...configOption) map[string]*boot.Config { func configsWithVFS2(t *testing.T, opts ...configOption) map[string]*boot.Config { vfs1 := configs(t, opts...) - vfs2 := configs(t, opts...) - for key, value := range vfs2 { + var optsVFS2 []configOption + for _, opt := range opts { + // TODO(gvisor.dev/issue/1487): Enable overlay tests. + if opt != overlay { + optsVFS2 = append(optsVFS2, opt) + } + } + + for key, value := range configs(t, optsVFS2...) { value.VFS2 = true vfs1[key+"VFS2"] = value } @@ -603,7 +608,7 @@ func doAppExitStatus(t *testing.T, vfs2 bool) { // TestExec verifies that a container can exec a new program. func TestExec(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { const uid = 343 spec := testutil.NewSpecWithArgs("sleep", "100") @@ -695,7 +700,7 @@ func TestExec(t *testing.T) { // TestKillPid verifies that we can signal individual exec'd processes. func TestKillPid(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { app, err := testutil.FindFile("test/cmd/test_app/test_app") if err != nil { @@ -1211,7 +1216,7 @@ func TestCapabilities(t *testing.T) { uid := auth.KUID(os.Getuid() + 1) gid := auth.KGID(os.Getgid() + 1) - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { spec := testutil.NewSpecWithArgs("sleep", "100") rootDir, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) @@ -1409,7 +1414,7 @@ func TestReadonlyRoot(t *testing.T) { } func TestUIDMap(t *testing.T) { - for name, conf := range configs(t, noOverlay...) { + for name, conf := range configsWithVFS2(t, noOverlay...) { t.Run(name, func(t *testing.T) { testDir, err := ioutil.TempDir(testutil.TmpDir(), "test-mount") if err != nil { @@ -1886,7 +1891,7 @@ func doDestroyStartingTest(t *testing.T, vfs2 bool) { } func TestCreateWorkingDir(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { tmpDir, err := ioutil.TempDir(testutil.TmpDir(), "cwd-create") if err != nil { @@ -2009,7 +2014,7 @@ func TestMountPropagation(t *testing.T) { } func TestMountSymlink(t *testing.T) { - for name, conf := range configs(t, overlay) { + for name, conf := range configsWithVFS2(t, overlay) { t.Run(name, func(t *testing.T) { dir, err := ioutil.TempDir(testutil.TmpDir(), "mount-symlink") if err != nil { diff --git a/runsc/container/multi_container_test.go b/runsc/container/multi_container_test.go index dc825abd9..207206dd2 100644 --- a/runsc/container/multi_container_test.go +++ b/runsc/container/multi_container_test.go @@ -129,7 +129,7 @@ func createSharedMount(mount specs.Mount, name string, pod ...*specs.Spec) { // TestMultiContainerSanity checks that it is possible to run 2 dead-simple // containers in the same sandbox. func TestMultiContainerSanity(t *testing.T) { - for name, conf := range configs(t, all...) { + for name, conf := range configsWithVFS2(t, all...) { t.Run(name, func(t *testing.T) { rootDir, cleanup, err := testutil.SetupRootDir() if err != nil { diff --git a/runsc/specutils/namespace.go b/runsc/specutils/namespace.go index 60bb7b7ee..23001d67c 100644 --- a/runsc/specutils/namespace.go +++ b/runsc/specutils/namespace.go @@ -18,6 +18,7 @@ import ( "fmt" "os" "os/exec" + "os/signal" "path/filepath" "runtime" "syscall" @@ -261,7 +262,18 @@ func MaybeRunAsRoot() error { cmd.Stdin = os.Stdin cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { + if err := cmd.Start(); err != nil { + return fmt.Errorf("re-executing self: %w", err) + } + ch := make(chan os.Signal, 1) + signal.Notify(ch) + go func() { + for { + // Forward all signals to child process. + cmd.Process.Signal(<-ch) + } + }() + if err := cmd.Wait(); err != nil { if exit, ok := err.(*exec.ExitError); ok { if ws, ok := exit.Sys().(syscall.WaitStatus); ok { os.Exit(ws.ExitStatus()) @@ -269,7 +281,7 @@ func MaybeRunAsRoot() error { log.Warningf("No wait status provided, exiting with -1: %v", err) os.Exit(-1) } - return fmt.Errorf("re-executing self: %v", err) + return err } // Child completed with success. os.Exit(0) diff --git a/test/runner/defs.bzl b/test/runner/defs.bzl index 0a75b158f..402ba4064 100644 --- a/test/runner/defs.bzl +++ b/test/runner/defs.bzl @@ -60,7 +60,8 @@ def _syscall_test( network = "none", file_access = "exclusive", overlay = False, - add_uds_tree = False): + add_uds_tree = False, + vfs2 = False): # Prepend "runsc" to non-native platform names. full_platform = platform if platform == "native" else "runsc_" + platform @@ -70,6 +71,8 @@ def _syscall_test( name += "_shared" if overlay: name += "_overlay" + if vfs2: + name += "_vfs2" if network != "none": name += "_" + network + "net" @@ -102,6 +105,7 @@ def _syscall_test( "--file-access=" + file_access, "--overlay=" + str(overlay), "--add-uds-tree=" + str(add_uds_tree), + "--vfs2=" + str(vfs2), ] # Call the rule above. @@ -123,6 +127,7 @@ def syscall_test( add_overlay = False, add_uds_tree = False, add_hostinet = False, + vfs2 = False, tags = None): """syscall_test is a macro that will create targets for all platforms. @@ -160,6 +165,29 @@ def syscall_test( tags = platform_tags + tags, ) + vfs2_tags = list(tags) + if vfs2: + # Add tag to easily run VFS2 tests with --test_tag_filters=vfs2 + vfs2_tags.append("vfs2") + + else: + # Don't automatically run tests tests not yet passing. + vfs2_tags.append("manual") + vfs2_tags.append("noguitar") + vfs2_tags.append("notap") + + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = default_platform, + use_tmpfs = use_tmpfs, + add_uds_tree = add_uds_tree, + tags = platforms[default_platform] + vfs2_tags, + vfs2 = True, + ) + + # TODO(gvisor.dev/issue/1487): Enable VFS2 overlay tests. if add_overlay: _syscall_test( test = test, @@ -172,6 +200,18 @@ def syscall_test( overlay = True, ) + if add_hostinet: + _syscall_test( + test = test, + shard_count = shard_count, + size = size, + platform = default_platform, + use_tmpfs = use_tmpfs, + network = "host", + add_uds_tree = add_uds_tree, + tags = platforms[default_platform] + tags, + ) + if not use_tmpfs: # Also test shared gofer access. _syscall_test( @@ -184,15 +224,14 @@ def syscall_test( tags = platforms[default_platform] + tags, file_access = "shared", ) - - if add_hostinet: _syscall_test( test = test, shard_count = shard_count, size = size, platform = default_platform, use_tmpfs = use_tmpfs, - network = "host", add_uds_tree = add_uds_tree, - tags = platforms[default_platform] + tags, + tags = platforms[default_platform] + vfs2_tags, + file_access = "shared", + vfs2 = True, ) diff --git a/test/runner/runner.go b/test/runner/runner.go index e048e5a9c..948e3a8ef 100644 --- a/test/runner/runner.go +++ b/test/runner/runner.go @@ -46,6 +46,7 @@ var ( useTmpfs = flag.Bool("use-tmpfs", false, "mounts tmpfs for /tmp") fileAccess = flag.String("file-access", "exclusive", "mounts root in exclusive or shared mode") overlay = flag.Bool("overlay", false, "wrap filesystem mounts with writable tmpfs overlay") + vfs2 = flag.Bool("vfs2", false, "enable VFS2") parallel = flag.Bool("parallel", false, "run tests in parallel") runscPath = flag.String("runsc", "", "path to runsc binary") @@ -146,6 +147,9 @@ func runRunsc(tc gtest.TestCase, spec *specs.Spec) error { if *overlay { args = append(args, "-overlay") } + if *vfs2 { + args = append(args, "-vfs2") + } if *debug { args = append(args, "-debug", "-log-packets=true") } diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD index 9800a0cdf..3406a2de8 100644 --- a/test/syscalls/BUILD +++ b/test/syscalls/BUILD @@ -2,22 +2,33 @@ load("//test/runner:defs.bzl", "syscall_test") package(licenses = ["notice"]) -syscall_test(test = "//test/syscalls/linux:32bit_test") +syscall_test( + test = "//test/syscalls/linux:32bit_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:accept_bind_stream_test") +syscall_test( + test = "//test/syscalls/linux:accept_bind_stream_test", + vfs2 = "True", +) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:accept_bind_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:access_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:affinity_test") +syscall_test( + test = "//test/syscalls/linux:affinity_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -28,11 +39,18 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:alarm_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:arch_prctl_test") +syscall_test( + test = "//test/syscalls/linux:arch_prctl_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:bad_test") +syscall_test( + test = "//test/syscalls/linux:bad_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -40,9 +58,15 @@ syscall_test( test = "//test/syscalls/linux:bind_test", ) -syscall_test(test = "//test/syscalls/linux:brk_test") +syscall_test( + test = "//test/syscalls/linux:brk_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_test") +syscall_test( + test = "//test/syscalls/linux:socket_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -51,16 +75,19 @@ syscall_test( # involve much concurrency, TSAN's usefulness here is limited anyway. tags = ["nogotsan"], test = "//test/syscalls/linux:socket_stress_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chdir_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:chmod_test", + vfs2 = "True", ) syscall_test( @@ -68,6 +95,7 @@ syscall_test( add_overlay = True, test = "//test/syscalls/linux:chown_test", use_tmpfs = True, # chwon tests require gofer to be running as root. + vfs2 = "True", ) syscall_test( @@ -75,45 +103,70 @@ syscall_test( test = "//test/syscalls/linux:chroot_test", ) -syscall_test(test = "//test/syscalls/linux:clock_getres_test") +syscall_test( + test = "//test/syscalls/linux:clock_getres_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:clock_gettime_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:clock_nanosleep_test") +syscall_test( + test = "//test/syscalls/linux:clock_nanosleep_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:concurrency_test") +syscall_test( + test = "//test/syscalls/linux:concurrency_test", + vfs2 = "True", +) syscall_test( add_uds_tree = True, test = "//test/syscalls/linux:connect_external_test", use_tmpfs = True, + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:creat_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:dev_test") +syscall_test( + test = "//test/syscalls/linux:dev_test", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:dup_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:epoll_test") +syscall_test( + test = "//test/syscalls/linux:epoll_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:eventfd_test") +syscall_test( + test = "//test/syscalls/linux:eventfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:exceptions_test") +syscall_test( + test = "//test/syscalls/linux:exceptions_test", + vfs2 = "True", +) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:exec_test", + vfs2 = "True", ) syscall_test( @@ -122,7 +175,10 @@ syscall_test( test = "//test/syscalls/linux:exec_binary_test", ) -syscall_test(test = "//test/syscalls/linux:exit_test") +syscall_test( + test = "//test/syscalls/linux:exit_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -134,11 +190,15 @@ syscall_test( test = "//test/syscalls/linux:fallocate_test", ) -syscall_test(test = "//test/syscalls/linux:fault_test") +syscall_test( + test = "//test/syscalls/linux:fault_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:fchdir_test", + vfs2 = "True", ) syscall_test( @@ -152,11 +212,20 @@ syscall_test( test = "//test/syscalls/linux:flock_test", ) -syscall_test(test = "//test/syscalls/linux:fork_test") +syscall_test( + test = "//test/syscalls/linux:fork_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:fpsig_fork_test") +syscall_test( + test = "//test/syscalls/linux:fpsig_fork_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:fpsig_nested_test") +syscall_test( + test = "//test/syscalls/linux:fpsig_nested_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -167,20 +236,33 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:futex_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:getcpu_host_test") +syscall_test( + test = "//test/syscalls/linux:getcpu_host_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:getcpu_test") +syscall_test( + test = "//test/syscalls/linux:getcpu_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:getdents_test", ) -syscall_test(test = "//test/syscalls/linux:getrandom_test") +syscall_test( + test = "//test/syscalls/linux:getrandom_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:getrusage_test") +syscall_test( + test = "//test/syscalls/linux:getrusage_test", + vfs2 = "True", +) syscall_test( size = "medium", @@ -196,15 +278,20 @@ syscall_test( syscall_test( test = "//test/syscalls/linux:iptables_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 5, test = "//test/syscalls/linux:itimer_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:kill_test") +syscall_test( + test = "//test/syscalls/linux:kill_test", + vfs2 = "True", +) syscall_test( add_overlay = True, @@ -215,19 +302,33 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:lseek_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:madvise_test") +syscall_test( + test = "//test/syscalls/linux:madvise_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:memory_accounting_test") +syscall_test( + test = "//test/syscalls/linux:memory_accounting_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:mempolicy_test") +syscall_test( + test = "//test/syscalls/linux:mempolicy_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:mincore_test") +syscall_test( + test = "//test/syscalls/linux:mincore_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:mkdir_test", + vfs2 = "True", ) syscall_test( @@ -249,20 +350,29 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:mremap_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:msync_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:munmap_test") +syscall_test( + test = "//test/syscalls/linux:munmap_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:network_namespace_test") +syscall_test( + test = "//test/syscalls/linux:network_namespace_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:open_create_test", + vfs2 = "True", ) syscall_test( @@ -270,40 +380,65 @@ syscall_test( test = "//test/syscalls/linux:open_test", ) -syscall_test(test = "//test/syscalls/linux:packet_socket_raw_test") +syscall_test( + test = "//test/syscalls/linux:packet_socket_raw_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:packet_socket_test") +syscall_test( + test = "//test/syscalls/linux:packet_socket_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:partial_bad_buffer_test") +syscall_test( + test = "//test/syscalls/linux:partial_bad_buffer_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:pause_test") +syscall_test( + test = "//test/syscalls/linux:pause_test", + vfs2 = "True", +) syscall_test( size = "large", add_overlay = True, shard_count = 5, test = "//test/syscalls/linux:pipe_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:poll_test") +syscall_test( + test = "//test/syscalls/linux:poll_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:ppoll_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:prctl_setuid_test") +syscall_test( + test = "//test/syscalls/linux:prctl_setuid_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:prctl_test") +syscall_test( + test = "//test/syscalls/linux:prctl_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:pread64_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:preadv_test", + vfs2 = "True", ) syscall_test( @@ -311,36 +446,56 @@ syscall_test( test = "//test/syscalls/linux:preadv2_test", ) -syscall_test(test = "//test/syscalls/linux:priority_test") +syscall_test( + test = "//test/syscalls/linux:priority_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:proc_test", ) -syscall_test(test = "//test/syscalls/linux:proc_net_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_oomscore_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_oomscore_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_smaps_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_smaps_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_pid_uid_gid_map_test") +syscall_test( + test = "//test/syscalls/linux:proc_pid_uid_gid_map_test", +) syscall_test( size = "medium", test = "//test/syscalls/linux:pselect_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:ptrace_test") +syscall_test( + test = "//test/syscalls/linux:ptrace_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:pty_test", + vfs2 = "True", ) syscall_test( test = "//test/syscalls/linux:pty_root_test", + vfs2 = "True", ) syscall_test( @@ -351,17 +506,28 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:pwrite64_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:raw_socket_hdrincl_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_hdrincl_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:raw_socket_icmp_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_icmp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:raw_socket_ipv4_test") +syscall_test( + test = "//test/syscalls/linux:raw_socket_ipv4_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:read_test", + vfs2 = "True", ) syscall_test( @@ -373,12 +539,14 @@ syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:readv_socket_test", + vfs2 = "True", ) syscall_test( size = "medium", add_overlay = True, test = "//test/syscalls/linux:readv_test", + vfs2 = "True", ) syscall_test( @@ -387,25 +555,50 @@ syscall_test( test = "//test/syscalls/linux:rename_test", ) -syscall_test(test = "//test/syscalls/linux:rlimits_test") +syscall_test( + test = "//test/syscalls/linux:rlimits_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:rseq_test") +syscall_test( + test = "//test/syscalls/linux:rseq_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:rtsignal_test") +syscall_test( + test = "//test/syscalls/linux:rtsignal_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:signalfd_test") +syscall_test( + test = "//test/syscalls/linux:signalfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sched_test") +syscall_test( + test = "//test/syscalls/linux:sched_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sched_yield_test") +syscall_test( + test = "//test/syscalls/linux:sched_yield_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:seccomp_test") +syscall_test( + test = "//test/syscalls/linux:seccomp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:select_test") +syscall_test( + test = "//test/syscalls/linux:select_test", + vfs2 = "True", +) syscall_test( shard_count = 20, test = "//test/syscalls/linux:semaphore_test", + vfs2 = "True", ) syscall_test( @@ -421,49 +614,68 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:splice_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:sigaction_test") +syscall_test( + test = "//test/syscalls/linux:sigaction_test", + vfs2 = "True", +) # TODO(b/119826902): Enable once the test passes in runsc. -# syscall_test(test = "//test/syscalls/linux:sigaltstack_test") +# syscall_test(vfs2="True",test = "//test/syscalls/linux:sigaltstack_test") -syscall_test(test = "//test/syscalls/linux:sigiret_test") +syscall_test( + test = "//test/syscalls/linux:sigiret_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sigprocmask_test") +syscall_test( + test = "//test/syscalls/linux:sigprocmask_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:sigstop_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:sigtimedwait_test") +syscall_test( + test = "//test/syscalls/linux:sigtimedwait_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:shm_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_abstract_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_abstract_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_domain_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_domain_test", + vfs2 = "True", ) syscall_test( @@ -489,58 +701,90 @@ syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_generic_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_tcp_loopback_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", shard_count = 50, test = "//test/syscalls/linux:socket_ip_tcp_udp_generic_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ip_udp_loopback_non_blocking_test", + vfs2 = "True", ) syscall_test( size = "large", shard_count = 50, test = "//test/syscalls/linux:socket_ip_udp_loopback_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_ipv4_udp_unbound_loopback_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:socket_ip_unbound_test") +syscall_test( + test = "//test/syscalls/linux:socket_ip_unbound_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netdevice_test") +syscall_test( + test = "//test/syscalls/linux:socket_netdevice_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_route_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_route_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_netlink_uevent_test") +syscall_test( + test = "//test/syscalls/linux:socket_netlink_uevent_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_blocking_local_test") +syscall_test( + test = "//test/syscalls/linux:socket_blocking_local_test", +) -syscall_test(test = "//test/syscalls/linux:socket_blocking_ip_test") +syscall_test( + test = "//test/syscalls/linux:socket_blocking_ip_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_local_test") +syscall_test( + test = "//test/syscalls/linux:socket_non_stream_blocking_local_test", +) -syscall_test(test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test") +syscall_test( + test = "//test/syscalls/linux:socket_non_stream_blocking_udp_test", + vfs2 = "True", +) syscall_test( size = "large", @@ -550,6 +794,7 @@ syscall_test( syscall_test( size = "large", test = "//test/syscalls/linux:socket_stream_blocking_tcp_test", + vfs2 = "True", ) syscall_test( @@ -572,6 +817,7 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_dgram_non_blocking_test", + vfs2 = "True", ) syscall_test( @@ -579,6 +825,7 @@ syscall_test( add_overlay = True, shard_count = 50, test = "//test/syscalls/linux:socket_unix_pair_test", + vfs2 = "True", ) syscall_test( @@ -596,11 +843,13 @@ syscall_test( syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_abstract_test", + vfs2 = "True", ) syscall_test( size = "medium", test = "//test/syscalls/linux:socket_unix_unbound_dgram_test", + vfs2 = "True", ) syscall_test( @@ -612,6 +861,7 @@ syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:socket_unix_unbound_seqpacket_test", + vfs2 = "True", ) syscall_test( @@ -623,6 +873,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:statfs_test", + vfs2 = "True", ) syscall_test( @@ -633,6 +884,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:stat_times_test", + vfs2 = "True", ) syscall_test( @@ -648,6 +900,7 @@ syscall_test( syscall_test( add_overlay = True, test = "//test/syscalls/linux:sync_test", + vfs2 = "True", ) syscall_test( @@ -655,86 +908,151 @@ syscall_test( test = "//test/syscalls/linux:sync_file_range_test", ) -syscall_test(test = "//test/syscalls/linux:sysinfo_test") +syscall_test( + test = "//test/syscalls/linux:sysinfo_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:syslog_test") +syscall_test( + test = "//test/syscalls/linux:syslog_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:sysret_test") +syscall_test( + test = "//test/syscalls/linux:sysret_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 10, test = "//test/syscalls/linux:tcp_socket_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:tgkill_test") +syscall_test( + test = "//test/syscalls/linux:tgkill_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:timerfd_test") +syscall_test( + test = "//test/syscalls/linux:timerfd_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:timers_test") +syscall_test( + test = "//test/syscalls/linux:timers_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:time_test") +syscall_test( + test = "//test/syscalls/linux:time_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:tkill_test") +syscall_test( + test = "//test/syscalls/linux:tkill_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:truncate_test", ) -syscall_test(test = "//test/syscalls/linux:tuntap_test") +syscall_test( + test = "//test/syscalls/linux:tuntap_test", +) syscall_test( add_hostinet = True, test = "//test/syscalls/linux:tuntap_hostinet_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:udp_bind_test") +syscall_test( + test = "//test/syscalls/linux:udp_bind_test", + vfs2 = "True", +) syscall_test( size = "medium", add_hostinet = True, shard_count = 10, test = "//test/syscalls/linux:udp_socket_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:uidgid_test") +syscall_test( + test = "//test/syscalls/linux:uidgid_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:uname_test") +syscall_test( + test = "//test/syscalls/linux:uname_test", + vfs2 = "True", +) syscall_test( add_overlay = True, test = "//test/syscalls/linux:unlink_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:unshare_test") +syscall_test( + test = "//test/syscalls/linux:unshare_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:utimes_test") +syscall_test( + test = "//test/syscalls/linux:utimes_test", + vfs2 = "True", +) syscall_test( size = "medium", test = "//test/syscalls/linux:vdso_clock_gettime_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:vdso_test") +syscall_test( + test = "//test/syscalls/linux:vdso_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:vsyscall_test") +syscall_test( + test = "//test/syscalls/linux:vsyscall_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:vfork_test") +syscall_test( + test = "//test/syscalls/linux:vfork_test", + vfs2 = "True", +) syscall_test( size = "medium", shard_count = 5, test = "//test/syscalls/linux:wait_test", + vfs2 = "True", ) syscall_test( add_overlay = True, test = "//test/syscalls/linux:write_test", + vfs2 = "True", ) -syscall_test(test = "//test/syscalls/linux:proc_net_unix_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_unix_test", +) -syscall_test(test = "//test/syscalls/linux:proc_net_tcp_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_tcp_test", + vfs2 = "True", +) -syscall_test(test = "//test/syscalls/linux:proc_net_udp_test") +syscall_test( + test = "//test/syscalls/linux:proc_net_udp_test", + vfs2 = "True", +) diff --git a/test/syscalls/linux/inotify.cc b/test/syscalls/linux/inotify.cc index 2306d9cab..1d1a7171d 100644 --- a/test/syscalls/linux/inotify.cc +++ b/test/syscalls/linux/inotify.cc @@ -19,6 +19,7 @@ #include <sys/inotify.h> #include <sys/ioctl.h> #include <sys/time.h> +#include <sys/xattr.h> #include <atomic> #include <list> @@ -593,12 +594,12 @@ TEST(Inotify, SizeZeroReadWriteGeneratesNothing) { TEST(Inotify, FailedFileCreationGeneratesNoEvents) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const std::string dir_path = dir.path(); const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); - ASSERT_NO_ERRNO_AND_VALUE( - InotifyAddWatch(fd.get(), dir.path(), IN_ALL_EVENTS)); + ASSERT_NO_ERRNO_AND_VALUE(InotifyAddWatch(fd.get(), dir_path, IN_ALL_EVENTS)); - const char* p = dir.path().c_str(); + const char* p = dir_path.c_str(); ASSERT_THAT(mkdir(p, 0777), SyscallFails()); ASSERT_THAT(mknod(p, S_IFIFO, 0777), SyscallFails()); ASSERT_THAT(symlink(p, p), SyscallFails()); @@ -1655,9 +1656,44 @@ TEST(Inotify, EpollNoDeadlock) { } } -TEST(Inotify, SpliceEvent) { - // TODO(gvisor.dev/issue/138): Implement splice in VFS2. - SKIP_IF(IsRunningOnGvisor() && !IsRunningWithVFS1()); +// On Linux, inotify behavior is not very consistent with splice(2). We try our +// best to emulate Linux for very basic calls to splice. +TEST(Inotify, SpliceOnWatchTarget) { + int pipes[2]; + ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); + + const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith( + dir.path(), "some content", TempPath::kDefaultFileMode)); + + const FileDescriptor fd = + ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR)); + const int dir_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), dir.path(), IN_ALL_EVENTS)); + const int file_wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), file.path(), IN_ALL_EVENTS)); + + EXPECT_THAT(splice(fd.get(), nullptr, pipes[1], nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + // Surprisingly, events are not generated in Linux if we read from a file. + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({})); + + EXPECT_THAT(splice(pipes[0], nullptr, fd.get(), nullptr, 1, /*flags=*/0), + SyscallSucceedsWithValue(1)); + + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + ASSERT_THAT(events, Are({ + Event(IN_MODIFY, dir_wd, Basename(file.path())), + Event(IN_MODIFY, file_wd), + })); +} + +TEST(Inotify, SpliceOnInotifyFD) { int pipes[2]; ASSERT_THAT(pipe2(pipes, O_NONBLOCK), SyscallSucceeds()); @@ -1719,6 +1755,58 @@ TEST(Inotify, LinkOnOtherParent) { EXPECT_THAT(events, Are({})); } +TEST(Inotify, Xattr) { + // TODO(gvisor.dev/issue/1636): Support extended attributes in runsc gofer. + SKIP_IF(IsRunningOnGvisor()); + + const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFile()); + const std::string path = file.path(); + const FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(path, O_RDWR)); + const FileDescriptor inotify_fd = + ASSERT_NO_ERRNO_AND_VALUE(InotifyInit1(IN_NONBLOCK)); + const int wd = ASSERT_NO_ERRNO_AND_VALUE( + InotifyAddWatch(inotify_fd.get(), path, IN_ALL_EVENTS)); + + const char* cpath = path.c_str(); + const char* name = "user.test"; + int val = 123; + ASSERT_THAT(setxattr(cpath, name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + std::vector<Event> events = + ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(getxattr(cpath, name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + char list[100]; + ASSERT_THAT(listxattr(cpath, list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(removexattr(cpath, name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fsetxattr(fd.get(), name, &val, sizeof(val), /*flags=*/0), + SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); + + ASSERT_THAT(fgetxattr(fd.get(), name, &val, sizeof(val)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(flistxattr(fd.get(), list, sizeof(list)), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({})); + + ASSERT_THAT(fremovexattr(fd.get(), name), SyscallSucceeds()); + events = ASSERT_NO_ERRNO_AND_VALUE(DrainEvents(inotify_fd.get())); + EXPECT_THAT(events, Are({Event(IN_ATTRIB, wd)})); +} + TEST(Inotify, Exec) { const TempPath dir = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir()); const TempPath bin = ASSERT_NO_ERRNO_AND_VALUE( diff --git a/test/syscalls/linux/pty.cc b/test/syscalls/linux/pty.cc index b8a0159ba..aabfa6955 100644 --- a/test/syscalls/linux/pty.cc +++ b/test/syscalls/linux/pty.cc @@ -364,6 +364,12 @@ PosixErrorOr<size_t> PollAndReadFd(int fd, void* buf, size_t count, ssize_t n = ReadFd(fd, static_cast<char*>(buf) + completed, count - completed); if (n < 0) { + if (errno == EAGAIN) { + // Linux sometimes returns EAGAIN from this read, despite the fact that + // poll returned success. Let's just do what do as we are told and try + // again. + continue; + } return PosixError(errno, "read failed"); } completed += n; diff --git a/test/syscalls/linux/socket_unix.cc b/test/syscalls/linux/socket_unix.cc index 8bf663e8b..591cab3fd 100644 --- a/test/syscalls/linux/socket_unix.cc +++ b/test/syscalls/linux/socket_unix.cc @@ -256,10 +256,9 @@ TEST_P(UnixSocketPairTest, ShutdownWrite) { } TEST_P(UnixSocketPairTest, SocketReopenFromProcfs) { - // TODO(b/122310852): We should be returning ENXIO and NOT EIO. - // TODO(github.dev/issue/1624): This should be resolved in VFS2. Verify - // that this is the case and delete the SKIP_IF once we delete VFS1. - SKIP_IF(IsRunningOnGvisor()); + // TODO(gvisor.dev/issue/1624): In VFS1, we return EIO instead of ENXIO (see + // b/122310852). Remove this skip once VFS1 is deleted. + SKIP_IF(IsRunningWithVFS1()); auto sockets = ASSERT_NO_ERRNO_AND_VALUE(NewSocketPair()); // Opening a socket pair via /proc/self/fd/X is a ENXIO. |