summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--benchmarks/workloads/httpd/BUILD1
-rw-r--r--benchmarks/workloads/httpd/Dockerfile10
-rw-r--r--benchmarks/workloads/httpd/apache2-tmpdir.conf5
-rw-r--r--g3doc/BUILD9
-rw-r--r--g3doc/user_guide/BUILD2
-rw-r--r--pkg/abi/linux/file.go5
-rw-r--r--pkg/p9/p9.go13
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go35
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go74
-rw-r--r--pkg/sentry/fsimpl/gofer/regular_file.go37
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go13
-rw-r--r--pkg/sentry/fsimpl/gofer/time.go12
-rw-r--r--pkg/sentry/fsimpl/host/host.go14
-rw-r--r--pkg/sentry/fsimpl/host/socket.go9
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go5
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go8
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go2
-rw-r--r--pkg/sentry/fsimpl/sys/sys_test.go2
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go6
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go29
-rw-r--r--pkg/sentry/kernel/fasync/fasync.go10
-rw-r--r--pkg/sentry/kernel/pipe/vfs.go5
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go7
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go54
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/filesystem.go50
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/ioctl.go43
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/sync.go42
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go2
-rw-r--r--pkg/sentry/vfs/file_description.go7
-rw-r--r--pkg/sentry/vfs/file_description_impl_util.go11
-rw-r--r--pkg/sentry/vfs/inotify.go5
-rw-r--r--pkg/tcpip/transport/tcp/connect.go23
-rw-r--r--pkg/tcpip/transport/tcp/dispatcher.go148
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go12
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go7
-rw-r--r--pkg/tcpip/transport/tcp/snd.go63
-rw-r--r--test/packetimpact/runner/defs.bzl6
-rw-r--r--test/packetimpact/runner/packetimpact_test.go8
-rw-r--r--test/packetimpact/tests/BUILD12
-rw-r--r--test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go131
-rw-r--r--test/packetimpact/tests/tcp_splitseg_mss_test.go71
-rw-r--r--test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go4
-rw-r--r--test/syscalls/BUILD5
-rw-r--r--test/syscalls/linux/BUILD5
-rw-r--r--test/syscalls/linux/fallocate.cc45
-rw-r--r--test/syscalls/linux/fcntl.cc60
-rw-r--r--test/syscalls/linux/pwritev2.cc59
-rw-r--r--test/syscalls/linux/socket_netlink_route.cc2
-rw-r--r--test/syscalls/linux/sticky.cc77
-rw-r--r--test/util/test_util.cc11
-rw-r--r--test/util/test_util.h3
-rw-r--r--website/BUILD1
-rw-r--r--website/_includes/footer-links.html2
-rw-r--r--website/cmd/server/main.go4
54 files changed, 830 insertions, 456 deletions
diff --git a/benchmarks/workloads/httpd/BUILD b/benchmarks/workloads/httpd/BUILD
index a70873065..83450d190 100644
--- a/benchmarks/workloads/httpd/BUILD
+++ b/benchmarks/workloads/httpd/BUILD
@@ -9,5 +9,6 @@ pkg_tar(
name = "tar",
srcs = [
"Dockerfile",
+ "apache2-tmpdir.conf",
],
)
diff --git a/benchmarks/workloads/httpd/Dockerfile b/benchmarks/workloads/httpd/Dockerfile
index 5259c8f4f..52a550678 100644
--- a/benchmarks/workloads/httpd/Dockerfile
+++ b/benchmarks/workloads/httpd/Dockerfile
@@ -6,16 +6,16 @@ RUN set -x \
apache2 \
&& rm -rf /var/lib/apt/lists/*
-# Link the htdoc directory to tmp.
-RUN mkdir -p /usr/local/apache2/htdocs && \
- cd /usr/local/apache2 && ln -s /tmp htdocs
-
# Generate a bunch of relevant files.
RUN mkdir -p /local && \
for size in 1 10 100 1000 1024 10240; do \
dd if=/dev/zero of=/local/latin${size}k.txt count=${size} bs=1024; \
done
+# Rewrite DocumentRoot to point to /tmp/html instead of the default path.
+RUN sed -i 's/DocumentRoot.*\/var\/www\/html$/DocumentRoot \/tmp\/html/' /etc/apache2/sites-enabled/000-default.conf
+COPY ./apache2-tmpdir.conf /etc/apache2/sites-enabled/apache2-tmpdir.conf
+
# Standard settings.
ENV APACHE_RUN_DIR /tmp
ENV APACHE_RUN_USER nobody
@@ -24,4 +24,4 @@ ENV APACHE_LOG_DIR /tmp
ENV APACHE_PID_FILE /tmp/apache.pid
# Copy on start-up; serve everything from /tmp (including the configuration).
-CMD ["sh", "-c", "cp -a /local/* /tmp && apache2 -c \"ServerName localhost\" -c \"DocumentRoot /tmp\" -X"]
+CMD ["sh", "-c", "mkdir -p /tmp/html && cp -a /local/* /tmp/html && apache2 -X"]
diff --git a/benchmarks/workloads/httpd/apache2-tmpdir.conf b/benchmarks/workloads/httpd/apache2-tmpdir.conf
new file mode 100644
index 000000000..e33f8d9bb
--- /dev/null
+++ b/benchmarks/workloads/httpd/apache2-tmpdir.conf
@@ -0,0 +1,5 @@
+<Directory /tmp/html/>
+ Options Indexes FollowSymLinks
+ AllowOverride None
+ Require all granted
+</Directory> \ No newline at end of file
diff --git a/g3doc/BUILD b/g3doc/BUILD
index dbbf96204..c315d38be 100644
--- a/g3doc/BUILD
+++ b/g3doc/BUILD
@@ -33,3 +33,12 @@ doc(
subcategory = "Community",
weight = "95",
)
+
+doc(
+ name = "style",
+ src = "style.md",
+ category = "Project",
+ permalink = "/community/style_guide/",
+ subcategory = "Community",
+ weight = "10",
+)
diff --git a/g3doc/user_guide/BUILD b/g3doc/user_guide/BUILD
index 5568e1ba4..b69aee12c 100644
--- a/g3doc/user_guide/BUILD
+++ b/g3doc/user_guide/BUILD
@@ -33,7 +33,7 @@ doc(
name = "FAQ",
src = "FAQ.md",
category = "User Guide",
- permalink = "/docs/user_guide/FAQ/",
+ permalink = "/docs/user_guide/faq/",
weight = "90",
)
diff --git a/pkg/abi/linux/file.go b/pkg/abi/linux/file.go
index 055ac1d7c..e11ca2d62 100644
--- a/pkg/abi/linux/file.go
+++ b/pkg/abi/linux/file.go
@@ -191,8 +191,9 @@ var DirentType = abi.ValueSet{
// Values for preadv2/pwritev2.
const (
- // Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
- // accepted as a valid flag argument for preadv2/pwritev2.
+ // NOTE(b/120162627): gVisor does not implement the RWF_HIPRI feature, but
+ // the flag is accepted as a valid flag argument for preadv2/pwritev2 and
+ // silently ignored.
RWF_HIPRI = 0x00000001
RWF_DSYNC = 0x00000002
RWF_SYNC = 0x00000004
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 28d851ff5..122c457d2 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -1091,6 +1091,19 @@ type AllocateMode struct {
Unshare bool
}
+// ToAllocateMode returns an AllocateMode from a fallocate(2) mode.
+func ToAllocateMode(mode uint64) AllocateMode {
+ return AllocateMode{
+ KeepSize: mode&unix.FALLOC_FL_KEEP_SIZE != 0,
+ PunchHole: mode&unix.FALLOC_FL_PUNCH_HOLE != 0,
+ NoHideStale: mode&unix.FALLOC_FL_NO_HIDE_STALE != 0,
+ CollapseRange: mode&unix.FALLOC_FL_COLLAPSE_RANGE != 0,
+ ZeroRange: mode&unix.FALLOC_FL_ZERO_RANGE != 0,
+ InsertRange: mode&unix.FALLOC_FL_INSERT_RANGE != 0,
+ Unshare: mode&unix.FALLOC_FL_UNSHARE_RANGE != 0,
+ }
+}
+
// ToLinux converts to a value compatible with fallocate(2)'s mode.
func (a *AllocateMode) ToLinux() uint32 {
rv := uint32(0)
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 51082359d..7bcc99b29 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -869,11 +869,22 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
if err := d.checkPermissions(rp.Credentials(), ats); err != nil {
return nil, err
}
+
+ trunc := opts.Flags&linux.O_TRUNC != 0 && d.fileType() == linux.S_IFREG
+ if trunc {
+ // Lock metadataMu *while* we open a regular file with O_TRUNC because
+ // open(2) will change the file size on server.
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+ }
+
+ var vfd *vfs.FileDescription
+ var err error
mnt := rp.Mount()
switch d.fileType() {
case linux.S_IFREG:
if !d.fs.opts.regularFilesUseSpecialFileFD {
- if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, opts.Flags&linux.O_TRUNC != 0); err != nil {
+ if err := d.ensureSharedHandle(ctx, ats&vfs.MayRead != 0, ats&vfs.MayWrite != 0, trunc); err != nil {
return nil, err
}
fd := &regularFileFD{}
@@ -883,7 +894,7 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
}); err != nil {
return nil, err
}
- return &fd.vfsfd, nil
+ vfd = &fd.vfsfd
}
case linux.S_IFDIR:
// Can't open directories with O_CREAT.
@@ -923,7 +934,25 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags, &d.locks)
}
}
- return d.openSpecialFileLocked(ctx, mnt, opts)
+
+ if vfd == nil {
+ if vfd, err = d.openSpecialFileLocked(ctx, mnt, opts); err != nil {
+ return nil, err
+ }
+ }
+
+ if trunc {
+ // If no errors occured so far then update file size in memory. This
+ // step is required even if !d.cachedMetadataAuthoritative() because
+ // d.mappings has to be updated.
+ // d.metadataMu has already been acquired if trunc == true.
+ d.updateFileSizeLocked(0)
+
+ if d.cachedMetadataAuthoritative() {
+ d.touchCMtimeLocked()
+ }
+ }
+ return vfd, err
}
func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 71c8d3ae1..8e74e60a5 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -794,9 +794,7 @@ func (d *dentry) updateFromP9Attrs(mask p9.AttrMask, attr *p9.Attr) {
atomic.StoreUint32(&d.nlink, uint32(attr.NLink))
}
if mask.Size {
- d.dataMu.Lock()
- atomic.StoreUint64(&d.size, attr.Size)
- d.dataMu.Unlock()
+ d.updateFileSizeLocked(attr.Size)
}
d.metadataMu.Unlock()
}
@@ -902,6 +900,12 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
}
d.metadataMu.Lock()
defer d.metadataMu.Unlock()
+ if stat.Mask&linux.STATX_SIZE != 0 {
+ // The size needs to be changed even when
+ // !d.cachedMetadataAuthoritative() because d.mappings has to be
+ // updated.
+ d.updateFileSizeLocked(stat.Size)
+ }
if !d.isSynthetic() {
if stat.Mask != 0 {
if err := d.file.setAttr(ctx, p9.SetAttrMask{
@@ -963,40 +967,42 @@ func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *lin
stat.Mask |= linux.STATX_MTIME
}
atomic.StoreInt64(&d.ctime, now)
- if stat.Mask&linux.STATX_SIZE != 0 {
+ return nil
+}
+
+// Preconditions: d.metadataMu must be locked.
+func (d *dentry) updateFileSizeLocked(newSize uint64) {
+ d.dataMu.Lock()
+ oldSize := d.size
+ d.size = newSize
+ // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
+ // below. This allows concurrent calls to Read/Translate/etc. These
+ // functions synchronize with truncation by refusing to use cache
+ // contents beyond the new d.size. (We are still holding d.metadataMu,
+ // so we can't race with Write or another truncate.)
+ d.dataMu.Unlock()
+ if d.size < oldSize {
+ oldpgend, _ := usermem.PageRoundUp(oldSize)
+ newpgend, _ := usermem.PageRoundUp(d.size)
+ if oldpgend != newpgend {
+ d.mapsMu.Lock()
+ d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
+ // Compare Linux's mm/truncate.c:truncate_setsize() =>
+ // truncate_pagecache() =>
+ // mm/memory.c:unmap_mapping_range(evencows=1).
+ InvalidatePrivate: true,
+ })
+ d.mapsMu.Unlock()
+ }
+ // We are now guaranteed that there are no translations of
+ // truncated pages, and can remove them from the cache. Since
+ // truncated pages have been removed from the remote file, they
+ // should be dropped without being written back.
d.dataMu.Lock()
- oldSize := d.size
- d.size = stat.Size
- // d.dataMu must be unlocked to lock d.mapsMu and invalidate mappings
- // below. This allows concurrent calls to Read/Translate/etc. These
- // functions synchronize with truncation by refusing to use cache
- // contents beyond the new d.size. (We are still holding d.metadataMu,
- // so we can't race with Write or another truncate.)
+ d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
+ d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
d.dataMu.Unlock()
- if d.size < oldSize {
- oldpgend, _ := usermem.PageRoundUp(oldSize)
- newpgend, _ := usermem.PageRoundUp(d.size)
- if oldpgend != newpgend {
- d.mapsMu.Lock()
- d.mappings.Invalidate(memmap.MappableRange{newpgend, oldpgend}, memmap.InvalidateOpts{
- // Compare Linux's mm/truncate.c:truncate_setsize() =>
- // truncate_pagecache() =>
- // mm/memory.c:unmap_mapping_range(evencows=1).
- InvalidatePrivate: true,
- })
- d.mapsMu.Unlock()
- }
- // We are now guaranteed that there are no translations of
- // truncated pages, and can remove them from the cache. Since
- // truncated pages have been removed from the remote file, they
- // should be dropped without being written back.
- d.dataMu.Lock()
- d.cache.Truncate(d.size, d.fs.mfp.MemoryFile())
- d.dirty.KeepClean(memmap.MappableRange{d.size, oldpgend})
- d.dataMu.Unlock()
- }
}
- return nil
}
func (d *dentry) checkPermissions(creds *auth.Credentials, ats vfs.AccessTypes) error {
diff --git a/pkg/sentry/fsimpl/gofer/regular_file.go b/pkg/sentry/fsimpl/gofer/regular_file.go
index 404a452c4..3d2d3530a 100644
--- a/pkg/sentry/fsimpl/gofer/regular_file.go
+++ b/pkg/sentry/fsimpl/gofer/regular_file.go
@@ -24,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/p9"
"gvisor.dev/gvisor/pkg/safemem"
"gvisor.dev/gvisor/pkg/sentry/fs/fsutil"
"gvisor.dev/gvisor/pkg/sentry/memmap"
@@ -67,13 +68,43 @@ func (fd *regularFileFD) OnClose(ctx context.Context) error {
return d.handle.file.flush(ctx)
}
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+
+ d := fd.dentry()
+ d.metadataMu.Lock()
+ defer d.metadataMu.Unlock()
+
+ size := offset + length
+
+ // Allocating a smaller size is a noop.
+ if size <= d.size {
+ return nil
+ }
+
+ d.handleMu.Lock()
+ defer d.handleMu.Unlock()
+
+ err := d.handle.file.allocate(ctx, p9.ToAllocateMode(mode), offset, length)
+ if err != nil {
+ return err
+ }
+ d.size = size
+ if !d.cachedMetadataAuthoritative() {
+ d.touchCMtimeLocked()
+ }
+ return nil
+}
+
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if offset < 0 {
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
+ // Check that flags are supported.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -126,7 +157,9 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
+ // Check that flags are supported.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index a016cbae1..3c4e7e2e4 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -130,7 +130,9 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
+ // Check that flags are supported.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -176,7 +178,9 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
+ // Check that flags are supported.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select pwritev2 flags.
if opts.Flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -235,8 +239,5 @@ func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (
// Sync implements vfs.FileDescriptionImpl.Sync.
func (fd *specialFileFD) Sync(ctx context.Context) error {
- if !fd.vfsfd.IsWritable() {
- return nil
- }
- return fd.handle.sync(ctx)
+ return fd.dentry().syncSharedHandle(ctx)
}
diff --git a/pkg/sentry/fsimpl/gofer/time.go b/pkg/sentry/fsimpl/gofer/time.go
index 1d5aa82dc..0eef4e16e 100644
--- a/pkg/sentry/fsimpl/gofer/time.go
+++ b/pkg/sentry/fsimpl/gofer/time.go
@@ -36,7 +36,7 @@ func statxTimestampFromDentry(ns int64) linux.StatxTimestamp {
}
}
-// Preconditions: fs.interop != InteropModeShared.
+// Preconditions: d.cachedMetadataAuthoritative() == true.
func (d *dentry) touchAtime(mnt *vfs.Mount) {
if mnt.Flags.NoATime {
return
@@ -51,8 +51,8 @@ func (d *dentry) touchAtime(mnt *vfs.Mount) {
mnt.EndWrite()
}
-// Preconditions: fs.interop != InteropModeShared. The caller has successfully
-// called vfs.Mount.CheckBeginWrite().
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// successfully called vfs.Mount.CheckBeginWrite().
func (d *dentry) touchCtime() {
now := d.fs.clock.Now().Nanoseconds()
d.metadataMu.Lock()
@@ -60,8 +60,8 @@ func (d *dentry) touchCtime() {
d.metadataMu.Unlock()
}
-// Preconditions: fs.interop != InteropModeShared. The caller has successfully
-// called vfs.Mount.CheckBeginWrite().
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// successfully called vfs.Mount.CheckBeginWrite().
func (d *dentry) touchCMtime() {
now := d.fs.clock.Now().Nanoseconds()
d.metadataMu.Lock()
@@ -70,6 +70,8 @@ func (d *dentry) touchCMtime() {
d.metadataMu.Unlock()
}
+// Preconditions: d.cachedMetadataAuthoritative() == true. The caller has
+// locked d.metadataMu.
func (d *dentry) touchCMtimeLocked() {
now := d.fs.clock.Now().Nanoseconds()
atomic.StoreInt64(&d.mtime, now)
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index d5e73ddae..1cd2982cb 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -543,6 +543,16 @@ func (f *fileDescription) Release() {
// noop
}
+// Allocate implements vfs.FileDescriptionImpl.
+func (f *fileDescription) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ if !f.inode.seekable {
+ return syserror.ESPIPE
+ }
+
+ // TODO(gvisor.dev/issue/2923): Implement Allocate for non-pipe hostfds.
+ return syserror.EOPNOTSUPP
+}
+
// PRead implements FileDescriptionImpl.
func (f *fileDescription) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
i := f.inode
@@ -578,8 +588,10 @@ func (f *fileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts
}
func readFromHostFD(ctx context.Context, hostFD int, dst usermem.IOSequence, offset int64, flags uint32) (int64, error) {
+ // Check that flags are supported.
+ //
// TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
- if flags != 0 {
+ if flags&^linux.RWF_HIPRI != 0 {
return 0, syserror.EOPNOTSUPP
}
reader := hostfd.GetReadWriterAt(int32(hostFD), offset, flags)
diff --git a/pkg/sentry/fsimpl/host/socket.go b/pkg/sentry/fsimpl/host/socket.go
index 38f1fbfba..fd16bd92d 100644
--- a/pkg/sentry/fsimpl/host/socket.go
+++ b/pkg/sentry/fsimpl/host/socket.go
@@ -47,11 +47,6 @@ func newEndpoint(ctx context.Context, hostFD int, queue *waiter.Queue) (transpor
return ep, nil
}
-// maxSendBufferSize is the maximum host send buffer size allowed for endpoint.
-//
-// N.B. 8MB is the default maximum on Linux (2 * sysctl_wmem_max).
-const maxSendBufferSize = 8 << 20
-
// ConnectedEndpoint is an implementation of transport.ConnectedEndpoint and
// transport.Receiver. It is backed by a host fd that was imported at sentry
// startup. This fd is shared with a hostfs inode, which retains ownership of
@@ -114,10 +109,6 @@ func (c *ConnectedEndpoint) init() *syserr.Error {
if err != nil {
return syserr.FromError(err)
}
- if sndbuf > maxSendBufferSize {
- log.Warningf("Socket send buffer too large: %d", sndbuf)
- return syserr.ErrInvalidEndpointState
- }
c.stype = linux.SockType(stype)
c.sndbuf = int64(sndbuf)
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 5f7853a2a..ca8b8c63b 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -236,6 +236,11 @@ func (fd *GenericDirectoryFD) SetStat(ctx context.Context, opts vfs.SetStatOptio
return inode.SetStat(ctx, fd.filesystem(), creds, opts)
}
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *GenericDirectoryFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return fd.DirectoryFileDescriptionDefaultImpl.Allocate(ctx, mode, offset, length)
+}
+
// LockPOSIX implements vfs.FileDescriptionImpl.LockPOSIX.
func (fd *GenericDirectoryFD) LockPOSIX(ctx context.Context, uid fslock.UniqueID, t fslock.LockType, start, length uint64, whence int16, block fslock.Blocker) error {
return fd.Locks().LockPOSIX(ctx, &fd.vfsfd, uid, t, start, length, whence, block)
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 286c23f01..9af43b859 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -231,8 +231,9 @@ func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
// Linux will return envp up to and including the first NULL character,
// so find it.
- if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 {
- buf.Truncate(end)
+ envStart := int(ar.Length())
+ if nullIdx := bytes.IndexByte(buf.Bytes()[envStart:], 0); nullIdx != -1 {
+ buf.Truncate(envStart + nullIdx)
}
}
@@ -300,7 +301,7 @@ type idMapData struct {
var _ dynamicInode = (*idMapData)(nil)
-// Generate implements vfs.DynamicBytesSource.Generate.
+// Generate implements vfs.WritableDynamicBytesSource.Generate.
func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
var entries []auth.IDMapEntry
if d.gids {
@@ -314,6 +315,7 @@ func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
return nil
}
+// Write implements vfs.WritableDynamicBytesSource.Write.
func (d *idMapData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
// "In addition, the number of bytes written to the file must be less than
// the system page size, and the write must be performed at the start of
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index fe02f7ee9..01ce30a4d 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -138,7 +138,7 @@ type cpuFile struct {
// Generate implements vfs.DynamicBytesSource.Generate.
func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "0-%d", c.maxCores-1)
+ fmt.Fprintf(buf, "0-%d\n", c.maxCores-1)
return nil
}
diff --git a/pkg/sentry/fsimpl/sys/sys_test.go b/pkg/sentry/fsimpl/sys/sys_test.go
index 4b3602d47..242d5fd12 100644
--- a/pkg/sentry/fsimpl/sys/sys_test.go
+++ b/pkg/sentry/fsimpl/sys/sys_test.go
@@ -51,7 +51,7 @@ func TestReadCPUFile(t *testing.T) {
k := kernel.KernelFromContext(s.Ctx)
maxCPUCores := k.ApplicationCores()
- expected := fmt.Sprintf("0-%d", maxCPUCores-1)
+ expected := fmt.Sprintf("0-%d\n", maxCPUCores-1)
for _, fname := range []string{"online", "possible", "present"} {
pop := s.PathOpAtRoot(fmt.Sprintf("devices/system/cpu/%s", fname))
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 71ac7b8e6..ed40f6b52 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -407,10 +407,10 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
case *regularFile:
var fd regularFileFD
fd.LockFD.Init(&d.inode.locks)
- if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{AllowDirectIO: true}); err != nil {
return nil, err
}
- if opts.Flags&linux.O_TRUNC != 0 {
+ if !afterCreate && opts.Flags&linux.O_TRUNC != 0 {
if _, err := impl.truncate(0); err != nil {
return nil, err
}
@@ -423,7 +423,7 @@ func (d *dentry) open(ctx context.Context, rp *vfs.ResolvingPath, opts *vfs.Open
}
var fd directoryFD
fd.LockFD.Init(&d.inode.locks)
- if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(&fd, opts.Flags, rp.Mount(), &d.vfsd, &vfs.FileDescriptionOptions{AllowDirectIO: true}); err != nil {
return nil, err
}
return &fd.vfsfd, nil
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index b805aadd0..1cdb46e6f 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -274,14 +274,32 @@ func (fd *regularFileFD) Release() {
// noop
}
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ f := fd.inode().impl.(*regularFile)
+
+ f.inode.mu.Lock()
+ defer f.inode.mu.Unlock()
+ oldSize := f.size
+ size := offset + length
+ if oldSize >= size {
+ return nil
+ }
+ _, err := f.truncateLocked(size)
+ return err
+}
+
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
if offset < 0 {
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
- if opts.Flags&^linux.RWF_HIPRI != 0 {
+ // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
+ // all state is in-memory.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+ if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
return 0, syserror.EOPNOTSUPP
}
@@ -311,8 +329,11 @@ func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
return 0, syserror.EINVAL
}
- // Check that flags are supported. Silently ignore RWF_HIPRI.
- if opts.Flags&^linux.RWF_HIPRI != 0 {
+ // Check that flags are supported. RWF_DSYNC/RWF_SYNC can be ignored since
+ // all state is in-memory.
+ //
+ // TODO(gvisor.dev/issue/2601): Support select preadv2 flags.
+ if opts.Flags&^(linux.RWF_HIPRI|linux.RWF_DSYNC|linux.RWF_SYNC) != 0 {
return 0, syserror.EOPNOTSUPP
}
diff --git a/pkg/sentry/kernel/fasync/fasync.go b/pkg/sentry/kernel/fasync/fasync.go
index 323f1dfa5..153d2cd9b 100644
--- a/pkg/sentry/kernel/fasync/fasync.go
+++ b/pkg/sentry/kernel/fasync/fasync.go
@@ -176,3 +176,13 @@ func (a *FileAsync) SetOwnerProcessGroup(requester *kernel.Task, recipient *kern
a.recipientTG = nil
a.recipientPG = recipient
}
+
+// ClearOwner unsets the current signal recipient.
+func (a *FileAsync) ClearOwner() {
+ a.mu.Lock()
+ defer a.mu.Unlock()
+ a.requester = nil
+ a.recipientT = nil
+ a.recipientTG = nil
+ a.recipientPG = nil
+}
diff --git a/pkg/sentry/kernel/pipe/vfs.go b/pkg/sentry/kernel/pipe/vfs.go
index a4519363f..45d4c5fc1 100644
--- a/pkg/sentry/kernel/pipe/vfs.go
+++ b/pkg/sentry/kernel/pipe/vfs.go
@@ -200,6 +200,11 @@ func (fd *VFSPipeFD) Readiness(mask waiter.EventMask) waiter.EventMask {
}
}
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (fd *VFSPipeFD) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.ESPIPE
+}
+
// EventRegister implements waiter.Waitable.EventRegister.
func (fd *VFSPipeFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
fd.pipe.EventRegister(e, mask)
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index ad5f64799..8f192c62f 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -96,7 +96,12 @@ func (s *socketVFS2) Ioctl(ctx context.Context, uio usermem.IO, args arch.Syscal
return ioctl(ctx, s.fd, uio, args)
}
-// PRead implements vfs.FileDescriptionImpl.
+// Allocate implements vfs.FileDescriptionImpl.Allocate.
+func (s *socketVFS2) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.ENODEV
+}
+
+// PRead implements vfs.FileDescriptionImpl.PRead.
func (s *socketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
return 0, syserror.ESPIPE
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 7e4c6a56e..517394ba9 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -156,11 +156,10 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
return uintptr(n), nil, nil
case linux.F_GETOWN:
- a := file.AsyncHandler()
- if a == nil {
+ owner, hasOwner := getAsyncOwner(t, file)
+ if !hasOwner {
return 0, nil, nil
}
- owner := getAsyncOwner(t, a.(*fasync.FileAsync))
if owner.Type == linux.F_OWNER_PGRP {
return uintptr(-owner.PID), nil, nil
}
@@ -176,26 +175,21 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
ownerType = linux.F_OWNER_PGRP
who = -who
}
- a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
- return 0, nil, setAsyncOwner(t, a, ownerType, who)
+ return 0, nil, setAsyncOwner(t, file, ownerType, who)
case linux.F_GETOWN_EX:
- a := file.AsyncHandler()
- if a == nil {
+ owner, hasOwner := getAsyncOwner(t, file)
+ if !hasOwner {
return 0, nil, nil
}
- addr := args[2].Pointer()
- owner := getAsyncOwner(t, a.(*fasync.FileAsync))
- _, err := t.CopyOut(addr, &owner)
+ _, err := t.CopyOut(args[2].Pointer(), &owner)
return 0, nil, err
case linux.F_SETOWN_EX:
- addr := args[2].Pointer()
var owner linux.FOwnerEx
- n, err := t.CopyIn(addr, &owner)
+ n, err := t.CopyIn(args[2].Pointer(), &owner)
if err != nil {
return 0, nil, err
}
- a := file.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
- return uintptr(n), nil, setAsyncOwner(t, a, owner.Type, owner.PID)
+ return uintptr(n), nil, setAsyncOwner(t, file, owner.Type, owner.PID)
case linux.F_GETPIPE_SZ:
pipefile, ok := file.Impl().(*pipe.VFSPipeFD)
if !ok {
@@ -219,30 +213,48 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
}
-func getAsyncOwner(t *kernel.Task, a *fasync.FileAsync) linux.FOwnerEx {
- ot, otg, opg := a.Owner()
+func getAsyncOwner(t *kernel.Task, fd *vfs.FileDescription) (ownerEx linux.FOwnerEx, hasOwner bool) {
+ a := fd.AsyncHandler()
+ if a == nil {
+ return linux.FOwnerEx{}, false
+ }
+
+ ot, otg, opg := a.(*fasync.FileAsync).Owner()
switch {
case ot != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_TID,
PID: int32(t.PIDNamespace().IDOfTask(ot)),
- }
+ }, true
case otg != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_PID,
PID: int32(t.PIDNamespace().IDOfThreadGroup(otg)),
- }
+ }, true
case opg != nil:
return linux.FOwnerEx{
Type: linux.F_OWNER_PGRP,
PID: int32(t.PIDNamespace().IDOfProcessGroup(opg)),
- }
+ }, true
default:
- return linux.FOwnerEx{}
+ return linux.FOwnerEx{}, true
}
}
-func setAsyncOwner(t *kernel.Task, a *fasync.FileAsync, ownerType, pid int32) error {
+func setAsyncOwner(t *kernel.Task, fd *vfs.FileDescription, ownerType, pid int32) error {
+ switch ownerType {
+ case linux.F_OWNER_TID, linux.F_OWNER_PID, linux.F_OWNER_PGRP:
+ // Acceptable type.
+ default:
+ return syserror.EINVAL
+ }
+
+ a := fd.SetAsyncHandler(fasync.NewVFS2).(*fasync.FileAsync)
+ if pid == 0 {
+ a.ClearOwner()
+ return nil
+ }
+
switch ownerType {
case linux.F_OWNER_TID:
task := t.PIDNamespace().TaskWithID(kernel.ThreadID(pid))
diff --git a/pkg/sentry/syscalls/linux/vfs2/filesystem.go b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
index 5dac77e4d..b12b5967b 100644
--- a/pkg/sentry/syscalls/linux/vfs2/filesystem.go
+++ b/pkg/sentry/syscalls/linux/vfs2/filesystem.go
@@ -18,6 +18,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
@@ -239,6 +240,55 @@ func renameat(t *kernel.Task, olddirfd int32, oldpathAddr usermem.Addr, newdirfd
})
}
+// Fallocate implements linux system call fallocate(2).
+func Fallocate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ mode := args[1].Uint64()
+ offset := args[2].Int64()
+ length := args[3].Int64()
+
+ file := t.GetFileVFS2(fd)
+
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+
+ if !file.IsWritable() {
+ return 0, nil, syserror.EBADF
+ }
+
+ if mode != 0 {
+ return 0, nil, syserror.ENOTSUP
+ }
+
+ if offset < 0 || length <= 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ size := offset + length
+
+ if size < 0 {
+ return 0, nil, syserror.EFBIG
+ }
+
+ limit := limits.FromContext(t).Get(limits.FileSize).Cur
+
+ if uint64(size) >= limit {
+ t.SendSignal(&arch.SignalInfo{
+ Signo: int32(linux.SIGXFSZ),
+ Code: arch.SignalInfoUser,
+ })
+ return 0, nil, syserror.EFBIG
+ }
+
+ return 0, nil, file.Impl().Allocate(t, mode, uint64(offset), uint64(length))
+
+ // File length modified, generate notification.
+ // TODO(gvisor.dev/issue/1479): Reenable when Inotify is ported.
+ // file.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
+}
+
// Rmdir implements Linux syscall rmdir(2).
func Rmdir(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
pathAddr := args[0].Pointer()
diff --git a/pkg/sentry/syscalls/linux/vfs2/ioctl.go b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
index 0399c0db4..fd6ab94b2 100644
--- a/pkg/sentry/syscalls/linux/vfs2/ioctl.go
+++ b/pkg/sentry/syscalls/linux/vfs2/ioctl.go
@@ -57,6 +57,49 @@ func Ioctl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
flags &^= linux.O_NONBLOCK
}
return 0, nil, file.SetStatusFlags(t, t.Credentials(), flags)
+
+ case linux.FIOASYNC:
+ var set int32
+ if _, err := t.CopyIn(args[2].Pointer(), &set); err != nil {
+ return 0, nil, err
+ }
+ flags := file.StatusFlags()
+ if set != 0 {
+ flags |= linux.O_ASYNC
+ } else {
+ flags &^= linux.O_ASYNC
+ }
+ file.SetStatusFlags(t, t.Credentials(), flags)
+ return 0, nil, nil
+
+ case linux.FIOGETOWN, linux.SIOCGPGRP:
+ var who int32
+ owner, hasOwner := getAsyncOwner(t, file)
+ if hasOwner {
+ if owner.Type == linux.F_OWNER_PGRP {
+ who = -owner.PID
+ } else {
+ who = owner.PID
+ }
+ }
+ _, err := t.CopyOut(args[2].Pointer(), &who)
+ return 0, nil, err
+
+ case linux.FIOSETOWN, linux.SIOCSPGRP:
+ var who int32
+ if _, err := t.CopyIn(args[2].Pointer(), &who); err != nil {
+ return 0, nil, err
+ }
+ ownerType := int32(linux.F_OWNER_PID)
+ if who < 0 {
+ // Check for overflow before flipping the sign.
+ if who-1 > who {
+ return 0, nil, syserror.EINVAL
+ }
+ ownerType = linux.F_OWNER_PGRP
+ who = -who
+ }
+ return 0, nil, setAsyncOwner(t, file, ownerType, who)
}
ret, err := file.Ioctl(t, t.MemoryManager(), args)
diff --git a/pkg/sentry/syscalls/linux/vfs2/sync.go b/pkg/sentry/syscalls/linux/vfs2/sync.go
index 365250b0b..0d0ebf46a 100644
--- a/pkg/sentry/syscalls/linux/vfs2/sync.go
+++ b/pkg/sentry/syscalls/linux/vfs2/sync.go
@@ -65,10 +65,8 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
nbytes := args[2].Int64()
flags := args[3].Uint()
- if offset < 0 {
- return 0, nil, syserror.EINVAL
- }
- if nbytes < 0 {
+ // Check for negative values and overflow.
+ if offset < 0 || offset+nbytes < 0 {
return 0, nil, syserror.EINVAL
}
if flags&^(linux.SYNC_FILE_RANGE_WAIT_BEFORE|linux.SYNC_FILE_RANGE_WRITE|linux.SYNC_FILE_RANGE_WAIT_AFTER) != 0 {
@@ -81,7 +79,37 @@ func SyncFileRange(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
}
defer file.DecRef()
- // TODO(gvisor.dev/issue/1897): Avoid writeback of data ranges outside of
- // [offset, offset+nbytes).
- return 0, nil, file.Sync(t)
+ // TODO(gvisor.dev/issue/1897): Currently, the only file syncing we support
+ // is a full-file sync, i.e. fsync(2). As a result, there are severe
+ // limitations on how much we support sync_file_range:
+ // - In Linux, sync_file_range(2) doesn't write out the file's metadata, even
+ // if the file size is changed. We do.
+ // - We always sync the entire file instead of [offset, offset+nbytes).
+ // - We do not support the use of WAIT_BEFORE without WAIT_AFTER. For
+ // correctness, we would have to perform a write-out every time WAIT_BEFORE
+ // was used, but this would be much more expensive than expected if there
+ // were no write-out operations in progress.
+ // - Whenever WAIT_AFTER is used, we sync the file.
+ // - Ignore WRITE. If this flag is used with WAIT_AFTER, then the file will
+ // be synced anyway. If this flag is used without WAIT_AFTER, then it is
+ // safe (and less expensive) to do nothing, because the syscall will not
+ // wait for the write-out to complete--we only need to make sure that the
+ // next time WAIT_BEFORE or WAIT_AFTER are used, the write-out completes.
+ // - According to fs/sync.c, WAIT_BEFORE|WAIT_AFTER "will detect any I/O
+ // errors or ENOSPC conditions and will return those to the caller, after
+ // clearing the EIO and ENOSPC flags in the address_space." We don't do
+ // this.
+
+ if flags&linux.SYNC_FILE_RANGE_WAIT_BEFORE != 0 &&
+ flags&linux.SYNC_FILE_RANGE_WAIT_AFTER == 0 {
+ t.Kernel().EmitUnimplementedEvent(t)
+ return 0, nil, syserror.ENOSYS
+ }
+
+ if flags&linux.SYNC_FILE_RANGE_WAIT_AFTER != 0 {
+ if err := file.Sync(t); err != nil {
+ return 0, nil, syserror.ConvertIntr(err, kernel.ERESTARTSYS)
+ }
+ }
+ return 0, nil, nil
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 9e60c4a1c..8f497ecc7 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -138,7 +138,7 @@ func Override() {
s.Table[282] = syscalls.Supported("signalfd", Signalfd)
s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate)
s.Table[284] = syscalls.Supported("eventfd", Eventfd)
- delete(s.Table, 285) // fallocate
+ s.Table[285] = syscalls.PartiallySupported("fallocate", Fallocate, "Not all options are supported.", nil)
s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime)
s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
s.Table[288] = syscalls.Supported("accept4", Accept4)
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index cd1db14ac..0c42574db 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -91,8 +91,7 @@ type FileDescription struct {
// FileDescriptionOptions contains options to FileDescription.Init().
type FileDescriptionOptions struct {
- // If AllowDirectIO is true, allow O_DIRECT to be set on the file. This is
- // usually only the case if O_DIRECT would actually have an effect.
+ // If AllowDirectIO is true, allow O_DIRECT to be set on the file.
AllowDirectIO bool
// If DenyPRead is true, calls to FileDescription.PRead() return ESPIPE.
@@ -355,6 +354,10 @@ type FileDescriptionImpl interface {
// represented by the FileDescription.
StatFS(ctx context.Context) (linux.Statfs, error)
+ // Allocate grows file represented by FileDescription to offset + length bytes.
+ // Only mode == 0 is supported currently.
+ Allocate(ctx context.Context, mode, offset, length uint64) error
+
// waiter.Waitable methods may be used to poll for I/O events.
waiter.Waitable
diff --git a/pkg/sentry/vfs/file_description_impl_util.go b/pkg/sentry/vfs/file_description_impl_util.go
index 3fec0d6d6..6b8b4ad49 100644
--- a/pkg/sentry/vfs/file_description_impl_util.go
+++ b/pkg/sentry/vfs/file_description_impl_util.go
@@ -56,6 +56,12 @@ func (FileDescriptionDefaultImpl) StatFS(ctx context.Context) (linux.Statfs, err
return linux.Statfs{}, syserror.ENOSYS
}
+// Allocate implements FileDescriptionImpl.Allocate analogously to
+// fallocate called on regular file, directory or FIFO in Linux.
+func (FileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.ENODEV
+}
+
// Readiness implements waiter.Waitable.Readiness analogously to
// file_operations::poll == NULL in Linux.
func (FileDescriptionDefaultImpl) Readiness(mask waiter.EventMask) waiter.EventMask {
@@ -158,6 +164,11 @@ func (FileDescriptionDefaultImpl) Removexattr(ctx context.Context, name string)
// implementations of non-directory I/O methods that return EISDIR.
type DirectoryFileDescriptionDefaultImpl struct{}
+// Allocate implements DirectoryFileDescriptionDefaultImpl.Allocate.
+func (DirectoryFileDescriptionDefaultImpl) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ return syserror.EISDIR
+}
+
// PRead implements FileDescriptionImpl.PRead.
func (DirectoryFileDescriptionDefaultImpl) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts ReadOptions) (int64, error) {
return 0, syserror.EISDIR
diff --git a/pkg/sentry/vfs/inotify.go b/pkg/sentry/vfs/inotify.go
index 509034531..c2e21ac5f 100644
--- a/pkg/sentry/vfs/inotify.go
+++ b/pkg/sentry/vfs/inotify.go
@@ -148,6 +148,11 @@ func (i *Inotify) Release() {
}
}
+// Allocate implements FileDescription.Allocate.
+func (i *Inotify) Allocate(ctx context.Context, mode, offset, length uint64) error {
+ panic("Allocate should not be called on read-only inotify fds")
+}
+
// EventRegister implements waiter.Waitable.
func (i *Inotify) EventRegister(e *waiter.Entry, mask waiter.EventMask) {
i.queue.EventRegister(e, mask)
diff --git a/pkg/tcpip/transport/tcp/connect.go b/pkg/tcpip/transport/tcp/connect.go
index 9d4dce826..81b740115 100644
--- a/pkg/tcpip/transport/tcp/connect.go
+++ b/pkg/tcpip/transport/tcp/connect.go
@@ -509,9 +509,7 @@ func (h *handshake) execute() *tcpip.Error {
// Initialize the resend timer.
resendWaker := sleep.Waker{}
timeOut := time.Duration(time.Second)
- rt := time.AfterFunc(timeOut, func() {
- resendWaker.Assert()
- })
+ rt := time.AfterFunc(timeOut, resendWaker.Assert)
defer rt.Stop()
// Set up the wakers.
@@ -1050,8 +1048,8 @@ func (e *endpoint) tryDeliverSegmentFromClosedEndpoint(s *segment) {
panic("current endpoint not removed from demuxer, enqueing segments to itself")
}
- if ep.(*endpoint).enqueueSegment(s) {
- ep.(*endpoint).newSegmentWaker.Assert()
+ if ep := ep.(*endpoint); ep.enqueueSegment(s) {
+ ep.newSegmentWaker.Assert()
}
}
@@ -1120,7 +1118,7 @@ func (e *endpoint) handleReset(s *segment) (ok bool, err *tcpip.Error) {
func (e *endpoint) handleSegments(fastPath bool) *tcpip.Error {
checkRequeue := true
for i := 0; i < maxSegmentsPerWake; i++ {
- if e.EndpointState() == StateClose || e.EndpointState() == StateError {
+ if e.EndpointState().closed() {
return nil
}
s := e.segmentQueue.dequeue()
@@ -1440,9 +1438,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
if e.EndpointState() == StateFinWait2 && e.closed {
// The socket has been closed and we are in FIN_WAIT2
// so start the FIN_WAIT2 timer.
- closeTimer = time.AfterFunc(e.tcpLingerTimeout, func() {
- closeWaker.Assert()
- })
+ closeTimer = time.AfterFunc(e.tcpLingerTimeout, closeWaker.Assert)
e.waiterQueue.Notify(waiter.EventHUp | waiter.EventErr | waiter.EventIn | waiter.EventOut)
}
}
@@ -1460,7 +1456,7 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
return err
}
}
- if e.EndpointState() != StateClose && e.EndpointState() != StateError {
+ if !e.EndpointState().closed() {
// Only block the worker if the endpoint
// is not in closed state or error state.
close(e.drainDone)
@@ -1526,7 +1522,12 @@ func (e *endpoint) protocolMainLoop(handshake bool, wakerInitDone chan<- struct{
}
loop:
- for e.EndpointState() != StateTimeWait && e.EndpointState() != StateClose && e.EndpointState() != StateError {
+ for {
+ switch e.EndpointState() {
+ case StateTimeWait, StateClose, StateError:
+ break loop
+ }
+
e.mu.Unlock()
v, _ := s.Fetch(true)
e.mu.Lock()
diff --git a/pkg/tcpip/transport/tcp/dispatcher.go b/pkg/tcpip/transport/tcp/dispatcher.go
index 047704c80..98aecab9e 100644
--- a/pkg/tcpip/transport/tcp/dispatcher.go
+++ b/pkg/tcpip/transport/tcp/dispatcher.go
@@ -15,6 +15,8 @@
package tcp
import (
+ "encoding/binary"
+
"gvisor.dev/gvisor/pkg/rand"
"gvisor.dev/gvisor/pkg/sleep"
"gvisor.dev/gvisor/pkg/sync"
@@ -66,89 +68,68 @@ func (q *epQueue) empty() bool {
// processor is responsible for processing packets queued to a tcp endpoint.
type processor struct {
epQ epQueue
+ sleeper sleep.Sleeper
newEndpointWaker sleep.Waker
closeWaker sleep.Waker
- id int
- wg sync.WaitGroup
-}
-
-func newProcessor(id int) *processor {
- p := &processor{
- id: id,
- }
- p.wg.Add(1)
- go p.handleSegments()
- return p
}
func (p *processor) close() {
p.closeWaker.Assert()
}
-func (p *processor) wait() {
- p.wg.Wait()
-}
-
func (p *processor) queueEndpoint(ep *endpoint) {
// Queue an endpoint for processing by the processor goroutine.
p.epQ.enqueue(ep)
p.newEndpointWaker.Assert()
}
-func (p *processor) handleSegments() {
- const newEndpointWaker = 1
- const closeWaker = 2
- s := sleep.Sleeper{}
- s.AddWaker(&p.newEndpointWaker, newEndpointWaker)
- s.AddWaker(&p.closeWaker, closeWaker)
- defer s.Done()
+const (
+ newEndpointWaker = 1
+ closeWaker = 2
+)
+
+func (p *processor) start(wg *sync.WaitGroup) {
+ defer wg.Done()
+ defer p.sleeper.Done()
+
for {
- id, ok := s.Fetch(true)
- if ok && id == closeWaker {
- p.wg.Done()
- return
+ if id, _ := p.sleeper.Fetch(true); id == closeWaker {
+ break
}
- for ep := p.epQ.dequeue(); ep != nil; ep = p.epQ.dequeue() {
+ for {
+ ep := p.epQ.dequeue()
+ if ep == nil {
+ break
+ }
if ep.segmentQueue.empty() {
continue
}
- // If socket has transitioned out of connected state
- // then just let the worker handle the packet.
+ // If socket has transitioned out of connected state then just let the
+ // worker handle the packet.
//
- // NOTE: We read this outside of e.mu lock which means
- // that by the time we get to handleSegments the
- // endpoint may not be in ESTABLISHED. But this should
- // be fine as all normal shutdown states are handled by
- // handleSegments and if the endpoint moves to a
- // CLOSED/ERROR state then handleSegments is a noop.
- if ep.EndpointState() != StateEstablished {
- ep.newSegmentWaker.Assert()
- continue
- }
-
- if !ep.mu.TryLock() {
- ep.newSegmentWaker.Assert()
- continue
- }
- // If the endpoint is in a connected state then we do
- // direct delivery to ensure low latency and avoid
- // scheduler interactions.
- if err := ep.handleSegments(true /* fastPath */); err != nil || ep.EndpointState() == StateClose {
- // Send any active resets if required.
- if err != nil {
+ // NOTE: We read this outside of e.mu lock which means that by the time
+ // we get to handleSegments the endpoint may not be in ESTABLISHED. But
+ // this should be fine as all normal shutdown states are handled by
+ // handleSegments and if the endpoint moves to a CLOSED/ERROR state
+ // then handleSegments is a noop.
+ if ep.EndpointState() == StateEstablished && ep.mu.TryLock() {
+ // If the endpoint is in a connected state then we do direct delivery
+ // to ensure low latency and avoid scheduler interactions.
+ switch err := ep.handleSegments(true /* fastPath */); {
+ case err != nil:
+ // Send any active resets if required.
ep.resetConnectionLocked(err)
+ fallthrough
+ case ep.EndpointState() == StateClose:
+ ep.notifyProtocolGoroutine(notifyTickleWorker)
+ case !ep.segmentQueue.empty():
+ p.epQ.enqueue(ep)
}
- ep.notifyProtocolGoroutine(notifyTickleWorker)
ep.mu.Unlock()
- continue
- }
-
- if !ep.segmentQueue.empty() {
- p.epQ.enqueue(ep)
+ } else {
+ ep.newSegmentWaker.Assert()
}
-
- ep.mu.Unlock()
}
}
}
@@ -159,31 +140,36 @@ func (p *processor) handleSegments() {
// hash of the endpoint id to ensure that delivery for the same endpoint happens
// in-order.
type dispatcher struct {
- processors []*processor
+ processors []processor
seed uint32
-}
-
-func newDispatcher(nProcessors int) *dispatcher {
- processors := []*processor{}
- for i := 0; i < nProcessors; i++ {
- processors = append(processors, newProcessor(i))
- }
- return &dispatcher{
- processors: processors,
- seed: generateRandUint32(),
+ wg sync.WaitGroup
+}
+
+func (d *dispatcher) init(nProcessors int) {
+ d.close()
+ d.wait()
+ d.processors = make([]processor, nProcessors)
+ d.seed = generateRandUint32()
+ for i := range d.processors {
+ p := &d.processors[i]
+ p.sleeper.AddWaker(&p.newEndpointWaker, newEndpointWaker)
+ p.sleeper.AddWaker(&p.closeWaker, closeWaker)
+ d.wg.Add(1)
+ // NB: sleeper-waker registration must happen synchronously to avoid races
+ // with `close`. It's possible to pull all this logic into `start`, but
+ // that results in a heap-allocated function literal.
+ go p.start(&d.wg)
}
}
func (d *dispatcher) close() {
- for _, p := range d.processors {
- p.close()
+ for i := range d.processors {
+ d.processors[i].close()
}
}
func (d *dispatcher) wait() {
- for _, p := range d.processors {
- p.wait()
- }
+ d.wg.Wait()
}
func (d *dispatcher) queuePacket(r *stack.Route, stackEP stack.TransportEndpoint, id stack.TransportEndpointID, pkt *stack.PacketBuffer) {
@@ -231,20 +217,18 @@ func generateRandUint32() uint32 {
if _, err := rand.Read(b); err != nil {
panic(err)
}
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+ return binary.LittleEndian.Uint32(b)
}
func (d *dispatcher) selectProcessor(id stack.TransportEndpointID) *processor {
- payload := []byte{
- byte(id.LocalPort),
- byte(id.LocalPort >> 8),
- byte(id.RemotePort),
- byte(id.RemotePort >> 8)}
+ var payload [4]byte
+ binary.LittleEndian.PutUint16(payload[0:], id.LocalPort)
+ binary.LittleEndian.PutUint16(payload[2:], id.RemotePort)
h := jenkins.Sum32(d.seed)
- h.Write(payload)
+ h.Write(payload[:])
h.Write([]byte(id.LocalAddress))
h.Write([]byte(id.RemoteAddress))
- return d.processors[h.Sum32()%uint32(len(d.processors))]
+ return &d.processors[h.Sum32()%uint32(len(d.processors))]
}
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 99a691815..bd3ec5a8d 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -1212,6 +1212,16 @@ func (e *endpoint) SetOwner(owner tcpip.PacketOwner) {
// Read reads data from the endpoint.
func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages, *tcpip.Error) {
e.LockUser()
+ defer e.UnlockUser()
+
+ // When in SYN-SENT state, let the caller block on the receive.
+ // An application can initiate a non-blocking connect and then block
+ // on a receive. It can expect to read any data after the handshake
+ // is complete. RFC793, section 3.9, p58.
+ if e.EndpointState() == StateSynSent {
+ return buffer.View{}, tcpip.ControlMessages{}, tcpip.ErrWouldBlock
+ }
+
// The endpoint can be read if it's connected, or if it's already closed
// but has some pending unread data. Also note that a RST being received
// would cause the state to become StateError so we should allow the
@@ -1221,7 +1231,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages,
if s := e.EndpointState(); !s.connected() && s != StateClose && bufUsed == 0 {
e.rcvListMu.Unlock()
he := e.HardError
- e.UnlockUser()
if s == StateError {
return buffer.View{}, tcpip.ControlMessages{}, he
}
@@ -1231,7 +1240,6 @@ func (e *endpoint) Read(*tcpip.FullAddress) (buffer.View, tcpip.ControlMessages,
v, err := e.readLocked()
e.rcvListMu.Unlock()
- e.UnlockUser()
if err == tcpip.ErrClosedForReceive {
e.stats.ReadErrors.ReadClosed.Increment()
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index f2ae6ce50..b34e47bbd 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -174,7 +174,7 @@ type protocol struct {
maxRetries uint32
synRcvdCount synRcvdCounter
synRetries uint8
- dispatcher *dispatcher
+ dispatcher dispatcher
}
// Number returns the tcp protocol number.
@@ -515,7 +515,7 @@ func (*protocol) Parse(pkt *stack.PacketBuffer) bool {
// NewProtocol returns a TCP transport protocol.
func NewProtocol() stack.TransportProtocol {
- return &protocol{
+ p := protocol{
sendBufferSize: SendBufferSizeOption{
Min: MinBufferSize,
Default: DefaultSendBufferSize,
@@ -531,10 +531,11 @@ func NewProtocol() stack.TransportProtocol {
tcpLingerTimeout: DefaultTCPLingerTimeout,
tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout,
synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold},
- dispatcher: newDispatcher(runtime.GOMAXPROCS(0)),
synRetries: DefaultSynRetries,
minRTO: MinRTO,
maxRTO: MaxRTO,
maxRetries: MaxRetries,
}
+ p.dispatcher.init(runtime.GOMAXPROCS(0))
+ return &p
}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index acacb42e4..5862c32f2 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -833,25 +833,6 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
panic("Netstack queues FIN segments without data.")
}
- segEnd = seg.sequenceNumber.Add(seqnum.Size(seg.data.Size()))
- // If the entire segment cannot be accomodated in the receiver
- // advertized window, skip splitting and sending of the segment.
- // ref: net/ipv4/tcp_output.c::tcp_snd_wnd_test()
- //
- // Linux checks this for all segment transmits not triggered
- // by a probe timer. On this condition, it defers the segment
- // split and transmit to a short probe timer.
- // ref: include/net/tcp.h::tcp_check_probe_timer()
- // ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
- //
- // Instead of defining a new transmit timer, we attempt to split the
- // segment right here if there are no pending segments.
- // If there are pending segments, segment transmits are deferred
- // to the retransmit timer handler.
- if s.sndUna != s.sndNxt && !segEnd.LessThan(end) {
- return false
- }
-
if !seg.sequenceNumber.LessThan(end) {
return false
}
@@ -861,14 +842,48 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
return false
}
- // The segment size limit is computed as a function of sender congestion
- // window and MSS. When sender congestion window is > 1, this limit can
- // be larger than MSS. Ensure that the currently available send space
- // is not greater than minimum of this limit and MSS.
+ // If the whole segment or at least 1MSS sized segment cannot
+ // be accomodated in the receiver advertized window, skip
+ // splitting and sending of the segment. ref:
+ // net/ipv4/tcp_output.c::tcp_snd_wnd_test()
+ //
+ // Linux checks this for all segment transmits not triggered by
+ // a probe timer. On this condition, it defers the segment split
+ // and transmit to a short probe timer.
+ //
+ // ref: include/net/tcp.h::tcp_check_probe_timer()
+ // ref: net/ipv4/tcp_output.c::tcp_write_wakeup()
+ //
+ // Instead of defining a new transmit timer, we attempt to split
+ // the segment right here if there are no pending segments. If
+ // there are pending segments, segment transmits are deferred to
+ // the retransmit timer handler.
+ if s.sndUna != s.sndNxt {
+ switch {
+ case available >= seg.data.Size():
+ // OK to send, the whole segments fits in the
+ // receiver's advertised window.
+ case available >= s.maxPayloadSize:
+ // OK to send, at least 1 MSS sized segment fits
+ // in the receiver's advertised window.
+ default:
+ return false
+ }
+ }
+
+ // The segment size limit is computed as a function of sender
+ // congestion window and MSS. When sender congestion window is >
+ // 1, this limit can be larger than MSS. Ensure that the
+ // currently available send space is not greater than minimum of
+ // this limit and MSS.
if available > limit {
available = limit
}
- if available > s.maxPayloadSize {
+
+ // If GSO is not in use then cap available to
+ // maxPayloadSize. When GSO is in use the gVisor GSO logic or
+ // the host GSO logic will cap the segment to the correct size.
+ if s.ep.gso == nil && available > s.maxPayloadSize {
available = s.maxPayloadSize
}
diff --git a/test/packetimpact/runner/defs.bzl b/test/packetimpact/runner/defs.bzl
index ea66b9756..77cdfea12 100644
--- a/test/packetimpact/runner/defs.bzl
+++ b/test/packetimpact/runner/defs.bzl
@@ -20,12 +20,12 @@ def _packetimpact_test_impl(ctx):
])
ctx.actions.write(bench, bench_content, is_executable = True)
- transitive_files = depset()
+ transitive_files = []
if hasattr(ctx.attr._test_runner, "data_runfiles"):
- transitive_files = depset(ctx.attr._test_runner.data_runfiles.files)
+ transitive_files.append(ctx.attr._test_runner.data_runfiles.files)
runfiles = ctx.runfiles(
files = [test_runner] + ctx.files.testbench_binary + ctx.files._posix_server_binary,
- transitive_files = transitive_files,
+ transitive_files = depset(transitive = transitive_files),
collect_default = True,
collect_data = True,
)
diff --git a/test/packetimpact/runner/packetimpact_test.go b/test/packetimpact/runner/packetimpact_test.go
index 3cac5915f..c0a2620de 100644
--- a/test/packetimpact/runner/packetimpact_test.go
+++ b/test/packetimpact/runner/packetimpact_test.go
@@ -242,7 +242,13 @@ func TestOne(t *testing.T) {
}
// Kill so that it will flush output.
- defer testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+ defer func() {
+ // Wait 1 second before killing tcpdump to give it time to flush
+ // any packets. On linux tests killing it immediately can
+ // sometimes result in partial pcaps.
+ time.Sleep(1 * time.Second)
+ testbench.Exec(dockerutil.RunOpts{}, "killall", snifferArgs[0])
+ }()
if _, err := testbench.WaitForOutput(snifferRegex, 60*time.Second); err != nil {
t.Fatalf("sniffer on %s never listened: %s", dut.Name, err)
diff --git a/test/packetimpact/tests/BUILD b/test/packetimpact/tests/BUILD
index 1c9f59df5..85749c559 100644
--- a/test/packetimpact/tests/BUILD
+++ b/test/packetimpact/tests/BUILD
@@ -183,8 +183,6 @@ packetimpact_go_test(
packetimpact_go_test(
name = "tcp_queue_receive_in_syn_sent",
srcs = ["tcp_queue_receive_in_syn_sent_test.go"],
- # TODO(b/157658105): Fix netstack then remove the line below.
- expect_netstack_failure = True,
deps = [
"//pkg/tcpip/header",
"//test/packetimpact/testbench",
@@ -213,16 +211,6 @@ packetimpact_go_test(
)
packetimpact_go_test(
- name = "tcp_splitseg_mss",
- srcs = ["tcp_splitseg_mss_test.go"],
- deps = [
- "//pkg/tcpip/header",
- "//test/packetimpact/testbench",
- "@org_golang_x_sys//unix:go_default_library",
- ],
-)
-
-packetimpact_go_test(
name = "tcp_cork_mss",
srcs = ["tcp_cork_mss_test.go"],
deps = [
diff --git a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
index b640d8673..8fbec893b 100644
--- a/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
+++ b/test/packetimpact/tests/tcp_queue_receive_in_syn_sent_test.go
@@ -35,53 +35,98 @@ func init() {
testbench.RegisterFlags(flag.CommandLine)
}
+// TestQueueReceiveInSynSent tests receive behavior when the TCP state
+// is SYN-SENT.
+// It tests for 2 variants where the receive is blocked and:
+// (1) we complete handshake and send sample data.
+// (2) we send a TCP RST.
func TestQueueReceiveInSynSent(t *testing.T) {
- dut := testbench.NewDUT(t)
- defer dut.TearDown()
+ for _, tt := range []struct {
+ description string
+ reset bool
+ }{
+ {description: "Send DATA", reset: false},
+ {description: "Send RST", reset: true},
+ } {
+ t.Run(tt.description, func(t *testing.T) {
+ dut := testbench.NewDUT(t)
+ defer dut.TearDown()
- socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
- conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
- defer conn.Close()
+ socket, remotePort := dut.CreateBoundSocket(unix.SOCK_STREAM, unix.IPPROTO_TCP, net.ParseIP(testbench.RemoteIPv4))
+ conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
+ defer conn.Close()
- sampleData := []byte("Sample Data")
+ sampleData := []byte("Sample Data")
- dut.SetNonBlocking(socket, true)
- if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
- t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
- }
- if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
- t.Fatalf("expected a SYN from DUT, but got none: %s", err)
- }
+ dut.SetNonBlocking(socket, true)
+ if _, err := dut.ConnectWithErrno(context.Background(), socket, conn.LocalAddr()); !errors.Is(err, syscall.EINPROGRESS) {
+ t.Fatalf("failed to bring DUT to SYN-SENT, got: %s, want EINPROGRESS", err)
+ }
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn)}, time.Second); err != nil {
+ t.Fatalf("expected a SYN from DUT, but got none: %s", err)
+ }
- // Issue RECEIVE call in SYN-SENT, this should be queued for process until the connection
- // is established.
- dut.SetNonBlocking(socket, false)
- var wg sync.WaitGroup
- defer wg.Wait()
- wg.Add(1)
- go func() {
- defer wg.Done()
- ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
- defer cancel()
- n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
- if n == -1 {
- t.Fatalf("failed to recv on DUT: %s", err)
- }
- if got := buff[:n]; !bytes.Equal(got, sampleData) {
- t.Fatalf("received data don't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData))
- }
- }()
-
- // The following sleep is used to prevent the connection from being established while the
- // RPC is in flight.
- time.Sleep(time.Second)
-
- // Bring the connection to Established.
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
- if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
- t.Fatalf("expected an ACK from DUT, but got none: %s", err)
- }
+ if _, _, err := dut.RecvWithErrno(context.Background(), socket, int32(len(sampleData)), 0); err != syscall.Errno(unix.EWOULDBLOCK) {
+ t.Fatalf("expected error %s, got %s", syscall.Errno(unix.EWOULDBLOCK), err)
+ }
+
+ // Test blocking read.
+ dut.SetNonBlocking(socket, false)
+
+ var wg sync.WaitGroup
+ defer wg.Wait()
+ wg.Add(1)
+ var block sync.WaitGroup
+ block.Add(1)
+ go func() {
+ defer wg.Done()
+ ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
+ defer cancel()
- // Send sample data to DUT.
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+ block.Done()
+ // Issue RECEIVE call in SYN-SENT, this should be queued for
+ // process until the connection is established.
+ n, buff, err := dut.RecvWithErrno(ctx, socket, int32(len(sampleData)), 0)
+ if tt.reset {
+ if err != syscall.Errno(unix.ECONNREFUSED) {
+ t.Errorf("expected error %s, got %s", syscall.Errno(unix.ECONNREFUSED), err)
+ }
+ if n != -1 {
+ t.Errorf("expected return value %d, got %d", -1, n)
+ }
+ return
+ }
+ if n == -1 {
+ t.Errorf("failed to recv on DUT: %s", err)
+ }
+ if got := buff[:n]; !bytes.Equal(got, sampleData) {
+ t.Errorf("received data doesn't match, got:\n%s, want:\n%s", hex.Dump(got), hex.Dump(sampleData))
+ }
+ }()
+
+ // Wait for the goroutine to be scheduled and before it
+ // blocks on endpoint receive.
+ block.Wait()
+ // The following sleep is used to prevent the connection
+ // from being established before we are blocked on Recv.
+ time.Sleep(100 * time.Millisecond)
+
+ if tt.reset {
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagRst | header.TCPFlagAck)})
+ return
+ }
+
+ // Bring the connection to Established.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagSyn | header.TCPFlagAck)})
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+ t.Fatalf("expected an ACK from DUT, but got none: %s", err)
+ }
+
+ // Send sample payload and expect an ACK.
+ conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: sampleData})
+ if _, err := conn.Expect(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, time.Second); err != nil {
+ t.Fatalf("expected an ACK from DUT, but got none: %s", err)
+ }
+ })
+ }
}
diff --git a/test/packetimpact/tests/tcp_splitseg_mss_test.go b/test/packetimpact/tests/tcp_splitseg_mss_test.go
deleted file mode 100644
index 9350d0988..000000000
--- a/test/packetimpact/tests/tcp_splitseg_mss_test.go
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package tcp_splitseg_mss_test
-
-import (
- "flag"
- "testing"
- "time"
-
- "golang.org/x/sys/unix"
- "gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/test/packetimpact/testbench"
-)
-
-func init() {
- testbench.RegisterFlags(flag.CommandLine)
-}
-
-// TestTCPSplitSegMSS lets the dut try to send segments larger than MSS.
-// It tests if the transmitted segments are capped at MSS and are split.
-func TestTCPSplitSegMSS(t *testing.T) {
- dut := testbench.NewDUT(t)
- defer dut.TearDown()
- listenFD, remotePort := dut.CreateListener(unix.SOCK_STREAM, unix.IPPROTO_TCP, 1)
- defer dut.Close(listenFD)
- conn := testbench.NewTCPIPv4(t, testbench.TCP{DstPort: &remotePort}, testbench.TCP{SrcPort: &remotePort})
- defer conn.Close()
-
- const mss = uint32(header.TCPDefaultMSS)
- options := make([]byte, header.TCPOptionMSSLength)
- header.EncodeMSSOption(mss, options)
- conn.ConnectWithOptions(options)
-
- acceptFD, _ := dut.Accept(listenFD)
- defer dut.Close(acceptFD)
-
- // Let the dut send a segment larger than MSS.
- largeData := make([]byte, mss+1)
- for i := 0; i < 2; i++ {
- dut.Send(acceptFD, largeData, 0)
- if i == 0 {
- // On Linux, the initial segment goes out beyond MSS and the segment
- // split occurs on retransmission. Call ExpectData to wait to
- // receive the split segment.
- if _, err := conn.ExpectData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- } else {
- if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)}, &testbench.Payload{Bytes: largeData[:mss]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- }
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
- if _, err := conn.ExpectNextData(&testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck | header.TCPFlagPsh)}, &testbench.Payload{Bytes: largeData[mss:]}, time.Second); err != nil {
- t.Fatalf("expected payload was not received: %s", err)
- }
- conn.Send(testbench.TCP{Flags: testbench.Uint8(header.TCPFlagAck)})
- }
-}
diff --git a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
index 5ab193181..8c89d57c9 100644
--- a/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
+++ b/test/packetimpact/tests/tcp_zero_window_probe_retransmit_test.go
@@ -88,8 +88,8 @@ func TestZeroWindowProbeRetransmit(t *testing.T) {
continue
}
// Check if the probes came at exponentially increasing intervals.
- if p := time.Since(start); p < current-startProbeDuration {
- t.Fatalf("zero probe came sooner interval %d probe %d\n", p, i)
+ if got, want := time.Since(start), current-startProbeDuration; got < want {
+ t.Errorf("got zero probe %d after %s, want >= %s", i, got, want)
}
// Acknowledge the zero-window probes from the dut.
conn.Send(testbench.TCP{AckNum: ackProbe, Flags: testbench.Uint8(header.TCPFlagAck), WindowSize: testbench.Uint16(0)})
diff --git a/test/syscalls/BUILD b/test/syscalls/BUILD
index 36c178e4a..7c4cd8192 100644
--- a/test/syscalls/BUILD
+++ b/test/syscalls/BUILD
@@ -199,6 +199,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:fallocate_test",
+ vfs2 = "True",
)
syscall_test(
@@ -288,6 +289,7 @@ syscall_test(
size = "medium",
add_overlay = True,
test = "//test/syscalls/linux:ioctl_test",
+ vfs2 = "True",
)
syscall_test(
@@ -355,6 +357,7 @@ syscall_test(
size = "medium",
shard_count = 5,
test = "//test/syscalls/linux:mmap_test",
+ vfs2 = "True",
)
syscall_test(
@@ -528,6 +531,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:pwritev2_test",
+ vfs2 = "True",
)
syscall_test(
@@ -960,6 +964,7 @@ syscall_test(
syscall_test(
add_overlay = True,
test = "//test/syscalls/linux:sync_file_range_test",
+ vfs2 = "True",
)
syscall_test(
diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD
index 8c7d54b21..9e097c888 100644
--- a/test/syscalls/linux/BUILD
+++ b/test/syscalls/linux/BUILD
@@ -748,9 +748,14 @@ cc_binary(
linkstatic = 1,
deps = [
":file_base",
+ ":socket_test_util",
"//test/util:cleanup",
+ "//test/util:eventfd_util",
"//test/util:file_descriptor",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
gtest,
+ "//test/util:posix_error",
"//test/util:temp_path",
"//test/util:test_main",
"//test/util:test_util",
diff --git a/test/syscalls/linux/fallocate.cc b/test/syscalls/linux/fallocate.cc
index 7819f4ac3..cabc2b751 100644
--- a/test/syscalls/linux/fallocate.cc
+++ b/test/syscalls/linux/fallocate.cc
@@ -15,16 +15,27 @@
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
+#include <sys/eventfd.h>
#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/timerfd.h>
#include <syscall.h>
#include <time.h>
#include <unistd.h>
+#include <ctime>
+
#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/time.h"
#include "test/syscalls/linux/file_base.h"
+#include "test/syscalls/linux/socket_test_util.h"
#include "test/util/cleanup.h"
+#include "test/util/eventfd_util.h"
#include "test/util/file_descriptor.h"
+#include "test/util/posix_error.h"
#include "test/util/temp_path.h"
#include "test/util/test_util.h"
@@ -70,6 +81,12 @@ TEST_F(AllocateTest, Fallocate) {
ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 39, 1), SyscallSucceeds());
ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
EXPECT_EQ(buf.st_size, 40);
+
+ // Given length 0 should fail with EINVAL.
+ ASSERT_THAT(fallocate(test_file_fd_.get(), 0, 50, 0),
+ SyscallFailsWithErrno(EINVAL));
+ ASSERT_THAT(fstat(test_file_fd_.get(), &buf), SyscallSucceeds());
+ EXPECT_EQ(buf.st_size, 40);
}
TEST_F(AllocateTest, FallocateInvalid) {
@@ -136,6 +153,34 @@ TEST_F(AllocateTest, FallocateRlimit) {
ASSERT_THAT(sigprocmask(SIG_UNBLOCK, &new_mask, nullptr), SyscallSucceeds());
}
+TEST_F(AllocateTest, FallocateOtherFDs) {
+ int fd;
+ ASSERT_THAT(fd = timerfd_create(CLOCK_MONOTONIC, 0), SyscallSucceeds());
+ auto timer_fd = FileDescriptor(fd);
+ EXPECT_THAT(fallocate(timer_fd.get(), 0, 0, 10),
+ SyscallFailsWithErrno(ENODEV));
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ ASSERT_THAT(fd = signalfd(-1, &mask, 0), SyscallSucceeds());
+ auto sfd = FileDescriptor(fd);
+ EXPECT_THAT(fallocate(sfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+ auto efd =
+ ASSERT_NO_ERRNO_AND_VALUE(NewEventFD(0, EFD_NONBLOCK | EFD_SEMAPHORE));
+ EXPECT_THAT(fallocate(efd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+ auto sockfd = ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
+ EXPECT_THAT(fallocate(sockfd.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+
+ int socks[2];
+ ASSERT_THAT(socketpair(AF_UNIX, SOCK_STREAM, PF_UNIX, socks),
+ SyscallSucceeds());
+ auto sock0 = FileDescriptor(socks[0]);
+ auto sock1 = FileDescriptor(socks[1]);
+ EXPECT_THAT(fallocate(sock0.get(), 0, 0, 10), SyscallFailsWithErrno(ENODEV));
+}
+
} // namespace
} // namespace testing
} // namespace gvisor
diff --git a/test/syscalls/linux/fcntl.cc b/test/syscalls/linux/fcntl.cc
index 9130618fa..5467fa2c8 100644
--- a/test/syscalls/linux/fcntl.cc
+++ b/test/syscalls/linux/fcntl.cc
@@ -18,7 +18,10 @@
#include <syscall.h>
#include <unistd.h>
+#include <iostream>
+#include <list>
#include <string>
+#include <vector>
#include "gtest/gtest.h"
#include "absl/base/macros.h"
@@ -1028,6 +1031,30 @@ TEST(FcntlTest, SetOwnPgrp) {
MaybeSave();
}
+TEST(FcntlTest, SetOwnUnset) {
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ // Set and unset pid.
+ pid_t pid;
+ EXPECT_THAT(pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, pid), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+ EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+ SyscallSucceedsWithValue(0));
+
+ // Set and unset pgid.
+ pid_t pgid;
+ EXPECT_THAT(pgid = getpgrp(), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, -pgid), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN, 0), SyscallSucceeds());
+
+ EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+ SyscallSucceedsWithValue(0));
+ MaybeSave();
+}
+
// F_SETOWN flips the sign of negative values, an operation that is guarded
// against overflow.
TEST(FcntlTest, SetOwnOverflow) {
@@ -1138,6 +1165,39 @@ TEST(FcntlTest, SetOwnExPgrp) {
MaybeSave();
}
+TEST(FcntlTest, SetOwnExUnset) {
+ SKIP_IF(IsRunningWithVFS1());
+
+ FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
+ Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
+
+ // Set and unset pid.
+ f_owner_ex owner = {};
+ owner.type = F_OWNER_PID;
+ EXPECT_THAT(owner.pid = getpid(), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+ SyscallSucceeds());
+ owner.pid = 0;
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+ SyscallSucceeds());
+
+ EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+ SyscallSucceedsWithValue(0));
+
+ // Set and unset pgid.
+ owner.type = F_OWNER_PGRP;
+ EXPECT_THAT(owner.pid = getpgrp(), SyscallSucceeds());
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+ SyscallSucceeds());
+ owner.pid = 0;
+ ASSERT_THAT(syscall(__NR_fcntl, s.get(), F_SETOWN_EX, &owner),
+ SyscallSucceeds());
+
+ EXPECT_THAT(syscall(__NR_fcntl, s.get(), F_GETOWN),
+ SyscallSucceedsWithValue(0));
+ MaybeSave();
+}
+
TEST(FcntlTest, GetOwnExTid) {
FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
Socket(AF_UNIX, SOCK_SEQPACKET | SOCK_NONBLOCK | SOCK_CLOEXEC, 0));
diff --git a/test/syscalls/linux/pwritev2.cc b/test/syscalls/linux/pwritev2.cc
index 3fe5a600f..63b686c62 100644
--- a/test/syscalls/linux/pwritev2.cc
+++ b/test/syscalls/linux/pwritev2.cc
@@ -69,7 +69,7 @@ ssize_t pwritev2(unsigned long fd, const struct iovec* iov,
}
// This test is the base case where we call pwritev (no offset, no flags).
-TEST(Writev2Test, TestBaseCall) {
+TEST(Writev2Test, BaseCall) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -97,7 +97,7 @@ TEST(Writev2Test, TestBaseCall) {
}
// This test is where we call pwritev2 with a positive offset and no flags.
-TEST(Pwritev2Test, TestValidPositiveOffset) {
+TEST(Pwritev2Test, ValidPositiveOffset) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
std::string prefix(kBufSize, '0');
@@ -129,7 +129,7 @@ TEST(Pwritev2Test, TestValidPositiveOffset) {
// This test is the base case where we call writev by using -1 as the offset.
// The write should use the file offset, so the test increments the file offset
// prior to call pwritev2.
-TEST(Pwritev2Test, TestNegativeOneOffset) {
+TEST(Pwritev2Test, NegativeOneOffset) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const std::string prefix = "00";
@@ -164,7 +164,7 @@ TEST(Pwritev2Test, TestNegativeOneOffset) {
// pwritev2 requires if the RWF_HIPRI flag is passed, the fd must be opened with
// O_DIRECT. This test implements a correct call with the RWF_HIPRI flag.
-TEST(Pwritev2Test, TestCallWithRWF_HIPRI) {
+TEST(Pwritev2Test, CallWithRWF_HIPRI) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -189,47 +189,8 @@ TEST(Pwritev2Test, TestCallWithRWF_HIPRI) {
EXPECT_EQ(buf, content);
}
-// This test checks that pwritev2 can be called with valid flags
-TEST(Pwritev2Test, TestCallWithValidFlags) {
- SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
-
- const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
- GetAbsoluteTestTmpdir(), "", TempPath::kDefaultFileMode));
- const FileDescriptor fd =
- ASSERT_NO_ERRNO_AND_VALUE(Open(file.path(), O_RDWR));
-
- std::vector<char> content(kBufSize, '0');
- struct iovec iov;
- iov.iov_base = content.data();
- iov.iov_len = content.size();
-
- EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
- /*offset=*/0, /*flags=*/RWF_DSYNC),
- SyscallSucceedsWithValue(kBufSize));
-
- std::vector<char> buf(content.size());
- EXPECT_THAT(read(fd.get(), buf.data(), buf.size()),
- SyscallSucceedsWithValue(buf.size()));
-
- EXPECT_EQ(buf, content);
-
- SetContent(content);
-
- EXPECT_THAT(pwritev2(fd.get(), &iov, /*iovcnt=*/1,
- /*offset=*/0, /*flags=*/0x4),
- SyscallSucceedsWithValue(kBufSize));
-
- ASSERT_THAT(lseek(fd.get(), 0, SEEK_CUR),
- SyscallSucceedsWithValue(content.size()));
-
- EXPECT_THAT(pread(fd.get(), buf.data(), buf.size(), /*offset=*/0),
- SyscallSucceedsWithValue(buf.size()));
-
- EXPECT_EQ(buf, content);
-}
-
// This test calls pwritev2 with a bad file descriptor.
-TEST(Writev2Test, TestBadFile) {
+TEST(Writev2Test, BadFile) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
ASSERT_THAT(pwritev2(/*fd=*/-1, /*iov=*/nullptr, /*iovcnt=*/0,
/*offset=*/0, /*flags=*/0),
@@ -237,7 +198,7 @@ TEST(Writev2Test, TestBadFile) {
}
// This test calls pwrite2 with an invalid offset.
-TEST(Pwritev2Test, TestInvalidOffset) {
+TEST(Pwritev2Test, InvalidOffset) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -255,7 +216,7 @@ TEST(Pwritev2Test, TestInvalidOffset) {
SyscallFailsWithErrno(EINVAL));
}
-TEST(Pwritev2Test, TestUnseekableFileValid) {
+TEST(Pwritev2Test, UnseekableFileValid) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
int pipe_fds[2];
@@ -285,7 +246,7 @@ TEST(Pwritev2Test, TestUnseekableFileValid) {
// Calling pwritev2 with a non-negative offset calls pwritev. Calling pwritev
// with an unseekable file is not allowed. A pipe is used for an unseekable
// file.
-TEST(Pwritev2Test, TestUnseekableFileInValid) {
+TEST(Pwritev2Test, UnseekableFileInvalid) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
int pipe_fds[2];
@@ -304,7 +265,7 @@ TEST(Pwritev2Test, TestUnseekableFileInValid) {
EXPECT_THAT(close(pipe_fds[1]), SyscallSucceeds());
}
-TEST(Pwritev2Test, TestReadOnlyFile) {
+TEST(Pwritev2Test, ReadOnlyFile) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
@@ -323,7 +284,7 @@ TEST(Pwritev2Test, TestReadOnlyFile) {
}
// This test calls pwritev2 with an invalid flag.
-TEST(Pwritev2Test, TestInvalidFlag) {
+TEST(Pwritev2Test, InvalidFlag) {
SKIP_IF(pwritev2(-1, nullptr, 0, 0, 0) < 0 && errno == ENOSYS);
const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateFileWith(
diff --git a/test/syscalls/linux/socket_netlink_route.cc b/test/syscalls/linux/socket_netlink_route.cc
index fbe61c5a0..e6647a1c3 100644
--- a/test/syscalls/linux/socket_netlink_route.cc
+++ b/test/syscalls/linux/socket_netlink_route.cc
@@ -595,7 +595,7 @@ TEST(NetlinkRouteTest, GetRouteRequest) {
ASSERT_NO_ERRNO_AND_VALUE(NetlinkBoundSocket(NETLINK_ROUTE));
uint32_t port = ASSERT_NO_ERRNO_AND_VALUE(NetlinkPortID(fd.get()));
- struct __attribute__((__packed__)) request {
+ struct request {
struct nlmsghdr hdr;
struct rtmsg rtm;
struct nlattr nla;
diff --git a/test/syscalls/linux/sticky.cc b/test/syscalls/linux/sticky.cc
index 39f4fb801..4afed6d08 100644
--- a/test/syscalls/linux/sticky.cc
+++ b/test/syscalls/linux/sticky.cc
@@ -42,12 +42,15 @@ TEST(StickyTest, StickyBitPermDenied) {
const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateFileWith(parent.path(), "some content", 0755));
- const TempPath dir =
- ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
- const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+ // After changing credentials below, we need to use an open fd to make
+ // modifications in the parent dir, because there is no guarantee that we will
+ // still have the ability to open it.
+ const FileDescriptor parent_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+ ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+ ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+ ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -65,12 +68,14 @@ TEST(StickyTest, StickyBitPermDenied) {
syscall(SYS_setresuid, -1, absl::GetFlag(FLAGS_scratch_uid), -1),
SyscallSucceeds());
- std::string new_path = NewTempAbsPath();
- EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+ EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+ SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlinkat(parent_fd.get(), "file", 0),
+ SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
+ SyscallFailsWithErrno(EPERM));
+ EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0),
SyscallFailsWithErrno(EPERM));
- EXPECT_THAT(unlink(file.path().c_str()), SyscallFailsWithErrno(EPERM));
- EXPECT_THAT(rmdir(dir.path().c_str()), SyscallFailsWithErrno(EPERM));
- EXPECT_THAT(unlink(link.path().c_str()), SyscallFailsWithErrno(EPERM));
});
}
@@ -79,12 +84,15 @@ TEST(StickyTest, StickyBitSameUID) {
const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateFileWith(parent.path(), "some content", 0755));
- const TempPath dir =
- ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
- const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+ // After changing credentials below, we need to use an open fd to make
+ // modifications in the parent dir, because there is no guarantee that we will
+ // still have the ability to open it.
+ const FileDescriptor parent_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+ ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+ ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+ ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -100,12 +108,12 @@ TEST(StickyTest, StickyBitSameUID) {
SyscallSucceeds());
// We still have the same EUID.
- std::string new_path = NewTempAbsPath();
- EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+ EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+ SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
SyscallSucceeds());
- EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
- EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
- EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
});
}
@@ -114,12 +122,15 @@ TEST(StickyTest, StickyBitCapFOWNER) {
const TempPath parent = ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDir());
EXPECT_THAT(chmod(parent.path().c_str(), 0777 | S_ISVTX), SyscallSucceeds());
- const TempPath file = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateFileWith(parent.path(), "some content", 0755));
- const TempPath dir =
- ASSERT_NO_ERRNO_AND_VALUE(TempPath::CreateDirWith(parent.path(), 0755));
- const TempPath link = ASSERT_NO_ERRNO_AND_VALUE(
- TempPath::CreateSymlinkTo(parent.path(), file.path()));
+
+ // After changing credentials below, we need to use an open fd to make
+ // modifications in the parent dir, because there is no guarantee that we will
+ // still have the ability to open it.
+ const FileDescriptor parent_fd =
+ ASSERT_NO_ERRNO_AND_VALUE(Open(parent.path(), O_DIRECTORY));
+ ASSERT_THAT(openat(parent_fd.get(), "file", O_CREAT), SyscallSucceeds());
+ ASSERT_THAT(mkdirat(parent_fd.get(), "dir", 0777), SyscallSucceeds());
+ ASSERT_THAT(symlinkat("xyz", parent_fd.get(), "link"), SyscallSucceeds());
// Drop privileges and change IDs only in child thread, or else this parent
// thread won't be able to open some log files after the test ends.
@@ -136,12 +147,12 @@ TEST(StickyTest, StickyBitCapFOWNER) {
SyscallSucceeds());
EXPECT_NO_ERRNO(SetCapability(CAP_FOWNER, true));
- std::string new_path = NewTempAbsPath();
- EXPECT_THAT(rename(file.path().c_str(), new_path.c_str()),
+ EXPECT_THAT(renameat(parent_fd.get(), "file", parent_fd.get(), "file2"),
+ SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "file2", 0), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "dir", AT_REMOVEDIR),
SyscallSucceeds());
- EXPECT_THAT(unlink(new_path.c_str()), SyscallSucceeds());
- EXPECT_THAT(rmdir(dir.path().c_str()), SyscallSucceeds());
- EXPECT_THAT(unlink(link.path().c_str()), SyscallSucceeds());
+ EXPECT_THAT(unlinkat(parent_fd.get(), "link", 0), SyscallSucceeds());
});
}
} // namespace
diff --git a/test/util/test_util.cc b/test/util/test_util.cc
index b20758626..8a037f45f 100644
--- a/test/util/test_util.cc
+++ b/test/util/test_util.cc
@@ -40,15 +40,14 @@
namespace gvisor {
namespace testing {
-#define TEST_ON_GVISOR "TEST_ON_GVISOR"
-#define GVISOR_NETWORK "GVISOR_NETWORK"
-#define GVISOR_VFS "GVISOR_VFS"
+constexpr char kGvisorNetwork[] = "GVISOR_NETWORK";
+constexpr char kGvisorVfs[] = "GVISOR_VFS";
bool IsRunningOnGvisor() { return GvisorPlatform() != Platform::kNative; }
const std::string GvisorPlatform() {
// Set by runner.go.
- const char* env = getenv(TEST_ON_GVISOR);
+ const char* env = getenv(kTestOnGvisor);
if (!env) {
return Platform::kNative;
}
@@ -56,12 +55,12 @@ const std::string GvisorPlatform() {
}
bool IsRunningWithHostinet() {
- const char* env = getenv(GVISOR_NETWORK);
+ const char* env = getenv(kGvisorNetwork);
return env && strcmp(env, "host") == 0;
}
bool IsRunningWithVFS1() {
- const char* env = getenv(GVISOR_VFS);
+ const char* env = getenv(kGvisorVfs);
if (env == nullptr) {
// If not set, it's running on Linux.
return false;
diff --git a/test/util/test_util.h b/test/util/test_util.h
index e635827e6..109078fc7 100644
--- a/test/util/test_util.h
+++ b/test/util/test_util.h
@@ -195,6 +195,8 @@
namespace gvisor {
namespace testing {
+constexpr char kTestOnGvisor[] = "TEST_ON_GVISOR";
+
// TestInit must be called prior to RUN_ALL_TESTS.
//
// This parses all arguments and adjusts argc and argv appropriately.
@@ -215,6 +217,7 @@ namespace Platform {
constexpr char kNative[] = "native";
constexpr char kPtrace[] = "ptrace";
constexpr char kKVM[] = "kvm";
+constexpr char kFuchsia[] = "fuchsia";
} // namespace Platform
bool IsRunningOnGvisor();
diff --git a/website/BUILD b/website/BUILD
index c97b2560b..4488cb543 100644
--- a/website/BUILD
+++ b/website/BUILD
@@ -138,6 +138,7 @@ docs(
"//g3doc:community",
"//g3doc:index",
"//g3doc:roadmap",
+ "//g3doc:style",
"//g3doc/architecture_guide:performance",
"//g3doc/architecture_guide:platforms",
"//g3doc/architecture_guide:resources",
diff --git a/website/_includes/footer-links.html b/website/_includes/footer-links.html
index 10c28ead4..2036dbaa9 100644
--- a/website/_includes/footer-links.html
+++ b/website/_includes/footer-links.html
@@ -15,7 +15,7 @@
<ul class="list-unstyled">
<li><a href="https://github.com/google/gvisor/issues">Issues</a></li>
<li><a href="/docs">Documentation</a></li>
- <li><a href="/docs/user_guide/FAQ">FAQ</a></li>
+ <li><a href="/docs/user_guide/faq">FAQ</a></li>
</ul>
</div>
<div class="col-sm-3 col-md-2">
diff --git a/website/cmd/server/main.go b/website/cmd/server/main.go
index 7c8bc9bfa..c401b6abd 100644
--- a/website/cmd/server/main.go
+++ b/website/cmd/server/main.go
@@ -35,6 +35,10 @@ var redirects = map[string]string{
// For links.
"/faq": "/docs/user_guide/faq/",
+ // From 2020-05-12 to 2020-06-30, the FAQ URL was uppercase. Redirect that
+ // back to maintain any links.
+ "/docs/user_guide/FAQ/": "/docs/user_guide/faq/",
+
// Redirects to compatibility docs.
"/c": "/docs/user_guide/compatibility/",
"/c/linux/amd64": "/docs/user_guide/compatibility/linux/amd64/",