summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/amutex/BUILD1
-rw-r--r--pkg/amutex/amutex.go17
-rw-r--r--pkg/sentry/arch/signal_arm64.go22
-rw-r--r--pkg/sentry/fs/gofer/fs.go3
-rw-r--r--pkg/sentry/fs/gofer/path.go24
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go11
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go62
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go13
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go28
-rw-r--r--pkg/sentry/fsimpl/host/host.go41
-rw-r--r--pkg/sentry/fsimpl/tmpfs/BUILD3
-rw-r--r--pkg/sentry/fsimpl/tmpfs/filesystem.go46
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file.go42
-rw-r--r--pkg/sentry/fsimpl/tmpfs/regular_file_test.go138
-rw-r--r--pkg/sentry/fsimpl/tmpfs/stat_test.go4
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go38
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs_test.go156
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/kernel.go25
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go22
-rw-r--r--pkg/sentry/socket/netfilter/owner_matcher.go81
-rw-r--r--pkg/sentry/socket/netstack/BUILD1
-rw-r--r--pkg/sentry/socket/netstack/netstack.go52
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go17
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD2
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/fd.go10
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/memfd.go63
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go2
-rw-r--r--pkg/tcpip/buffer/view.go6
-rw-r--r--pkg/tcpip/buffer/view_test.go36
-rw-r--r--pkg/tcpip/link/qdisc/fifo/endpoint.go2
-rw-r--r--pkg/tcpip/tcpip.go25
-rw-r--r--pkg/tcpip/transport/tcp/endpoint.go64
-rw-r--r--pkg/tcpip/transport/tcp/protocol.go52
-rw-r--r--pkg/tcpip/transport/tcp/snd.go59
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go114
36 files changed, 992 insertions, 291 deletions
diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD
index 9612f072e..ffc918846 100644
--- a/pkg/amutex/BUILD
+++ b/pkg/amutex/BUILD
@@ -6,6 +6,7 @@ go_library(
name = "amutex",
srcs = ["amutex.go"],
visibility = ["//:sandbox"],
+ deps = ["//pkg/syserror"],
)
go_test(
diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go
index 1c4fd1784..a078a31db 100644
--- a/pkg/amutex/amutex.go
+++ b/pkg/amutex/amutex.go
@@ -18,6 +18,8 @@ package amutex
import (
"sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Sleeper must be implemented by users of the abortable mutex to allow for
@@ -53,6 +55,21 @@ func (NoopSleeper) SleepFinish(success bool) {}
// Interrupted implements Sleeper.Interrupted.
func (NoopSleeper) Interrupted() bool { return false }
+// Block blocks until either receiving from ch succeeds (in which case it
+// returns nil) or sleeper is interrupted (in which case it returns
+// syserror.ErrInterrupted).
+func Block(sleeper Sleeper, ch <-chan struct{}) error {
+ cancel := sleeper.SleepStart()
+ select {
+ case <-ch:
+ sleeper.SleepFinish(true)
+ return nil
+ case <-cancel:
+ sleeper.SleepFinish(false)
+ return syserror.ErrInterrupted
+ }
+}
+
// AbortableMutex is an abortable mutex. It allows Lock() to be aborted while it
// waits to acquire the mutex.
type AbortableMutex struct {
diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go
index 1cb1adf8c..642c79dda 100644
--- a/pkg/sentry/arch/signal_arm64.go
+++ b/pkg/sentry/arch/signal_arm64.go
@@ -19,6 +19,7 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -134,6 +135,11 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
c.Regs.Regs[1] = uint64(infoAddr)
c.Regs.Regs[2] = uint64(ucAddr)
c.Regs.Regs[30] = uint64(act.Restorer)
+
+ // Save the thread's floating point state.
+ c.sigFPState = append(c.sigFPState, c.aarch64FPState)
+ // Signal handler gets a clean floating point state.
+ c.aarch64FPState = newAarch64FPState()
return nil
}
@@ -155,5 +161,21 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt
c.Regs.Sp = uc.MContext.Sp
c.Regs.Pstate = uc.MContext.Pstate
+ // Restore floating point state.
+ l := len(c.sigFPState)
+ if l > 0 {
+ c.aarch64FPState = c.sigFPState[l-1]
+ // NOTE(cl/133042258): State save requires that any slice
+ // elements from '[len:cap]' to be zero value.
+ c.sigFPState[l-1] = nil
+ c.sigFPState = c.sigFPState[0 : l-1]
+ } else {
+ // This might happen if sigreturn(2) calls are unbalanced with
+ // respect to signal handler entries. This is not expected so
+ // don't bother to do anything fancy with the floating point
+ // state.
+ log.Warningf("sigreturn unable to restore application fpstate")
+ }
+
return uc.Sigset, uc.Stack, nil
}
diff --git a/pkg/sentry/fs/gofer/fs.go b/pkg/sentry/fs/gofer/fs.go
index 9d41fcbdb..8ae2d78d7 100644
--- a/pkg/sentry/fs/gofer/fs.go
+++ b/pkg/sentry/fs/gofer/fs.go
@@ -60,8 +60,7 @@ const (
limitHostFDTranslationKey = "limit_host_fd_translation"
// overlayfsStaleRead if present closes cached readonly file after the first
- // write. This is done to workaround a limitation of overlayfs in kernels
- // before 4.19 where open FDs are not updated after the file is copied up.
+ // write. This is done to workaround a limitation of Linux overlayfs.
overlayfsStaleRead = "overlayfs_stale_read"
)
diff --git a/pkg/sentry/fs/gofer/path.go b/pkg/sentry/fs/gofer/path.go
index a35c3a23d..cf9800100 100644
--- a/pkg/sentry/fs/gofer/path.go
+++ b/pkg/sentry/fs/gofer/path.go
@@ -16,7 +16,6 @@ package gofer
import (
"fmt"
- "syscall"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
@@ -68,7 +67,7 @@ func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string
// Get a p9.File for name.
qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name})
if err != nil {
- if err == syscall.ENOENT {
+ if err == syserror.ENOENT {
if cp.cacheNegativeDirents() {
// Return a negative Dirent. It will stay cached until something
// is created over it.
@@ -207,7 +206,7 @@ func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, t
targetOpts, ok := target.InodeOperations.(*inodeOperations)
if !ok {
- return syscall.EXDEV
+ return syserror.EXDEV
}
if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil {
@@ -251,7 +250,7 @@ func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string,
}
if i.session().overrides == nil {
- return nil, syscall.EOPNOTSUPP
+ return nil, syserror.EOPNOTSUPP
}
// Stabilize the override map while creation is in progress.
@@ -280,7 +279,7 @@ func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name st
// N.B. FIFOs use major/minor numbers 0.
if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
- if i.session().overrides == nil || err != syscall.EPERM {
+ if i.session().overrides == nil || err != syserror.EPERM {
return err
}
// If gofer doesn't support mknod, check if we can create an internal fifo.
@@ -427,17 +426,16 @@ func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent
return syserror.ENAMETOOLONG
}
- // Unwrap the new parent to a *inodeOperations.
- newParentInodeOperations, ok := newParent.InodeOperations.(*inodeOperations)
- if !ok {
- return syscall.EXDEV
+ // Don't allow renames across different mounts.
+ if newParent.MountSource != oldParent.MountSource {
+ return syserror.EXDEV
}
+ // Unwrap the new parent to a *inodeOperations.
+ newParentInodeOperations := newParent.InodeOperations.(*inodeOperations)
+
// Unwrap the old parent to a *inodeOperations.
- oldParentInodeOperations, ok := oldParent.InodeOperations.(*inodeOperations)
- if !ok {
- return syscall.EXDEV
- }
+ oldParentInodeOperations := oldParent.InodeOperations.(*inodeOperations)
// Do the rename.
if err := i.fileState.file.rename(ctx, newParentInodeOperations.fileState.file, newName); err != nil {
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index 3c2b583ae..b095312fe 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -39,14 +39,13 @@ var fsInfo = fs.Info{
// rename implements fs.InodeOperations.Rename for tmpfs nodes.
func rename(ctx context.Context, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
- op, ok := oldParent.InodeOperations.(*Dir)
- if !ok {
- return syserror.EXDEV
- }
- np, ok := newParent.InodeOperations.(*Dir)
- if !ok {
+ // Don't allow renames across different mounts.
+ if newParent.MountSource != oldParent.MountSource {
return syserror.EXDEV
}
+
+ op := oldParent.InodeOperations.(*Dir)
+ np := newParent.InodeOperations.(*Dir)
return ramfs.Rename(ctx, op.ramfsDir, oldName, np.ramfsDir, newName, replacement)
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 4a32821bd..7f2181216 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -21,6 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
@@ -835,6 +837,9 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
if d.isSynthetic() {
return nil, syserror.ENXIO
}
+ if d.fs.iopts.OpenSocketsByConnecting {
+ return d.connectSocketLocked(ctx, opts)
+ }
case linux.S_IFIFO:
if d.isSynthetic() {
return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags)
@@ -843,10 +848,28 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
return d.openSpecialFileLocked(ctx, mnt, opts)
}
+func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+ if opts.Flags&linux.O_DIRECT != 0 {
+ return nil, syserror.EINVAL
+ }
+ fdObj, err := d.file.connect(ctx, p9.AnonymousSocket)
+ if err != nil {
+ return nil, err
+ }
+ fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fdObj.FD(), &host.NewFDOptions{
+ HaveFlags: true,
+ Flags: opts.Flags,
+ })
+ if err != nil {
+ fdObj.Close()
+ return nil, err
+ }
+ fdObj.Release()
+ return fd, nil
+}
+
func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
ats := vfs.AccessTypesForOpenFlags(opts)
- // Treat as a special file. This is done for non-synthetic pipes as well as
- // regular files when d.fs.opts.regularFilesUseSpecialFileFD is true.
if opts.Flags&linux.O_DIRECT != 0 {
return nil, syserror.EINVAL
}
@@ -854,10 +877,15 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts
if err != nil {
return nil, err
}
+ seekable := d.fileType() == linux.S_IFREG
fd := &specialFileFD{
- handle: h,
+ handle: h,
+ seekable: seekable,
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
+ DenyPRead: !seekable,
+ DenyPWrite: !seekable,
+ }); err != nil {
h.close(ctx)
return nil, err
}
@@ -888,7 +916,11 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
creds := rp.Credentials()
name := rp.Component()
- fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ // Filter file creation flags and O_LARGEFILE out; the create RPC already
+ // has the semantics of O_CREAT|O_EXCL, while some servers will choke on
+ // O_LARGEFILE.
+ createFlags := p9.OpenFlags(opts.Flags &^ (linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_LARGEFILE))
+ fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
if err != nil {
dirfile.close(ctx)
return nil, err
@@ -896,7 +928,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
// Then we need to walk to the file we just created to get a non-open fid
// representing it, and to get its metadata. This must use d.file since, as
// explained above, dirfile was invalidated by dirfile.Create().
- walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
+ _, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
if err != nil {
openFile.close(ctx)
if fdobj != nil {
@@ -904,17 +936,6 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
return nil, err
}
- // Sanity-check that we walked to the file we created.
- if createQID.Path != walkQID.Path {
- // Probably due to concurrent remote filesystem mutation?
- ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop)
- nonOpenFile.close(ctx)
- openFile.close(ctx)
- if fdobj != nil {
- fdobj.Close()
- }
- return nil, syserror.EAGAIN
- }
// Construct the new dentry.
child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr)
@@ -960,16 +981,21 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
childVFSFD = &fd.vfsfd
} else {
+ seekable := child.fileType() == linux.S_IFREG
fd := &specialFileFD{
handle: handle{
file: openFile,
fd: -1,
},
+ seekable: seekable,
}
if fdobj != nil {
fd.handle.fd = int32(fdobj.Release())
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{
+ DenyPRead: !seekable,
+ DenyPWrite: !seekable,
+ }); err != nil {
fd.handle.close(ctx)
return nil, err
}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index e68e37ebc..353e2cf5b 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -143,9 +143,12 @@ type filesystemOptions struct {
// If overlayfsStaleRead is true, O_RDONLY host FDs provided by the remote
// filesystem may not be coherent with writable host FDs opened later, so
- // mappings of the former must be replaced by mappings of the latter. This
- // is usually only the case when the remote filesystem is an overlayfs
- // mount on Linux < 4.19.
+ // all uses of the former must be replaced by uses of the latter. This is
+ // usually only the case when the remote filesystem is a Linux overlayfs
+ // mount. (Prior to Linux 4.18, patch series centered on commit
+ // d1d04ef8572b "ovl: stack file ops", both I/O and memory mappings were
+ // incoherent between pre-copy-up and post-copy-up FDs; after that patch
+ // series, only memory mappings are incoherent.)
overlayfsStaleRead bool
// If regularFilesUseSpecialFileFD is true, application FDs representing
@@ -221,6 +224,10 @@ type InternalFilesystemOptions struct {
// which servers can handle only a single client and report failure if that
// client disconnects.
LeakConnection bool
+
+ // If OpenSocketsByConnecting is true, silently translate attempts to open
+ // files identifying as sockets to connect RPCs.
+ OpenSocketsByConnecting bool
}
// Name implements vfs.FilesystemType.Name.
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 507e0e276..a464e6a94 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -33,13 +33,14 @@ import (
type specialFileFD struct {
fileDescription
- // handle is immutable.
+ // handle is used for file I/O. handle is immutable.
handle handle
- // off is the file offset. off is protected by mu. (POSIX 2.9.7 only
- // requires operations using the file offset to be atomic for regular files
- // and symlinks; however, since specialFileFD may be used for regular
- // files, we apply this atomicity unconditionally.)
+ // seekable is true if this file description represents a file for which
+ // file offset is significant, i.e. a regular file. seekable is immutable.
+ seekable bool
+
+ // If seekable is true, off is the file offset. off is protected by mu.
mu sync.Mutex
off int64
}
@@ -63,7 +64,7 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error {
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if offset < 0 {
+ if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
if opts.Flags != 0 {
@@ -91,6 +92,10 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ if !fd.seekable {
+ return fd.PRead(ctx, dst, -1, opts)
+ }
+
fd.mu.Lock()
n, err := fd.PRead(ctx, dst, fd.off, opts)
fd.off += n
@@ -100,14 +105,14 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if offset < 0 {
+ if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
if opts.Flags != 0 {
return 0, syserror.EOPNOTSUPP
}
- if fd.dentry().fileType() == linux.S_IFREG {
+ if fd.seekable {
limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
if err != nil {
return 0, err
@@ -130,6 +135,10 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ if !fd.seekable {
+ return fd.PWrite(ctx, src, -1, opts)
+ }
+
fd.mu.Lock()
n, err := fd.PWrite(ctx, src, fd.off, opts)
fd.off += n
@@ -139,6 +148,9 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ if !fd.seekable {
+ return 0, syserror.ESPIPE
+ }
fd.mu.Lock()
defer fd.mu.Unlock()
switch whence {
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 36bceeaa4..8caf55a1b 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -40,8 +40,20 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
+// NewFDOptions contains options to NewFD.
+type NewFDOptions struct {
+ // If IsTTY is true, the file descriptor is a TTY.
+ IsTTY bool
+
+ // If HaveFlags is true, use Flags for the new file description. Otherwise,
+ // the new file description will inherit flags from hostFD.
+ HaveFlags bool
+ Flags uint32
+}
+
+// NewFD returns a vfs.FileDescription representing the given host file
+// descriptor. mnt must be Kernel.HostMount().
+func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) (*vfs.FileDescription, error) {
fs, ok := mnt.Filesystem().Impl().(*filesystem)
if !ok {
return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl())
@@ -53,10 +65,14 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
return nil, err
}
- // Get flags for the imported FD.
- flags, err := unix.FcntlInt(uintptr(hostFD), syscall.F_GETFL, 0)
- if err != nil {
- return nil, err
+ flags := opts.Flags
+ if !opts.HaveFlags {
+ // Get flags for the imported FD.
+ flagsInt, err := unix.FcntlInt(uintptr(hostFD), syscall.F_GETFL, 0)
+ if err != nil {
+ return nil, err
+ }
+ flags = uint32(flagsInt)
}
fileMode := linux.FileMode(s.Mode)
@@ -65,13 +81,13 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
// Determine if hostFD is seekable. If not, this syscall will return ESPIPE
// (see fs/read_write.c:llseek), e.g. for pipes, sockets, and some character
// devices.
- _, err = unix.Seek(hostFD, 0, linux.SEEK_CUR)
+ _, err := unix.Seek(hostFD, 0, linux.SEEK_CUR)
seekable := err != syserror.ESPIPE
i := &inode{
hostFD: hostFD,
seekable: seekable,
- isTTY: isTTY,
+ isTTY: opts.IsTTY,
canMap: canMap(uint32(fileType)),
wouldBlock: wouldBlock(uint32(fileType)),
ino: fs.NextIno(),
@@ -101,7 +117,14 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
// i.open will take a reference on d.
defer d.DecRef()
- return i.open(ctx, d.VFSDentry(), mnt, uint32(flags))
+ return i.open(ctx, d.VFSDentry(), mnt, flags)
+}
+
+// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
+func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
+ return NewFD(ctx, mnt, hostFD, &NewFDOptions{
+ IsTTY: isTTY,
+ })
}
// filesystemType implements vfs.FilesystemType.
diff --git a/pkg/sentry/fsimpl/tmpfs/BUILD b/pkg/sentry/fsimpl/tmpfs/BUILD
index a2d9649e7..007be1572 100644
--- a/pkg/sentry/fsimpl/tmpfs/BUILD
+++ b/pkg/sentry/fsimpl/tmpfs/BUILD
@@ -52,7 +52,6 @@ go_library(
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
"//pkg/sentry/fs/lock",
- "//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
"//pkg/sentry/kernel/time",
@@ -96,6 +95,7 @@ go_test(
"pipe_test.go",
"regular_file_test.go",
"stat_test.go",
+ "tmpfs_test.go",
],
library = ":tmpfs",
deps = [
@@ -105,7 +105,6 @@ go_test(
"//pkg/sentry/contexttest",
"//pkg/sentry/fs/lock",
"//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/contexttest",
"//pkg/sentry/vfs",
"//pkg/syserror",
"//pkg/usermem",
diff --git a/pkg/sentry/fsimpl/tmpfs/filesystem.go b/pkg/sentry/fsimpl/tmpfs/filesystem.go
index 36ffcb592..80fa7b29d 100644
--- a/pkg/sentry/fsimpl/tmpfs/filesystem.go
+++ b/pkg/sentry/fsimpl/tmpfs/filesystem.go
@@ -16,6 +16,7 @@ package tmpfs
import (
"fmt"
+ "sync/atomic"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
@@ -24,6 +25,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
)
// Sync implements vfs.FilesystemImpl.Sync.
@@ -76,8 +78,8 @@ afterSymlink:
return nil, err
}
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO(gvisor.dev/issue/1197): Symlink traversals updates
- // access time.
+ // Symlink traversal updates access time.
+ atomic.StoreInt64(&d.inode.atime, d.inode.fs.clock.Now().Nanoseconds())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -361,8 +363,8 @@ afterTrailingSymlink:
}
// Do we need to resolve a trailing symlink?
if symlink, ok := child.inode.impl.(*symlink); ok && rp.ShouldFollowSymlink() {
- // TODO(gvisor.dev/issue/1197): Symlink traversals updates
- // access time.
+ // Symlink traversal updates access time.
+ atomic.StoreInt64(&child.inode.atime, child.inode.fs.clock.Now().Nanoseconds())
if err := rp.HandleSymlink(symlink.target); err != nil {
return nil, err
}
@@ -636,12 +638,19 @@ func (fs *filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
fs.mu.RLock()
defer fs.mu.RUnlock()
- _, err := resolveLocked(rp)
- if err != nil {
+ if _, err := resolveLocked(rp); err != nil {
return linux.Statfs{}, err
}
- // TODO(gvisor.dev/issue/1197): Actually implement statfs.
- return linux.Statfs{}, syserror.ENOSYS
+ statfs := linux.Statfs{
+ Type: linux.TMPFS_MAGIC,
+ BlockSize: usermem.PageSize,
+ FragmentSize: usermem.PageSize,
+ NameLength: linux.NAME_MAX,
+ // TODO(b/29637826): Allow configuring a tmpfs size and enforce it.
+ Blocks: 0,
+ BlocksFree: 0,
+ }
+ return statfs, nil
}
// SymlinkAt implements vfs.FilesystemImpl.SymlinkAt.
@@ -763,5 +772,24 @@ func (fs *filesystem) RemovexattrAt(ctx context.Context, rp *vfs.ResolvingPath,
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
fs.mu.RLock()
defer fs.mu.RUnlock()
- return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*dentry), b)
+ mnt := vd.Mount()
+ d := vd.Dentry().Impl().(*dentry)
+ for {
+ if mnt == vfsroot.Mount() && &d.vfsd == vfsroot.Dentry() {
+ return vfs.PrependPathAtVFSRootError{}
+ }
+ if &d.vfsd == mnt.Root() {
+ return nil
+ }
+ if d.parent == nil {
+ if d.name != "" {
+ // This must be an anonymous memfd file.
+ b.PrependComponent("/" + d.name)
+ return vfs.PrependPathSyntheticError{}
+ }
+ return vfs.PrependPathAtNonMountRootError{}
+ }
+ b.PrependComponent(d.name)
+ d = d.parent
+ }
}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file.go b/pkg/sentry/fsimpl/tmpfs/regular_file.go
index 57e5e28ec..3f433d666 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file.go
@@ -88,6 +88,7 @@ type regularFile struct {
func (fs *filesystem) newRegularFile(creds *auth.Credentials, mode linux.FileMode) *inode {
file := &regularFile{
memFile: fs.memFile,
+ seals: linux.F_SEAL_SEAL,
}
file.inode.init(file, fs, creds, linux.S_IFREG|mode)
file.inode.nlink = 1 // from parent directory
@@ -577,3 +578,44 @@ exitLoop:
return done, retErr
}
+
+// GetSeals returns the current set of seals on a memfd inode.
+func GetSeals(fd *vfs.FileDescription) (uint32, error) {
+ f, ok := fd.Impl().(*regularFileFD)
+ if !ok {
+ return 0, syserror.EINVAL
+ }
+ rf := f.inode().impl.(*regularFile)
+ rf.dataMu.RLock()
+ defer rf.dataMu.RUnlock()
+ return rf.seals, nil
+}
+
+// AddSeals adds new file seals to a memfd inode.
+func AddSeals(fd *vfs.FileDescription, val uint32) error {
+ f, ok := fd.Impl().(*regularFileFD)
+ if !ok {
+ return syserror.EINVAL
+ }
+ rf := f.inode().impl.(*regularFile)
+ rf.mapsMu.Lock()
+ defer rf.mapsMu.Unlock()
+ rf.dataMu.RLock()
+ defer rf.dataMu.RUnlock()
+
+ if rf.seals&linux.F_SEAL_SEAL != 0 {
+ // Seal applied which prevents addition of any new seals.
+ return syserror.EPERM
+ }
+
+ // F_SEAL_WRITE can only be added if there are no active writable maps.
+ if rf.seals&linux.F_SEAL_WRITE == 0 && val&linux.F_SEAL_WRITE != 0 {
+ if rf.writableMappingPages > 0 {
+ return syserror.EBUSY
+ }
+ }
+
+ // Seals can only be added, never removed.
+ rf.seals |= val
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
index 0399725cf..64e1c40ad 100644
--- a/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/regular_file_test.go
@@ -18,152 +18,16 @@ import (
"bytes"
"fmt"
"io"
- "sync/atomic"
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/fs/lock"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
)
-// nextFileID is used to generate unique file names.
-var nextFileID int64
-
-// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
-// is not nil, then cleanup should be called when the root is no longer needed.
-func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
-
- vfsObj := &vfs.VirtualFilesystem{}
- if err := vfsObj.Init(); err != nil {
- return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err)
- }
-
- vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
- mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
- if err != nil {
- return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
- }
- root := mntns.Root()
- return vfsObj, root, func() {
- root.DecRef()
- mntns.DecRef()
- }, nil
-}
-
-// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
-// the returned err is not nil, then cleanup should be called when the FD is no
-// longer needed.
-func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the file that will be write/read.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(filename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
- Mode: linux.ModeRegular | mode,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newDirFD is like newFileFD, but for directories.
-func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the dir.
- if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.MkdirOptions{
- Mode: linux.ModeDirectory | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
- }
-
- // Open the dir and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(dirname),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDONLY | linux.O_DIRECTORY,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
- }
-
- return fd, cleanup, nil
-}
-
-// newPipeFD is like newFileFD, but for pipes.
-func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
- creds := auth.CredentialsFromContext(ctx)
- vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
- if err != nil {
- return nil, nil, err
- }
-
- pipename := fmt.Sprintf("tmpfs-test-pipe-%d", atomic.AddInt64(&nextFileID, 1))
-
- // Create the pipe.
- if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.MknodOptions{
- Mode: linux.ModeNamedPipe | mode,
- }); err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to create pipe %q: %v", pipename, err)
- }
-
- // Open the pipe and return it.
- fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
- Root: root,
- Start: root,
- Path: fspath.Parse(pipename),
- }, &vfs.OpenOptions{
- Flags: linux.O_RDWR,
- })
- if err != nil {
- cleanup()
- return nil, nil, fmt.Errorf("failed to open pipe %q: %v", pipename, err)
- }
-
- return fd, cleanup, nil
-}
-
// Test that we can write some data to a file and read it back.`
func TestSimpleWriteRead(t *testing.T) {
ctx := contexttest.Context(t)
diff --git a/pkg/sentry/fsimpl/tmpfs/stat_test.go b/pkg/sentry/fsimpl/tmpfs/stat_test.go
index 60c2c980e..f7ee4aab2 100644
--- a/pkg/sentry/fsimpl/tmpfs/stat_test.go
+++ b/pkg/sentry/fsimpl/tmpfs/stat_test.go
@@ -19,8 +19,8 @@ import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/contexttest"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/contexttest"
"gvisor.dev/gvisor/pkg/sentry/vfs"
)
@@ -29,7 +29,6 @@ func TestStatAfterCreate(t *testing.T) {
mode := linux.FileMode(0644)
// Run with different file types.
- // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets.
for _, typ := range []string{"file", "dir", "pipe"} {
t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
var (
@@ -175,7 +174,6 @@ func TestSetStat(t *testing.T) {
mode := linux.FileMode(0644)
// Run with different file types.
- // TODO(gvisor.dev/issue/1197): Also test symlinks and sockets.
for _, typ := range []string{"file", "dir", "pipe"} {
t.Run(fmt.Sprintf("type=%q", typ), func(t *testing.T) {
var (
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index 405928bd0..1e781aecd 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -94,7 +94,7 @@ type FilesystemOpts struct {
}
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
-func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, _ string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
memFileProvider := pgalloc.MemoryFileProviderFromContext(ctx)
if memFileProvider == nil {
panic("MemoryFileProviderFromContext returned nil")
@@ -139,6 +139,11 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return &fs.vfsfs, &root.vfsd, nil
}
+// NewFilesystem returns a new tmpfs filesystem.
+func NewFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*vfs.Filesystem, *vfs.Dentry, error) {
+ return FilesystemType{}.GetFilesystem(ctx, vfsObj, creds, "", vfs.GetFilesystemOptions{})
+}
+
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
@@ -658,3 +663,34 @@ func (fd *fileDescription) Setxattr(ctx context.Context, opts vfs.SetxattrOption
func (fd *fileDescription) Removexattr(ctx context.Context, name string) error {
return fd.inode().removexattr(auth.CredentialsFromContext(ctx), name)
}
+
+// NewMemfd creates a new tmpfs regular file and file description that can back
+// an anonymous fd created by memfd_create.
+func NewMemfd(mount *vfs.Mount, creds *auth.Credentials, allowSeals bool, name string) (*vfs.FileDescription, error) {
+ fs, ok := mount.Filesystem().Impl().(*filesystem)
+ if !ok {
+ panic("NewMemfd() called with non-tmpfs mount")
+ }
+
+ // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd inodes are set up with
+ // S_IRWXUGO.
+ mode := linux.FileMode(0777)
+ inode := fs.newRegularFile(creds, mode)
+ rf := inode.impl.(*regularFile)
+ if allowSeals {
+ rf.seals = 0
+ }
+
+ d := fs.newDentry(inode)
+ defer d.DecRef()
+ d.name = name
+
+ // Per Linux, mm/shmem.c:__shmem_file_setup(), memfd files are set up with
+ // FMODE_READ | FMODE_WRITE.
+ var fd regularFileFD
+ flags := uint32(linux.O_RDWR)
+ if err := fd.vfsfd.Init(&fd, flags, mount, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ return nil, err
+ }
+ return &fd.vfsfd, nil
+}
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
new file mode 100644
index 000000000..a240fb276
--- /dev/null
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs_test.go
@@ -0,0 +1,156 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tmpfs
+
+import (
+ "fmt"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// nextFileID is used to generate unique file names.
+var nextFileID int64
+
+// newTmpfsRoot creates a new tmpfs mount, and returns the root. If the error
+// is not nil, then cleanup should be called when the root is no longer needed.
+func newTmpfsRoot(ctx context.Context) (*vfs.VirtualFilesystem, vfs.VirtualDentry, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+
+ vfsObj := &vfs.VirtualFilesystem{}
+ if err := vfsObj.Init(); err != nil {
+ return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("VFS init: %v", err)
+ }
+
+ vfsObj.MustRegisterFilesystemType("tmpfs", FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
+ AllowUserMount: true,
+ })
+ mntns, err := vfsObj.NewMountNamespace(ctx, creds, "", "tmpfs", &vfs.GetFilesystemOptions{})
+ if err != nil {
+ return nil, vfs.VirtualDentry{}, nil, fmt.Errorf("failed to create tmpfs root mount: %v", err)
+ }
+ root := mntns.Root()
+ return vfsObj, root, func() {
+ root.DecRef()
+ mntns.DecRef()
+ }, nil
+}
+
+// newFileFD creates a new file in a new tmpfs mount, and returns the FD. If
+// the returned err is not nil, then cleanup should be called when the FD is no
+// longer needed.
+func newFileFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ filename := fmt.Sprintf("tmpfs-test-file-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the file that will be write/read.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(filename),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR | linux.O_CREAT | linux.O_EXCL,
+ Mode: linux.ModeRegular | mode,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create file %q: %v", filename, err)
+ }
+
+ return fd, cleanup, nil
+}
+
+// newDirFD is like newFileFD, but for directories.
+func newDirFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ dirname := fmt.Sprintf("tmpfs-test-dir-%d", atomic.AddInt64(&nextFileID, 1))
+
+ // Create the dir.
+ if err := vfsObj.MkdirAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.MkdirOptions{
+ Mode: linux.ModeDirectory | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create directory %q: %v", dirname, err)
+ }
+
+ // Open the dir and return it.
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(dirname),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDONLY | linux.O_DIRECTORY,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open directory %q: %v", dirname, err)
+ }
+
+ return fd, cleanup, nil
+}
+
+// newPipeFD is like newFileFD, but for pipes.
+func newPipeFD(ctx context.Context, mode linux.FileMode) (*vfs.FileDescription, func(), error) {
+ creds := auth.CredentialsFromContext(ctx)
+ vfsObj, root, cleanup, err := newTmpfsRoot(ctx)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ name := fmt.Sprintf("tmpfs-test-%d", atomic.AddInt64(&nextFileID, 1))
+
+ if err := vfsObj.MknodAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(name),
+ }, &vfs.MknodOptions{
+ Mode: linux.ModeNamedPipe | mode,
+ }); err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to create pipe %q: %v", name, err)
+ }
+
+ fd, err := vfsObj.OpenAt(ctx, creds, &vfs.PathOperation{
+ Root: root,
+ Start: root,
+ Path: fspath.Parse(name),
+ }, &vfs.OpenOptions{
+ Flags: linux.O_RDWR,
+ })
+ if err != nil {
+ cleanup()
+ return nil, nil, fmt.Errorf("failed to open pipe %q: %v", name, err)
+ }
+
+ return fd, cleanup, nil
+}
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index 8104f50f3..a28eab8b8 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -173,6 +173,7 @@ go_library(
"//pkg/sentry/fsimpl/pipefs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/fsimpl/timerfd",
+ "//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/hostcpu",
"//pkg/sentry/inet",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index 3617da8c6..5efeb3767 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -53,6 +53,7 @@ import (
"gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/timerfd"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -259,6 +260,10 @@ type Kernel struct {
// syscalls (as opposed to named pipes created by mknod()).
pipeMount *vfs.Mount
+ // shmMount is the Mount used for anonymous files created by the
+ // memfd_create() syscalls. It is analagous to Linux's shm_mnt.
+ shmMount *vfs.Mount
+
// socketMount is the Mount used for sockets created by the socket() and
// socketpair() syscalls. There are several cases where a socket dentry will
// not be contained in socketMount:
@@ -330,6 +335,9 @@ func (k *Kernel) Init(args InitKernelArgs) error {
if args.Timekeeper == nil {
return fmt.Errorf("Timekeeper is nil")
}
+ if args.Timekeeper.clocks == nil {
+ return fmt.Errorf("Must call Timekeeper.SetClocks() before Kernel.Init()")
+ }
if args.RootUserNamespace == nil {
return fmt.Errorf("RootUserNamespace is nil")
}
@@ -384,6 +392,18 @@ func (k *Kernel) Init(args InitKernelArgs) error {
}
k.pipeMount = pipeMount
+ tmpfsFilesystem, tmpfsRoot, err := tmpfs.NewFilesystem(k.SupervisorContext(), &k.vfs, auth.NewRootCredentials(k.rootUserNamespace))
+ if err != nil {
+ return fmt.Errorf("failed to create tmpfs filesystem: %v", err)
+ }
+ defer tmpfsFilesystem.DecRef()
+ defer tmpfsRoot.DecRef()
+ shmMount, err := k.vfs.NewDisconnectedMount(tmpfsFilesystem, tmpfsRoot, &vfs.MountOptions{})
+ if err != nil {
+ return fmt.Errorf("failed to create tmpfs mount: %v", err)
+ }
+ k.shmMount = shmMount
+
socketFilesystem, err := sockfs.NewFilesystem(&k.vfs)
if err != nil {
return fmt.Errorf("failed to create sockfs filesystem: %v", err)
@@ -1656,6 +1676,11 @@ func (k *Kernel) PipeMount() *vfs.Mount {
return k.pipeMount
}
+// ShmMount returns the tmpfs mount.
+func (k *Kernel) ShmMount() *vfs.Mount {
+ return k.shmMount
+}
+
// SocketMount returns the sockfs mount.
func (k *Kernel) SocketMount() *vfs.Mount {
return k.socketMount
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 41a1ce031..789bb94c8 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -59,6 +59,13 @@ type metadata struct {
// developing iptables, but can pollute sentry logs otherwise.
const enableLogging = false
+// emptyFilter is for comparison with a rule's filters to determine whether it
+// is also empty. It is immutable.
+var emptyFilter = stack.IPHeaderFilter{
+ Dst: "\x00\x00\x00\x00",
+ DstMask: "\x00\x00\x00\x00",
+}
+
// nflog logs messages related to the writing and reading of iptables.
func nflog(format string, args ...interface{}) {
if enableLogging && log.IsLogging(log.Debug) {
@@ -484,7 +491,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
}
if offset == replace.Underflow[hook] {
if !validUnderflow(table.Rules[ruleIdx]) {
- nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP")
+ nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx)
return syserr.ErrInvalidArgument
}
table.Underflows[hk] = ruleIdx
@@ -547,7 +554,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
// make sure all other chains point to ACCEPT rules.
for hook, ruleIdx := range table.BuiltinChains {
if hook == stack.Forward || hook == stack.Postrouting {
- if _, ok := table.Rules[ruleIdx].Target.(stack.AcceptTarget); !ok {
+ if !isUnconditionalAccept(table.Rules[ruleIdx]) {
nflog("hook %d is unsupported.", hook)
return syserr.ErrInvalidArgument
}
@@ -776,6 +783,9 @@ func validUnderflow(rule stack.Rule) bool {
if len(rule.Matchers) != 0 {
return false
}
+ if rule.Filter != emptyFilter {
+ return false
+ }
switch rule.Target.(type) {
case stack.AcceptTarget, stack.DropTarget:
return true
@@ -784,6 +794,14 @@ func validUnderflow(rule stack.Rule) bool {
}
}
+func isUnconditionalAccept(rule stack.Rule) bool {
+ if !validUnderflow(rule) {
+ return false
+ }
+ _, ok := rule.Target.(stack.AcceptTarget)
+ return ok
+}
+
func hookFromLinux(hook int) stack.Hook {
switch hook {
case linux.NF_INET_PRE_ROUTING:
diff --git a/pkg/sentry/socket/netfilter/owner_matcher.go b/pkg/sentry/socket/netfilter/owner_matcher.go
index 5949a7c29..3863293c7 100644
--- a/pkg/sentry/socket/netfilter/owner_matcher.go
+++ b/pkg/sentry/socket/netfilter/owner_matcher.go
@@ -45,14 +45,18 @@ func (ownerMarshaler) marshal(mr stack.Matcher) []byte {
GID: matcher.gid,
}
- // Support for UID match.
- // TODO(gvisor.dev/issue/170): Need to support gid match.
+ // Support for UID and GID match.
if matcher.matchUID {
iptOwnerInfo.Match = linux.XT_OWNER_UID
- } else if matcher.matchGID {
- panic("GID match is not supported.")
- } else {
- panic("UID match is not set.")
+ if matcher.invertUID {
+ iptOwnerInfo.Invert = linux.XT_OWNER_UID
+ }
+ }
+ if matcher.matchGID {
+ iptOwnerInfo.Match |= linux.XT_OWNER_GID
+ if matcher.invertGID {
+ iptOwnerInfo.Invert |= linux.XT_OWNER_GID
+ }
}
buf := make([]byte, 0, linux.SizeOfIPTOwnerInfo)
@@ -71,31 +75,34 @@ func (ownerMarshaler) unmarshal(buf []byte, filter stack.IPHeaderFilter) (stack.
binary.Unmarshal(buf[:linux.SizeOfIPTOwnerInfo], usermem.ByteOrder, &matchData)
nflog("parseMatchers: parsed IPTOwnerInfo: %+v", matchData)
- if matchData.Invert != 0 {
- return nil, fmt.Errorf("invert flag is not supported for owner match")
- }
-
- // Support for UID match.
- // TODO(gvisor.dev/issue/170): Need to support gid match.
- if matchData.Match&linux.XT_OWNER_UID != linux.XT_OWNER_UID {
- return nil, fmt.Errorf("owner match is only supported for uid")
- }
-
- // Check Flags.
var owner OwnerMatcher
owner.uid = matchData.UID
owner.gid = matchData.GID
- owner.matchUID = true
+
+ // Check flags.
+ if matchData.Match&linux.XT_OWNER_UID != 0 {
+ owner.matchUID = true
+ if matchData.Invert&linux.XT_OWNER_UID != 0 {
+ owner.invertUID = true
+ }
+ }
+ if matchData.Match&linux.XT_OWNER_GID != 0 {
+ owner.matchGID = true
+ if matchData.Invert&linux.XT_OWNER_GID != 0 {
+ owner.invertGID = true
+ }
+ }
return &owner, nil
}
type OwnerMatcher struct {
- uid uint32
- gid uint32
- matchUID bool
- matchGID bool
- invert uint8
+ uid uint32
+ gid uint32
+ matchUID bool
+ matchGID bool
+ invertUID bool
+ invertGID bool
}
// Name implements Matcher.Name.
@@ -112,16 +119,30 @@ func (om *OwnerMatcher) Match(hook stack.Hook, pkt stack.PacketBuffer, interface
}
// If the packet owner is not set, drop the packet.
- // Support for uid match.
- // TODO(gvisor.dev/issue/170): Need to support gid match.
- if pkt.Owner == nil || !om.matchUID {
+ if pkt.Owner == nil {
return false, true
}
- // TODO(gvisor.dev/issue/170): Need to add tests to verify
- // drop rule when packet UID does not match owner matcher UID.
- if pkt.Owner.UID() != om.uid {
- return false, false
+ var matches bool
+ // Check for UID match.
+ if om.matchUID {
+ if pkt.Owner.UID() == om.uid {
+ matches = true
+ }
+ if matches == om.invertUID {
+ return false, false
+ }
+ }
+
+ // Check for GID match.
+ if om.matchGID {
+ matches = false
+ if pkt.Owner.GID() == om.gid {
+ matches = true
+ }
+ if matches == om.invertGID {
+ return false, false
+ }
}
return true, false
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index 6129fb83d..333e0042e 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -18,6 +18,7 @@ go_library(
],
deps = [
"//pkg/abi/linux",
+ "//pkg/amutex",
"//pkg/binary",
"//pkg/context",
"//pkg/log",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 81053d8ef..9dea2b5ff 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -34,6 +34,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
@@ -553,11 +554,9 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{})
}
@@ -626,11 +625,9 @@ func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{
Atomic: true, // See above.
})
@@ -1324,6 +1321,29 @@ func getSockOptTCP(t *kernel.Task, ep commonEndpoint, name, outLen int) (interfa
return int32(time.Duration(v) / time.Second), nil
+ case linux.TCP_SYNCNT:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptInt(tcpip.TCPSynCountOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
+
+ case linux.TCP_WINDOW_CLAMP:
+ if outLen < sizeOfInt32 {
+ return nil, syserr.ErrInvalidArgument
+ }
+
+ v, err := ep.GetSockOptInt(tcpip.TCPWindowClampOption)
+ if err != nil {
+ return nil, syserr.TranslateNetstackError(err)
+ }
+
+ return int32(v), nil
default:
emitUnimplementedEventTCP(t, name)
}
@@ -1793,6 +1813,22 @@ func setSockOptTCP(t *kernel.Task, ep commonEndpoint, name int, optVal []byte) *
}
return syserr.TranslateNetstackError(ep.SetSockOpt(tcpip.TCPDeferAcceptOption(time.Second * time.Duration(v))))
+ case linux.TCP_SYNCNT:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ v := usermem.ByteOrder.Uint32(optVal)
+
+ return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPSynCountOption, int(v)))
+
+ case linux.TCP_WINDOW_CLAMP:
+ if len(optVal) < sizeOfInt32 {
+ return syserr.ErrInvalidArgument
+ }
+ v := usermem.ByteOrder.Uint32(optVal)
+
+ return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.TCPWindowClampOption, int(v)))
+
case linux.TCP_REPAIR_OPTIONS:
t.Kernel().EmitUnimplementedEvent(t)
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index 191970d41..fcd8013c0 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -16,6 +16,7 @@ package netstack
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
@@ -89,11 +90,6 @@ func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
s.socketOpsCommon.EventUnregister(e)
}
-// PRead implements vfs.FileDescriptionImpl.
-func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.ESPIPE
-}
-
// Read implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
@@ -115,11 +111,6 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.
return int64(n), nil
}
-// PWrite implements vfs.FileDescriptionImpl.
-func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.ESPIPE
-}
-
// Write implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
@@ -135,11 +126,9 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{})
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index c32f942fb..f882ef840 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -13,6 +13,7 @@ go_library(
"fscontext.go",
"getdents.go",
"ioctl.go",
+ "memfd.go",
"mmap.go",
"path.go",
"pipe.go",
@@ -43,6 +44,7 @@ go_library(
"//pkg/sentry/fsimpl/pipefs",
"//pkg/sentry/fsimpl/signalfd",
"//pkg/sentry/fsimpl/timerfd",
+ "//pkg/sentry/fsimpl/tmpfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
diff --git a/pkg/sentry/syscalls/linux/vfs2/fd.go b/pkg/sentry/syscalls/linux/vfs2/fd.go
index 8181d80f4..ca0f7fd1e 100644
--- a/pkg/sentry/syscalls/linux/vfs2/fd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/fd.go
@@ -17,6 +17,7 @@ package vfs2
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
@@ -157,6 +158,15 @@ func Fcntl(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
return 0, nil, syserror.EBADF
}
return uintptr(pipefile.PipeSize()), nil, nil
+ case linux.F_GET_SEALS:
+ val, err := tmpfs.GetSeals(file)
+ return uintptr(val), nil, err
+ case linux.F_ADD_SEALS:
+ if !file.IsWritable() {
+ return 0, nil, syserror.EPERM
+ }
+ err := tmpfs.AddSeals(file, args[2].Uint())
+ return 0, nil, err
default:
// TODO(gvisor.dev/issue/1623): Everything else is not yet supported.
return 0, nil, syserror.EINVAL
diff --git a/pkg/sentry/syscalls/linux/vfs2/memfd.go b/pkg/sentry/syscalls/linux/vfs2/memfd.go
new file mode 100644
index 000000000..bbe248d17
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/vfs2/memfd.go
@@ -0,0 +1,63 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs2
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+const (
+ memfdPrefix = "memfd:"
+ memfdMaxNameLen = linux.NAME_MAX - len(memfdPrefix)
+ memfdAllFlags = uint32(linux.MFD_CLOEXEC | linux.MFD_ALLOW_SEALING)
+)
+
+// MemfdCreate implements the linux syscall memfd_create(2).
+func MemfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ addr := args[0].Pointer()
+ flags := args[1].Uint()
+
+ if flags&^memfdAllFlags != 0 {
+ // Unknown bits in flags.
+ return 0, nil, syserror.EINVAL
+ }
+
+ allowSeals := flags&linux.MFD_ALLOW_SEALING != 0
+ cloExec := flags&linux.MFD_CLOEXEC != 0
+
+ name, err := t.CopyInString(addr, memfdMaxNameLen)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ shmMount := t.Kernel().ShmMount()
+ file, err := tmpfs.NewMemfd(shmMount, t.Credentials(), allowSeals, memfdPrefix+name)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ fd, err := t.NewFDFromVFS2(0, file, kernel.FDFlags{
+ CloseOnExec: cloExec,
+ })
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(fd), nil, nil
+}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index 9c04677f1..ec8da7f06 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -158,7 +158,7 @@ func Override() {
s.Table[306] = syscalls.Supported("syncfs", Syncfs)
s.Table[307] = syscalls.Supported("sendmmsg", SendMMsg)
s.Table[316] = syscalls.Supported("renameat2", Renameat2)
- delete(s.Table, 319) // memfd_create
+ s.Table[319] = syscalls.Supported("memfd_create", MemfdCreate)
s.Table[322] = syscalls.Supported("execveat", Execveat)
s.Table[327] = syscalls.Supported("preadv2", Preadv2)
s.Table[328] = syscalls.Supported("pwritev2", Pwritev2)
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index f01217c91..9a3c5d6c3 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -59,6 +59,9 @@ func (v *View) Reader() bytes.Reader {
// ToVectorisedView returns a VectorisedView containing the receiver.
func (v View) ToVectorisedView() VectorisedView {
+ if len(v) == 0 {
+ return VectorisedView{}
+ }
return NewVectorisedView(len(v), []View{v})
}
@@ -229,6 +232,9 @@ func (vv *VectorisedView) Append(vv2 VectorisedView) {
// AppendView appends the given view into this vectorised view.
func (vv *VectorisedView) AppendView(v View) {
+ if len(v) == 0 {
+ return
+ }
vv.views = append(vv.views, v)
vv.size += len(v)
}
diff --git a/pkg/tcpip/buffer/view_test.go b/pkg/tcpip/buffer/view_test.go
index c56795c7b..726e54de9 100644
--- a/pkg/tcpip/buffer/view_test.go
+++ b/pkg/tcpip/buffer/view_test.go
@@ -483,3 +483,39 @@ func TestPullUp(t *testing.T) {
}
}
}
+
+func TestToVectorisedView(t *testing.T) {
+ testCases := []struct {
+ in View
+ want VectorisedView
+ }{
+ {nil, VectorisedView{}},
+ {View{}, VectorisedView{}},
+ {View{'a'}, VectorisedView{size: 1, views: []View{{'a'}}}},
+ }
+ for _, tc := range testCases {
+ if got, want := tc.in.ToVectorisedView(), tc.want; !reflect.DeepEqual(got, want) {
+ t.Errorf("(%v).ToVectorisedView failed got: %+v, want: %+v", tc.in, got, want)
+ }
+ }
+}
+
+func TestAppendView(t *testing.T) {
+ testCases := []struct {
+ vv VectorisedView
+ in View
+ want VectorisedView
+ }{
+ {VectorisedView{}, nil, VectorisedView{}},
+ {VectorisedView{}, View{}, VectorisedView{}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, nil, VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, View{}, VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, View{'e'}, VectorisedView{[]View{{'a', 'b', 'c', 'd'}, {'e'}}, 5}},
+ }
+ for _, tc := range testCases {
+ tc.vv.AppendView(tc.in)
+ if got, want := tc.vv, tc.want; !reflect.DeepEqual(got, want) {
+ t.Errorf("(%v).ToVectorisedView failed got: %+v, want: %+v", tc.in, got, want)
+ }
+ }
+}
diff --git a/pkg/tcpip/link/qdisc/fifo/endpoint.go b/pkg/tcpip/link/qdisc/fifo/endpoint.go
index be9fec3b3..54432194d 100644
--- a/pkg/tcpip/link/qdisc/fifo/endpoint.go
+++ b/pkg/tcpip/link/qdisc/fifo/endpoint.go
@@ -163,7 +163,7 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, protocol tcpip.Ne
// WritePackets implements stack.LinkEndpoint.WritePackets.
//
-// Being a batch API each packet in pkts should have the following fields
+// Being a batch API, each packet in pkts should have the following fields
// populated:
// - pkt.EgressRoute
// - pkt.GSOOptions
diff --git a/pkg/tcpip/tcpip.go b/pkg/tcpip/tcpip.go
index 1ca4088c9..45e930ad8 100644
--- a/pkg/tcpip/tcpip.go
+++ b/pkg/tcpip/tcpip.go
@@ -622,6 +622,19 @@ const (
//
// A zero value indicates the default.
TTLOption
+
+ // TCPSynCountOption is used by SetSockOpt/GetSockOpt to specify the number of
+ // SYN retransmits that TCP should send before aborting the attempt to
+ // connect. It cannot exceed 255.
+ //
+ // NOTE: This option is currently only stubbed out and is no-op.
+ TCPSynCountOption
+
+ // TCPWindowClampOption is used by SetSockOpt/GetSockOpt to bound the size
+ // of the advertised window to this value.
+ //
+ // NOTE: This option is currently only stubed out and is a no-op
+ TCPWindowClampOption
)
// ErrorOption is used in GetSockOpt to specify that the last error reported by
@@ -685,11 +698,23 @@ type TCPDeferAcceptOption time.Duration
// default MinRTO used by the Stack.
type TCPMinRTOOption time.Duration
+// TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
+// default MaxRTO used by the Stack.
+type TCPMaxRTOOption time.Duration
+
+// TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
+// maximum number of retransmits after which we time out the connection.
+type TCPMaxRetriesOption uint64
+
// TCPSynRcvdCountThresholdOption is used by SetSockOpt/GetSockOpt to specify
// the number of endpoints that can be in SYN-RCVD state before the stack
// switches to using SYN cookies.
type TCPSynRcvdCountThresholdOption uint64
+// TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
+// default for number of times SYN is retransmitted before aborting a connect.
+type TCPSynRetriesOption uint8
+
// MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
// default interface for multicast.
type MulticastInterfaceOption struct {
diff --git a/pkg/tcpip/transport/tcp/endpoint.go b/pkg/tcpip/transport/tcp/endpoint.go
index 07d3e64c8..71735029e 100644
--- a/pkg/tcpip/transport/tcp/endpoint.go
+++ b/pkg/tcpip/transport/tcp/endpoint.go
@@ -470,6 +470,17 @@ type endpoint struct {
// for this endpoint using the TCP_MAXSEG setsockopt.
userMSS uint16
+ // maxSynRetries is the maximum number of SYN retransmits that TCP should
+ // send before aborting the attempt to connect. It cannot exceed 255.
+ //
+ // NOTE: This is currently a no-op and does not change the SYN
+ // retransmissions.
+ maxSynRetries uint8
+
+ // windowClamp is used to bound the size of the advertised window to
+ // this value.
+ windowClamp uint32
+
// The following fields are used to manage the send buffer. When
// segments are ready to be sent, they are added to sndQueue and the
// protocol goroutine is signaled via sndWaker.
@@ -795,8 +806,10 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
interval: 75 * time.Second,
count: 9,
},
- uniqueID: s.UniqueID(),
- txHash: s.Rand().Uint32(),
+ uniqueID: s.UniqueID(),
+ txHash: s.Rand().Uint32(),
+ windowClamp: DefaultReceiveBufferSize,
+ maxSynRetries: DefaultSynRetries,
}
var ss SendBufferSizeOption
@@ -829,6 +842,11 @@ func newEndpoint(s *stack.Stack, netProto tcpip.NetworkProtocolNumber, waiterQue
e.tcpLingerTimeout = time.Duration(tcpLT)
}
+ var synRetries tcpip.TCPSynRetriesOption
+ if err := s.TransportProtocolOption(ProtocolNumber, &synRetries); err == nil {
+ e.maxSynRetries = uint8(synRetries)
+ }
+
if p := s.GetTCPProbe(); p != nil {
e.probe = p
}
@@ -1603,6 +1621,36 @@ func (e *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) *tcpip.Error {
e.ttl = uint8(v)
e.UnlockUser()
+ case tcpip.TCPSynCountOption:
+ if v < 1 || v > 255 {
+ return tcpip.ErrInvalidOptionValue
+ }
+ e.LockUser()
+ e.maxSynRetries = uint8(v)
+ e.UnlockUser()
+
+ case tcpip.TCPWindowClampOption:
+ if v == 0 {
+ e.LockUser()
+ switch e.EndpointState() {
+ case StateClose, StateInitial:
+ e.windowClamp = 0
+ e.UnlockUser()
+ return nil
+ default:
+ e.UnlockUser()
+ return tcpip.ErrInvalidOptionValue
+ }
+ }
+ var rs ReceiveBufferSizeOption
+ if err := e.stack.TransportProtocolOption(ProtocolNumber, &rs); err == nil {
+ if v < rs.Min/2 {
+ v = rs.Min / 2
+ }
+ }
+ e.LockUser()
+ e.windowClamp = uint32(v)
+ e.UnlockUser()
}
return nil
}
@@ -1826,6 +1874,18 @@ func (e *endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, *tcpip.Error) {
e.UnlockUser()
return v, nil
+ case tcpip.TCPSynCountOption:
+ e.LockUser()
+ v := int(e.maxSynRetries)
+ e.UnlockUser()
+ return v, nil
+
+ case tcpip.TCPWindowClampOption:
+ e.LockUser()
+ v := int(e.windowClamp)
+ e.UnlockUser()
+ return v, nil
+
default:
return -1, tcpip.ErrUnknownProtocolOption
}
diff --git a/pkg/tcpip/transport/tcp/protocol.go b/pkg/tcpip/transport/tcp/protocol.go
index cfd9a4e8e..2a2a7ddeb 100644
--- a/pkg/tcpip/transport/tcp/protocol.go
+++ b/pkg/tcpip/transport/tcp/protocol.go
@@ -64,6 +64,10 @@ const (
// DefaultTCPTimeWaitTimeout is the amount of time that sockets linger
// in TIME_WAIT state before being marked closed.
DefaultTCPTimeWaitTimeout = 60 * time.Second
+
+ // DefaultSynRetries is the default value for the number of SYN retransmits
+ // before a connect is aborted.
+ DefaultSynRetries = 6
)
// SACKEnabled option can be used to enable SACK support in the TCP
@@ -163,7 +167,10 @@ type protocol struct {
tcpLingerTimeout time.Duration
tcpTimeWaitTimeout time.Duration
minRTO time.Duration
+ maxRTO time.Duration
+ maxRetries uint32
synRcvdCount synRcvdCounter
+ synRetries uint8
dispatcher *dispatcher
}
@@ -340,12 +347,36 @@ func (p *protocol) SetOption(option interface{}) *tcpip.Error {
p.mu.Unlock()
return nil
+ case tcpip.TCPMaxRTOOption:
+ if v < 0 {
+ v = tcpip.TCPMaxRTOOption(MaxRTO)
+ }
+ p.mu.Lock()
+ p.maxRTO = time.Duration(v)
+ p.mu.Unlock()
+ return nil
+
+ case tcpip.TCPMaxRetriesOption:
+ p.mu.Lock()
+ p.maxRetries = uint32(v)
+ p.mu.Unlock()
+ return nil
+
case tcpip.TCPSynRcvdCountThresholdOption:
p.mu.Lock()
p.synRcvdCount.SetThreshold(uint64(v))
p.mu.Unlock()
return nil
+ case tcpip.TCPSynRetriesOption:
+ if v < 1 || v > 255 {
+ return tcpip.ErrInvalidOptionValue
+ }
+ p.mu.Lock()
+ p.synRetries = uint8(v)
+ p.mu.Unlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -414,12 +445,30 @@ func (p *protocol) Option(option interface{}) *tcpip.Error {
p.mu.RUnlock()
return nil
+ case *tcpip.TCPMaxRTOOption:
+ p.mu.RLock()
+ *v = tcpip.TCPMaxRTOOption(p.maxRTO)
+ p.mu.RUnlock()
+ return nil
+
+ case *tcpip.TCPMaxRetriesOption:
+ p.mu.RLock()
+ *v = tcpip.TCPMaxRetriesOption(p.maxRetries)
+ p.mu.RUnlock()
+ return nil
+
case *tcpip.TCPSynRcvdCountThresholdOption:
p.mu.RLock()
*v = tcpip.TCPSynRcvdCountThresholdOption(p.synRcvdCount.Threshold())
p.mu.RUnlock()
return nil
+ case *tcpip.TCPSynRetriesOption:
+ p.mu.RLock()
+ *v = tcpip.TCPSynRetriesOption(p.synRetries)
+ p.mu.RUnlock()
+ return nil
+
default:
return tcpip.ErrUnknownProtocolOption
}
@@ -452,6 +501,9 @@ func NewProtocol() stack.TransportProtocol {
tcpTimeWaitTimeout: DefaultTCPTimeWaitTimeout,
synRcvdCount: synRcvdCounter{threshold: SynRcvdCountThreshold},
dispatcher: newDispatcher(runtime.GOMAXPROCS(0)),
+ synRetries: DefaultSynRetries,
minRTO: MinRTO,
+ maxRTO: MaxRTO,
+ maxRetries: MaxRetries,
}
}
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index 9e547a221..06dc9b7d7 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -43,7 +43,8 @@ const (
nDupAckThreshold = 3
// MaxRetries is the maximum number of probe retries sender does
- // before timing out the connection, Linux default TCP_RETR2.
+ // before timing out the connection.
+ // Linux default TCP_RETR2, net.ipv4.tcp_retries2.
MaxRetries = 15
)
@@ -165,6 +166,12 @@ type sender struct {
// minRTO is the minimum permitted value for sender.rto.
minRTO time.Duration
+ // maxRTO is the maximum permitted value for sender.rto.
+ maxRTO time.Duration
+
+ // maxRetries is the maximum permitted retransmissions.
+ maxRetries uint32
+
// maxPayloadSize is the maximum size of the payload of a given segment.
// It is initialized on demand.
maxPayloadSize int
@@ -276,12 +283,24 @@ func newSender(ep *endpoint, iss, irs seqnum.Value, sndWnd seqnum.Size, mss uint
// etc.
s.ep.scoreboard = NewSACKScoreboard(uint16(s.maxPayloadSize), iss)
- // Get Stack wide minRTO.
- var v tcpip.TCPMinRTOOption
- if err := ep.stack.TransportProtocolOption(ProtocolNumber, &v); err != nil {
+ // Get Stack wide config.
+ var minRTO tcpip.TCPMinRTOOption
+ if err := ep.stack.TransportProtocolOption(ProtocolNumber, &minRTO); err != nil {
panic(fmt.Sprintf("unable to get minRTO from stack: %s", err))
}
- s.minRTO = time.Duration(v)
+ s.minRTO = time.Duration(minRTO)
+
+ var maxRTO tcpip.TCPMaxRTOOption
+ if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRTO); err != nil {
+ panic(fmt.Sprintf("unable to get maxRTO from stack: %s", err))
+ }
+ s.maxRTO = time.Duration(maxRTO)
+
+ var maxRetries tcpip.TCPMaxRetriesOption
+ if err := ep.stack.TransportProtocolOption(ProtocolNumber, &maxRetries); err != nil {
+ panic(fmt.Sprintf("unable to get maxRetries from stack: %s", err))
+ }
+ s.maxRetries = uint32(maxRetries)
return s
}
@@ -485,7 +504,7 @@ func (s *sender) retransmitTimerExpired() bool {
}
elapsed := time.Since(s.firstRetransmittedSegXmitTime)
- remaining := MaxRTO
+ remaining := s.maxRTO
if uto != 0 {
// Cap to the user specified timeout if one is specified.
remaining = uto - elapsed
@@ -494,24 +513,17 @@ func (s *sender) retransmitTimerExpired() bool {
// Always honor the user-timeout irrespective of whether the zero
// window probes were acknowledged.
// net/ipv4/tcp_timer.c::tcp_probe_timer()
- if remaining <= 0 || s.unackZeroWindowProbes >= MaxRetries {
+ if remaining <= 0 || s.unackZeroWindowProbes >= s.maxRetries {
return false
}
- if s.rto >= MaxRTO {
- // RFC 1122 section: 4.2.2.17
- // A TCP MAY keep its offered receive window closed
- // indefinitely. As long as the receiving TCP continues to
- // send acknowledgments in response to the probe segments, the
- // sending TCP MUST allow the connection to stay open.
- if !(s.zeroWindowProbing && s.unackZeroWindowProbes == 0) {
- return false
- }
- }
-
// Set new timeout. The timer will be restarted by the call to sendData
// below.
s.rto *= 2
+ // Cap the RTO as per RFC 1122 4.2.3.1, RFC 6298 5.5
+ if s.rto > s.maxRTO {
+ s.rto = s.maxRTO
+ }
// Cap RTO to remaining time.
if s.rto > remaining {
@@ -565,9 +577,20 @@ func (s *sender) retransmitTimerExpired() bool {
// send.
if s.zeroWindowProbing {
s.sendZeroWindowProbe()
+ // RFC 1122 4.2.2.17: A TCP MAY keep its offered receive window closed
+ // indefinitely. As long as the receiving TCP continues to send
+ // acknowledgments in response to the probe segments, the sending TCP
+ // MUST allow the connection to stay open.
return true
}
+ seg := s.writeNext
+ // RFC 1122 4.2.3.5: Close the connection when the number of
+ // retransmissions for this segment is beyond a limit.
+ if seg != nil && seg.xmitCount > s.maxRetries {
+ return false
+ }
+
s.sendData()
return true
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index d2c90ebd5..0b4512c65 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -2994,6 +2994,101 @@ func TestSendOnResetConnection(t *testing.T) {
}
}
+// TestMaxRetransmitsTimeout tests if the connection is timed out after
+// a segment has been retransmitted MaxRetries times.
+func TestMaxRetransmitsTimeout(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ defer c.Cleanup()
+
+ const numRetries = 2
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRetriesOption(numRetries)); err != nil {
+ t.Fatalf("could not set protocol option MaxRetries.\n")
+ }
+
+ c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
+
+ waitEntry, notifyCh := waiter.NewChannelEntry(nil)
+ c.WQ.EventRegister(&waitEntry, waiter.EventHUp)
+ defer c.WQ.EventUnregister(&waitEntry)
+
+ _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(1)), tcpip.WriteOptions{})
+ if err != nil {
+ t.Fatalf("Write failed: %v", err)
+ }
+
+ // Expect first transmit and MaxRetries retransmits.
+ for i := 0; i < numRetries+1; i++ {
+ checker.IPv4(t, c.GetPacket(),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlags(header.TCPFlagAck|header.TCPFlagPsh),
+ ),
+ )
+ }
+ // Wait for the connection to timeout after MaxRetries retransmits.
+ initRTO := 1 * time.Second
+ select {
+ case <-notifyCh:
+ case <-time.After((2 << numRetries) * initRTO):
+ t.Fatalf("connection still alive after maximum retransmits.\n")
+ }
+
+ // Send an ACK and expect a RST as the connection would have been closed.
+ c.SendPacket(nil, &context.Headers{
+ SrcPort: context.TestPort,
+ DstPort: c.Port,
+ Flags: header.TCPFlagAck,
+ })
+
+ checker.IPv4(t, c.GetPacket(),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlags(header.TCPFlagRst),
+ ),
+ )
+
+ if got := c.Stack().Stats().TCP.EstablishedTimedout.Value(); got != 1 {
+ t.Errorf("got c.Stack().Stats().TCP.EstablishedTimedout.Value() = %v, want = 1", got)
+ }
+}
+
+// TestMaxRTO tests if the retransmit interval caps to MaxRTO.
+func TestMaxRTO(t *testing.T) {
+ c := context.New(t, defaultMTU)
+ defer c.Cleanup()
+
+ rto := 1 * time.Second
+ if err := c.Stack().SetTransportProtocolOption(tcp.ProtocolNumber, tcpip.TCPMaxRTOOption(rto)); err != nil {
+ t.Fatalf("c.stack.SetTransportProtocolOption(tcp, tcpip.TCPMaxRTO(%d) failed: %s", rto, err)
+ }
+
+ c.CreateConnected(789 /* iss */, 30000 /* rcvWnd */, -1 /* epRcvBuf */)
+
+ _, _, err := c.EP.Write(tcpip.SlicePayload(buffer.NewView(1)), tcpip.WriteOptions{})
+ if err != nil {
+ t.Fatalf("Write failed: %v", err)
+ }
+ checker.IPv4(t, c.GetPacket(),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
+ ),
+ )
+ const numRetransmits = 2
+ for i := 0; i < numRetransmits; i++ {
+ start := time.Now()
+ checker.IPv4(t, c.GetPacket(),
+ checker.TCP(
+ checker.DstPort(context.TestPort),
+ checker.TCPFlagsMatch(header.TCPFlagAck, ^uint8(header.TCPFlagPsh)),
+ ),
+ )
+ if time.Since(start).Round(time.Second).Seconds() != rto.Seconds() {
+ t.Errorf("Retransmit interval not capped to MaxRTO.\n")
+ }
+ }
+}
+
func TestFinImmediately(t *testing.T) {
c := context.New(t, defaultMTU)
defer c.Cleanup()
@@ -6605,9 +6700,16 @@ func TestTCPUserTimeout(t *testing.T) {
c.CreateConnected(789, 30000, -1 /* epRcvBuf */)
+ waitEntry, notifyCh := waiter.NewChannelEntry(nil)
+ c.WQ.EventRegister(&waitEntry, waiter.EventHUp)
+ defer c.WQ.EventUnregister(&waitEntry)
+
origEstablishedTimedout := c.Stack().Stats().TCP.EstablishedTimedout.Value()
- userTimeout := 50 * time.Millisecond
+ // Ensure that on the next retransmit timer fire, the user timeout has
+ // expired.
+ initRTO := 1 * time.Second
+ userTimeout := initRTO / 2
c.EP.SetSockOpt(tcpip.TCPUserTimeoutOption(userTimeout))
// Send some data and wait before ACKing it.
@@ -6627,9 +6729,13 @@ func TestTCPUserTimeout(t *testing.T) {
),
)
- // Wait for a little over the minimum retransmit timeout of 200ms for
- // the retransmitTimer to fire and close the connection.
- time.Sleep(tcp.MinRTO + 10*time.Millisecond)
+ // Wait for the retransmit timer to be fired and the user timeout to cause
+ // close of the connection.
+ select {
+ case <-notifyCh:
+ case <-time.After(2 * initRTO):
+ t.Fatalf("connection still alive after %s, should have been closed after :%s", 2*initRTO, userTimeout)
+ }
// No packet should be received as the connection should be silently
// closed due to timeout.