summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/dev.go4
-rw-r--r--pkg/abi/linux/netfilter.go2
-rw-r--r--pkg/amutex/BUILD1
-rw-r--r--pkg/amutex/amutex.go17
-rw-r--r--pkg/sentry/arch/arch_aarch64.go4
-rw-r--r--pkg/sentry/arch/signal_arm64.go22
-rw-r--r--pkg/sentry/fs/gofer/session.go6
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go40
-rw-r--r--pkg/sentry/fsimpl/eventfd/BUILD33
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd.go (renamed from pkg/sentry/vfs/eventfd.go)24
-rw-r--r--pkg/sentry/fsimpl/eventfd/eventfd_test.go (renamed from pkg/sentry/vfs/eventfd_test.go)19
-rw-r--r--pkg/sentry/fsimpl/ext/ext.go16
-rw-r--r--pkg/sentry/fsimpl/ext/filesystem.go8
-rw-r--r--pkg/sentry/fsimpl/ext/inode.go2
-rw-r--r--pkg/sentry/fsimpl/gofer/filesystem.go62
-rw-r--r--pkg/sentry/fsimpl/gofer/gofer.go23
-rw-r--r--pkg/sentry/fsimpl/gofer/special_file.go28
-rw-r--r--pkg/sentry/fsimpl/host/host.go205
-rw-r--r--pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go36
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_test.go6
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go8
-rw-r--r--pkg/sentry/fsimpl/pipefs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go79
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go29
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go12
-rw-r--r--pkg/sentry/fsimpl/proc/task.go64
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go28
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go16
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go38
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go47
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go8
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go108
-rw-r--r--pkg/sentry/fsimpl/signalfd/BUILD20
-rw-r--r--pkg/sentry/fsimpl/signalfd/signalfd.go135
-rw-r--r--pkg/sentry/fsimpl/sockfs/BUILD1
-rw-r--r--pkg/sentry/fsimpl/sockfs/sockfs.go46
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go21
-rw-r--r--pkg/sentry/fsimpl/timerfd/BUILD17
-rw-r--r--pkg/sentry/fsimpl/timerfd/timerfd.go (renamed from pkg/sentry/vfs/timerfd.go)23
-rw-r--r--pkg/sentry/fsimpl/tmpfs/tmpfs.go39
-rw-r--r--pkg/sentry/hostfd/hostfd_unsafe.go46
-rw-r--r--pkg/sentry/kernel/BUILD1
-rw-r--r--pkg/sentry/kernel/kernel.go21
-rw-r--r--pkg/sentry/socket/hostinet/BUILD1
-rw-r--r--pkg/sentry/socket/hostinet/socket_vfs2.go4
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go62
-rw-r--r--pkg/sentry/socket/netlink/BUILD1
-rw-r--r--pkg/sentry/socket/netlink/provider_vfs2.go4
-rw-r--r--pkg/sentry/socket/netstack/BUILD2
-rw-r--r--pkg/sentry/socket/netstack/netstack.go13
-rw-r--r--pkg/sentry/socket/netstack/netstack_vfs2.go21
-rw-r--r--pkg/sentry/socket/unix/BUILD1
-rw-r--r--pkg/sentry/socket/unix/unix_vfs2.go4
-rw-r--r--pkg/sentry/syscalls/linux/BUILD3
-rw-r--r--pkg/sentry/syscalls/linux/sigset.go8
-rw-r--r--pkg/sentry/syscalls/linux/sys_epoll.go2
-rw-r--r--pkg/sentry/syscalls/linux/sys_poll.go4
-rw-r--r--pkg/sentry/syscalls/linux/sys_signal.go6
-rw-r--r--pkg/sentry/syscalls/linux/sys_tls_amd64.go (renamed from pkg/sentry/syscalls/linux/sys_tls.go)0
-rw-r--r--pkg/sentry/syscalls/linux/sys_tls_arm64.go28
-rw-r--r--pkg/sentry/syscalls/linux/sys_utsname.go2
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/BUILD4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/eventfd.go4
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/signal.go100
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/timerfd.go16
-rw-r--r--pkg/sentry/syscalls/linux/vfs2/vfs2.go4
-rw-r--r--pkg/sentry/vfs/BUILD5
-rw-r--r--pkg/sentry/vfs/anonfs.go4
-rw-r--r--pkg/sentry/vfs/device.go2
-rw-r--r--pkg/sentry/vfs/file_description.go7
-rw-r--r--pkg/tcpip/buffer/view.go6
-rw-r--r--pkg/tcpip/buffer/view_test.go36
-rw-r--r--pkg/tcpip/header/tcp.go5
-rw-r--r--pkg/tcpip/network/ipv4/BUILD1
-rw-r--r--pkg/tcpip/network/ipv4/ipv4.go8
-rw-r--r--pkg/tcpip/network/ipv4/ipv4_test.go250
-rw-r--r--pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go11
-rw-r--r--pkg/tcpip/stack/iptables.go42
-rw-r--r--pkg/tcpip/stack/iptables_types.go13
-rw-r--r--pkg/tcpip/stack/ndp.go9
-rw-r--r--pkg/tcpip/stack/ndp_test.go24
-rw-r--r--pkg/tcpip/stack/nic.go2
-rw-r--r--pkg/tcpip/stack/stack.go16
-rw-r--r--pkg/tcpip/transport/tcp/rcv.go36
-rw-r--r--pkg/tcpip/transport/tcp/rcv_test.go4
-rw-r--r--pkg/tcpip/transport/tcp/segment.go2
-rw-r--r--pkg/tcpip/transport/tcp/snd.go208
-rw-r--r--pkg/tcpip/transport/tcp/tcp_test.go4
89 files changed, 1703 insertions, 656 deletions
diff --git a/pkg/abi/linux/dev.go b/pkg/abi/linux/dev.go
index 89f9a793f..fa3ae5f18 100644
--- a/pkg/abi/linux/dev.go
+++ b/pkg/abi/linux/dev.go
@@ -36,6 +36,10 @@ func DecodeDeviceID(rdev uint32) (uint16, uint32) {
//
// See Documentations/devices.txt and uapi/linux/major.h.
const (
+ // UNNAMED_MAJOR is the major device number for "unnamed" devices, whose
+ // minor numbers are dynamically allocated by the kernel.
+ UNNAMED_MAJOR = 0
+
// MEM_MAJOR is the major device number for "memory" character devices.
MEM_MAJOR = 1
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index a8d4f9d69..46d8b0b42 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -146,7 +146,7 @@ type IPTIP struct {
// OutputInterface is the output network interface.
OutputInterface [IFNAMSIZ]byte
- // InputInterfaceMask is the intput interface mask.
+ // InputInterfaceMask is the input interface mask.
InputInterfaceMask [IFNAMSIZ]byte
// OuputInterfaceMask is the output interface mask.
diff --git a/pkg/amutex/BUILD b/pkg/amutex/BUILD
index 9612f072e..ffc918846 100644
--- a/pkg/amutex/BUILD
+++ b/pkg/amutex/BUILD
@@ -6,6 +6,7 @@ go_library(
name = "amutex",
srcs = ["amutex.go"],
visibility = ["//:sandbox"],
+ deps = ["//pkg/syserror"],
)
go_test(
diff --git a/pkg/amutex/amutex.go b/pkg/amutex/amutex.go
index 1c4fd1784..a078a31db 100644
--- a/pkg/amutex/amutex.go
+++ b/pkg/amutex/amutex.go
@@ -18,6 +18,8 @@ package amutex
import (
"sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/syserror"
)
// Sleeper must be implemented by users of the abortable mutex to allow for
@@ -53,6 +55,21 @@ func (NoopSleeper) SleepFinish(success bool) {}
// Interrupted implements Sleeper.Interrupted.
func (NoopSleeper) Interrupted() bool { return false }
+// Block blocks until either receiving from ch succeeds (in which case it
+// returns nil) or sleeper is interrupted (in which case it returns
+// syserror.ErrInterrupted).
+func Block(sleeper Sleeper, ch <-chan struct{}) error {
+ cancel := sleeper.SleepStart()
+ select {
+ case <-ch:
+ sleeper.SleepFinish(true)
+ return nil
+ case <-cancel:
+ sleeper.SleepFinish(false)
+ return syserror.ErrInterrupted
+ }
+}
+
// AbortableMutex is an abortable mutex. It allows Lock() to be aborted while it
// waits to acquire the mutex.
type AbortableMutex struct {
diff --git a/pkg/sentry/arch/arch_aarch64.go b/pkg/sentry/arch/arch_aarch64.go
index 529980267..343f81f59 100644
--- a/pkg/sentry/arch/arch_aarch64.go
+++ b/pkg/sentry/arch/arch_aarch64.go
@@ -274,7 +274,7 @@ const (
func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int, error) {
switch regset {
case _NT_PRSTATUS:
- if maxlen < ptraceRegsSize {
+ if maxlen < registersSize {
return 0, syserror.EFAULT
}
return s.PtraceGetRegs(dst)
@@ -287,7 +287,7 @@ func (s *State) PtraceGetRegSet(regset uintptr, dst io.Writer, maxlen int) (int,
func (s *State) PtraceSetRegSet(regset uintptr, src io.Reader, maxlen int) (int, error) {
switch regset {
case _NT_PRSTATUS:
- if maxlen < ptraceRegsSize {
+ if maxlen < registersSize {
return 0, syserror.EFAULT
}
return s.PtraceSetRegs(src)
diff --git a/pkg/sentry/arch/signal_arm64.go b/pkg/sentry/arch/signal_arm64.go
index 1cb1adf8c..642c79dda 100644
--- a/pkg/sentry/arch/signal_arm64.go
+++ b/pkg/sentry/arch/signal_arm64.go
@@ -19,6 +19,7 @@ import (
"syscall"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/usermem"
)
@@ -134,6 +135,11 @@ func (c *context64) SignalSetup(st *Stack, act *SignalAct, info *SignalInfo, alt
c.Regs.Regs[1] = uint64(infoAddr)
c.Regs.Regs[2] = uint64(ucAddr)
c.Regs.Regs[30] = uint64(act.Restorer)
+
+ // Save the thread's floating point state.
+ c.sigFPState = append(c.sigFPState, c.aarch64FPState)
+ // Signal handler gets a clean floating point state.
+ c.aarch64FPState = newAarch64FPState()
return nil
}
@@ -155,5 +161,21 @@ func (c *context64) SignalRestore(st *Stack, rt bool) (linux.SignalSet, SignalSt
c.Regs.Sp = uc.MContext.Sp
c.Regs.Pstate = uc.MContext.Pstate
+ // Restore floating point state.
+ l := len(c.sigFPState)
+ if l > 0 {
+ c.aarch64FPState = c.sigFPState[l-1]
+ // NOTE(cl/133042258): State save requires that any slice
+ // elements from '[len:cap]' to be zero value.
+ c.sigFPState[l-1] = nil
+ c.sigFPState = c.sigFPState[0 : l-1]
+ } else {
+ // This might happen if sigreturn(2) calls are unbalanced with
+ // respect to signal handler entries. This is not expected so
+ // don't bother to do anything fancy with the floating point
+ // state.
+ log.Warningf("sigreturn unable to restore application fpstate")
+ }
+
return uc.Sigset, uc.Stack, nil
}
diff --git a/pkg/sentry/fs/gofer/session.go b/pkg/sentry/fs/gofer/session.go
index f6b3ef178..b5efc86f2 100644
--- a/pkg/sentry/fs/gofer/session.go
+++ b/pkg/sentry/fs/gofer/session.go
@@ -190,9 +190,9 @@ type session struct {
// be socket/pipe files. This allows unix domain sockets and named pipes to
// be used with paths that belong to a gofer.
//
- // TODO(gvisor.dev/issue/1200): there are few possible races with someone
- // stat'ing the file and another deleting it concurrently, where the file
- // will not be reported as socket file.
+ // There are a few possible races with someone stat'ing the file and another
+ // deleting it concurrently, where the file will not be reported as socket
+ // file.
overrides *overrideMaps `state:"wait"`
}
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 94db8fe5c..c03c65445 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -51,21 +51,37 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return nil, nil, syserror.EINVAL
}
- fs, root := fstype.newFilesystem(vfsObj, creds)
- return fs.VFSFilesystem(), root.VFSDentry(), nil
+ fs, root, err := fstype.newFilesystem(vfsObj, creds)
+ if err != nil {
+ return nil, nil, err
+ }
+ return fs.Filesystem.VFSFilesystem(), root.VFSDentry(), nil
+}
+
+type filesystem struct {
+ kernfs.Filesystem
+
+ devMinor uint32
}
// newFilesystem creates a new devpts filesystem with root directory and ptmx
// master inode. It returns the filesystem and root Dentry.
-func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*kernfs.Filesystem, *kernfs.Dentry) {
- fs := &kernfs.Filesystem{}
- fs.VFSFilesystem().Init(vfsObj, fstype, fs)
+func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials) (*filesystem, *kernfs.Dentry, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
+ fs.Filesystem.VFSFilesystem().Init(vfsObj, fstype, fs)
// Construct the root directory. This is always inode id 1.
root := &rootInode{
slaves: make(map[uint32]*slaveInode),
}
- root.InodeAttrs.Init(creds, 1, linux.ModeDirectory|0555)
+ root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555)
root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
root.dentry.Init(root)
@@ -74,7 +90,7 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
master := &masterInode{
root: root,
}
- master.InodeAttrs.Init(creds, 2, linux.ModeCharacterDevice|0666)
+ master.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 2, linux.ModeCharacterDevice|0666)
master.dentry.Init(master)
// Add the master as a child of the root.
@@ -83,7 +99,13 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
})
root.IncLinks(links)
- return fs, &root.dentry
+ return fs, &root.dentry, nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
}
// rootInode is the root directory inode for the devpts mounts.
@@ -140,7 +162,7 @@ func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error)
}
// Linux always uses pty index + 3 as the inode id. See
// fs/devpts/inode.c:devpts_pty_new().
- slave.InodeAttrs.Init(creds, uint64(idx+3), linux.ModeCharacterDevice|0600)
+ slave.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600)
slave.dentry.Init(slave)
i.slaves[idx] = slave
diff --git a/pkg/sentry/fsimpl/eventfd/BUILD b/pkg/sentry/fsimpl/eventfd/BUILD
new file mode 100644
index 000000000..ea167d38c
--- /dev/null
+++ b/pkg/sentry/fsimpl/eventfd/BUILD
@@ -0,0 +1,33 @@
+load("//tools:defs.bzl", "go_library", "go_test")
+
+licenses(["notice"])
+
+go_library(
+ name = "eventfd",
+ srcs = ["eventfd.go"],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/context",
+ "//pkg/fdnotifier",
+ "//pkg/log",
+ "//pkg/sentry/vfs",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ "//pkg/waiter",
+ ],
+)
+
+go_test(
+ name = "eventfd_test",
+ size = "small",
+ srcs = ["eventfd_test.go"],
+ library = ":eventfd",
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/sentry/contexttest",
+ "//pkg/sentry/vfs",
+ "//pkg/usermem",
+ "//pkg/waiter",
+ ],
+)
diff --git a/pkg/sentry/vfs/eventfd.go b/pkg/sentry/fsimpl/eventfd/eventfd.go
index f39dacacf..c573d7935 100644
--- a/pkg/sentry/vfs/eventfd.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd.go
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package vfs
+// Package eventfd implements event fds.
+package eventfd
import (
"math"
@@ -23,6 +24,7 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -32,9 +34,9 @@ import (
// notification (eventfd). Eventfds are usually internal to the Sentry but in
// certain situations they may be converted into a host-backed eventfd.
type EventFileDescription struct {
- vfsfd FileDescription
- FileDescriptionDefaultImpl
- DentryMetadataFileDescriptionImpl
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+ vfs.DentryMetadataFileDescriptionImpl
// queue is used to notify interested parties when the event object
// becomes readable or writable.
@@ -53,18 +55,18 @@ type EventFileDescription struct {
hostfd int
}
-var _ FileDescriptionImpl = (*EventFileDescription)(nil)
+var _ vfs.FileDescriptionImpl = (*EventFileDescription)(nil)
-// NewEventFD creates a new event fd.
-func (vfs *VirtualFilesystem) NewEventFD(initVal uint64, semMode bool, flags uint32) (*FileDescription, error) {
- vd := vfs.NewAnonVirtualDentry("[eventfd]")
+// New creates a new event fd.
+func New(vfsObj *vfs.VirtualFilesystem, initVal uint64, semMode bool, flags uint32) (*vfs.FileDescription, error) {
+ vd := vfsObj.NewAnonVirtualDentry("[eventfd]")
defer vd.DecRef()
efd := &EventFileDescription{
val: initVal,
semMode: semMode,
hostfd: -1,
}
- if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
+ if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{
UseDentryMetadata: true,
DenyPRead: true,
DenyPWrite: true,
@@ -117,7 +119,7 @@ func (efd *EventFileDescription) Release() {
}
// Read implements FileDescriptionImpl.Read.
-func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ ReadOptions) (int64, error) {
+func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
if dst.NumBytes() < 8 {
return 0, syscall.EINVAL
}
@@ -128,7 +130,7 @@ func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequenc
}
// Write implements FileDescriptionImpl.Write.
-func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ WriteOptions) (int64, error) {
+func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
if src.NumBytes() < 8 {
return 0, syscall.EINVAL
}
diff --git a/pkg/sentry/vfs/eventfd_test.go b/pkg/sentry/fsimpl/eventfd/eventfd_test.go
index 2dff2d10b..20e3adffc 100644
--- a/pkg/sentry/vfs/eventfd_test.go
+++ b/pkg/sentry/fsimpl/eventfd/eventfd_test.go
@@ -12,13 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package vfs
+package eventfd
import (
"testing"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/contexttest"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
)
@@ -34,15 +35,15 @@ func TestEventFD(t *testing.T) {
for _, initVal := range initVals {
ctx := contexttest.Context(t)
- vfsObj := &VirtualFilesystem{}
+ vfsObj := &vfs.VirtualFilesystem{}
if err := vfsObj.Init(); err != nil {
t.Fatalf("VFS init: %v", err)
}
// Make a new eventfd that is writable.
- eventfd, err := vfsObj.NewEventFD(initVal, false, linux.O_RDWR)
+ eventfd, err := New(vfsObj, initVal, false, linux.O_RDWR)
if err != nil {
- t.Fatalf("NewEventFD failed: %v", err)
+ t.Fatalf("New() failed: %v", err)
}
defer eventfd.DecRef()
@@ -53,7 +54,7 @@ func TestEventFD(t *testing.T) {
data := []byte("00000124")
// Create and submit a write request.
- n, err := eventfd.Write(ctx, usermem.BytesIOSequence(data), WriteOptions{})
+ n, err := eventfd.Write(ctx, usermem.BytesIOSequence(data), vfs.WriteOptions{})
if err != nil {
t.Fatal(err)
}
@@ -72,19 +73,19 @@ func TestEventFD(t *testing.T) {
func TestEventFDStat(t *testing.T) {
ctx := contexttest.Context(t)
- vfsObj := &VirtualFilesystem{}
+ vfsObj := &vfs.VirtualFilesystem{}
if err := vfsObj.Init(); err != nil {
t.Fatalf("VFS init: %v", err)
}
// Make a new eventfd that is writable.
- eventfd, err := vfsObj.NewEventFD(0, false, linux.O_RDWR)
+ eventfd, err := New(vfsObj, 0, false, linux.O_RDWR)
if err != nil {
- t.Fatalf("NewEventFD failed: %v", err)
+ t.Fatalf("New() failed: %v", err)
}
defer eventfd.DecRef()
- statx, err := eventfd.Stat(ctx, StatOptions{
+ statx, err := eventfd.Stat(ctx, vfs.StatOptions{
Mask: linux.STATX_BASIC_STATS,
})
if err != nil {
diff --git a/pkg/sentry/fsimpl/ext/ext.go b/pkg/sentry/fsimpl/ext/ext.go
index 7176af6d1..dac6effbf 100644
--- a/pkg/sentry/fsimpl/ext/ext.go
+++ b/pkg/sentry/fsimpl/ext/ext.go
@@ -105,36 +105,50 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// EACCESS should be returned according to mount(2). Filesystem independent
// flags (like readonly) are currently not available in pkg/sentry/vfs.
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+
dev, err := getDeviceFd(source, opts)
if err != nil {
return nil, nil, err
}
- fs := filesystem{dev: dev, inodeCache: make(map[uint32]*inode)}
+ fs := filesystem{
+ dev: dev,
+ inodeCache: make(map[uint32]*inode),
+ devMinor: devMinor,
+ }
fs.vfsfs.Init(vfsObj, &fsType, &fs)
fs.sb, err = readSuperBlock(dev)
if err != nil {
+ fs.vfsfs.DecRef()
return nil, nil, err
}
if fs.sb.Magic() != linux.EXT_SUPER_MAGIC {
// mount(2) specifies that EINVAL should be returned if the superblock is
// invalid.
+ fs.vfsfs.DecRef()
return nil, nil, syserror.EINVAL
}
// Refuse to mount if the filesystem is incompatible.
if !isCompatible(fs.sb) {
+ fs.vfsfs.DecRef()
return nil, nil, syserror.EINVAL
}
fs.bgs, err = readBlockGroups(dev, fs.sb)
if err != nil {
+ fs.vfsfs.DecRef()
return nil, nil, err
}
rootInode, err := fs.getOrCreateInodeLocked(disklayout.RootDirInode)
if err != nil {
+ fs.vfsfs.DecRef()
return nil, nil, err
}
rootInode.incRef()
diff --git a/pkg/sentry/fsimpl/ext/filesystem.go b/pkg/sentry/fsimpl/ext/filesystem.go
index 77b644275..557963e03 100644
--- a/pkg/sentry/fsimpl/ext/filesystem.go
+++ b/pkg/sentry/fsimpl/ext/filesystem.go
@@ -64,6 +64,10 @@ type filesystem struct {
// bgs represents all the block group descriptors for the filesystem.
// Immutable after initialization.
bgs []disklayout.BlockGroup
+
+ // devMinor is this filesystem's device minor number. Immutable after
+ // initialization.
+ devMinor uint32
}
// Compiles only if filesystem implements vfs.FilesystemImpl.
@@ -366,7 +370,9 @@ func (fs *filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linu
}
// Release implements vfs.FilesystemImpl.Release.
-func (fs *filesystem) Release() {}
+func (fs *filesystem) Release() {
+ fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+}
// Sync implements vfs.FilesystemImpl.Sync.
func (fs *filesystem) Sync(ctx context.Context) error {
diff --git a/pkg/sentry/fsimpl/ext/inode.go b/pkg/sentry/fsimpl/ext/inode.go
index a98512350..485f86f4b 100644
--- a/pkg/sentry/fsimpl/ext/inode.go
+++ b/pkg/sentry/fsimpl/ext/inode.go
@@ -204,6 +204,8 @@ func (in *inode) statTo(stat *linux.Statx) {
stat.Atime = in.diskInode.AccessTime().StatxTimestamp()
stat.Ctime = in.diskInode.ChangeTime().StatxTimestamp()
stat.Mtime = in.diskInode.ModificationTime().StatxTimestamp()
+ stat.DevMajor = linux.UNNAMED_MAJOR
+ stat.DevMinor = in.fs.devMinor
// TODO(b/134676337): Set stat.Blocks which is the number of 512 byte blocks
// (including metadata blocks) required to represent this file.
}
diff --git a/pkg/sentry/fsimpl/gofer/filesystem.go b/pkg/sentry/fsimpl/gofer/filesystem.go
index 4a32821bd..7f2181216 100644
--- a/pkg/sentry/fsimpl/gofer/filesystem.go
+++ b/pkg/sentry/fsimpl/gofer/filesystem.go
@@ -21,6 +21,8 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/p9"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/host"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
"gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
@@ -835,6 +837,9 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
if d.isSynthetic() {
return nil, syserror.ENXIO
}
+ if d.fs.iopts.OpenSocketsByConnecting {
+ return d.connectSocketLocked(ctx, opts)
+ }
case linux.S_IFIFO:
if d.isSynthetic() {
return d.pipe.Open(ctx, mnt, &d.vfsd, opts.Flags)
@@ -843,10 +848,28 @@ func (d *dentry) openLocked(ctx context.Context, rp *vfs.ResolvingPath, opts *vf
return d.openSpecialFileLocked(ctx, mnt, opts)
}
+func (d *dentry) connectSocketLocked(ctx context.Context, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
+ if opts.Flags&linux.O_DIRECT != 0 {
+ return nil, syserror.EINVAL
+ }
+ fdObj, err := d.file.connect(ctx, p9.AnonymousSocket)
+ if err != nil {
+ return nil, err
+ }
+ fd, err := host.NewFD(ctx, kernel.KernelFromContext(ctx).HostMount(), fdObj.FD(), &host.NewFDOptions{
+ HaveFlags: true,
+ Flags: opts.Flags,
+ })
+ if err != nil {
+ fdObj.Close()
+ return nil, err
+ }
+ fdObj.Release()
+ return fd, nil
+}
+
func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts *vfs.OpenOptions) (*vfs.FileDescription, error) {
ats := vfs.AccessTypesForOpenFlags(opts)
- // Treat as a special file. This is done for non-synthetic pipes as well as
- // regular files when d.fs.opts.regularFilesUseSpecialFileFD is true.
if opts.Flags&linux.O_DIRECT != 0 {
return nil, syserror.EINVAL
}
@@ -854,10 +877,15 @@ func (d *dentry) openSpecialFileLocked(ctx context.Context, mnt *vfs.Mount, opts
if err != nil {
return nil, err
}
+ seekable := d.fileType() == linux.S_IFREG
fd := &specialFileFD{
- handle: h,
+ handle: h,
+ seekable: seekable,
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &d.vfsd, &vfs.FileDescriptionOptions{
+ DenyPRead: !seekable,
+ DenyPWrite: !seekable,
+ }); err != nil {
h.close(ctx)
return nil, err
}
@@ -888,7 +916,11 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
creds := rp.Credentials()
name := rp.Component()
- fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, (p9.OpenFlags)(opts.Flags), (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
+ // Filter file creation flags and O_LARGEFILE out; the create RPC already
+ // has the semantics of O_CREAT|O_EXCL, while some servers will choke on
+ // O_LARGEFILE.
+ createFlags := p9.OpenFlags(opts.Flags &^ (linux.O_CREAT | linux.O_EXCL | linux.O_NOCTTY | linux.O_TRUNC | linux.O_LARGEFILE))
+ fdobj, openFile, createQID, _, err := dirfile.create(ctx, name, createFlags, (p9.FileMode)(opts.Mode), (p9.UID)(creds.EffectiveKUID), (p9.GID)(creds.EffectiveKGID))
if err != nil {
dirfile.close(ctx)
return nil, err
@@ -896,7 +928,7 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
// Then we need to walk to the file we just created to get a non-open fid
// representing it, and to get its metadata. This must use d.file since, as
// explained above, dirfile was invalidated by dirfile.Create().
- walkQID, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
+ _, nonOpenFile, attrMask, attr, err := d.file.walkGetAttrOne(ctx, name)
if err != nil {
openFile.close(ctx)
if fdobj != nil {
@@ -904,17 +936,6 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
return nil, err
}
- // Sanity-check that we walked to the file we created.
- if createQID.Path != walkQID.Path {
- // Probably due to concurrent remote filesystem mutation?
- ctx.Warningf("gofer.dentry.createAndOpenChildLocked: created file has QID %v before walk, QID %v after (interop=%v)", createQID, walkQID, d.fs.opts.interop)
- nonOpenFile.close(ctx)
- openFile.close(ctx)
- if fdobj != nil {
- fdobj.Close()
- }
- return nil, syserror.EAGAIN
- }
// Construct the new dentry.
child, err := d.fs.newDentry(ctx, nonOpenFile, createQID, attrMask, &attr)
@@ -960,16 +981,21 @@ func (d *dentry) createAndOpenChildLocked(ctx context.Context, rp *vfs.Resolving
}
childVFSFD = &fd.vfsfd
} else {
+ seekable := child.fileType() == linux.S_IFREG
fd := &specialFileFD{
handle: handle{
file: openFile,
fd: -1,
},
+ seekable: seekable,
}
if fdobj != nil {
fd.handle.fd = int32(fdobj.Release())
}
- if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{}); err != nil {
+ if err := fd.vfsfd.Init(fd, opts.Flags, mnt, &child.vfsd, &vfs.FileDescriptionOptions{
+ DenyPRead: !seekable,
+ DenyPWrite: !seekable,
+ }); err != nil {
fd.handle.close(ctx)
return nil, err
}
diff --git a/pkg/sentry/fsimpl/gofer/gofer.go b/pkg/sentry/fsimpl/gofer/gofer.go
index 9ab8fdc65..1da8d5d82 100644
--- a/pkg/sentry/fsimpl/gofer/gofer.go
+++ b/pkg/sentry/fsimpl/gofer/gofer.go
@@ -81,6 +81,9 @@ type filesystem struct {
// clock is a realtime clock used to set timestamps in file operations.
clock ktime.Clock
+ // devMinor is the filesystem's minor device number. devMinor is immutable.
+ devMinor uint32
+
// uid and gid are the effective KUID and KGID of the filesystem's creator,
// and are used as the owner and group for files that don't specify one.
// uid and gid are immutable.
@@ -218,6 +221,10 @@ type InternalFilesystemOptions struct {
// which servers can handle only a single client and report failure if that
// client disconnects.
LeakConnection bool
+
+ // If OpenSocketsByConnecting is true, silently translate attempts to open
+ // files identifying as sockets to connect RPCs.
+ OpenSocketsByConnecting bool
}
// Name implements vfs.FilesystemType.Name.
@@ -399,14 +406,21 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
// Construct the filesystem object.
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ attachFile.close(ctx)
+ client.Close()
+ return nil, nil, err
+ }
fs := &filesystem{
mfp: mfp,
opts: fsopts,
iopts: iopts,
- uid: creds.EffectiveKUID,
- gid: creds.EffectiveKGID,
client: client,
clock: ktime.RealtimeClockFromContext(ctx),
+ devMinor: devMinor,
+ uid: creds.EffectiveKUID,
+ gid: creds.EffectiveKGID,
syncableDentries: make(map[*dentry]struct{}),
specialFileFDs: make(map[*specialFileFD]struct{}),
}
@@ -464,6 +478,8 @@ func (fs *filesystem) Release() {
// Close the connection to the server. This implicitly clunks all fids.
fs.client.Close()
}
+
+ fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
}
// dentry implements vfs.DentryImpl.
@@ -796,7 +812,8 @@ func (d *dentry) statTo(stat *linux.Statx) {
stat.Btime = statxTimestampFromDentry(atomic.LoadInt64(&d.btime))
stat.Ctime = statxTimestampFromDentry(atomic.LoadInt64(&d.ctime))
stat.Mtime = statxTimestampFromDentry(atomic.LoadInt64(&d.mtime))
- // TODO(gvisor.dev/issue/1198): device number
+ stat.DevMajor = linux.UNNAMED_MAJOR
+ stat.DevMinor = d.fs.devMinor
}
func (d *dentry) setStat(ctx context.Context, creds *auth.Credentials, stat *linux.Statx, mnt *vfs.Mount) error {
diff --git a/pkg/sentry/fsimpl/gofer/special_file.go b/pkg/sentry/fsimpl/gofer/special_file.go
index 507e0e276..a464e6a94 100644
--- a/pkg/sentry/fsimpl/gofer/special_file.go
+++ b/pkg/sentry/fsimpl/gofer/special_file.go
@@ -33,13 +33,14 @@ import (
type specialFileFD struct {
fileDescription
- // handle is immutable.
+ // handle is used for file I/O. handle is immutable.
handle handle
- // off is the file offset. off is protected by mu. (POSIX 2.9.7 only
- // requires operations using the file offset to be atomic for regular files
- // and symlinks; however, since specialFileFD may be used for regular
- // files, we apply this atomicity unconditionally.)
+ // seekable is true if this file description represents a file for which
+ // file offset is significant, i.e. a regular file. seekable is immutable.
+ seekable bool
+
+ // If seekable is true, off is the file offset. off is protected by mu.
mu sync.Mutex
off int64
}
@@ -63,7 +64,7 @@ func (fd *specialFileFD) OnClose(ctx context.Context) error {
// PRead implements vfs.FileDescriptionImpl.PRead.
func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- if offset < 0 {
+ if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
if opts.Flags != 0 {
@@ -91,6 +92,10 @@ func (fd *specialFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offs
// Read implements vfs.FileDescriptionImpl.Read.
func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
+ if !fd.seekable {
+ return fd.PRead(ctx, dst, -1, opts)
+ }
+
fd.mu.Lock()
n, err := fd.PRead(ctx, dst, fd.off, opts)
fd.off += n
@@ -100,14 +105,14 @@ func (fd *specialFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts
// PWrite implements vfs.FileDescriptionImpl.PWrite.
func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- if offset < 0 {
+ if fd.seekable && offset < 0 {
return 0, syserror.EINVAL
}
if opts.Flags != 0 {
return 0, syserror.EOPNOTSUPP
}
- if fd.dentry().fileType() == linux.S_IFREG {
+ if fd.seekable {
limit, err := vfs.CheckLimit(ctx, offset, src.NumBytes())
if err != nil {
return 0, err
@@ -130,6 +135,10 @@ func (fd *specialFileFD) PWrite(ctx context.Context, src usermem.IOSequence, off
// Write implements vfs.FileDescriptionImpl.Write.
func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
+ if !fd.seekable {
+ return fd.PWrite(ctx, src, -1, opts)
+ }
+
fd.mu.Lock()
n, err := fd.PWrite(ctx, src, fd.off, opts)
fd.off += n
@@ -139,6 +148,9 @@ func (fd *specialFileFD) Write(ctx context.Context, src usermem.IOSequence, opts
// Seek implements vfs.FileDescriptionImpl.Seek.
func (fd *specialFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
+ if !fd.seekable {
+ return 0, syserror.ESPIPE
+ }
fd.mu.Lock()
defer fd.mu.Unlock()
switch whence {
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index 144e04905..8caf55a1b 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -40,8 +40,20 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)
-// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
-func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
+// NewFDOptions contains options to NewFD.
+type NewFDOptions struct {
+ // If IsTTY is true, the file descriptor is a TTY.
+ IsTTY bool
+
+ // If HaveFlags is true, use Flags for the new file description. Otherwise,
+ // the new file description will inherit flags from hostFD.
+ HaveFlags bool
+ Flags uint32
+}
+
+// NewFD returns a vfs.FileDescription representing the given host file
+// descriptor. mnt must be Kernel.HostMount().
+func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) (*vfs.FileDescription, error) {
fs, ok := mnt.Filesystem().Impl().(*filesystem)
if !ok {
return nil, fmt.Errorf("can't import host FDs into filesystems of type %T", mnt.Filesystem().Impl())
@@ -53,6 +65,16 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
return nil, err
}
+ flags := opts.Flags
+ if !opts.HaveFlags {
+ // Get flags for the imported FD.
+ flagsInt, err := unix.FcntlInt(uintptr(hostFD), syscall.F_GETFL, 0)
+ if err != nil {
+ return nil, err
+ }
+ flags = uint32(flagsInt)
+ }
+
fileMode := linux.FileMode(s.Mode)
fileType := fileMode.FileType()
@@ -65,7 +87,7 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
i := &inode{
hostFD: hostFD,
seekable: seekable,
- isTTY: isTTY,
+ isTTY: opts.IsTTY,
canMap: canMap(uint32(fileType)),
wouldBlock: wouldBlock(uint32(fileType)),
ino: fs.NextIno(),
@@ -95,7 +117,14 @@ func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs
// i.open will take a reference on d.
defer d.DecRef()
- return i.open(ctx, d.VFSDentry(), mnt)
+ return i.open(ctx, d.VFSDentry(), mnt, flags)
+}
+
+// ImportFD sets up and returns a vfs.FileDescription from a donated fd.
+func ImportFD(ctx context.Context, mnt *vfs.Mount, hostFD int, isTTY bool) (*vfs.FileDescription, error) {
+ return NewFD(ctx, mnt, hostFD, &NewFDOptions{
+ IsTTY: isTTY,
+ })
}
// filesystemType implements vfs.FilesystemType.
@@ -115,15 +144,28 @@ func (filesystemType) Name() string {
//
// Note that there should only ever be one instance of host.filesystem,
// a global mount for host fds.
-func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem {
- fs := &filesystem{}
+func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, err
+ }
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
fs.VFSFilesystem().Init(vfsObj, filesystemType{}, fs)
- return fs.VFSFilesystem()
+ return fs.VFSFilesystem(), nil
}
// filesystem implements vfs.FilesystemImpl.
type filesystem struct {
kernfs.Filesystem
+
+ devMinor uint32
+}
+
+func (fs *filesystem) Release() {
+ fs.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
}
func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
@@ -185,19 +227,6 @@ type inode struct {
queue waiter.Queue
}
-// Note that these flags may become out of date, since they can be modified
-// on the host, e.g. with fcntl.
-func fileFlagsFromHostFD(fd int) (uint32, error) {
- flags, err := unix.FcntlInt(uintptr(fd), syscall.F_GETFL, 0)
- if err != nil {
- log.Warningf("Failed to get file flags for donated FD %d: %v", fd, err)
- return 0, err
- }
- // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
- flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
- return uint32(flags), nil
-}
-
// CheckPermissions implements kernfs.Inode.
func (i *inode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
var s syscall.Stat_t
@@ -219,7 +248,7 @@ func (i *inode) Mode() linux.FileMode {
}
// Stat implements kernfs.Inode.
-func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+func (i *inode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
if opts.Mask&linux.STATX__RESERVED != 0 {
return linux.Statx{}, syserror.EINVAL
}
@@ -227,73 +256,73 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro
return linux.Statx{}, syserror.EINVAL
}
+ fs := vfsfs.Impl().(*filesystem)
+
// Limit our host call only to known flags.
mask := opts.Mask & linux.STATX_ALL
var s unix.Statx_t
err := unix.Statx(i.hostFD, "", int(unix.AT_EMPTY_PATH|opts.Sync), int(mask), &s)
- // Fallback to fstat(2), if statx(2) is not supported on the host.
- //
- // TODO(b/151263641): Remove fallback.
if err == syserror.ENOSYS {
- return i.fstat(opts)
- } else if err != nil {
+ // Fallback to fstat(2), if statx(2) is not supported on the host.
+ //
+ // TODO(b/151263641): Remove fallback.
+ return i.fstat(fs)
+ }
+ if err != nil {
return linux.Statx{}, err
}
- ls := linux.Statx{Mask: mask}
- // Unconditionally fill blksize, attributes, and device numbers, as indicated
- // by /include/uapi/linux/stat.h.
- //
- // RdevMajor/RdevMinor are left as zero, so as not to expose host device
- // numbers.
- //
- // TODO(gvisor.dev/issue/1672): Use kernfs-specific, internally defined
- // device numbers. If we use the device number from the host, it may collide
- // with another sentry-internal device number. We handle device/inode
- // numbers without relying on the host to prevent collisions.
- ls.Blksize = s.Blksize
- ls.Attributes = s.Attributes
- ls.AttributesMask = s.Attributes_mask
-
- if mask&linux.STATX_TYPE != 0 {
+ // Unconditionally fill blksize, attributes, and device numbers, as
+ // indicated by /include/uapi/linux/stat.h. Inode number is always
+ // available, since we use our own rather than the host's.
+ ls := linux.Statx{
+ Mask: linux.STATX_INO,
+ Blksize: s.Blksize,
+ Attributes: s.Attributes,
+ Ino: i.ino,
+ AttributesMask: s.Attributes_mask,
+ DevMajor: linux.UNNAMED_MAJOR,
+ DevMinor: fs.devMinor,
+ }
+
+ // Copy other fields that were returned by the host. RdevMajor/RdevMinor
+ // are never copied (and therefore left as zero), so as not to expose host
+ // device numbers.
+ ls.Mask |= s.Mask & linux.STATX_ALL
+ if s.Mask&linux.STATX_TYPE != 0 {
ls.Mode |= s.Mode & linux.S_IFMT
}
- if mask&linux.STATX_MODE != 0 {
+ if s.Mask&linux.STATX_MODE != 0 {
ls.Mode |= s.Mode &^ linux.S_IFMT
}
- if mask&linux.STATX_NLINK != 0 {
+ if s.Mask&linux.STATX_NLINK != 0 {
ls.Nlink = s.Nlink
}
- if mask&linux.STATX_UID != 0 {
+ if s.Mask&linux.STATX_UID != 0 {
ls.UID = s.Uid
}
- if mask&linux.STATX_GID != 0 {
+ if s.Mask&linux.STATX_GID != 0 {
ls.GID = s.Gid
}
- if mask&linux.STATX_ATIME != 0 {
+ if s.Mask&linux.STATX_ATIME != 0 {
ls.Atime = unixToLinuxStatxTimestamp(s.Atime)
}
- if mask&linux.STATX_BTIME != 0 {
+ if s.Mask&linux.STATX_BTIME != 0 {
ls.Btime = unixToLinuxStatxTimestamp(s.Btime)
}
- if mask&linux.STATX_CTIME != 0 {
+ if s.Mask&linux.STATX_CTIME != 0 {
ls.Ctime = unixToLinuxStatxTimestamp(s.Ctime)
}
- if mask&linux.STATX_MTIME != 0 {
+ if s.Mask&linux.STATX_MTIME != 0 {
ls.Mtime = unixToLinuxStatxTimestamp(s.Mtime)
}
- if mask&linux.STATX_SIZE != 0 {
+ if s.Mask&linux.STATX_SIZE != 0 {
ls.Size = s.Size
}
- if mask&linux.STATX_BLOCKS != 0 {
+ if s.Mask&linux.STATX_BLOCKS != 0 {
ls.Blocks = s.Blocks
}
- // Use our own internal inode number.
- if mask&linux.STATX_INO != 0 {
- ls.Ino = i.ino
- }
-
return ls, nil
}
@@ -305,36 +334,30 @@ func (i *inode) Stat(_ *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, erro
// of a mask or sync flags. fstat(2) does not provide any metadata
// equivalent to Statx.Attributes, Statx.AttributesMask, or Statx.Btime, so
// those fields remain empty.
-func (i *inode) fstat(opts vfs.StatOptions) (linux.Statx, error) {
+func (i *inode) fstat(fs *filesystem) (linux.Statx, error) {
var s unix.Stat_t
if err := unix.Fstat(i.hostFD, &s); err != nil {
return linux.Statx{}, err
}
- // Note that rdev numbers are left as 0; do not expose host device numbers.
- ls := linux.Statx{
- Mask: linux.STATX_BASIC_STATS,
- Blksize: uint32(s.Blksize),
- Nlink: uint32(s.Nlink),
- UID: s.Uid,
- GID: s.Gid,
- Mode: uint16(s.Mode),
- Size: uint64(s.Size),
- Blocks: uint64(s.Blocks),
- Atime: timespecToStatxTimestamp(s.Atim),
- Ctime: timespecToStatxTimestamp(s.Ctim),
- Mtime: timespecToStatxTimestamp(s.Mtim),
- }
-
- // Use our own internal inode number.
- //
- // TODO(gvisor.dev/issue/1672): Use a kernfs-specific device number as well.
- // If we use the device number from the host, it may collide with another
- // sentry-internal device number. We handle device/inode numbers without
- // relying on the host to prevent collisions.
- ls.Ino = i.ino
-
- return ls, nil
+ // As with inode.Stat(), we always use internal device and inode numbers,
+ // and never expose the host's represented device numbers.
+ return linux.Statx{
+ Mask: linux.STATX_BASIC_STATS,
+ Blksize: uint32(s.Blksize),
+ Nlink: uint32(s.Nlink),
+ UID: s.Uid,
+ GID: s.Gid,
+ Mode: uint16(s.Mode),
+ Ino: i.ino,
+ Size: uint64(s.Size),
+ Blocks: uint64(s.Blocks),
+ Atime: timespecToStatxTimestamp(s.Atim),
+ Ctime: timespecToStatxTimestamp(s.Ctim),
+ Mtime: timespecToStatxTimestamp(s.Mtim),
+ DevMajor: linux.UNNAMED_MAJOR,
+ DevMinor: fs.devMinor,
+ }, nil
}
// SetStat implements kernfs.Inode.
@@ -399,19 +422,19 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
if i.Mode().FileType() == linux.S_IFSOCK {
return nil, syserror.ENXIO
}
- return i.open(ctx, vfsd, rp.Mount())
+ return i.open(ctx, vfsd, rp.Mount(), opts.Flags)
}
-func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.FileDescription, error) {
+func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, error) {
var s syscall.Stat_t
if err := syscall.Fstat(i.hostFD, &s); err != nil {
return nil, err
}
fileType := s.Mode & linux.FileTypeMask
- flags, err := fileFlagsFromHostFD(i.hostFD)
- if err != nil {
- return nil, err
- }
+
+ // Constrain flags to a subset we can handle.
+ // TODO(gvisor.dev/issue/1672): implement behavior corresponding to these allowed flags.
+ flags &= syscall.O_ACCMODE | syscall.O_DIRECT | syscall.O_NONBLOCK | syscall.O_DSYNC | syscall.O_SYNC | syscall.O_APPEND
if fileType == syscall.S_IFSOCK {
if i.isTTY {
@@ -453,8 +476,6 @@ func (i *inode) open(ctx context.Context, d *vfs.Dentry, mnt *vfs.Mount) (*vfs.F
}
// fileDescription is embedded by host fd implementations of FileDescriptionImpl.
-//
-// TODO(gvisor.dev/issue/1672): Implement Waitable interface.
type fileDescription struct {
vfsfd vfs.FileDescription
vfs.FileDescriptionDefaultImpl
@@ -471,12 +492,12 @@ type fileDescription struct {
// SetStat implements vfs.FileDescriptionImpl.
func (f *fileDescription) SetStat(ctx context.Context, opts vfs.SetStatOptions) error {
creds := auth.CredentialsFromContext(ctx)
- return f.inode.SetStat(ctx, nil, creds, opts)
+ return f.inode.SetStat(ctx, f.vfsfd.Mount().Filesystem(), creds, opts)
}
// Stat implements vfs.FileDescriptionImpl.
func (f *fileDescription) Stat(_ context.Context, opts vfs.StatOptions) (linux.Statx, error) {
- return f.inode.Stat(nil, opts)
+ return f.inode.Stat(f.vfsfd.Mount().Filesystem(), opts)
}
// Release implements vfs.FileDescriptionImpl.
diff --git a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
index c7779fc11..1568a9d49 100644
--- a/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
+++ b/pkg/sentry/fsimpl/kernfs/dynamic_bytes_file.go
@@ -44,11 +44,11 @@ type DynamicBytesFile struct {
var _ Inode = (*DynamicBytesFile)(nil)
// Init initializes a dynamic bytes file.
-func (f *DynamicBytesFile) Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode) {
+func (f *DynamicBytesFile) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode) {
if perm&^linux.PermissionsMask != 0 {
panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
}
- f.InodeAttrs.Init(creds, ino, linux.ModeRegular|perm)
+ f.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeRegular|perm)
f.data = data
}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 615592d5f..982daa2e6 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -192,15 +192,17 @@ func (InodeNotSymlink) Getlink(context.Context, *vfs.Mount) (vfs.VirtualDentry,
//
// Must be initialized by Init prior to first use.
type InodeAttrs struct {
- ino uint64
- mode uint32
- uid uint32
- gid uint32
- nlink uint32
+ devMajor uint32
+ devMinor uint32
+ ino uint64
+ mode uint32
+ uid uint32
+ gid uint32
+ nlink uint32
}
// Init initializes this InodeAttrs.
-func (a *InodeAttrs) Init(creds *auth.Credentials, ino uint64, mode linux.FileMode) {
+func (a *InodeAttrs) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, mode linux.FileMode) {
if mode.FileType() == 0 {
panic(fmt.Sprintf("No file type specified in 'mode' for InodeAttrs.Init(): mode=0%o", mode))
}
@@ -209,6 +211,8 @@ func (a *InodeAttrs) Init(creds *auth.Credentials, ino uint64, mode linux.FileMo
if mode.FileType() == linux.ModeDirectory {
nlink = 2
}
+ a.devMajor = devMajor
+ a.devMinor = devMinor
atomic.StoreUint64(&a.ino, ino)
atomic.StoreUint32(&a.mode, uint32(mode))
atomic.StoreUint32(&a.uid, uint32(creds.EffectiveKUID))
@@ -216,6 +220,16 @@ func (a *InodeAttrs) Init(creds *auth.Credentials, ino uint64, mode linux.FileMo
atomic.StoreUint32(&a.nlink, nlink)
}
+// DevMajor returns the device major number.
+func (a *InodeAttrs) DevMajor() uint32 {
+ return a.devMajor
+}
+
+// DevMinor returns the device minor number.
+func (a *InodeAttrs) DevMinor() uint32 {
+ return a.devMinor
+}
+
// Ino returns the inode id.
func (a *InodeAttrs) Ino() uint64 {
return atomic.LoadUint64(&a.ino)
@@ -232,6 +246,8 @@ func (a *InodeAttrs) Mode() linux.FileMode {
func (a *InodeAttrs) Stat(*vfs.Filesystem, vfs.StatOptions) (linux.Statx, error) {
var stat linux.Statx
stat.Mask = linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_UID | linux.STATX_GID | linux.STATX_INO | linux.STATX_NLINK
+ stat.DevMajor = a.devMajor
+ stat.DevMinor = a.devMinor
stat.Ino = atomic.LoadUint64(&a.ino)
stat.Mode = uint16(a.Mode())
stat.UID = atomic.LoadUint32(&a.uid)
@@ -544,9 +560,9 @@ type StaticDirectory struct {
var _ Inode = (*StaticDirectory)(nil)
// NewStaticDir creates a new static directory and returns its dentry.
-func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry {
+func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry {
inode := &StaticDirectory{}
- inode.Init(creds, ino, perm)
+ inode.Init(creds, devMajor, devMinor, ino, perm)
dentry := &Dentry{}
dentry.Init(inode)
@@ -559,11 +575,11 @@ func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, chil
}
// Init initializes StaticDirectory.
-func (s *StaticDirectory) Init(creds *auth.Credentials, ino uint64, perm linux.FileMode) {
+func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
if perm&^linux.PermissionsMask != 0 {
panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
}
- s.InodeAttrs.Init(creds, ino, linux.ModeDirectory|perm)
+ s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm)
}
// Open implements kernfs.Inode.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_test.go b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
index 1c5d3e7e7..412cf6ac9 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_test.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_test.go
@@ -75,7 +75,7 @@ type file struct {
func (fs *filesystem) newFile(creds *auth.Credentials, content string) *kernfs.Dentry {
f := &file{}
f.content = content
- f.DynamicBytesFile.Init(creds, fs.NextIno(), f, 0777)
+ f.DynamicBytesFile.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), f, 0777)
d := &kernfs.Dentry{}
d.Init(f)
@@ -107,7 +107,7 @@ type readonlyDir struct {
func (fs *filesystem) newReadonlyDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry {
dir := &readonlyDir{}
- dir.attrs.Init(creds, fs.NextIno(), linux.ModeDirectory|mode)
+ dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode)
dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
dir.dentry.Init(dir)
@@ -137,7 +137,7 @@ type dir struct {
func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry {
dir := &dir{}
dir.fs = fs
- dir.attrs.Init(creds, fs.NextIno(), linux.ModeDirectory|mode)
+ dir.attrs.Init(creds, 0 /* devMajor */, 0 /* devMinor */, fs.NextIno(), linux.ModeDirectory|mode)
dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true})
dir.dentry.Init(dir)
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 0aa6dc979..2ab3f53fd 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -35,9 +35,9 @@ type StaticSymlink struct {
var _ Inode = (*StaticSymlink)(nil)
// NewStaticSymlink creates a new symlink file pointing to 'target'.
-func NewStaticSymlink(creds *auth.Credentials, ino uint64, target string) *Dentry {
+func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) *Dentry {
inode := &StaticSymlink{}
- inode.Init(creds, ino, target)
+ inode.Init(creds, devMajor, devMinor, ino, target)
d := &Dentry{}
d.Init(inode)
@@ -45,9 +45,9 @@ func NewStaticSymlink(creds *auth.Credentials, ino uint64, target string) *Dentr
}
// Init initializes the instance.
-func (s *StaticSymlink) Init(creds *auth.Credentials, ino uint64, target string) {
+func (s *StaticSymlink) Init(creds *auth.Credentials, devMajor uint32, devMinor uint32, ino uint64, target string) {
s.target = target
- s.InodeAttrs.Init(creds, ino, linux.ModeSymlink|0777)
+ s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeSymlink|0777)
}
// Readlink implements Inode.
diff --git a/pkg/sentry/fsimpl/pipefs/BUILD b/pkg/sentry/fsimpl/pipefs/BUILD
index 0d411606f..5950a2d59 100644
--- a/pkg/sentry/fsimpl/pipefs/BUILD
+++ b/pkg/sentry/fsimpl/pipefs/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/fspath",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 5375e5e75..cab771211 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -17,8 +17,11 @@
package pipefs
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/kernel/pipe"
@@ -40,20 +43,36 @@ func (filesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFile
panic("pipefs.filesystemType.GetFilesystem should never be called")
}
-// TODO(gvisor.dev/issue/1193):
-//
-// - kernfs does not provide a way to implement statfs, from which we
-// should indicate PIPEFS_MAGIC.
-//
-// - kernfs does not provide a way to override names for
-// vfs.FilesystemImpl.PrependPath(); pipefs inodes should use synthetic
-// name fmt.Sprintf("pipe:[%d]", inode.ino).
+type filesystem struct {
+ kernfs.Filesystem
+
+ devMinor uint32
+}
// NewFilesystem sets up and returns a new vfs.Filesystem implemented by pipefs.
-func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem {
- fs := &kernfs.Filesystem{}
- fs.VFSFilesystem().Init(vfsObj, filesystemType{}, fs)
- return fs.VFSFilesystem()
+func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, err
+ }
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
+ fs.Filesystem.VFSFilesystem().Init(vfsObj, filesystemType{}, fs)
+ return fs.Filesystem.VFSFilesystem(), nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+ inode := vd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode)
+ b.PrependComponent(fmt.Sprintf("pipe:[%d]", inode.ino))
+ return vfs.PrependPathSyntheticError{}
}
// inode implements kernfs.Inode.
@@ -71,11 +90,11 @@ type inode struct {
ctime ktime.Time
}
-func newInode(ctx context.Context, fs *kernfs.Filesystem) *inode {
+func newInode(ctx context.Context, fs *filesystem) *inode {
creds := auth.CredentialsFromContext(ctx)
return &inode{
pipe: pipe.NewVFSPipe(false /* isNamed */, pipe.DefaultPipeSize, usermem.PageSize),
- ino: fs.NextIno(),
+ ino: fs.Filesystem.NextIno(),
uid: creds.EffectiveKUID,
gid: creds.EffectiveKGID,
ctime: ktime.NowFromContext(ctx),
@@ -98,19 +117,20 @@ func (i *inode) Mode() linux.FileMode {
func (i *inode) Stat(vfsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
ts := linux.NsecToStatxTimestamp(i.ctime.Nanoseconds())
return linux.Statx{
- Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS,
- Blksize: usermem.PageSize,
- Nlink: 1,
- UID: uint32(i.uid),
- GID: uint32(i.gid),
- Mode: pipeMode,
- Ino: i.ino,
- Size: 0,
- Blocks: 0,
- Atime: ts,
- Ctime: ts,
- Mtime: ts,
- // TODO(gvisor.dev/issue/1197): Device number.
+ Mask: linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_NLINK | linux.STATX_UID | linux.STATX_GID | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME | linux.STATX_INO | linux.STATX_SIZE | linux.STATX_BLOCKS,
+ Blksize: usermem.PageSize,
+ Nlink: 1,
+ UID: uint32(i.uid),
+ GID: uint32(i.gid),
+ Mode: pipeMode,
+ Ino: i.ino,
+ Size: 0,
+ Blocks: 0,
+ Atime: ts,
+ Ctime: ts,
+ Mtime: ts,
+ DevMajor: linux.UNNAMED_MAJOR,
+ DevMinor: vfsfs.Impl().(*filesystem).devMinor,
}, nil
}
@@ -122,6 +142,9 @@ func (i *inode) SetStat(ctx context.Context, vfsfs *vfs.Filesystem, creds *auth.
return syserror.EPERM
}
+// TODO(gvisor.dev/issue/1193): kernfs does not provide a way to implement
+// statfs, from which we should indicate PIPEFS_MAGIC.
+
// Open implements kernfs.Inode.Open.
func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
return i.pipe.Open(ctx, rp.Mount(), vfsd, opts.Flags)
@@ -132,7 +155,7 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
//
// Preconditions: mnt.Filesystem() must have been returned by NewFilesystem().
func NewConnectedPipeFDs(ctx context.Context, mnt *vfs.Mount, flags uint32) (*vfs.FileDescription, *vfs.FileDescription) {
- fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
+ fs := mnt.Filesystem().Impl().(*filesystem)
inode := newInode(ctx, fs)
var d kernfs.Dentry
d.Init(inode)
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index 104fc9030..609210253 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -41,6 +41,12 @@ func (FilesystemType) Name() string {
return Name
}
+type filesystem struct {
+ kernfs.Filesystem
+
+ devMinor uint32
+}
+
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
k := kernel.KernelFromContext(ctx)
@@ -51,8 +57,13 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF
if pidns == nil {
return nil, nil, fmt.Errorf("procfs requires a PID namespace")
}
-
- procfs := &kernfs.Filesystem{}
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+ procfs := &filesystem{
+ devMinor: devMinor,
+ }
procfs.VFSFilesystem().Init(vfsObj, &ft, procfs)
var cgroups map[string]string
@@ -61,21 +72,27 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF
cgroups = data.Cgroups
}
- _, dentry := newTasksInode(procfs, k, pidns, cgroups)
+ _, dentry := procfs.newTasksInode(k, pidns, cgroups)
return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
}
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
+}
+
// dynamicInode is an overfitted interface for common Inodes with
// dynamicByteSource types used in procfs.
type dynamicInode interface {
kernfs.Inode
vfs.DynamicBytesSource
- Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode)
+ Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode)
}
-func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
- inode.Init(creds, ino, inode, perm)
+func (fs *filesystem) newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
+ inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm)
d := &kernfs.Dentry{}
d.Init(inode)
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index a5cfa8333..36a911db4 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -37,23 +37,23 @@ type subtasksInode struct {
kernfs.OrderedChildren
kernfs.AlwaysValid
+ fs *filesystem
task *kernel.Task
pidns *kernel.PIDNamespace
- inoGen InoGenerator
cgroupControllers map[string]string
}
var _ kernfs.Inode = (*subtasksInode)(nil)
-func newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, inoGen InoGenerator, cgroupControllers map[string]string) *kernfs.Dentry {
+func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *kernfs.Dentry {
subInode := &subtasksInode{
+ fs: fs,
task: task,
pidns: pidns,
- inoGen: inoGen,
cgroupControllers: cgroupControllers,
}
// Note: credentials are overridden by taskOwnedInode.
- subInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ subInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
inode := &taskOwnedInode{Inode: subInode, owner: task}
@@ -78,7 +78,7 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, e
return nil, syserror.ENOENT
}
- subTaskDentry := newTaskInode(i.inoGen, subTask, i.pidns, false, i.cgroupControllers)
+ subTaskDentry := i.fs.newTaskInode(subTask, i.pidns, false, i.cgroupControllers)
return subTaskDentry.VFSDentry(), nil
}
@@ -102,7 +102,7 @@ func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallb
dirent := vfs.Dirent{
Name: strconv.FormatUint(uint64(tid), 10),
Type: linux.DT_DIR,
- Ino: i.inoGen.NextIno(),
+ Ino: i.fs.NextIno(),
NextOff: offset + 1,
}
if err := cb.Handle(dirent); err != nil {
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index 66419d91b..482055db1 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -43,45 +43,45 @@ type taskInode struct {
var _ kernfs.Inode = (*taskInode)(nil)
-func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
+func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
// TODO(gvisor.dev/issue/164): Fail with ESRCH if task exited.
contents := map[string]*kernfs.Dentry{
- "auxv": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &auxvData{task: task}),
- "cmdline": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
- "comm": newComm(task, inoGen.NextIno(), 0444),
- "environ": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
- "exe": newExeSymlink(task, inoGen.NextIno()),
- "fd": newFDDirInode(task, inoGen),
- "fdinfo": newFDInfoDirInode(task, inoGen),
- "gid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: true}),
- "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)),
- "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}),
- "mountinfo": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountInfoData{task: task}),
- "mounts": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountsData{task: task}),
- "net": newTaskNetDir(task, inoGen),
- "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{
- "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"),
- "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"),
- "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"),
+ "auxv": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &auxvData{task: task}),
+ "cmdline": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
+ "comm": fs.newComm(task, fs.NextIno(), 0444),
+ "environ": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
+ "exe": fs.newExeSymlink(task, fs.NextIno()),
+ "fd": fs.newFDDirInode(task),
+ "fdinfo": fs.newFDInfoDirInode(task),
+ "gid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}),
+ "io": fs.newTaskOwnedFile(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
+ "maps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mapsData{task: task}),
+ "mountinfo": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountInfoData{task: task}),
+ "mounts": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountsData{task: task}),
+ "net": fs.newTaskNetDir(task),
+ "ns": fs.newTaskOwnedDir(task, fs.NextIno(), 0511, map[string]*kernfs.Dentry{
+ "net": fs.newNamespaceSymlink(task, fs.NextIno(), "net"),
+ "pid": fs.newNamespaceSymlink(task, fs.NextIno(), "pid"),
+ "user": fs.newNamespaceSymlink(task, fs.NextIno(), "user"),
}),
- "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")),
- "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}),
- "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}),
- "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
- "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}),
- "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
- "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}),
+ "oom_score": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newStaticFile("0\n")),
+ "oom_score_adj": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &oomScoreAdj{task: task}),
+ "smaps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &smapsData{task: task}),
+ "stat": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
+ "statm": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statmData{task: task}),
+ "status": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
+ "uid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
}
if isThreadGroup {
- contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers)
+ contents["task"] = fs.newSubtasks(task, pidns, cgroupControllers)
}
if len(cgroupControllers) > 0 {
- contents["cgroup"] = newTaskOwnedFile(task, inoGen.NextIno(), 0444, newCgroupData(cgroupControllers))
+ contents["cgroup"] = fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newCgroupData(cgroupControllers))
}
taskInode := &taskInode{task: task}
// Note: credentials are overridden by taskOwnedInode.
- taskInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ taskInode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
inode := &taskOwnedInode{Inode: taskInode, owner: task}
dentry := &kernfs.Dentry{}
@@ -126,9 +126,9 @@ type taskOwnedInode struct {
var _ kernfs.Inode = (*taskOwnedInode)(nil)
-func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
+func (fs *filesystem) newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
// Note: credentials are overridden by taskOwnedInode.
- inode.Init(task.Credentials(), ino, inode, perm)
+ inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm)
taskInode := &taskOwnedInode{Inode: inode, owner: task}
d := &kernfs.Dentry{}
@@ -136,11 +136,11 @@ func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode
return d
}
-func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
+func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
dir := &kernfs.StaticDirectory{}
// Note: credentials are overridden by taskOwnedInode.
- dir.Init(task.Credentials(), ino, perm)
+ dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm)
inode := &taskOwnedInode{Inode: dir, owner: task}
d := &kernfs.Dentry{}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index 8ad976073..44ccc9e4a 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -53,8 +53,8 @@ func taskFDExists(t *kernel.Task, fd int32) bool {
}
type fdDir struct {
- inoGen InoGenerator
- task *kernel.Task
+ fs *filesystem
+ task *kernel.Task
// When produceSymlinks is set, dirents produces for the FDs are reported
// as symlink. Otherwise, they are reported as regular files.
@@ -85,7 +85,7 @@ func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, abs
dirent := vfs.Dirent{
Name: strconv.FormatUint(uint64(fd), 10),
Type: typ,
- Ino: i.inoGen.NextIno(),
+ Ino: i.fs.NextIno(),
NextOff: offset + 1,
}
if err := cb.Handle(dirent); err != nil {
@@ -110,15 +110,15 @@ type fdDirInode struct {
var _ kernfs.Inode = (*fdDirInode)(nil)
-func newFDDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry {
inode := &fdDirInode{
fdDir: fdDir{
- inoGen: inoGen,
+ fs: fs,
task: task,
produceSymlink: true,
},
}
- inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -137,7 +137,7 @@ func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
if !taskFDExists(i.task, fd) {
return nil, syserror.ENOENT
}
- taskDentry := newFDSymlink(i.task, fd, i.inoGen.NextIno())
+ taskDentry := i.fs.newFDSymlink(i.task, fd, i.fs.NextIno())
return taskDentry.VFSDentry(), nil
}
@@ -186,12 +186,12 @@ type fdSymlink struct {
var _ kernfs.Inode = (*fdSymlink)(nil)
-func newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry {
+func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry {
inode := &fdSymlink{
task: task,
fd: fd,
}
- inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777)
+ inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
@@ -234,14 +234,14 @@ type fdInfoDirInode struct {
var _ kernfs.Inode = (*fdInfoDirInode)(nil)
-func newFDInfoDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry {
inode := &fdInfoDirInode{
fdDir: fdDir{
- inoGen: inoGen,
- task: task,
+ fs: fs,
+ task: task,
},
}
- inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -264,7 +264,7 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry,
task: i.task,
fd: fd,
}
- dentry := newTaskOwnedFile(i.task, i.inoGen.NextIno(), 0444, data)
+ dentry := i.fs.newTaskOwnedFile(i.task, i.fs.NextIno(), 0444, data)
return dentry.VFSDentry(), nil
}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 515f25327..2f297e48a 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -241,9 +241,9 @@ type commInode struct {
task *kernel.Task
}
-func newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
+func (fs *filesystem) newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
inode := &commInode{task: task}
- inode.DynamicBytesFile.Init(task.Credentials(), ino, &commData{task: task}, perm)
+ inode.DynamicBytesFile.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, &commData{task: task}, perm)
d := &kernfs.Dentry{}
d.Init(inode)
@@ -596,9 +596,9 @@ type exeSymlink struct {
var _ kernfs.Inode = (*exeSymlink)(nil)
-func newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
+func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
inode := &exeSymlink{task: task}
- inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777)
+ inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
@@ -729,7 +729,7 @@ type namespaceSymlink struct {
task *kernel.Task
}
-func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
+func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
// Namespace symlinks should contain the namespace name and the inode number
// for the namespace instance, so for example user:[123456]. We currently fake
// the inode number by sticking the symlink inode in its place.
@@ -737,7 +737,7 @@ func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentr
inode := &namespaceSymlink{task: task}
// Note: credentials are overridden by taskOwnedInode.
- inode.Init(task.Credentials(), ino, target)
+ inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, target)
taskInode := &taskOwnedInode{Inode: inode, owner: task}
d := &kernfs.Dentry{}
@@ -780,11 +780,11 @@ type namespaceInode struct {
var _ kernfs.Inode = (*namespaceInode)(nil)
// Init initializes a namespace inode.
-func (i *namespaceInode) Init(creds *auth.Credentials, ino uint64, perm linux.FileMode) {
+func (i *namespaceInode) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
if perm&^linux.PermissionsMask != 0 {
panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask))
}
- i.InodeAttrs.Init(creds, ino, linux.ModeRegular|perm)
+ i.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeRegular|perm)
}
// Open implements Inode.Open.
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index 9c329341a..6bde27376 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -37,7 +37,7 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+func (fs *filesystem) newTaskNetDir(task *kernel.Task) *kernfs.Dentry {
k := task.Kernel()
pidns := task.PIDNamespace()
root := auth.NewRootCredentials(pidns.UserNamespace())
@@ -57,37 +57,37 @@ func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
// TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task
// network namespace.
contents = map[string]*kernfs.Dentry{
- "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}),
- "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}),
+ "dev": fs.newDentry(root, fs.NextIno(), 0444, &netDevData{stack: stack}),
+ "snmp": fs.newDentry(root, fs.NextIno(), 0444, &netSnmpData{stack: stack}),
// The following files are simple stubs until they are implemented in
// netstack, if the file contains a header the stub is just the header
// otherwise it is an empty file.
- "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)),
- "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)),
- "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}),
- "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)),
- "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)),
+ "arp": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(arp)),
+ "netlink": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(netlink)),
+ "netstat": fs.newDentry(root, fs.NextIno(), 0444, &netStatData{}),
+ "packet": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(packet)),
+ "protocols": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(protocols)),
// Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
// high res timer ticks per sec (ClockGetres returns 1ns resolution).
- "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)),
- "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)),
- "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}),
- "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}),
- "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}),
- "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}),
+ "psched": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(psched)),
+ "ptype": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(ptype)),
+ "route": fs.newDentry(root, fs.NextIno(), 0444, &netRouteData{stack: stack}),
+ "tcp": fs.newDentry(root, fs.NextIno(), 0444, &netTCPData{kernel: k}),
+ "udp": fs.newDentry(root, fs.NextIno(), 0444, &netUDPData{kernel: k}),
+ "unix": fs.newDentry(root, fs.NextIno(), 0444, &netUnixData{kernel: k}),
}
if stack.SupportsIPv6() {
- contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack})
- contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(""))
- contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k})
- contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6))
+ contents["if_inet6"] = fs.newDentry(root, fs.NextIno(), 0444, &ifinet6{stack: stack})
+ contents["ipv6_route"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(""))
+ contents["tcp6"] = fs.newDentry(root, fs.NextIno(), 0444, &netTCP6Data{kernel: k})
+ contents["udp6"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(upd6))
}
}
- return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents)
+ return fs.newTaskOwnedDir(task, fs.NextIno(), 0555, contents)
}
// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 5aeda8c9b..b51d43954 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -33,11 +33,6 @@ const (
threadSelfName = "thread-self"
)
-// InoGenerator generates unique inode numbers for a given filesystem.
-type InoGenerator interface {
- NextIno() uint64
-}
-
// tasksInode represents the inode for /proc/ directory.
//
// +stateify savable
@@ -48,8 +43,8 @@ type tasksInode struct {
kernfs.OrderedChildren
kernfs.AlwaysValid
- inoGen InoGenerator
- pidns *kernel.PIDNamespace
+ fs *filesystem
+ pidns *kernel.PIDNamespace
// '/proc/self' and '/proc/thread-self' have custom directory offsets in
// Linux. So handle them outside of OrderedChildren.
@@ -64,29 +59,29 @@ type tasksInode struct {
var _ kernfs.Inode = (*tasksInode)(nil)
-func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
+func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
root := auth.NewRootCredentials(pidns.UserNamespace())
contents := map[string]*kernfs.Dentry{
- "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))),
- "filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}),
- "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}),
- "sys": newSysDir(root, inoGen, k),
- "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}),
- "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"),
- "net": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/net"),
- "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}),
- "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}),
- "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}),
+ "cpuinfo": fs.newDentry(root, fs.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))),
+ "filesystems": fs.newDentry(root, fs.NextIno(), 0444, &filesystemsData{}),
+ "loadavg": fs.newDentry(root, fs.NextIno(), 0444, &loadavgData{}),
+ "sys": fs.newSysDir(root, k),
+ "meminfo": fs.newDentry(root, fs.NextIno(), 0444, &meminfoData{}),
+ "mounts": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"),
+ "net": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"),
+ "stat": fs.newDentry(root, fs.NextIno(), 0444, &statData{}),
+ "uptime": fs.newDentry(root, fs.NextIno(), 0444, &uptimeData{}),
+ "version": fs.newDentry(root, fs.NextIno(), 0444, &versionData{}),
}
inode := &tasksInode{
pidns: pidns,
- inoGen: inoGen,
- selfSymlink: newSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
- threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
+ fs: fs,
+ selfSymlink: fs.newSelfSymlink(root, fs.NextIno(), pidns).VFSDentry(),
+ threadSelfSymlink: fs.newThreadSelfSymlink(root, fs.NextIno(), pidns).VFSDentry(),
cgroupControllers: cgroupControllers,
}
- inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555)
+ inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
dentry := &kernfs.Dentry{}
dentry.Init(inode)
@@ -118,7 +113,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
return nil, syserror.ENOENT
}
- taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers)
+ taskDentry := i.fs.newTaskInode(task, i.pidns, true, i.cgroupControllers)
return taskDentry.VFSDentry(), nil
}
@@ -144,7 +139,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback
dirent := vfs.Dirent{
Name: selfName,
Type: linux.DT_LNK,
- Ino: i.inoGen.NextIno(),
+ Ino: i.fs.NextIno(),
NextOff: offset + 1,
}
if err := cb.Handle(dirent); err != nil {
@@ -156,7 +151,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback
dirent := vfs.Dirent{
Name: threadSelfName,
Type: linux.DT_LNK,
- Ino: i.inoGen.NextIno(),
+ Ino: i.fs.NextIno(),
NextOff: offset + 1,
}
if err := cb.Handle(dirent); err != nil {
@@ -189,7 +184,7 @@ func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback
dirent := vfs.Dirent{
Name: strconv.FormatUint(uint64(tid), 10),
Type: linux.DT_DIR,
- Ino: i.inoGen.NextIno(),
+ Ino: i.fs.NextIno(),
NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
}
if err := cb.Handle(dirent); err != nil {
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index e5f13b69e..7d8983aa5 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -41,9 +41,9 @@ type selfSymlink struct {
var _ kernfs.Inode = (*selfSymlink)(nil)
-func newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
+func (fs *filesystem) newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
inode := &selfSymlink{pidns: pidns}
- inode.Init(creds, ino, linux.ModeSymlink|0777)
+ inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
@@ -83,9 +83,9 @@ type threadSelfSymlink struct {
var _ kernfs.Inode = (*threadSelfSymlink)(nil)
-func newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
+func (fs *filesystem) newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
inode := &threadSelfSymlink{pidns: pidns}
- inode.Init(creds, ino, linux.ModeSymlink|0777)
+ inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
d := &kernfs.Dentry{}
d.Init(inode)
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 0e90e02fe..6dac2afa4 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -30,89 +30,89 @@ import (
)
// newSysDir returns the dentry corresponding to /proc/sys directory.
-func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
- return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}),
- "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)),
- "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)),
- "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)),
+func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
+ return kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "kernel": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "hostname": fs.newDentry(root, fs.NextIno(), 0444, &hostnameData{}),
+ "shmall": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMALL)),
+ "shmmax": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMAX)),
+ "shmmni": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMNI)),
}),
- "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{k: k}),
- "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")),
+ "vm": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "mmap_min_addr": fs.newDentry(root, fs.NextIno(), 0444, &mmapMinAddrData{k: k}),
+ "overcommit_memory": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0\n")),
}),
- "net": newSysNetDir(root, inoGen, k),
+ "net": fs.newSysNetDir(root, k),
})
}
// newSysNetDir returns the dentry corresponding to /proc/sys/net directory.
-func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
+func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
var contents map[string]*kernfs.Dentry
// TODO(gvisor.dev/issue/1833): Support for using the network stack in the
// network namespace of the calling process.
if stack := k.RootNetworkNamespace().Stack(); stack != nil {
contents = map[string]*kernfs.Dentry{
- "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}),
+ "ipv4": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
// The following files are simple stubs until they are implemented in
// netstack, most of these files are configuration related. We use the
// value closest to the actual netstack behavior or any empty file, all
// of these files will have mode 0444 (read-only for all users).
- "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")),
- "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")),
- "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "ip_local_port_range": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("16000 65535")),
+ "ip_local_reserved_ports": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
+ "ipfrag_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("30")),
+ "ip_nonlocal_bind": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "ip_no_pmtu_disc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
// tcp_allowed_congestion_control tell the user what they are able to
// do as an unprivledged process so we leave it empty.
- "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
- "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
+ "tcp_allowed_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
+ "tcp_available_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")),
+ "tcp_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")),
// Many of the following stub files are features netstack doesn't
// support. The unsupported features return "0" to indicate they are
// disabled.
- "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")),
- "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")),
- "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
- "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")),
- "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
- "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
- "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_base_mss": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1280")),
+ "tcp_dsack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_early_retrans": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen_key": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
+ "tcp_invalid_ratelimit": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_intvl": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_probes": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("7200")),
+ "tcp_mtu_probing": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_no_metrics_save": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
+ "tcp_probe_interval": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_probe_threshold": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "tcp_retries1": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")),
+ "tcp_retries2": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("15")),
+ "tcp_rfc1337": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
+ "tcp_slow_start_after_idle": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
+ "tcp_synack_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")),
+ "tcp_syn_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")),
+ "tcp_timestamps": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
}),
- "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")),
- "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")),
- "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
- "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")),
- "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "core": kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "default_qdisc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("pfifo_fast")),
+ "message_burst": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("10")),
+ "message_cost": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")),
+ "optmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
+ "rmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
+ "rmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
+ "somaxconn": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("128")),
+ "wmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
+ "wmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
}),
}
}
- return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, contents)
+ return kernfs.NewStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, contents)
}
// mmapMinAddrData implements vfs.DynamicBytesSource for
diff --git a/pkg/sentry/fsimpl/signalfd/BUILD b/pkg/sentry/fsimpl/signalfd/BUILD
new file mode 100644
index 000000000..067c1657f
--- /dev/null
+++ b/pkg/sentry/fsimpl/signalfd/BUILD
@@ -0,0 +1,20 @@
+load("//tools:defs.bzl", "go_library")
+
+package(licenses = ["notice"])
+
+go_library(
+ name = "signalfd",
+ srcs = ["signalfd.go"],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/abi/linux",
+ "//pkg/binary",
+ "//pkg/context",
+ "//pkg/sentry/kernel",
+ "//pkg/sentry/vfs",
+ "//pkg/sync",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ "//pkg/waiter",
+ ],
+)
diff --git a/pkg/sentry/fsimpl/signalfd/signalfd.go b/pkg/sentry/fsimpl/signalfd/signalfd.go
new file mode 100644
index 000000000..d29ef3f83
--- /dev/null
+++ b/pkg/sentry/fsimpl/signalfd/signalfd.go
@@ -0,0 +1,135 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package signalfd
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/binary"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/sync"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+ "gvisor.dev/gvisor/pkg/waiter"
+)
+
+// SignalFileDescription implements FileDescriptionImpl for signal fds.
+type SignalFileDescription struct {
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+ vfs.DentryMetadataFileDescriptionImpl
+
+ // target is the original signal target task.
+ //
+ // The semantics here are a bit broken. Linux will always use current
+ // for all reads, regardless of where the signalfd originated. We can't
+ // do exactly that because we need to plumb the context through
+ // EventRegister in order to support proper blocking behavior. This
+ // will undoubtedly become very complicated quickly.
+ target *kernel.Task
+
+ // mu protects mask.
+ mu sync.Mutex
+
+ // mask is the signal mask. Protected by mu.
+ mask linux.SignalSet
+}
+
+var _ vfs.FileDescriptionImpl = (*SignalFileDescription)(nil)
+
+// New creates a new signal fd.
+func New(vfsObj *vfs.VirtualFilesystem, target *kernel.Task, mask linux.SignalSet, flags uint32) (*vfs.FileDescription, error) {
+ vd := vfsObj.NewAnonVirtualDentry("[signalfd]")
+ defer vd.DecRef()
+ sfd := &SignalFileDescription{
+ target: target,
+ mask: mask,
+ }
+ if err := sfd.vfsfd.Init(sfd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{
+ UseDentryMetadata: true,
+ DenyPRead: true,
+ DenyPWrite: true,
+ }); err != nil {
+ return nil, err
+ }
+ return &sfd.vfsfd, nil
+}
+
+// Mask returns the signal mask.
+func (sfd *SignalFileDescription) Mask() linux.SignalSet {
+ sfd.mu.Lock()
+ defer sfd.mu.Unlock()
+ return sfd.mask
+}
+
+// SetMask sets the signal mask.
+func (sfd *SignalFileDescription) SetMask(mask linux.SignalSet) {
+ sfd.mu.Lock()
+ defer sfd.mu.Unlock()
+ sfd.mask = mask
+}
+
+// Read implements FileDescriptionImpl.Read.
+func (sfd *SignalFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
+ // Attempt to dequeue relevant signals.
+ info, err := sfd.target.Sigtimedwait(sfd.Mask(), 0)
+ if err != nil {
+ // There must be no signal available.
+ return 0, syserror.ErrWouldBlock
+ }
+
+ // Copy out the signal info using the specified format.
+ var buf [128]byte
+ binary.Marshal(buf[:0], usermem.ByteOrder, &linux.SignalfdSiginfo{
+ Signo: uint32(info.Signo),
+ Errno: info.Errno,
+ Code: info.Code,
+ PID: uint32(info.Pid()),
+ UID: uint32(info.Uid()),
+ Status: info.Status(),
+ Overrun: uint32(info.Overrun()),
+ Addr: info.Addr(),
+ })
+ n, err := dst.CopyOut(ctx, buf[:])
+ return int64(n), err
+}
+
+// Readiness implements waiter.Waitable.Readiness.
+func (sfd *SignalFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
+ sfd.mu.Lock()
+ defer sfd.mu.Unlock()
+ if mask&waiter.EventIn != 0 && sfd.target.PendingSignals()&sfd.mask != 0 {
+ return waiter.EventIn // Pending signals.
+ }
+ return 0
+}
+
+// EventRegister implements waiter.Waitable.EventRegister.
+func (sfd *SignalFileDescription) EventRegister(entry *waiter.Entry, _ waiter.EventMask) {
+ sfd.mu.Lock()
+ defer sfd.mu.Unlock()
+ // Register for the signal set; ignore the passed events.
+ sfd.target.SignalRegister(entry, waiter.EventMask(sfd.mask))
+}
+
+// EventUnregister implements waiter.Waitable.EventUnregister.
+func (sfd *SignalFileDescription) EventUnregister(entry *waiter.Entry) {
+ // Unregister the original entry.
+ sfd.target.SignalUnregister(entry)
+}
+
+// Release implements FileDescriptionImpl.Release()
+func (sfd *SignalFileDescription) Release() {}
diff --git a/pkg/sentry/fsimpl/sockfs/BUILD b/pkg/sentry/fsimpl/sockfs/BUILD
index 52084ddb5..9453277b8 100644
--- a/pkg/sentry/fsimpl/sockfs/BUILD
+++ b/pkg/sentry/fsimpl/sockfs/BUILD
@@ -9,6 +9,7 @@ go_library(
deps = [
"//pkg/abi/linux",
"//pkg/context",
+ "//pkg/fspath",
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/vfs",
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index 239a9f4b4..ee0828a15 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -16,8 +16,11 @@
package sockfs
import (
+ "fmt"
+
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -41,19 +44,42 @@ func (filesystemType) Name() string {
return "sockfs"
}
+type filesystem struct {
+ kernfs.Filesystem
+
+ devMinor uint32
+}
+
// NewFilesystem sets up and returns a new sockfs filesystem.
//
// Note that there should only ever be one instance of sockfs.Filesystem,
// backing a global socket mount.
-func NewFilesystem(vfsObj *vfs.VirtualFilesystem) *vfs.Filesystem {
- fs := &kernfs.Filesystem{}
- fs.VFSFilesystem().Init(vfsObj, filesystemType{}, fs)
- return fs.VFSFilesystem()
+func NewFilesystem(vfsObj *vfs.VirtualFilesystem) (*vfs.Filesystem, error) {
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, err
+ }
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
+ fs.Filesystem.VFSFilesystem().Init(vfsObj, filesystemType{}, fs)
+ return fs.Filesystem.VFSFilesystem(), nil
+}
+
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
+}
+
+// PrependPath implements vfs.FilesystemImpl.PrependPath.
+func (fs *filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDentry, b *fspath.Builder) error {
+ inode := vd.Dentry().Impl().(*kernfs.Dentry).Inode().(*inode)
+ b.PrependComponent(fmt.Sprintf("socket:[%d]", inode.InodeAttrs.Ino()))
+ return vfs.PrependPathSyntheticError{}
}
// inode implements kernfs.Inode.
-//
-// TODO(gvisor.dev/issue/1193): Device numbers.
type inode struct {
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
@@ -67,11 +93,15 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, vfsd *vfs.Dentr
}
// NewDentry constructs and returns a sockfs dentry.
-func NewDentry(creds *auth.Credentials, ino uint64) *vfs.Dentry {
+//
+// Preconditions: mnt.Filesystem() must have been returned by NewFilesystem().
+func NewDentry(creds *auth.Credentials, mnt *vfs.Mount) *vfs.Dentry {
+ fs := mnt.Filesystem().Impl().(*filesystem)
+
// File mode matches net/socket.c:sock_alloc.
filemode := linux.FileMode(linux.S_IFSOCK | 0600)
i := &inode{}
- i.Init(creds, ino, filemode)
+ i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.Filesystem.NextIno(), filemode)
d := &kernfs.Dentry{}
d.Init(i)
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 00f7d6214..0af373604 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -37,6 +37,8 @@ type FilesystemType struct{}
// filesystem implements vfs.FilesystemImpl.
type filesystem struct {
kernfs.Filesystem
+
+ devMinor uint32
}
// Name implements vfs.FilesystemType.Name.
@@ -46,7 +48,14 @@ func (FilesystemType) Name() string {
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- fs := &filesystem{}
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ fs := &filesystem{
+ devMinor: devMinor,
+ }
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
k := kernel.KernelFromContext(ctx)
maxCPUCores := k.ApplicationCores()
@@ -77,6 +86,12 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), root.VFSDentry(), nil
}
+// Release implements vfs.FilesystemImpl.Release.
+func (fs *filesystem) Release() {
+ fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
+ fs.Filesystem.Release()
+}
+
// dir implements kernfs.Inode.
type dir struct {
kernfs.InodeAttrs
@@ -90,7 +105,7 @@ type dir struct {
func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry {
d := &dir{}
- d.InodeAttrs.Init(creds, fs.NextIno(), linux.ModeDirectory|0755)
+ d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
d.dentry.Init(d)
@@ -127,7 +142,7 @@ func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) *kernfs.Dentry {
c := &cpuFile{maxCores: maxCores}
- c.DynamicBytesFile.Init(creds, fs.NextIno(), c, mode)
+ c.DynamicBytesFile.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), c, mode)
d := &kernfs.Dentry{}
d.Init(c)
return d
diff --git a/pkg/sentry/fsimpl/timerfd/BUILD b/pkg/sentry/fsimpl/timerfd/BUILD
new file mode 100644
index 000000000..fbb02a271
--- /dev/null
+++ b/pkg/sentry/fsimpl/timerfd/BUILD
@@ -0,0 +1,17 @@
+load("//tools:defs.bzl", "go_library")
+
+licenses(["notice"])
+
+go_library(
+ name = "timerfd",
+ srcs = ["timerfd.go"],
+ visibility = ["//pkg/sentry:internal"],
+ deps = [
+ "//pkg/context",
+ "//pkg/sentry/kernel/time",
+ "//pkg/sentry/vfs",
+ "//pkg/syserror",
+ "//pkg/usermem",
+ "//pkg/waiter",
+ ],
+)
diff --git a/pkg/sentry/vfs/timerfd.go b/pkg/sentry/fsimpl/timerfd/timerfd.go
index 42b880656..60c92d626 100644
--- a/pkg/sentry/vfs/timerfd.go
+++ b/pkg/sentry/fsimpl/timerfd/timerfd.go
@@ -12,13 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package vfs
+// Package timerfd implements timer fds.
+package timerfd
import (
"sync/atomic"
"gvisor.dev/gvisor/pkg/context"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
"gvisor.dev/gvisor/pkg/usermem"
"gvisor.dev/gvisor/pkg/waiter"
@@ -27,9 +29,9 @@ import (
// TimerFileDescription implements FileDescriptionImpl for timer fds. It also
// implements ktime.TimerListener.
type TimerFileDescription struct {
- vfsfd FileDescription
- FileDescriptionDefaultImpl
- DentryMetadataFileDescriptionImpl
+ vfsfd vfs.FileDescription
+ vfs.FileDescriptionDefaultImpl
+ vfs.DentryMetadataFileDescriptionImpl
events waiter.Queue
timer *ktime.Timer
@@ -40,20 +42,19 @@ type TimerFileDescription struct {
val uint64
}
-var _ FileDescriptionImpl = (*TimerFileDescription)(nil)
+var _ vfs.FileDescriptionImpl = (*TimerFileDescription)(nil)
var _ ktime.TimerListener = (*TimerFileDescription)(nil)
-// NewTimerFD returns a new timer fd.
-func (vfs *VirtualFilesystem) NewTimerFD(clock ktime.Clock, flags uint32) (*FileDescription, error) {
- vd := vfs.NewAnonVirtualDentry("[timerfd]")
+// New returns a new timer fd.
+func New(vfsObj *vfs.VirtualFilesystem, clock ktime.Clock, flags uint32) (*vfs.FileDescription, error) {
+ vd := vfsObj.NewAnonVirtualDentry("[timerfd]")
defer vd.DecRef()
tfd := &TimerFileDescription{}
tfd.timer = ktime.NewTimer(clock, tfd)
- if err := tfd.vfsfd.Init(tfd, flags, vd.Mount(), vd.Dentry(), &FileDescriptionOptions{
+ if err := tfd.vfsfd.Init(tfd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{
UseDentryMetadata: true,
DenyPRead: true,
DenyPWrite: true,
- InvalidWrite: true,
}); err != nil {
return nil, err
}
@@ -61,7 +62,7 @@ func (vfs *VirtualFilesystem) NewTimerFD(clock ktime.Clock, flags uint32) (*File
}
// Read implements FileDescriptionImpl.Read.
-func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts ReadOptions) (int64, error) {
+func (tfd *TimerFileDescription) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
const sizeofUint64 = 8
if dst.NumBytes() < sizeofUint64 {
return 0, syserror.EINVAL
diff --git a/pkg/sentry/fsimpl/tmpfs/tmpfs.go b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
index efc931468..405928bd0 100644
--- a/pkg/sentry/fsimpl/tmpfs/tmpfs.go
+++ b/pkg/sentry/fsimpl/tmpfs/tmpfs.go
@@ -63,6 +63,9 @@ type filesystem struct {
// clock is a realtime clock used to set timestamps in file operations.
clock time.Clock
+ // devMinor is the filesystem's minor device number. devMinor is immutable.
+ devMinor uint32
+
// mu serializes changes to the Dentry tree.
mu sync.RWMutex
@@ -96,11 +99,6 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
if memFileProvider == nil {
panic("MemoryFileProviderFromContext returned nil")
}
- clock := time.RealtimeClockFromContext(ctx)
- fs := filesystem{
- memFile: memFileProvider.MemoryFile(),
- clock: clock,
- }
rootFileType := uint16(linux.S_IFDIR)
newFSType := vfs.FilesystemType(&fstype)
@@ -114,6 +112,16 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
}
+ devMinor, err := vfsObj.GetAnonBlockDevMinor()
+ if err != nil {
+ return nil, nil, err
+ }
+ clock := time.RealtimeClockFromContext(ctx)
+ fs := filesystem{
+ memFile: memFileProvider.MemoryFile(),
+ clock: clock,
+ devMinor: devMinor,
+ }
fs.vfsfs.Init(vfsObj, newFSType, &fs)
var root *dentry
@@ -125,6 +133,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
case linux.S_IFDIR:
root = &fs.newDirectory(creds, 01777).dentry
default:
+ fs.vfsfs.DecRef()
return nil, nil, fmt.Errorf("invalid tmpfs root file type: %#o", rootFileType)
}
return &fs.vfsfs, &root.vfsd, nil
@@ -132,6 +141,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
// Release implements vfs.FilesystemImpl.Release.
func (fs *filesystem) Release() {
+ fs.vfsfs.VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
}
// dentry implements vfs.DentryImpl.
@@ -188,8 +198,8 @@ func (d *dentry) DecRef() {
// inode represents a filesystem object.
type inode struct {
- // clock is a realtime clock used to set timestamps in file operations.
- clock time.Clock
+ // fs is the owning filesystem. fs is immutable.
+ fs *filesystem
// refs is a reference count. refs is accessed using atomic memory
// operations.
@@ -232,7 +242,7 @@ func (i *inode) init(impl interface{}, fs *filesystem, creds *auth.Credentials,
if mode.FileType() == 0 {
panic("file type is required in FileMode")
}
- i.clock = fs.clock
+ i.fs = fs
i.refs = 1
i.mode = uint32(mode)
i.uid = uint32(creds.EffectiveKUID)
@@ -327,7 +337,8 @@ func (i *inode) statTo(stat *linux.Statx) {
stat.Atime = linux.NsecToStatxTimestamp(i.atime)
stat.Ctime = linux.NsecToStatxTimestamp(i.ctime)
stat.Mtime = linux.NsecToStatxTimestamp(i.mtime)
- // TODO(gvisor.dev/issue/1197): Device number.
+ stat.DevMajor = linux.UNNAMED_MAJOR
+ stat.DevMinor = i.fs.devMinor
switch impl := i.impl.(type) {
case *regularFile:
stat.Mask |= linux.STATX_SIZE | linux.STATX_BLOCKS
@@ -401,7 +412,7 @@ func (i *inode) setStat(ctx context.Context, creds *auth.Credentials, stat *linu
return syserror.EINVAL
}
}
- now := i.clock.Now().Nanoseconds()
+ now := i.fs.clock.Now().Nanoseconds()
if mask&linux.STATX_ATIME != 0 {
if stat.Atime.Nsec == linux.UTIME_NOW {
atomic.StoreInt64(&i.atime, now)
@@ -518,7 +529,7 @@ func (i *inode) touchAtime(mnt *vfs.Mount) {
if err := mnt.CheckBeginWrite(); err != nil {
return
}
- now := i.clock.Now().Nanoseconds()
+ now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.atime, now)
i.mu.Unlock()
@@ -527,7 +538,7 @@ func (i *inode) touchAtime(mnt *vfs.Mount) {
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite().
func (i *inode) touchCtime() {
- now := i.clock.Now().Nanoseconds()
+ now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.ctime, now)
i.mu.Unlock()
@@ -535,7 +546,7 @@ func (i *inode) touchCtime() {
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite().
func (i *inode) touchCMtime() {
- now := i.clock.Now().Nanoseconds()
+ now := i.fs.clock.Now().Nanoseconds()
i.mu.Lock()
atomic.StoreInt64(&i.mtime, now)
atomic.StoreInt64(&i.ctime, now)
@@ -545,7 +556,7 @@ func (i *inode) touchCMtime() {
// Preconditions: The caller has called vfs.Mount.CheckBeginWrite() and holds
// inode.mu.
func (i *inode) touchCMtimeLocked() {
- now := i.clock.Now().Nanoseconds()
+ now := i.fs.clock.Now().Nanoseconds()
atomic.StoreInt64(&i.mtime, now)
atomic.StoreInt64(&i.ctime, now)
}
diff --git a/pkg/sentry/hostfd/hostfd_unsafe.go b/pkg/sentry/hostfd/hostfd_unsafe.go
index 5e9e60fc4..cd4dc67fb 100644
--- a/pkg/sentry/hostfd/hostfd_unsafe.go
+++ b/pkg/sentry/hostfd/hostfd_unsafe.go
@@ -34,24 +34,13 @@ func Preadv2(fd int32, dsts safemem.BlockSeq, offset int64, flags uint32) (uint6
n uintptr
e syscall.Errno
)
- // Avoid preadv2(2) if possible, since it's relatively new and thus least
- // likely to be supported by the host kernel.
- if flags == 0 {
- if dsts.NumBlocks() == 1 {
- // Use read() or pread() to avoid iovec allocation and copying.
- dst := dsts.Head()
- if offset == -1 {
- n, _, e = syscall.Syscall(unix.SYS_READ, uintptr(fd), dst.Addr(), uintptr(dst.Len()))
- } else {
- n, _, e = syscall.Syscall6(unix.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
- }
+ if flags == 0 && dsts.NumBlocks() == 1 {
+ // Use read() or pread() to avoid iovec allocation and copying.
+ dst := dsts.Head()
+ if offset == -1 {
+ n, _, e = syscall.Syscall(unix.SYS_READ, uintptr(fd), dst.Addr(), uintptr(dst.Len()))
} else {
- iovs := safemem.IovecsFromBlockSeq(dsts)
- if offset == -1 {
- n, _, e = syscall.Syscall(unix.SYS_READV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)))
- } else {
- n, _, e = syscall.Syscall6(unix.SYS_PREADV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
- }
+ n, _, e = syscall.Syscall6(unix.SYS_PREAD64, uintptr(fd), dst.Addr(), uintptr(dst.Len()), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
}
} else {
iovs := safemem.IovecsFromBlockSeq(dsts)
@@ -77,24 +66,13 @@ func Pwritev2(fd int32, srcs safemem.BlockSeq, offset int64, flags uint32) (uint
n uintptr
e syscall.Errno
)
- // Avoid pwritev2(2) if possible, since it's relatively new and thus least
- // likely to be supported by the host kernel.
- if flags == 0 {
- if srcs.NumBlocks() == 1 {
- // Use write() or pwrite() to avoid iovec allocation and copying.
- src := srcs.Head()
- if offset == -1 {
- n, _, e = syscall.Syscall(unix.SYS_WRITE, uintptr(fd), src.Addr(), uintptr(src.Len()))
- } else {
- n, _, e = syscall.Syscall6(unix.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
- }
+ if flags == 0 && srcs.NumBlocks() == 1 {
+ // Use write() or pwrite() to avoid iovec allocation and copying.
+ src := srcs.Head()
+ if offset == -1 {
+ n, _, e = syscall.Syscall(unix.SYS_WRITE, uintptr(fd), src.Addr(), uintptr(src.Len()))
} else {
- iovs := safemem.IovecsFromBlockSeq(srcs)
- if offset == -1 {
- n, _, e = syscall.Syscall(unix.SYS_WRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)))
- } else {
- n, _, e = syscall.Syscall6(unix.SYS_PWRITEV, uintptr(fd), uintptr((unsafe.Pointer)(&iovs[0])), uintptr(len(iovs)), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
- }
+ n, _, e = syscall.Syscall6(unix.SYS_PWRITE64, uintptr(fd), src.Addr(), uintptr(src.Len()), uintptr(offset), 0 /* pos_h */, 0 /* unused */)
}
} else {
iovs := safemem.IovecsFromBlockSeq(srcs)
diff --git a/pkg/sentry/kernel/BUILD b/pkg/sentry/kernel/BUILD
index e47af66d6..8104f50f3 100644
--- a/pkg/sentry/kernel/BUILD
+++ b/pkg/sentry/kernel/BUILD
@@ -172,6 +172,7 @@ go_library(
"//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/pipefs",
"//pkg/sentry/fsimpl/sockfs",
+ "//pkg/sentry/fsimpl/timerfd",
"//pkg/sentry/hostcpu",
"//pkg/sentry/inet",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/kernel/kernel.go b/pkg/sentry/kernel/kernel.go
index c91b9dce2..3617da8c6 100644
--- a/pkg/sentry/kernel/kernel.go
+++ b/pkg/sentry/kernel/kernel.go
@@ -48,10 +48,11 @@ import (
"gvisor.dev/gvisor/pkg/refs"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
+ oldtimerfd "gvisor.dev/gvisor/pkg/sentry/fs/timerfd"
"gvisor.dev/gvisor/pkg/sentry/fsbridge"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/pipefs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/timerfd"
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -372,7 +373,10 @@ func (k *Kernel) Init(args InitKernelArgs) error {
return fmt.Errorf("failed to initialize VFS: %v", err)
}
- pipeFilesystem := pipefs.NewFilesystem(&k.vfs)
+ pipeFilesystem, err := pipefs.NewFilesystem(&k.vfs)
+ if err != nil {
+ return fmt.Errorf("failed to create pipefs filesystem: %v", err)
+ }
defer pipeFilesystem.DecRef()
pipeMount, err := k.vfs.NewDisconnectedMount(pipeFilesystem, nil, &vfs.MountOptions{})
if err != nil {
@@ -380,7 +384,10 @@ func (k *Kernel) Init(args InitKernelArgs) error {
}
k.pipeMount = pipeMount
- socketFilesystem := sockfs.NewFilesystem(&k.vfs)
+ socketFilesystem, err := sockfs.NewFilesystem(&k.vfs)
+ if err != nil {
+ return fmt.Errorf("failed to create sockfs filesystem: %v", err)
+ }
defer socketFilesystem.DecRef()
socketMount, err := k.vfs.NewDisconnectedMount(socketFilesystem, nil, &vfs.MountOptions{})
if err != nil {
@@ -1068,11 +1075,11 @@ func (k *Kernel) pauseTimeLocked() {
if t.fdTable != nil {
t.fdTable.forEach(func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) {
if VFS2Enabled {
- if tfd, ok := fd.Impl().(*vfs.TimerFileDescription); ok {
+ if tfd, ok := fd.Impl().(*timerfd.TimerFileDescription); ok {
tfd.PauseTimer()
}
} else {
- if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
+ if tfd, ok := file.FileOperations.(*oldtimerfd.TimerOperations); ok {
tfd.PauseTimer()
}
}
@@ -1104,11 +1111,11 @@ func (k *Kernel) resumeTimeLocked() {
if t.fdTable != nil {
t.fdTable.forEach(func(_ int32, file *fs.File, fd *vfs.FileDescription, _ FDFlags) {
if VFS2Enabled {
- if tfd, ok := fd.Impl().(*vfs.TimerFileDescription); ok {
+ if tfd, ok := fd.Impl().(*timerfd.TimerFileDescription); ok {
tfd.ResumeTimer()
}
} else {
- if tfd, ok := file.FileOperations.(*timerfd.TimerOperations); ok {
+ if tfd, ok := file.FileOperations.(*oldtimerfd.TimerOperations); ok {
tfd.ResumeTimer()
}
}
diff --git a/pkg/sentry/socket/hostinet/BUILD b/pkg/sentry/socket/hostinet/BUILD
index deedd35f7..e82d6cd1e 100644
--- a/pkg/sentry/socket/hostinet/BUILD
+++ b/pkg/sentry/socket/hostinet/BUILD
@@ -26,7 +26,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/hostfd",
"//pkg/sentry/inet",
diff --git a/pkg/sentry/socket/hostinet/socket_vfs2.go b/pkg/sentry/socket/hostinet/socket_vfs2.go
index a8278bffc..677743113 100644
--- a/pkg/sentry/socket/hostinet/socket_vfs2.go
+++ b/pkg/sentry/socket/hostinet/socket_vfs2.go
@@ -21,7 +21,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fdnotifier"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/hostfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -50,8 +49,7 @@ var _ = socket.SocketVFS2(&socketVFS2{})
func newVFS2Socket(t *kernel.Task, family int, stype linux.SockType, protocol int, fd int, flags uint32) (*vfs.FileDescription, *syserr.Error) {
mnt := t.Kernel().SocketMount()
- fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
- d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+ d := sockfs.NewDentry(t.Credentials(), mnt)
s := &socketVFS2{
socketOpsCommon: socketOpsCommon{
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index 40736fb38..789bb94c8 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -17,6 +17,7 @@
package netfilter
import (
+ "bytes"
"errors"
"fmt"
@@ -58,6 +59,13 @@ type metadata struct {
// developing iptables, but can pollute sentry logs otherwise.
const enableLogging = false
+// emptyFilter is for comparison with a rule's filters to determine whether it
+// is also empty. It is immutable.
+var emptyFilter = stack.IPHeaderFilter{
+ Dst: "\x00\x00\x00\x00",
+ DstMask: "\x00\x00\x00\x00",
+}
+
// nflog logs messages related to the writing and reading of iptables.
func nflog(format string, args ...interface{}) {
if enableLogging && log.IsLogging(log.Debug) {
@@ -204,6 +212,16 @@ func convertNetstackToBinary(tablename string, table stack.Table) (linux.KernelI
TargetOffset: linux.SizeOfIPTEntry,
},
}
+ copy(entry.IPTEntry.IP.Dst[:], rule.Filter.Dst)
+ copy(entry.IPTEntry.IP.DstMask[:], rule.Filter.DstMask)
+ copy(entry.IPTEntry.IP.OutputInterface[:], rule.Filter.OutputInterface)
+ copy(entry.IPTEntry.IP.OutputInterfaceMask[:], rule.Filter.OutputInterfaceMask)
+ if rule.Filter.DstInvert {
+ entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_DSTIP
+ }
+ if rule.Filter.OutputInterfaceInvert {
+ entry.IPTEntry.IP.InverseFlags |= linux.IPT_INV_VIA_OUT
+ }
for _, matcher := range rule.Matchers {
// Serialize the matcher and add it to the
@@ -473,7 +491,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
}
if offset == replace.Underflow[hook] {
if !validUnderflow(table.Rules[ruleIdx]) {
- nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP")
+ nflog("underflow for hook %d isn't an unconditional ACCEPT or DROP", ruleIdx)
return syserr.ErrInvalidArgument
}
table.Underflows[hk] = ruleIdx
@@ -536,7 +554,7 @@ func SetEntries(stk *stack.Stack, optVal []byte) *syserr.Error {
// make sure all other chains point to ACCEPT rules.
for hook, ruleIdx := range table.BuiltinChains {
if hook == stack.Forward || hook == stack.Postrouting {
- if _, ok := table.Rules[ruleIdx].Target.(stack.AcceptTarget); !ok {
+ if !isUnconditionalAccept(table.Rules[ruleIdx]) {
nflog("hook %d is unsupported.", hook)
return syserr.ErrInvalidArgument
}
@@ -719,11 +737,27 @@ func filterFromIPTIP(iptip linux.IPTIP) (stack.IPHeaderFilter, error) {
if len(iptip.Dst) != header.IPv4AddressSize || len(iptip.DstMask) != header.IPv4AddressSize {
return stack.IPHeaderFilter{}, fmt.Errorf("incorrect length of destination (%d) and/or destination mask (%d) fields", len(iptip.Dst), len(iptip.DstMask))
}
+
+ n := bytes.IndexByte([]byte(iptip.OutputInterface[:]), 0)
+ if n == -1 {
+ n = len(iptip.OutputInterface)
+ }
+ ifname := string(iptip.OutputInterface[:n])
+
+ n = bytes.IndexByte([]byte(iptip.OutputInterfaceMask[:]), 0)
+ if n == -1 {
+ n = len(iptip.OutputInterfaceMask)
+ }
+ ifnameMask := string(iptip.OutputInterfaceMask[:n])
+
return stack.IPHeaderFilter{
- Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
- Dst: tcpip.Address(iptip.Dst[:]),
- DstMask: tcpip.Address(iptip.DstMask[:]),
- DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
+ Protocol: tcpip.TransportProtocolNumber(iptip.Protocol),
+ Dst: tcpip.Address(iptip.Dst[:]),
+ DstMask: tcpip.Address(iptip.DstMask[:]),
+ DstInvert: iptip.InverseFlags&linux.IPT_INV_DSTIP != 0,
+ OutputInterface: ifname,
+ OutputInterfaceMask: ifnameMask,
+ OutputInterfaceInvert: iptip.InverseFlags&linux.IPT_INV_VIA_OUT != 0,
}, nil
}
@@ -732,16 +766,15 @@ func containsUnsupportedFields(iptip linux.IPTIP) bool {
// - Protocol
// - Dst and DstMask
// - The inverse destination IP check flag
+ // - OutputInterface, OutputInterfaceMask and its inverse.
var emptyInetAddr = linux.InetAddr{}
var emptyInterface = [linux.IFNAMSIZ]byte{}
// Disable any supported inverse flags.
- inverseMask := uint8(linux.IPT_INV_DSTIP)
+ inverseMask := uint8(linux.IPT_INV_DSTIP) | uint8(linux.IPT_INV_VIA_OUT)
return iptip.Src != emptyInetAddr ||
iptip.SrcMask != emptyInetAddr ||
iptip.InputInterface != emptyInterface ||
- iptip.OutputInterface != emptyInterface ||
iptip.InputInterfaceMask != emptyInterface ||
- iptip.OutputInterfaceMask != emptyInterface ||
iptip.Flags != 0 ||
iptip.InverseFlags&^inverseMask != 0
}
@@ -750,6 +783,9 @@ func validUnderflow(rule stack.Rule) bool {
if len(rule.Matchers) != 0 {
return false
}
+ if rule.Filter != emptyFilter {
+ return false
+ }
switch rule.Target.(type) {
case stack.AcceptTarget, stack.DropTarget:
return true
@@ -758,6 +794,14 @@ func validUnderflow(rule stack.Rule) bool {
}
}
+func isUnconditionalAccept(rule stack.Rule) bool {
+ if !validUnderflow(rule) {
+ return false
+ }
+ _, ok := rule.Target.(stack.AcceptTarget)
+ return ok
+}
+
func hookFromLinux(hook int) stack.Hook {
switch hook {
case linux.NF_INET_PRE_ROUTING:
diff --git a/pkg/sentry/socket/netlink/BUILD b/pkg/sentry/socket/netlink/BUILD
index 09ca00a4a..7212d8644 100644
--- a/pkg/sentry/socket/netlink/BUILD
+++ b/pkg/sentry/socket/netlink/BUILD
@@ -20,7 +20,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
diff --git a/pkg/sentry/socket/netlink/provider_vfs2.go b/pkg/sentry/socket/netlink/provider_vfs2.go
index dcd92b5cd..bb205be0d 100644
--- a/pkg/sentry/socket/netlink/provider_vfs2.go
+++ b/pkg/sentry/socket/netlink/provider_vfs2.go
@@ -16,7 +16,6 @@ package netlink
import (
"gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/vfs"
@@ -52,8 +51,7 @@ func (*socketProviderVFS2) Socket(t *kernel.Task, stype linux.SockType, protocol
vfsfd := &s.vfsfd
mnt := t.Kernel().SocketMount()
- fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
- d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+ d := sockfs.NewDentry(t.Credentials(), mnt)
if err := vfsfd.Init(s, linux.O_RDWR, mnt, d, &vfs.FileDescriptionOptions{
DenyPRead: true,
DenyPWrite: true,
diff --git a/pkg/sentry/socket/netstack/BUILD b/pkg/sentry/socket/netstack/BUILD
index ccf9fcf5c..333e0042e 100644
--- a/pkg/sentry/socket/netstack/BUILD
+++ b/pkg/sentry/socket/netstack/BUILD
@@ -18,6 +18,7 @@ go_library(
],
deps = [
"//pkg/abi/linux",
+ "//pkg/amutex",
"//pkg/binary",
"//pkg/context",
"//pkg/log",
@@ -27,7 +28,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/inet",
"//pkg/sentry/kernel",
diff --git a/pkg/sentry/socket/netstack/netstack.go b/pkg/sentry/socket/netstack/netstack.go
index 81053d8ef..9d032f052 100644
--- a/pkg/sentry/socket/netstack/netstack.go
+++ b/pkg/sentry/socket/netstack/netstack.go
@@ -34,6 +34,7 @@ import (
"time"
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/binary"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/log"
@@ -553,11 +554,9 @@ func (s *SocketOperations) Write(ctx context.Context, _ *fs.File, src usermem.IO
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{})
}
@@ -626,11 +625,9 @@ func (s *SocketOperations) ReadFrom(ctx context.Context, _ *fs.File, r io.Reader
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{
Atomic: true, // See above.
})
diff --git a/pkg/sentry/socket/netstack/netstack_vfs2.go b/pkg/sentry/socket/netstack/netstack_vfs2.go
index f7d9b2ff4..fcd8013c0 100644
--- a/pkg/sentry/socket/netstack/netstack_vfs2.go
+++ b/pkg/sentry/socket/netstack/netstack_vfs2.go
@@ -16,9 +16,9 @@ package netstack
import (
"gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/amutex"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/inet"
"gvisor.dev/gvisor/pkg/sentry/kernel"
@@ -53,8 +53,7 @@ func NewVFS2(t *kernel.Task, family int, skType linux.SockType, protocol int, qu
}
mnt := t.Kernel().SocketMount()
- fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
- d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+ d := sockfs.NewDentry(t.Credentials(), mnt)
s := &SocketVFS2{
socketOpsCommon: socketOpsCommon{
@@ -91,11 +90,6 @@ func (s *SocketVFS2) EventUnregister(e *waiter.Entry) {
s.socketOpsCommon.EventUnregister(e)
}
-// PRead implements vfs.FileDescriptionImpl.
-func (s *SocketVFS2) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) {
- return 0, syserror.ESPIPE
-}
-
// Read implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
@@ -117,11 +111,6 @@ func (s *SocketVFS2) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.
return int64(n), nil
}
-// PWrite implements vfs.FileDescriptionImpl.
-func (s *SocketVFS2) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) {
- return 0, syserror.ESPIPE
-}
-
// Write implements vfs.FileDescriptionImpl.
func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) {
// All flags other than RWF_NOWAIT should be ignored.
@@ -137,11 +126,9 @@ func (s *SocketVFS2) Write(ctx context.Context, src usermem.IOSequence, opts vfs
}
if resCh != nil {
- t := kernel.TaskFromContext(ctx)
- if err := t.Block(resCh); err != nil {
- return 0, syserr.FromError(err).ToError()
+ if err := amutex.Block(ctx, resCh); err != nil {
+ return 0, err
}
-
n, _, err = s.Endpoint.Write(f, tcpip.WriteOptions{})
}
diff --git a/pkg/sentry/socket/unix/BUILD b/pkg/sentry/socket/unix/BUILD
index 941a91097..de2cc4bdf 100644
--- a/pkg/sentry/socket/unix/BUILD
+++ b/pkg/sentry/socket/unix/BUILD
@@ -21,7 +21,6 @@ go_library(
"//pkg/sentry/device",
"//pkg/sentry/fs",
"//pkg/sentry/fs/fsutil",
- "//pkg/sentry/fsimpl/kernfs",
"//pkg/sentry/fsimpl/sockfs",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/time",
diff --git a/pkg/sentry/socket/unix/unix_vfs2.go b/pkg/sentry/socket/unix/unix_vfs2.go
index 06d838868..45e109361 100644
--- a/pkg/sentry/socket/unix/unix_vfs2.go
+++ b/pkg/sentry/socket/unix/unix_vfs2.go
@@ -19,7 +19,6 @@ import (
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/arch"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/sockfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
@@ -50,8 +49,7 @@ var _ = socket.SocketVFS2(&SocketVFS2{})
// returns a corresponding file description.
func NewSockfsFile(t *kernel.Task, ep transport.Endpoint, stype linux.SockType) (*vfs.FileDescription, *syserr.Error) {
mnt := t.Kernel().SocketMount()
- fs := mnt.Filesystem().Impl().(*kernfs.Filesystem)
- d := sockfs.NewDentry(t.Credentials(), fs.NextIno())
+ d := sockfs.NewDentry(t.Credentials(), mnt)
fd, err := NewFileDescription(ep, stype, linux.O_RDWR, mnt, d)
if err != nil {
diff --git a/pkg/sentry/syscalls/linux/BUILD b/pkg/sentry/syscalls/linux/BUILD
index 245e8fe1e..217fcfef2 100644
--- a/pkg/sentry/syscalls/linux/BUILD
+++ b/pkg/sentry/syscalls/linux/BUILD
@@ -49,7 +49,8 @@ go_library(
"sys_time.go",
"sys_timer.go",
"sys_timerfd.go",
- "sys_tls.go",
+ "sys_tls_amd64.go",
+ "sys_tls_arm64.go",
"sys_utsname.go",
"sys_write.go",
"sys_xattr.go",
diff --git a/pkg/sentry/syscalls/linux/sigset.go b/pkg/sentry/syscalls/linux/sigset.go
index 2ddb2b146..434559b80 100644
--- a/pkg/sentry/syscalls/linux/sigset.go
+++ b/pkg/sentry/syscalls/linux/sigset.go
@@ -21,9 +21,13 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-// copyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and
+// CopyInSigSet copies in a sigset_t, checks its size, and ensures that KILL and
// STOP are clear.
-func copyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.SignalSet, error) {
+//
+// TODO(gvisor.dev/issue/1624): This is only exported because
+// syscalls/vfs2/signal.go depends on it. Once vfs1 is deleted and the vfs2
+// syscalls are moved into this package, then they can be unexported.
+func CopyInSigSet(t *kernel.Task, sigSetAddr usermem.Addr, size uint) (linux.SignalSet, error) {
if size != linux.SignalSetSize {
return 0, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/sys_epoll.go b/pkg/sentry/syscalls/linux/sys_epoll.go
index 51bf205cf..7f460d30b 100644
--- a/pkg/sentry/syscalls/linux/sys_epoll.go
+++ b/pkg/sentry/syscalls/linux/sys_epoll.go
@@ -131,7 +131,7 @@ func EpollPwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sy
maskSize := uint(args[5].Uint())
if maskAddr != 0 {
- mask, err := copyInSigSet(t, maskAddr, maskSize)
+ mask, err := CopyInSigSet(t, maskAddr, maskSize)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_poll.go b/pkg/sentry/syscalls/linux/sys_poll.go
index 4f8762d7d..f0198141c 100644
--- a/pkg/sentry/syscalls/linux/sys_poll.go
+++ b/pkg/sentry/syscalls/linux/sys_poll.go
@@ -443,7 +443,7 @@ func Ppoll(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Syscall
}
if maskAddr != 0 {
- mask, err := copyInSigSet(t, maskAddr, maskSize)
+ mask, err := CopyInSigSet(t, maskAddr, maskSize)
if err != nil {
return 0, nil, err
}
@@ -525,7 +525,7 @@ func Pselect(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysca
}
if maskAddr != 0 {
- mask, err := copyInSigSet(t, maskAddr, size)
+ mask, err := CopyInSigSet(t, maskAddr, size)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_signal.go b/pkg/sentry/syscalls/linux/sys_signal.go
index 582d37e03..d2b0012ae 100644
--- a/pkg/sentry/syscalls/linux/sys_signal.go
+++ b/pkg/sentry/syscalls/linux/sys_signal.go
@@ -295,7 +295,7 @@ func RtSigprocmask(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
}
oldmask := t.SignalMask()
if setaddr != 0 {
- mask, err := copyInSigSet(t, setaddr, sigsetsize)
+ mask, err := CopyInSigSet(t, setaddr, sigsetsize)
if err != nil {
return 0, nil, err
}
@@ -366,7 +366,7 @@ func RtSigtimedwait(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
timespec := args[2].Pointer()
sigsetsize := args[3].SizeT()
- mask, err := copyInSigSet(t, sigset, sigsetsize)
+ mask, err := CopyInSigSet(t, sigset, sigsetsize)
if err != nil {
return 0, nil, err
}
@@ -518,7 +518,7 @@ func RestartSyscall(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
// sharedSignalfd is shared between the two calls.
func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
// Copy in the signal mask.
- mask, err := copyInSigSet(t, sigset, sigsetsize)
+ mask, err := CopyInSigSet(t, sigset, sigsetsize)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/sys_tls.go b/pkg/sentry/syscalls/linux/sys_tls_amd64.go
index b3eb96a1c..b3eb96a1c 100644
--- a/pkg/sentry/syscalls/linux/sys_tls.go
+++ b/pkg/sentry/syscalls/linux/sys_tls_amd64.go
diff --git a/pkg/sentry/syscalls/linux/sys_tls_arm64.go b/pkg/sentry/syscalls/linux/sys_tls_arm64.go
new file mode 100644
index 000000000..fb08a356e
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/sys_tls_arm64.go
@@ -0,0 +1,28 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//+build arm64
+
+package linux
+
+import (
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// ArchPrctl is not defined for ARM64.
+func ArchPrctl(*kernel.Task, arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return 0, nil, syserror.ENOSYS
+}
diff --git a/pkg/sentry/syscalls/linux/sys_utsname.go b/pkg/sentry/syscalls/linux/sys_utsname.go
index a393e28c1..e9d702e8e 100644
--- a/pkg/sentry/syscalls/linux/sys_utsname.go
+++ b/pkg/sentry/syscalls/linux/sys_utsname.go
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// +build amd64 arm64
-
package linux
import (
diff --git a/pkg/sentry/syscalls/linux/vfs2/BUILD b/pkg/sentry/syscalls/linux/vfs2/BUILD
index 4c7b8f819..c32f942fb 100644
--- a/pkg/sentry/syscalls/linux/vfs2/BUILD
+++ b/pkg/sentry/syscalls/linux/vfs2/BUILD
@@ -19,6 +19,7 @@ go_library(
"poll.go",
"read_write.go",
"setstat.go",
+ "signal.go",
"socket.go",
"stat.go",
"stat_amd64.go",
@@ -38,7 +39,10 @@ go_library(
"//pkg/gohacks",
"//pkg/sentry/arch",
"//pkg/sentry/fsbridge",
+ "//pkg/sentry/fsimpl/eventfd",
"//pkg/sentry/fsimpl/pipefs",
+ "//pkg/sentry/fsimpl/signalfd",
+ "//pkg/sentry/fsimpl/timerfd",
"//pkg/sentry/kernel",
"//pkg/sentry/kernel/auth",
"//pkg/sentry/kernel/pipe",
diff --git a/pkg/sentry/syscalls/linux/vfs2/eventfd.go b/pkg/sentry/syscalls/linux/vfs2/eventfd.go
index bd2194972..aff1a2070 100644
--- a/pkg/sentry/syscalls/linux/vfs2/eventfd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/eventfd.go
@@ -17,6 +17,7 @@ package vfs2
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/eventfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -31,12 +32,13 @@ func Eventfd2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sysc
return 0, nil, syserror.EINVAL
}
+ vfsObj := t.Kernel().VFS()
fileFlags := uint32(linux.O_RDWR)
if flags&linux.EFD_NONBLOCK != 0 {
fileFlags |= linux.O_NONBLOCK
}
semMode := flags&linux.EFD_SEMAPHORE != 0
- eventfd, err := t.Kernel().VFS().NewEventFD(initVal, semMode, fileFlags)
+ eventfd, err := eventfd.New(vfsObj, initVal, semMode, fileFlags)
if err != nil {
return 0, nil, err
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/signal.go b/pkg/sentry/syscalls/linux/vfs2/signal.go
new file mode 100644
index 000000000..623992f6f
--- /dev/null
+++ b/pkg/sentry/syscalls/linux/vfs2/signal.go
@@ -0,0 +1,100 @@
+// Copyright 2018 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package vfs2
+
+import (
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/signalfd"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ slinux "gvisor.dev/gvisor/pkg/sentry/syscalls/linux"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// sharedSignalfd is shared between the two calls.
+func sharedSignalfd(t *kernel.Task, fd int32, sigset usermem.Addr, sigsetsize uint, flags int32) (uintptr, *kernel.SyscallControl, error) {
+ // Copy in the signal mask.
+ mask, err := slinux.CopyInSigSet(t, sigset, sigsetsize)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // Always check for valid flags, even if not creating.
+ if flags&^(linux.SFD_NONBLOCK|linux.SFD_CLOEXEC) != 0 {
+ return 0, nil, syserror.EINVAL
+ }
+
+ // Is this a change to an existing signalfd?
+ //
+ // The spec indicates that this should adjust the mask.
+ if fd != -1 {
+ file := t.GetFileVFS2(fd)
+ if file == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer file.DecRef()
+
+ // Is this a signalfd?
+ if sfd, ok := file.Impl().(*signalfd.SignalFileDescription); ok {
+ sfd.SetMask(mask)
+ return 0, nil, nil
+ }
+
+ // Not a signalfd.
+ return 0, nil, syserror.EINVAL
+ }
+
+ fileFlags := uint32(linux.O_RDWR)
+ if flags&linux.SFD_NONBLOCK != 0 {
+ fileFlags |= linux.O_NONBLOCK
+ }
+
+ // Create a new file.
+ vfsObj := t.Kernel().VFS()
+ file, err := signalfd.New(vfsObj, t, mask, fileFlags)
+ if err != nil {
+ return 0, nil, err
+ }
+ defer file.DecRef()
+
+ // Create a new descriptor.
+ fd, err = t.NewFDFromVFS2(0, file, kernel.FDFlags{
+ CloseOnExec: flags&linux.SFD_CLOEXEC != 0,
+ })
+ if err != nil {
+ return 0, nil, err
+ }
+
+ // Done.
+ return uintptr(fd), nil, nil
+}
+
+// Signalfd implements the linux syscall signalfd(2).
+func Signalfd(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ sigset := args[1].Pointer()
+ sigsetsize := args[2].SizeT()
+ return sharedSignalfd(t, fd, sigset, sigsetsize, 0)
+}
+
+// Signalfd4 implements the linux syscall signalfd4(2).
+func Signalfd4(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ sigset := args[1].Pointer()
+ sigsetsize := args[2].SizeT()
+ flags := args[3].Int()
+ return sharedSignalfd(t, fd, sigset, sigsetsize, flags)
+}
diff --git a/pkg/sentry/syscalls/linux/vfs2/timerfd.go b/pkg/sentry/syscalls/linux/vfs2/timerfd.go
index 7938a5249..5ac79bc09 100644
--- a/pkg/sentry/syscalls/linux/vfs2/timerfd.go
+++ b/pkg/sentry/syscalls/linux/vfs2/timerfd.go
@@ -17,9 +17,9 @@ package vfs2
import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/sentry/arch"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/timerfd"
"gvisor.dev/gvisor/pkg/sentry/kernel"
ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/syserror"
)
@@ -32,9 +32,12 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
return 0, nil, syserror.EINVAL
}
- var fileFlags uint32
+ // Timerfds aren't writable per se (their implementation of Write just
+ // returns EINVAL), but they are "opened for writing", which is necessary
+ // to actually reach said implementation of Write.
+ fileFlags := uint32(linux.O_RDWR)
if flags&linux.TFD_NONBLOCK != 0 {
- fileFlags = linux.O_NONBLOCK
+ fileFlags |= linux.O_NONBLOCK
}
var clock ktime.Clock
@@ -46,7 +49,8 @@ func TimerfdCreate(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel
default:
return 0, nil, syserror.EINVAL
}
- file, err := t.Kernel().VFS().NewTimerFD(clock, fileFlags)
+ vfsObj := t.Kernel().VFS()
+ file, err := timerfd.New(vfsObj, clock, fileFlags)
if err != nil {
return 0, nil, err
}
@@ -77,7 +81,7 @@ func TimerfdSettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
}
defer file.DecRef()
- tfd, ok := file.Impl().(*vfs.TimerFileDescription)
+ tfd, ok := file.Impl().(*timerfd.TimerFileDescription)
if !ok {
return 0, nil, syserror.EINVAL
}
@@ -111,7 +115,7 @@ func TimerfdGettime(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kerne
}
defer file.DecRef()
- tfd, ok := file.Impl().(*vfs.TimerFileDescription)
+ tfd, ok := file.Impl().(*timerfd.TimerFileDescription)
if !ok {
return 0, nil, syserror.EINVAL
}
diff --git a/pkg/sentry/syscalls/linux/vfs2/vfs2.go b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
index f1b697844..9c04677f1 100644
--- a/pkg/sentry/syscalls/linux/vfs2/vfs2.go
+++ b/pkg/sentry/syscalls/linux/vfs2/vfs2.go
@@ -139,14 +139,14 @@ func Override() {
s.Table[277] = syscalls.Supported("sync_file_range", SyncFileRange)
s.Table[280] = syscalls.Supported("utimensat", Utimensat)
s.Table[281] = syscalls.Supported("epoll_pwait", EpollPwait)
- delete(s.Table, 282) // signalfd
+ s.Table[282] = syscalls.Supported("signalfd", Signalfd)
s.Table[283] = syscalls.Supported("timerfd_create", TimerfdCreate)
s.Table[284] = syscalls.Supported("eventfd", Eventfd)
delete(s.Table, 285) // fallocate
s.Table[286] = syscalls.Supported("timerfd_settime", TimerfdSettime)
s.Table[287] = syscalls.Supported("timerfd_gettime", TimerfdGettime)
s.Table[288] = syscalls.Supported("accept4", Accept4)
- delete(s.Table, 289) // signalfd4
+ s.Table[289] = syscalls.Supported("signalfd4", Signalfd4)
s.Table[290] = syscalls.Supported("eventfd2", Eventfd2)
s.Table[291] = syscalls.Supported("epoll_create1", EpollCreate1)
s.Table[292] = syscalls.Supported("dup3", Dup3)
diff --git a/pkg/sentry/vfs/BUILD b/pkg/sentry/vfs/BUILD
index c62505fe2..94d69c1cc 100644
--- a/pkg/sentry/vfs/BUILD
+++ b/pkg/sentry/vfs/BUILD
@@ -25,7 +25,6 @@ go_library(
"device.go",
"epoll.go",
"epoll_interest_list.go",
- "eventfd.go",
"file_description.go",
"file_description_impl_util.go",
"filesystem.go",
@@ -37,12 +36,12 @@ go_library(
"pathname.go",
"permissions.go",
"resolving_path.go",
- "timerfd.go",
"vfs.go",
],
visibility = ["//pkg/sentry:internal"],
deps = [
"//pkg/abi/linux",
+ "//pkg/binary",
"//pkg/context",
"//pkg/fd",
"//pkg/fdnotifier",
@@ -70,7 +69,6 @@ go_test(
name = "vfs_test",
size = "small",
srcs = [
- "eventfd_test.go",
"file_description_impl_util_test.go",
"mount_test.go",
],
@@ -82,6 +80,5 @@ go_test(
"//pkg/sync",
"//pkg/syserror",
"//pkg/usermem",
- "//pkg/waiter",
],
)
diff --git a/pkg/sentry/vfs/anonfs.go b/pkg/sentry/vfs/anonfs.go
index 981bd8caa..caf770fd5 100644
--- a/pkg/sentry/vfs/anonfs.go
+++ b/pkg/sentry/vfs/anonfs.go
@@ -91,8 +91,6 @@ func (fs *anonFilesystem) Sync(ctx context.Context) error {
}
// AccessAt implements vfs.Filesystem.Impl.AccessAt.
-//
-// TODO(gvisor.dev/issue/1965): Implement access permissions.
func (fs *anonFilesystem) AccessAt(ctx context.Context, rp *ResolvingPath, creds *auth.Credentials, ats AccessTypes) error {
if !rp.Done() {
return syserror.ENOTDIR
@@ -204,7 +202,7 @@ func (fs *anonFilesystem) StatAt(ctx context.Context, rp *ResolvingPath, opts St
Ino: 1,
Size: 0,
Blocks: 0,
- DevMajor: 0,
+ DevMajor: linux.UNNAMED_MAJOR,
DevMinor: fs.devMinor,
}, nil
}
diff --git a/pkg/sentry/vfs/device.go b/pkg/sentry/vfs/device.go
index bda5576fa..1e9dffc8f 100644
--- a/pkg/sentry/vfs/device.go
+++ b/pkg/sentry/vfs/device.go
@@ -103,7 +103,7 @@ func (vfs *VirtualFilesystem) OpenDeviceSpecialFile(ctx context.Context, mnt *Mo
}
// GetAnonBlockDevMinor allocates and returns an unused minor device number for
-// an "anonymous" block device with major number 0.
+// an "anonymous" block device with major number UNNAMED_MAJOR.
func (vfs *VirtualFilesystem) GetAnonBlockDevMinor() (uint32, error) {
vfs.anonBlockDevMinorMu.Lock()
defer vfs.anonBlockDevMinorMu.Unlock()
diff --git a/pkg/sentry/vfs/file_description.go b/pkg/sentry/vfs/file_description.go
index 418d69b96..cfabd936c 100644
--- a/pkg/sentry/vfs/file_description.go
+++ b/pkg/sentry/vfs/file_description.go
@@ -91,10 +91,6 @@ type FileDescriptionOptions struct {
// ESPIPE.
DenyPWrite bool
- // if InvalidWrite is true, calls to FileDescription.Write() return
- // EINVAL.
- InvalidWrite bool
-
// If UseDentryMetadata is true, calls to FileDescription methods that
// interact with file and filesystem metadata (Stat, SetStat, StatFS,
// Listxattr, Getxattr, Setxattr, Removexattr) are implemented by calling
@@ -570,9 +566,6 @@ func (fd *FileDescription) PWrite(ctx context.Context, src usermem.IOSequence, o
// Write is similar to PWrite, but does not specify an offset.
func (fd *FileDescription) Write(ctx context.Context, src usermem.IOSequence, opts WriteOptions) (int64, error) {
- if fd.opts.InvalidWrite {
- return 0, syserror.EINVAL
- }
if !fd.writable {
return 0, syserror.EBADF
}
diff --git a/pkg/tcpip/buffer/view.go b/pkg/tcpip/buffer/view.go
index f01217c91..9a3c5d6c3 100644
--- a/pkg/tcpip/buffer/view.go
+++ b/pkg/tcpip/buffer/view.go
@@ -59,6 +59,9 @@ func (v *View) Reader() bytes.Reader {
// ToVectorisedView returns a VectorisedView containing the receiver.
func (v View) ToVectorisedView() VectorisedView {
+ if len(v) == 0 {
+ return VectorisedView{}
+ }
return NewVectorisedView(len(v), []View{v})
}
@@ -229,6 +232,9 @@ func (vv *VectorisedView) Append(vv2 VectorisedView) {
// AppendView appends the given view into this vectorised view.
func (vv *VectorisedView) AppendView(v View) {
+ if len(v) == 0 {
+ return
+ }
vv.views = append(vv.views, v)
vv.size += len(v)
}
diff --git a/pkg/tcpip/buffer/view_test.go b/pkg/tcpip/buffer/view_test.go
index c56795c7b..726e54de9 100644
--- a/pkg/tcpip/buffer/view_test.go
+++ b/pkg/tcpip/buffer/view_test.go
@@ -483,3 +483,39 @@ func TestPullUp(t *testing.T) {
}
}
}
+
+func TestToVectorisedView(t *testing.T) {
+ testCases := []struct {
+ in View
+ want VectorisedView
+ }{
+ {nil, VectorisedView{}},
+ {View{}, VectorisedView{}},
+ {View{'a'}, VectorisedView{size: 1, views: []View{{'a'}}}},
+ }
+ for _, tc := range testCases {
+ if got, want := tc.in.ToVectorisedView(), tc.want; !reflect.DeepEqual(got, want) {
+ t.Errorf("(%v).ToVectorisedView failed got: %+v, want: %+v", tc.in, got, want)
+ }
+ }
+}
+
+func TestAppendView(t *testing.T) {
+ testCases := []struct {
+ vv VectorisedView
+ in View
+ want VectorisedView
+ }{
+ {VectorisedView{}, nil, VectorisedView{}},
+ {VectorisedView{}, View{}, VectorisedView{}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, nil, VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, View{}, VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}},
+ {VectorisedView{[]View{{'a', 'b', 'c', 'd'}}, 4}, View{'e'}, VectorisedView{[]View{{'a', 'b', 'c', 'd'}, {'e'}}, 5}},
+ }
+ for _, tc := range testCases {
+ tc.vv.AppendView(tc.in)
+ if got, want := tc.vv, tc.want; !reflect.DeepEqual(got, want) {
+ t.Errorf("(%v).ToVectorisedView failed got: %+v, want: %+v", tc.in, got, want)
+ }
+ }
+}
diff --git a/pkg/tcpip/header/tcp.go b/pkg/tcpip/header/tcp.go
index 21581257b..29454c4b9 100644
--- a/pkg/tcpip/header/tcp.go
+++ b/pkg/tcpip/header/tcp.go
@@ -609,5 +609,8 @@ func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.V
}
// Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming
// the payload, so we'll accept any payload that overlaps the receieve window.
- return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThan(rcvAcc)
+ // segSeq < rcvAcc is more correct according to RFC, however, Linux does it
+ // differently, it uses segSeq <= rcvAcc, we'd want to keep the same behavior
+ // as Linux.
+ return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThanEq(rcvAcc)
}
diff --git a/pkg/tcpip/network/ipv4/BUILD b/pkg/tcpip/network/ipv4/BUILD
index 880ea7de2..78420d6e6 100644
--- a/pkg/tcpip/network/ipv4/BUILD
+++ b/pkg/tcpip/network/ipv4/BUILD
@@ -34,5 +34,6 @@ go_test(
"//pkg/tcpip/transport/tcp",
"//pkg/tcpip/transport/udp",
"//pkg/waiter",
+ "@com_github_google_go-cmp//cmp:go_default_library",
],
)
diff --git a/pkg/tcpip/network/ipv4/ipv4.go b/pkg/tcpip/network/ipv4/ipv4.go
index 9db42b2a4..64046cbbf 100644
--- a/pkg/tcpip/network/ipv4/ipv4.go
+++ b/pkg/tcpip/network/ipv4/ipv4.go
@@ -249,10 +249,11 @@ func (e *endpoint) WritePacket(r *stack.Route, gso *stack.GSO, params stack.Netw
ip := e.addIPHeader(r, &pkt.Header, pkt.Data.Size(), params)
pkt.NetworkHeader = buffer.View(ip)
+ nicName := e.stack.FindNICNameFromID(e.NICID())
// iptables filtering. All packets that reach here are locally
// generated.
ipt := e.stack.IPTables()
- if ok := ipt.Check(stack.Output, &pkt, gso, r, ""); !ok {
+ if ok := ipt.Check(stack.Output, &pkt, gso, r, "", nicName); !ok {
// iptables is telling us to drop the packet.
return nil
}
@@ -319,10 +320,11 @@ func (e *endpoint) WritePackets(r *stack.Route, gso *stack.GSO, pkts stack.Packe
pkt = pkt.Next()
}
+ nicName := e.stack.FindNICNameFromID(e.NICID())
// iptables filtering. All packets that reach here are locally
// generated.
ipt := e.stack.IPTables()
- dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r)
+ dropped, natPkts := ipt.CheckPackets(stack.Output, pkts, gso, r, nicName)
if len(dropped) == 0 && len(natPkts) == 0 {
// Fast path: If no packets are to be dropped then we can just invoke the
// faster WritePackets API directly.
@@ -445,7 +447,7 @@ func (e *endpoint) HandlePacket(r *stack.Route, pkt stack.PacketBuffer) {
// iptables filtering. All packets that reach here are intended for
// this machine and will not be forwarded.
ipt := e.stack.IPTables()
- if ok := ipt.Check(stack.Input, &pkt, nil, nil, ""); !ok {
+ if ok := ipt.Check(stack.Input, &pkt, nil, nil, "", ""); !ok {
// iptables is telling us to drop the packet.
return
}
diff --git a/pkg/tcpip/network/ipv4/ipv4_test.go b/pkg/tcpip/network/ipv4/ipv4_test.go
index 5a864d832..36035c820 100644
--- a/pkg/tcpip/network/ipv4/ipv4_test.go
+++ b/pkg/tcpip/network/ipv4/ipv4_test.go
@@ -20,6 +20,7 @@ import (
"math/rand"
"testing"
+ "github.com/google/go-cmp/cmp"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/buffer"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -473,3 +474,252 @@ func TestInvalidFragments(t *testing.T) {
})
}
}
+
+// TestReceiveFragments feeds fragments in through the incoming packet path to
+// test reassembly
+func TestReceiveFragments(t *testing.T) {
+ const addr1 = "\x0c\xa8\x00\x01" // 192.168.0.1
+ const addr2 = "\x0c\xa8\x00\x02" // 192.168.0.2
+ const nicID = 1
+
+ // Build and return a UDP header containing payload.
+ udpGen := func(payloadLen int, multiplier uint8) buffer.View {
+ payload := buffer.NewView(payloadLen)
+ for i := 0; i < len(payload); i++ {
+ payload[i] = uint8(i) * multiplier
+ }
+
+ udpLength := header.UDPMinimumSize + len(payload)
+
+ hdr := buffer.NewPrependable(udpLength)
+ u := header.UDP(hdr.Prepend(udpLength))
+ u.Encode(&header.UDPFields{
+ SrcPort: 5555,
+ DstPort: 80,
+ Length: uint16(udpLength),
+ })
+ copy(u.Payload(), payload)
+ sum := header.PseudoHeaderChecksum(udp.ProtocolNumber, addr1, addr2, uint16(udpLength))
+ sum = header.Checksum(payload, sum)
+ u.SetChecksum(^u.CalculateChecksum(sum))
+ return hdr.View()
+ }
+
+ // UDP header plus a payload of 0..256
+ ipv4Payload1 := udpGen(256, 1)
+ udpPayload1 := ipv4Payload1[header.UDPMinimumSize:]
+ // UDP header plus a payload of 0..256 in increments of 2.
+ ipv4Payload2 := udpGen(128, 2)
+ udpPayload2 := ipv4Payload2[header.UDPMinimumSize:]
+
+ type fragmentData struct {
+ id uint16
+ flags uint8
+ fragmentOffset uint16
+ payload buffer.View
+ }
+
+ tests := []struct {
+ name string
+ fragments []fragmentData
+ expectedPayloads [][]byte
+ }{
+ {
+ name: "No fragmentation",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: 0,
+ fragmentOffset: 0,
+ payload: ipv4Payload1,
+ },
+ },
+ expectedPayloads: [][]byte{udpPayload1},
+ },
+ {
+ name: "More fragments without payload",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload1,
+ },
+ },
+ expectedPayloads: nil,
+ },
+ {
+ name: "Non-zero fragment offset without payload",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: 0,
+ fragmentOffset: 8,
+ payload: ipv4Payload1,
+ },
+ },
+ expectedPayloads: nil,
+ },
+ {
+ name: "Two fragments",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload1[:64],
+ },
+ {
+ id: 1,
+ flags: 0,
+ fragmentOffset: 64,
+ payload: ipv4Payload1[64:],
+ },
+ },
+ expectedPayloads: [][]byte{udpPayload1},
+ },
+ {
+ name: "Second fragment has MoreFlags set",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload1[:64],
+ },
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 64,
+ payload: ipv4Payload1[64:],
+ },
+ },
+ expectedPayloads: nil,
+ },
+ {
+ name: "Two fragments with different IDs",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload1[:64],
+ },
+ {
+ id: 2,
+ flags: 0,
+ fragmentOffset: 64,
+ payload: ipv4Payload1[64:],
+ },
+ },
+ expectedPayloads: nil,
+ },
+ {
+ name: "Two interleaved fragmented packets",
+ fragments: []fragmentData{
+ {
+ id: 1,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload1[:64],
+ },
+ {
+ id: 2,
+ flags: header.IPv4FlagMoreFragments,
+ fragmentOffset: 0,
+ payload: ipv4Payload2[:64],
+ },
+ {
+ id: 1,
+ flags: 0,
+ fragmentOffset: 64,
+ payload: ipv4Payload1[64:],
+ },
+ {
+ id: 2,
+ flags: 0,
+ fragmentOffset: 64,
+ payload: ipv4Payload2[64:],
+ },
+ },
+ expectedPayloads: [][]byte{udpPayload1, udpPayload2},
+ },
+ }
+
+ for _, test := range tests {
+ t.Run(test.name, func(t *testing.T) {
+ // Setup a stack and endpoint.
+ s := stack.New(stack.Options{
+ NetworkProtocols: []stack.NetworkProtocol{ipv4.NewProtocol()},
+ TransportProtocols: []stack.TransportProtocol{udp.NewProtocol()},
+ })
+ e := channel.New(0, 1280, tcpip.LinkAddress("\xf0\x00"))
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+ }
+ if err := s.AddAddress(nicID, header.IPv4ProtocolNumber, addr2); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv4ProtocolNumber, addr2, err)
+ }
+
+ wq := waiter.Queue{}
+ we, ch := waiter.NewChannelEntry(nil)
+ wq.EventRegister(&we, waiter.EventIn)
+ defer wq.EventUnregister(&we)
+ defer close(ch)
+ ep, err := s.NewEndpoint(udp.ProtocolNumber, header.IPv4ProtocolNumber, &wq)
+ if err != nil {
+ t.Fatalf("NewEndpoint(%d, %d, _): %s", udp.ProtocolNumber, header.IPv4ProtocolNumber, err)
+ }
+ defer ep.Close()
+
+ bindAddr := tcpip.FullAddress{Addr: addr2, Port: 80}
+ if err := ep.Bind(bindAddr); err != nil {
+ t.Fatalf("Bind(%+v): %s", bindAddr, err)
+ }
+
+ // Prepare and send the fragments.
+ for _, frag := range test.fragments {
+ hdr := buffer.NewPrependable(header.IPv4MinimumSize)
+
+ // Serialize IPv4 fixed header.
+ ip := header.IPv4(hdr.Prepend(header.IPv4MinimumSize))
+ ip.Encode(&header.IPv4Fields{
+ IHL: header.IPv4MinimumSize,
+ TotalLength: header.IPv4MinimumSize + uint16(len(frag.payload)),
+ ID: frag.id,
+ Flags: frag.flags,
+ FragmentOffset: frag.fragmentOffset,
+ TTL: 64,
+ Protocol: uint8(header.UDPProtocolNumber),
+ SrcAddr: addr1,
+ DstAddr: addr2,
+ })
+
+ vv := hdr.View().ToVectorisedView()
+ vv.AppendView(frag.payload)
+
+ e.InjectInbound(header.IPv4ProtocolNumber, stack.PacketBuffer{
+ Data: vv,
+ })
+ }
+
+ if got, want := s.Stats().UDP.PacketsReceived.Value(), uint64(len(test.expectedPayloads)); got != want {
+ t.Errorf("got UDP Rx Packets = %d, want = %d", got, want)
+ }
+
+ for i, expectedPayload := range test.expectedPayloads {
+ gotPayload, _, err := ep.Read(nil)
+ if err != nil {
+ t.Fatalf("(i=%d) Read(nil): %s", i, err)
+ }
+ if diff := cmp.Diff(buffer.View(expectedPayload), gotPayload); diff != "" {
+ t.Errorf("(i=%d) got UDP payload mismatch (-want +got):\n%s", i, diff)
+ }
+ }
+
+ if gotPayload, _, err := ep.Read(nil); err != tcpip.ErrWouldBlock {
+ t.Fatalf("(last) got Read(nil) = (%x, _, %v), want = (_, _, %s)", gotPayload, err, tcpip.ErrWouldBlock)
+ }
+ })
+ }
+}
diff --git a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go b/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go
index 8b4213eec..d199ded6a 100644
--- a/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go
+++ b/pkg/tcpip/stack/dhcpv6configurationfromndpra_string.go
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Code generated by "stringer -type=DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT.
+// Code generated by "stringer -type DHCPv6ConfigurationFromNDPRA"; DO NOT EDIT.
package stack
@@ -22,9 +22,9 @@ func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
- _ = x[DHCPv6NoConfiguration-0]
- _ = x[DHCPv6ManagedAddress-1]
- _ = x[DHCPv6OtherConfigurations-2]
+ _ = x[DHCPv6NoConfiguration-1]
+ _ = x[DHCPv6ManagedAddress-2]
+ _ = x[DHCPv6OtherConfigurations-3]
}
const _DHCPv6ConfigurationFromNDPRA_name = "DHCPv6NoConfigurationDHCPv6ManagedAddressDHCPv6OtherConfigurations"
@@ -32,8 +32,9 @@ const _DHCPv6ConfigurationFromNDPRA_name = "DHCPv6NoConfigurationDHCPv6ManagedAd
var _DHCPv6ConfigurationFromNDPRA_index = [...]uint8{0, 21, 41, 66}
func (i DHCPv6ConfigurationFromNDPRA) String() string {
+ i -= 1
if i < 0 || i >= DHCPv6ConfigurationFromNDPRA(len(_DHCPv6ConfigurationFromNDPRA_index)-1) {
- return "DHCPv6ConfigurationFromNDPRA(" + strconv.FormatInt(int64(i), 10) + ")"
+ return "DHCPv6ConfigurationFromNDPRA(" + strconv.FormatInt(int64(i+1), 10) + ")"
}
return _DHCPv6ConfigurationFromNDPRA_name[_DHCPv6ConfigurationFromNDPRA_index[i]:_DHCPv6ConfigurationFromNDPRA_index[i+1]]
}
diff --git a/pkg/tcpip/stack/iptables.go b/pkg/tcpip/stack/iptables.go
index 7c3c47d50..443423b3c 100644
--- a/pkg/tcpip/stack/iptables.go
+++ b/pkg/tcpip/stack/iptables.go
@@ -16,6 +16,7 @@ package stack
import (
"fmt"
+ "strings"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
@@ -178,7 +179,7 @@ const (
// dropped.
//
// Precondition: pkt.NetworkHeader is set.
-func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address) bool {
+func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, address tcpip.Address, nicName string) bool {
// Packets are manipulated only if connection and matching
// NAT rule exists.
it.connections.HandlePacket(pkt, hook, gso, r)
@@ -187,7 +188,7 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
for _, tablename := range it.Priorities[hook] {
table := it.Tables[tablename]
ruleIdx := table.BuiltinChains[hook]
- switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address); verdict {
+ switch verdict := it.checkChain(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict {
// If the table returns Accept, move on to the next table.
case chainAccept:
continue
@@ -228,10 +229,10 @@ func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, gso *GSO, r *Route, addr
//
// NOTE: unlike the Check API the returned map contains packets that should be
// dropped.
-func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
+func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *Route, nicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
if !pkt.NatDone {
- if ok := it.Check(hook, pkt, gso, r, ""); !ok {
+ if ok := it.Check(hook, pkt, gso, r, "", nicName); !ok {
if drop == nil {
drop = make(map[*PacketBuffer]struct{})
}
@@ -251,11 +252,11 @@ func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, gso *GSO, r *
// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a
// precondition.
-func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) chainVerdict {
+func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) chainVerdict {
// Start from ruleIdx and walk the list of rules until a rule gives us
// a verdict.
for ruleIdx < len(table.Rules) {
- switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, address); verdict {
+ switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, gso, r, address, nicName); verdict {
case RuleAccept:
return chainAccept
@@ -272,7 +273,7 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId
ruleIdx++
continue
}
- switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, address); verdict {
+ switch verdict := it.checkChain(hook, pkt, table, jumpTo, gso, r, address, nicName); verdict {
case chainAccept:
return chainAccept
case chainDrop:
@@ -298,7 +299,7 @@ func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleId
// Precondition: pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
// TODO(gvisor.dev/issue/170): pkt.NetworkHeader will always be set as a
// precondition.
-func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address) (RuleVerdict, int) {
+func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, gso *GSO, r *Route, address tcpip.Address, nicName string) (RuleVerdict, int) {
rule := table.Rules[ruleIdx]
// If pkt.NetworkHeader hasn't been set yet, it will be contained in
@@ -313,7 +314,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
}
// Check whether the packet matches the IP header filter.
- if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader)) {
+ if !filterMatch(rule.Filter, header.IPv4(pkt.NetworkHeader), hook, nicName) {
// Continue on to the next rule.
return RuleJump, ruleIdx + 1
}
@@ -335,7 +336,7 @@ func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx
return rule.Target.Action(pkt, &it.connections, hook, gso, r, address)
}
-func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool {
+func filterMatch(filter IPHeaderFilter, hdr header.IPv4, hook Hook, nicName string) bool {
// TODO(gvisor.dev/issue/170): Support other fields of the filter.
// Check the transport protocol.
if filter.Protocol != 0 && filter.Protocol != hdr.TransportProtocol() {
@@ -355,5 +356,26 @@ func filterMatch(filter IPHeaderFilter, hdr header.IPv4) bool {
return false
}
+ // Check the output interface.
+ // TODO(gvisor.dev/issue/170): Add the check for FORWARD and POSTROUTING
+ // hooks after supported.
+ if hook == Output {
+ n := len(filter.OutputInterface)
+ if n == 0 {
+ return true
+ }
+
+ // If the interface name ends with '+', any interface which begins
+ // with the name should be matched.
+ ifName := filter.OutputInterface
+ matches = true
+ if strings.HasSuffix(ifName, "+") {
+ matches = strings.HasPrefix(nicName, ifName[:n-1])
+ } else {
+ matches = nicName == ifName
+ }
+ return filter.OutputInterfaceInvert != matches
+ }
+
return true
}
diff --git a/pkg/tcpip/stack/iptables_types.go b/pkg/tcpip/stack/iptables_types.go
index 1bb0ba1bd..fe06007ae 100644
--- a/pkg/tcpip/stack/iptables_types.go
+++ b/pkg/tcpip/stack/iptables_types.go
@@ -158,6 +158,19 @@ type IPHeaderFilter struct {
// true the filter will match packets that fail the destination
// comparison.
DstInvert bool
+
+ // OutputInterface matches the name of the outgoing interface for the
+ // packet.
+ OutputInterface string
+
+ // OutputInterfaceMask masks the characters of the interface name when
+ // comparing with OutputInterface.
+ OutputInterfaceMask string
+
+ // OutputInterfaceInvert inverts the meaning of outgoing interface check,
+ // i.e. when true the filter will match packets that fail the outgoing
+ // interface comparison.
+ OutputInterfaceInvert bool
}
// A Matcher is the interface for matching packets.
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 15343acbc..526c7d6ff 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -199,9 +199,11 @@ var (
type DHCPv6ConfigurationFromNDPRA int
const (
+ _ DHCPv6ConfigurationFromNDPRA = iota
+
// DHCPv6NoConfiguration indicates that no configurations are available via
// DHCPv6.
- DHCPv6NoConfiguration DHCPv6ConfigurationFromNDPRA = iota
+ DHCPv6NoConfiguration
// DHCPv6ManagedAddress indicates that addresses are available via DHCPv6.
//
@@ -315,9 +317,6 @@ type NDPDispatcher interface {
// OnDHCPv6Configuration will be called with an updated configuration that is
// available via DHCPv6 for a specified NIC.
//
- // NDPDispatcher assumes that the initial configuration available by DHCPv6 is
- // DHCPv6NoConfiguration.
- //
// This function is not permitted to block indefinitely. It must not
// call functions on the stack itself.
OnDHCPv6Configuration(tcpip.NICID, DHCPv6ConfigurationFromNDPRA)
@@ -1808,6 +1807,8 @@ func (ndp *ndpState) cleanupState(hostOnly bool) {
if got := len(ndp.defaultRouters); got != 0 {
panic(fmt.Sprintf("ndp: still have discovered default routers after cleaning up; found = %d", got))
}
+
+ ndp.dhcpv6Configuration = 0
}
// startSolicitingRouters starts soliciting routers, as per RFC 4861 section
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 67f012840..b3d174cdd 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -4888,7 +4888,12 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
}
}
- // The initial DHCPv6 configuration should be stack.DHCPv6NoConfiguration.
+ // Even if the first RA reports no DHCPv6 configurations are available, the
+ // dispatcher should get an event.
+ e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
+ expectDHCPv6Event(stack.DHCPv6NoConfiguration)
+ // Receiving the same update again should not result in an event to the
+ // dispatcher.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, false))
expectNoDHCPv6Event()
@@ -4896,8 +4901,6 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
// Configurations.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
- // Receiving the same update again should not result in an event to the
- // NDPDispatcher.
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectNoDHCPv6Event()
@@ -4933,6 +4936,21 @@ func TestDHCPv6ConfigurationFromNDPDA(t *testing.T) {
expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
expectNoDHCPv6Event()
+
+ // Cycling the NIC should cause the last DHCPv6 configuration to be cleared.
+ if err := s.DisableNIC(nicID); err != nil {
+ t.Fatalf("s.DisableNIC(%d): %s", nicID, err)
+ }
+ if err := s.EnableNIC(nicID); err != nil {
+ t.Fatalf("s.EnableNIC(%d): %s", nicID, err)
+ }
+
+ // Receive an RA that updates the DHCPv6 configuration to Other
+ // Configurations.
+ e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
+ expectDHCPv6Event(stack.DHCPv6OtherConfigurations)
+ e.InjectInbound(header.IPv6ProtocolNumber, raBufWithDHCPv6(llAddr2, false, true))
+ expectNoDHCPv6Event()
}
// TestRouterSolicitation tests the initial Router Solicitations that are sent
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 8f4c1fe42..54103fdb3 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -1233,7 +1233,7 @@ func (n *NIC) DeliverNetworkPacket(linkEP LinkEndpoint, remote, local tcpip.Link
// iptables filtering.
ipt := n.stack.IPTables()
address := n.primaryAddress(protocol)
- if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address); !ok {
+ if ok := ipt.Check(Prerouting, &pkt, nil, nil, address.Address, ""); !ok {
// iptables is telling us to drop the packet.
return
}
diff --git a/pkg/tcpip/stack/stack.go b/pkg/tcpip/stack/stack.go
index e33fae4eb..b39ffa9fb 100644
--- a/pkg/tcpip/stack/stack.go
+++ b/pkg/tcpip/stack/stack.go
@@ -1898,9 +1898,23 @@ func (s *Stack) FindNetworkEndpoint(netProto tcpip.NetworkProtocolNumber, addres
nic.mu.RLock()
defer nic.mu.RUnlock()
- // An endpoint with this id exists, check if it can be used and return it.
+ // An endpoint with this id exists, check if it can be
+ // used and return it.
return ref.ep, nil
}
}
return nil, tcpip.ErrBadAddress
}
+
+// FindNICNameFromID returns the name of the nic for the given NICID.
+func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+
+ nic, ok := s.nics[id]
+ if !ok {
+ return ""
+ }
+
+ return nic.Name()
+}
diff --git a/pkg/tcpip/transport/tcp/rcv.go b/pkg/tcpip/transport/tcp/rcv.go
index 6fe97fefd..dd89a292a 100644
--- a/pkg/tcpip/transport/tcp/rcv.go
+++ b/pkg/tcpip/transport/tcp/rcv.go
@@ -70,7 +70,16 @@ func newReceiver(ep *endpoint, irs seqnum.Value, rcvWnd seqnum.Size, rcvWndScale
// acceptable checks if the segment sequence number range is acceptable
// according to the table on page 26 of RFC 793.
func (r *receiver) acceptable(segSeq seqnum.Value, segLen seqnum.Size) bool {
- return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvAcc)
+ // r.rcvWnd could be much larger than the window size we advertised in our
+ // outgoing packets, we should use what we have advertised for acceptability
+ // test.
+ scaledWindowSize := r.rcvWnd >> r.rcvWndScale
+ if scaledWindowSize > 0xffff {
+ // This is what we actually put in the Window field.
+ scaledWindowSize = 0xffff
+ }
+ advertisedWindowSize := scaledWindowSize << r.rcvWndScale
+ return header.Acceptable(segSeq, segLen, r.rcvNxt, r.rcvNxt.Add(advertisedWindowSize))
}
// getSendParams returns the parameters needed by the sender when building
@@ -259,7 +268,14 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
// If we are in one of the shutdown states then we need to do
// additional checks before we try and process the segment.
switch state {
- case StateCloseWait, StateClosing, StateLastAck:
+ case StateCloseWait:
+ // If the ACK acks something not yet sent then we send an ACK.
+ if r.ep.snd.sndNxt.LessThan(s.ackNumber) {
+ r.ep.snd.sendAck()
+ return true, nil
+ }
+ fallthrough
+ case StateClosing, StateLastAck:
if !s.sequenceNumber.LessThanEq(r.rcvNxt) {
// Just drop the segment as we have
// already received a FIN and this
@@ -276,7 +292,7 @@ func (r *receiver) handleRcvdSegmentClosing(s *segment, state EndpointState, clo
// SHUT_RD) then any data past the rcvNxt should
// trigger a RST.
endDataSeq := s.sequenceNumber.Add(seqnum.Size(s.data.Size()))
- if rcvClosed && r.rcvNxt.LessThan(endDataSeq) {
+ if state != StateCloseWait && rcvClosed && r.rcvNxt.LessThan(endDataSeq) {
return true, tcpip.ErrConnectionAborted
}
if state == StateFinWait1 {
@@ -329,13 +345,6 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
state := r.ep.EndpointState()
closed := r.ep.closed
- if state != StateEstablished {
- drop, err := r.handleRcvdSegmentClosing(s, state, closed)
- if drop || err != nil {
- return drop, err
- }
- }
-
segLen := seqnum.Size(s.data.Size())
segSeq := s.sequenceNumber
@@ -347,6 +356,13 @@ func (r *receiver) handleRcvdSegment(s *segment) (drop bool, err *tcpip.Error) {
return true, nil
}
+ if state != StateEstablished {
+ drop, err := r.handleRcvdSegmentClosing(s, state, closed)
+ if drop || err != nil {
+ return drop, err
+ }
+ }
+
// Store the time of the last ack.
r.lastRcvdAckTime = time.Now()
diff --git a/pkg/tcpip/transport/tcp/rcv_test.go b/pkg/tcpip/transport/tcp/rcv_test.go
index c9eeff935..8a026ec46 100644
--- a/pkg/tcpip/transport/tcp/rcv_test.go
+++ b/pkg/tcpip/transport/tcp/rcv_test.go
@@ -30,7 +30,7 @@ func TestAcceptable(t *testing.T) {
}{
// The segment is smaller than the window.
{105, 2, 100, 104, false},
- {105, 2, 101, 105, false},
+ {105, 2, 101, 105, true},
{105, 2, 102, 106, true},
{105, 2, 103, 107, true},
{105, 2, 104, 108, true},
@@ -39,7 +39,7 @@ func TestAcceptable(t *testing.T) {
{105, 2, 107, 111, false},
// The segment is larger than the window.
- {105, 4, 103, 105, false},
+ {105, 4, 103, 105, true},
{105, 4, 104, 106, true},
{105, 4, 105, 107, true},
{105, 4, 106, 108, true},
diff --git a/pkg/tcpip/transport/tcp/segment.go b/pkg/tcpip/transport/tcp/segment.go
index 7712ce652..074edded6 100644
--- a/pkg/tcpip/transport/tcp/segment.go
+++ b/pkg/tcpip/transport/tcp/segment.go
@@ -96,6 +96,8 @@ func (s *segment) clone() *segment {
route: s.route.Clone(),
viewToDeliver: s.viewToDeliver,
rcvdTime: s.rcvdTime,
+ xmitTime: s.xmitTime,
+ xmitCount: s.xmitCount,
}
t.data = s.data.Clone(t.views[:])
return t
diff --git a/pkg/tcpip/transport/tcp/snd.go b/pkg/tcpip/transport/tcp/snd.go
index a3018914b..9e547a221 100644
--- a/pkg/tcpip/transport/tcp/snd.go
+++ b/pkg/tcpip/transport/tcp/snd.go
@@ -598,22 +598,34 @@ func (s *sender) splitSeg(seg *segment, size int) {
seg.data.CapLength(size)
}
-// NextSeg implements the RFC6675 NextSeg() operation. It returns segments that
-// match rule 1, 3 and 4 of the NextSeg() operation defined in RFC6675. Rule 2
-// is handled by the normal send logic.
-func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
+// NextSeg implements the RFC6675 NextSeg() operation.
+//
+// NextSeg starts scanning the writeList starting from nextSegHint and returns
+// the hint to be passed on the next call to NextSeg. This is required to avoid
+// iterating the write list repeatedly when NextSeg is invoked in a loop during
+// recovery. The returned hint will be nil if there are no more segments that
+// can match rules defined by NextSeg operation in RFC6675.
+//
+// rescueRtx will be true only if nextSeg is a rescue retransmission as
+// described by Step 4) of the NextSeg algorithm.
+func (s *sender) NextSeg(nextSegHint *segment) (nextSeg, hint *segment, rescueRtx bool) {
var s3 *segment
var s4 *segment
- smss := s.ep.scoreboard.SMSS()
// Step 1.
- for seg := s.writeList.Front(); seg != nil; seg = seg.Next() {
- if !s.isAssignedSequenceNumber(seg) {
+ for seg := nextSegHint; seg != nil; seg = seg.Next() {
+ // Stop iteration if we hit a segment that has never been
+ // transmitted (i.e. either it has no assigned sequence number
+ // or if it does have one, it's >= the next sequence number
+ // to be sent [i.e. >= s.sndNxt]).
+ if !s.isAssignedSequenceNumber(seg) || s.sndNxt.LessThanEq(seg.sequenceNumber) {
+ hint = nil
break
}
segSeq := seg.sequenceNumber
- if seg.data.Size() > int(smss) {
+ if smss := s.ep.scoreboard.SMSS(); seg.data.Size() > int(smss) {
s.splitSeg(seg, int(smss))
}
+
// See RFC 6675 Section 4
//
// 1. If there exists a smallest unSACKED sequence number
@@ -630,8 +642,9 @@ func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
// NextSeg():
// (1.c) IsLost(S2) returns true.
if s.ep.scoreboard.IsLost(segSeq) {
- return seg, s3, s4
+ return seg, seg.Next(), false
}
+
// NextSeg():
//
// (3): If the conditions for rules (1) and (2)
@@ -643,6 +656,7 @@ func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
// SHOULD be returned.
if s3 == nil {
s3 = seg
+ hint = seg.Next()
}
}
// NextSeg():
@@ -651,10 +665,12 @@ func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
// but there exists outstanding unSACKED data, we
// provide the opportunity for a single "rescue"
// retransmission per entry into loss recovery. If
- // HighACK is greater than RescueRxt, the one
- // segment of upto SMSS octects that MUST include
- // the highest outstanding unSACKed sequence number
- // SHOULD be returned.
+ // HighACK is greater than RescueRxt (or RescueRxt
+ // is undefined), then one segment of upto SMSS
+ // octects that MUST include the highest outstanding
+ // unSACKed sequence number SHOULD be returned, and
+ // RescueRxt set to RecoveryPoint. HighRxt MUST NOT
+ // be updated.
if s.fr.rescueRxt.LessThan(s.sndUna - 1) {
if s4 != nil {
if s4.sequenceNumber.LessThan(segSeq) {
@@ -663,12 +679,31 @@ func (s *sender) NextSeg() (nextSeg1, nextSeg3, nextSeg4 *segment) {
} else {
s4 = seg
}
- s.fr.rescueRxt = s.fr.last
}
}
}
- return nil, s3, s4
+ // If we got here then no segment matched step (1).
+ // Step (2): "If no sequence number 'S2' per rule (1)
+ // exists but there exists available unsent data and the
+ // receiver's advertised window allows, the sequence
+ // range of one segment of up to SMSS octets of
+ // previously unsent data starting with sequence number
+ // HighData+1 MUST be returned."
+ for seg := s.writeNext; seg != nil; seg = seg.Next() {
+ if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) {
+ continue
+ }
+ // We do not split the segment here to <= smss as it has
+ // potentially not been assigned a sequence number yet.
+ return seg, nil, false
+ }
+
+ if s3 != nil {
+ return s3, hint, false
+ }
+
+ return s4, nil, true
}
// maybeSendSegment tries to send the specified segment and either coalesces
@@ -792,64 +827,47 @@ func (s *sender) maybeSendSegment(seg *segment, limit int, end seqnum.Value) (se
// section 5, step C.
func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool) {
s.SetPipe()
+
+ if smss := int(s.ep.scoreboard.SMSS()); limit > smss {
+ // Cap segment size limit to s.smss as SACK recovery requires
+ // that all retransmissions or new segments send during recovery
+ // be of <= SMSS.
+ limit = smss
+ }
+
+ nextSegHint := s.writeList.Front()
for s.outstanding < s.sndCwnd {
- nextSeg, s3, s4 := s.NextSeg()
- if nextSeg == nil {
- // NextSeg():
- //
- // Step (2): "If no sequence number 'S2' per rule (1)
- // exists but there exists available unsent data and the
- // receiver's advertised window allows, the sequence
- // range of one segment of up to SMSS octets of
- // previously unsent data starting with sequence number
- // HighData+1 MUST be returned."
- for seg := s.writeNext; seg != nil; seg = seg.Next() {
- if s.isAssignedSequenceNumber(seg) && seg.sequenceNumber.LessThan(s.sndNxt) {
- continue
- }
- // Step C.3 described below is handled by
- // maybeSendSegment which increments sndNxt when
- // a segment is transmitted.
- //
- // Step C.3 "If any of the data octets sent in
- // (C.1) are above HighData, HighData must be
- // updated to reflect the transmission of
- // previously unsent data."
- if sent := s.maybeSendSegment(seg, limit, end); !sent {
- break
- }
- dataSent = true
- s.outstanding++
- s.writeNext = seg.Next()
- nextSeg = seg
- break
- }
- if nextSeg != nil {
- continue
- }
- }
- rescueRtx := false
- if nextSeg == nil && s3 != nil {
- nextSeg = s3
- }
- if nextSeg == nil && s4 != nil {
- nextSeg = s4
- rescueRtx = true
- }
+ var nextSeg *segment
+ var rescueRtx bool
+ nextSeg, nextSegHint, rescueRtx = s.NextSeg(nextSegHint)
if nextSeg == nil {
- break
+ return dataSent
}
- segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
- if !rescueRtx && nextSeg.sequenceNumber.LessThan(s.sndNxt) {
- // RFC 6675, Step C.2
+ if !s.isAssignedSequenceNumber(nextSeg) || s.sndNxt.LessThanEq(nextSeg.sequenceNumber) {
+ // New data being sent.
+
+ // Step C.3 described below is handled by
+ // maybeSendSegment which increments sndNxt when
+ // a segment is transmitted.
//
- // "If any of the data octets sent in (C.1) are below
- // HighData, HighRxt MUST be set to the highest sequence
- // number of the retransmitted segment unless NextSeg ()
- // rule (4) was invoked for this retransmission."
- s.fr.highRxt = segEnd - 1
+ // Step C.3 "If any of the data octets sent in
+ // (C.1) are above HighData, HighData must be
+ // updated to reflect the transmission of
+ // previously unsent data."
+ //
+ // We pass s.smss as the limit as the Step 2) requires that
+ // new data sent should be of size s.smss or less.
+ if sent := s.maybeSendSegment(nextSeg, limit, end); !sent {
+ return dataSent
+ }
+ dataSent = true
+ s.outstanding++
+ s.writeNext = nextSeg.Next()
+ continue
}
+ // Now handle the retransmission case where we matched either step 1,3 or 4
+ // of the NextSeg algorithm.
// RFC 6675, Step C.4.
//
// "The estimate of the amount of data outstanding in the network
@@ -858,6 +876,22 @@ func (s *sender) handleSACKRecovery(limit int, end seqnum.Value) (dataSent bool)
s.outstanding++
dataSent = true
s.sendSegment(nextSeg)
+
+ segEnd := nextSeg.sequenceNumber.Add(nextSeg.logicalLen())
+ if rescueRtx {
+ // We do the last part of rule (4) of NextSeg here to update
+ // RescueRxt as until this point we don't know if we are going
+ // to use the rescue transmission.
+ s.fr.rescueRxt = s.fr.last
+ } else {
+ // RFC 6675, Step C.2
+ //
+ // "If any of the data octets sent in (C.1) are below
+ // HighData, HighRxt MUST be set to the highest sequence
+ // number of the retransmitted segment unless NextSeg ()
+ // rule (4) was invoked for this retransmission."
+ s.fr.highRxt = segEnd - 1
+ }
}
return dataSent
}
@@ -903,7 +937,7 @@ func (s *sender) sendData() {
// "A TCP SHOULD set cwnd to no more than RW before beginning
// transmission if the TCP has not sent data in the interval exceeding
// the retrasmission timeout."
- if !s.fr.active && time.Now().Sub(s.lastSendTime) > s.rto {
+ if !s.fr.active && s.state != RTORecovery && time.Now().Sub(s.lastSendTime) > s.rto {
if s.sndCwnd > InitialCwnd {
s.sndCwnd = InitialCwnd
}
@@ -921,6 +955,9 @@ func (s *sender) sendData() {
limit = cwndLimit
}
if s.isAssignedSequenceNumber(seg) && s.ep.sackPermitted && s.ep.scoreboard.IsSACKED(seg.sackBlock()) {
+ // Move writeNext along so that we don't try and scan data that
+ // has already been SACKED.
+ s.writeNext = seg.Next()
continue
}
if sent := s.maybeSendSegment(seg, limit, end); !sent {
@@ -966,6 +1003,8 @@ func (s *sender) enterFastRecovery() {
s.fr.first = s.sndUna
s.fr.last = s.sndNxt - 1
s.fr.maxCwnd = s.sndCwnd + s.outstanding
+ s.fr.highRxt = s.sndUna
+ s.fr.rescueRxt = s.sndUna
if s.ep.sackPermitted {
s.state = SACKRecovery
s.ep.stack.Stats().TCP.SACKRecovery.Increment()
@@ -1258,6 +1297,7 @@ func (s *sender) handleRcvdSegment(seg *segment) {
if s.writeNext == seg {
s.writeNext = seg.Next()
}
+
s.writeList.Remove(seg)
// if SACK is enabled then Only reduce outstanding if
@@ -1329,7 +1369,23 @@ func (s *sender) sendSegment(seg *segment) *tcpip.Error {
}
seg.xmitTime = time.Now()
seg.xmitCount++
- return s.sendSegmentFromView(seg.data, seg.flags, seg.sequenceNumber)
+ err := s.sendSegmentFromView(seg.data, seg.flags, seg.sequenceNumber)
+
+ // Every time a packet containing data is sent (including a
+ // retransmission), if SACK is enabled and we are retransmitting data
+ // then use the conservative timer described in RFC6675 Section 6.0,
+ // otherwise follow the standard time described in RFC6298 Section 5.1.
+ if err != nil && seg.data.Size() != 0 {
+ if s.fr.active && seg.xmitCount > 1 && s.ep.sackPermitted {
+ s.resendTimer.enable(s.rto)
+ } else {
+ if !s.resendTimer.enabled() {
+ s.resendTimer.enable(s.rto)
+ }
+ }
+ }
+
+ return err
}
// sendSegmentFromView sends a new segment containing the given payload, flags
@@ -1345,19 +1401,5 @@ func (s *sender) sendSegmentFromView(data buffer.VectorisedView, flags byte, seq
// Remember the max sent ack.
s.maxSentAck = rcvNxt
- // Every time a packet containing data is sent (including a
- // retransmission), if SACK is enabled then use the conservative timer
- // described in RFC6675 Section 4.0, otherwise follow the standard time
- // described in RFC6298 Section 5.2.
- if data.Size() != 0 {
- if s.ep.sackPermitted {
- s.resendTimer.enable(s.rto)
- } else {
- if !s.resendTimer.enabled() {
- s.resendTimer.enable(s.rto)
- }
- }
- }
-
return s.ep.sendRaw(data, flags, seq, rcvNxt, rcvWnd)
}
diff --git a/pkg/tcpip/transport/tcp/tcp_test.go b/pkg/tcpip/transport/tcp/tcp_test.go
index 49e4ba214..d2c90ebd5 100644
--- a/pkg/tcpip/transport/tcp/tcp_test.go
+++ b/pkg/tcpip/transport/tcp/tcp_test.go
@@ -4905,6 +4905,8 @@ func TestListenNoAcceptNonUnicastV4(t *testing.T) {
}
for _, test := range tests {
+ test := test // capture range variable
+
t.Run(test.name, func(t *testing.T) {
t.Parallel()
@@ -5007,6 +5009,8 @@ func TestListenNoAcceptNonUnicastV6(t *testing.T) {
}
for _, test := range tests {
+ test := test // capture range variable
+
t.Run(test.name, func(t *testing.T) {
t.Parallel()